[stdlib] Revise documentation for string-related types

This documentation revision covers a large number of types & protocols: String, its views and their indices, the Unicode codec types and protocol, as well as Character, UnicodeScalar, and StaticString, among others. This also includes a few small changes across the standard library for consistency.
2025-12-21 12:14:44 +01:00 · 2016-04-06 13:03:46 -05:00
parent 7f31d4e889
commit 44b2d56a7f
35 changed files with 2998 additions and 670 deletions
--- a/stdlib/public/core/CString.swift
+++ b/stdlib/public/core/CString.swift
@@ -16,25 +16,64 @@ import SwiftShims

 extension String {

-  /// Create a new `String` by copying the nul-terminated UTF-8 data
-  /// referenced by a `cString`.
+  /// Creates a new string by copying the null-terminated UTF-8 data referenced
+  /// by the given pointer.
  ///
-  /// If `cString` contains ill-formed UTF-8 code unit sequences, replaces them
-  /// with replacement characters (U+FFFD).
+  /// If `cString` contains ill-formed UTF-8 code unit sequences, this
+  /// initializer replaces them with the Unicode replacement character
+  /// (`"\u{FFFD}"`).
  ///
-  /// - Precondition: `cString != nil`
+  /// The following example calls this initializer with pointers to the
+  /// contents of two different `CChar` arrays---the first with well-formed
+  /// UTF-8 code unit sequences and the second with an ill-formed sequence at
+  /// the end.
+  ///
+  ///     let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
+  ///     validUTF8.withUnsafeBufferPointer { ptr in
+  ///         let s = String(cString: ptr.baseAddress!)
+  ///         print(s)
+  ///     }
+  ///     // Prints "Café"
+  ///
+  ///     let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
+  ///     invalidUTF8.withUnsafeBufferPointer { ptr in
+  ///         let s = String(cString: ptr.baseAddress!)
+  ///         print(s)
+  ///     }
+  ///     // Prints "Caf<EFBFBD>"
+  ///
+  /// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
  public init(cString: UnsafePointer<CChar>) {
    self = String.decodeCString(UnsafePointer(cString), as: UTF8.self,
      repairingInvalidCodeUnits: true)!.result
  }

-  /// Create a new `String` by copying the nul-terminated UTF-8 data
-  /// referenced by a `cString`.
+  /// Creates a new string by copying and validating the null-terminated UTF-8
+  /// data referenced by the given pointer.
  ///
-  /// Does not try to repair ill-formed UTF-8 code unit sequences, fails if any
-  /// such sequences are found.
+  /// This initializer does not try to repair ill-formed UTF-8 code unit
+  /// sequences. If any are found, the result of the initializer is `nil`.
  ///
-  /// - Precondition: `cString != nil`
+  /// The following example calls this initializer with pointers to the
+  /// contents of two different `CChar` arrays---the first with well-formed
+  /// UTF-8 code unit sequences and the second with an ill-formed sequence at
+  /// the end.
+  ///
+  ///     let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
+  ///     validUTF8.withUnsafeBufferPointer { ptr in
+  ///         let s = String(validatingUTF8: ptr.baseAddress!)
+  ///         print(s)
+  ///     }
+  ///     // Prints "Optional(Café)"
+  ///
+  ///     let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
+  ///     invalidUTF8.withUnsafeBufferPointer { ptr in
+  ///         let s = String(validatingUTF8: ptr.baseAddress!)
+  ///         print(s)
+  ///     }
+  ///     // Prints "nil"
+  ///
+  /// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
  public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
    guard let (result, _) = String.decodeCString(
        UnsafePointer(cString),
@@ -45,12 +84,50 @@ extension String {
    self = result
  }

-  /// Create a new `String` by copying the nul-terminated data
-  /// referenced by a `cString` using `encoding`.
+  /// Creates a new string by copying the null-terminated data referenced by
+  /// the given pointer using the specified encoding.
  ///
-  /// Returns `nil` if the `cString` is `nil` or if it contains ill-formed code
-  /// units and no repairing has been requested. Otherwise replaces
-  /// ill-formed code units with replacement characters (U+FFFD).
+  /// When you pass `true` as `isRepairing`, this method replaces ill-formed
+  /// sequences with the Unicode replacement character (`"\u{FFFD}"`);
+  /// otherwise, an ill-formed sequence causes this method to stop decoding
+  /// and return `nil`.
+  ///
+  /// The following example calls this method with pointers to the contents of
+  /// two different `CChar` arrays---the first with well-formed UTF-8 code
+  /// unit sequences and the second with an ill-formed sequence at the end.
+  ///
+  ///     let validUTF8: [UInt8] = [67, 97, 102, 195, 169, 0]
+  ///     validUTF8.withUnsafeBufferPointer { ptr in
+  ///         let s = String.decodeCString(ptr.baseAddress,
+  ///                                      as: UTF8.self,
+  ///                                      repairingInvalidCodeUnits: true)
+  ///         print(s)
+  ///     }
+  ///     // Prints "Optional((Café, false))"
+  ///
+  ///     let invalidUTF8: [UInt8] = [67, 97, 102, 195, 0]
+  ///     invalidUTF8.withUnsafeBufferPointer { ptr in
+  ///         let s = String.decodeCString(ptr.baseAddress,
+  ///                                      as: UTF8.self,
+  ///                                      repairingInvalidCodeUnits: true)
+  ///         print(s)
+  ///     }
+  ///     // Prints "Optional((Caf<EFBFBD>, true))"
+  ///
+  /// - Parameters:
+  ///   - cString: A pointer to a null-terminated code sequence encoded in
+  ///     `encoding`.
+  ///   - encoding: The Unicode encoding of the data referenced by `cString`.
+  ///   - isRepairing: Pass `true` to create a new string, even when the data
+  ///     referenced by `cString` contains ill-formed sequences. Ill-formed
+  ///     sequences are replaced with the Unicode replacement character
+  ///     (`"\u{FFFD}"`). Pass `false` to interrupt the creation of the new
+  ///     string if an ill-formed sequence is detected.
+  /// - Returns: A tuple with the new string and a Boolean value that indicates
+  ///   whether any repairs were made. If `isRepairing` is `false` and an
+  ///   ill-formed sequence is detected, this method returns `nil`.
+  ///
+  /// - SeeAlso: `UnicodeCodec`
  public static func decodeCString<Encoding : UnicodeCodec>(
    _ cString: UnsafePointer<Encoding.CodeUnit>?,
    as encoding: Encoding.Type,