[stdlib] Revise documentation for string-related types

This documentation revision covers a large number of types & protocols:
String, its views and their indices, the Unicode codec types and protocol,
as well as Character, UnicodeScalar, and StaticString, among others.

This also includes a few small changes across the standard library for
consistency.
This commit is contained in:
Nate Cook
2016-04-06 13:03:46 -05:00
parent 7f31d4e889
commit 44b2d56a7f
35 changed files with 2998 additions and 670 deletions

View File

@@ -482,8 +482,8 @@ public struct ${Self}<Element>
%end %end
} }
/// The array's "past the end" position, or one greater than the last valid /// The array's "past the end" position---that is, the position one greater
/// subscript argument. /// than the last valid subscript argument.
/// ///
/// When you need a range that includes the last element of an array, use the /// When you need a range that includes the last element of an array, use the
/// half-open range operator (`..<`) with `endIndex`. The `..<` operator /// half-open range operator (`..<`) with `endIndex`. The `..<` operator
@@ -934,7 +934,7 @@ extension ${Self} : ArrayLiteralConvertible {
// Optimized implementation for Array // Optimized implementation for Array
/// Creates an array from the given array literal. /// Creates an array from the given array literal.
/// ///
/// Don't directly call this initializer, which is used by the compiler /// Do not call this initializer directly. It is used by the compiler
/// when you use an array literal. Instead, create a new array by using an /// when you use an array literal. Instead, create a new array by using an
/// array literal as its value. To do this, enclose a comma-separated list of /// array literal as its value. To do this, enclose a comma-separated list of
/// values in square brackets. /// values in square brackets.
@@ -951,7 +951,7 @@ extension ${Self} : ArrayLiteralConvertible {
%else: %else:
/// Creates an array from the given array literal. /// Creates an array from the given array literal.
/// ///
/// Don't directly call this initializer, which is used by the compiler when /// Do not call this initializer directly. It is used by the compiler when
/// you use an array literal. Instead, create a new array by using an array /// you use an array literal. Instead, create a new array by using an array
/// literal as its value. To do this, enclose a comma-separated list of /// literal as its value. To do this, enclose a comma-separated list of
/// values in square brackets. /// values in square brackets.

View File

@@ -79,7 +79,7 @@ extension Bool : _BuiltinBooleanLiteralConvertible, BooleanLiteralConvertible {
/// Creates an instance initialized to the specified Boolean literal. /// Creates an instance initialized to the specified Boolean literal.
/// ///
/// Don't directly call this initializer, which is used by the compiler when /// Do not call this initializer directly. It is used by the compiler when
/// you use a Boolean literal. Instead, create a new `Bool` instance by /// you use a Boolean literal. Instead, create a new `Bool` instance by
/// using one of the Boolean literals `true` and `false`. /// using one of the Boolean literals `true` and `false`.
/// ///

View File

@@ -16,25 +16,64 @@ import SwiftShims
extension String { extension String {
/// Create a new `String` by copying the nul-terminated UTF-8 data /// Creates a new string by copying the null-terminated UTF-8 data referenced
/// referenced by a `cString`. /// by the given pointer.
/// ///
/// If `cString` contains ill-formed UTF-8 code unit sequences, replaces them /// If `cString` contains ill-formed UTF-8 code unit sequences, this
/// with replacement characters (U+FFFD). /// initializer replaces them with the Unicode replacement character
/// (`"\u{FFFD}"`).
/// ///
/// - Precondition: `cString != nil` /// The following example calls this initializer with pointers to the
/// contents of two different `CChar` arrays---the first with well-formed
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
/// the end.
///
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(cString: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Café"
///
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(cString: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Caf<EFBFBD>"
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init(cString: UnsafePointer<CChar>) { public init(cString: UnsafePointer<CChar>) {
self = String.decodeCString(UnsafePointer(cString), as: UTF8.self, self = String.decodeCString(UnsafePointer(cString), as: UTF8.self,
repairingInvalidCodeUnits: true)!.result repairingInvalidCodeUnits: true)!.result
} }
/// Create a new `String` by copying the nul-terminated UTF-8 data /// Creates a new string by copying and validating the null-terminated UTF-8
/// referenced by a `cString`. /// data referenced by the given pointer.
/// ///
/// Does not try to repair ill-formed UTF-8 code unit sequences, fails if any /// This initializer does not try to repair ill-formed UTF-8 code unit
/// such sequences are found. /// sequences. If any are found, the result of the initializer is `nil`.
/// ///
/// - Precondition: `cString != nil` /// The following example calls this initializer with pointers to the
/// contents of two different `CChar` arrays---the first with well-formed
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
/// the end.
///
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(validatingUTF8: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Optional(Café)"
///
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(validatingUTF8: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "nil"
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init?(validatingUTF8 cString: UnsafePointer<CChar>) { public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
guard let (result, _) = String.decodeCString( guard let (result, _) = String.decodeCString(
UnsafePointer(cString), UnsafePointer(cString),
@@ -45,12 +84,50 @@ extension String {
self = result self = result
} }
/// Create a new `String` by copying the nul-terminated data /// Creates a new string by copying the null-terminated data referenced by
/// referenced by a `cString` using `encoding`. /// the given pointer using the specified encoding.
/// ///
/// Returns `nil` if the `cString` is `nil` or if it contains ill-formed code /// When you pass `true` as `isRepairing`, this method replaces ill-formed
/// units and no repairing has been requested. Otherwise replaces /// sequences with the Unicode replacement character (`"\u{FFFD}"`);
/// ill-formed code units with replacement characters (U+FFFD). /// otherwise, an ill-formed sequence causes this method to stop decoding
/// and return `nil`.
///
/// The following example calls this method with pointers to the contents of
/// two different `CChar` arrays---the first with well-formed UTF-8 code
/// unit sequences and the second with an ill-formed sequence at the end.
///
/// let validUTF8: [UInt8] = [67, 97, 102, 195, 169, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String.decodeCString(ptr.baseAddress,
/// as: UTF8.self,
/// repairingInvalidCodeUnits: true)
/// print(s)
/// }
/// // Prints "Optional((Café, false))"
///
/// let invalidUTF8: [UInt8] = [67, 97, 102, 195, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String.decodeCString(ptr.baseAddress,
/// as: UTF8.self,
/// repairingInvalidCodeUnits: true)
/// print(s)
/// }
/// // Prints "Optional((Caf<EFBFBD>, true))"
///
/// - Parameters:
/// - cString: A pointer to a null-terminated code sequence encoded in
/// `encoding`.
/// - encoding: The Unicode encoding of the data referenced by `cString`.
/// - isRepairing: Pass `true` to create a new string, even when the data
/// referenced by `cString` contains ill-formed sequences. Ill-formed
/// sequences are replaced with the Unicode replacement character
/// (`"\u{FFFD}"`). Pass `false` to interrupt the creation of the new
/// string if an ill-formed sequence is detected.
/// - Returns: A tuple with the new string and a Boolean value that indicates
/// whether any repairs were made. If `isRepairing` is `false` and an
/// ill-formed sequence is detected, this method returns `nil`.
///
/// - SeeAlso: `UnicodeCodec`
public static func decodeCString<Encoding : UnicodeCodec>( public static func decodeCString<Encoding : UnicodeCodec>(
_ cString: UnsafePointer<Encoding.CodeUnit>?, _ cString: UnsafePointer<Encoding.CodeUnit>?,
as encoding: Encoding.Type, as encoding: Encoding.Type,

View File

@@ -10,9 +10,56 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// `Character` represents some Unicode grapheme cluster as /// A single extended grapheme cluster, which approximates a user-perceived
/// defined by a canonical, localized, or otherwise tailored /// character.
/// segmentation algorithm. ///
/// The `Character` type represents a character made up of one or more Unicode
/// scalar values, grouped by a Unicode boundary algorithm. Generally, a
/// `Character` instance matches what the reader of a string will perceive as
/// a single character. The number of visible characters is generally the most
/// natural way to count the length of a string.
///
/// let greeting = "Hello! 🐥"
/// print("Character count: \(greeting.characters.count)")
/// // Prints "Character count: 8"
///
/// Because each character in a string can be made up of one or more Unicode
/// code points, the number of characters in a string may not match the length
/// of the Unicode code point representation or the length of the string in a
/// particular binary representation.
///
/// print("Unicode code point count: \(greeting.unicodeScalars.count)")
/// // Prints "Unicode code point count: 15"
///
/// print("UTF-8 representation count: \(greeting.utf8.count)")
/// // Prints "UTF-8 representation count: 18"
///
/// Every `Character` instance is composed of one or more Unicode code points
/// that are grouped together as an *extended grapheme cluster*. The way these
/// code points are grouped is defined by a canonical, localized, or otherwise
/// tailored Unicode segmentation algorithm.
///
/// For example, a country's Unicode flag character is made up of two regional
/// indicator code points that correspond to that country's ISO 3166-1 alpha-2
/// code. The alpha-2 code for The United States is "US", so its flag
/// character is made up of the Unicode code points `"\u{1F1FA}"` (REGIONAL
/// INDICATOR SYMBOL LETTER U) and `"\u{1F1F8}"` (REGIONAL INDICATOR SYMBOL
/// LETTER S). When placed next to each other in a Swift string literal, these
/// two code points are combined into a single grapheme cluster, represented
/// by a `Character` instance in Swift.
///
/// let usFlag: Character = "\u{1F1FA}\u{1F1F8}"
/// print(usFlag)
/// // Prints "🇺🇸"
///
/// For more information about the Unicode terms used in this discussion, see
/// the [Unicode.org glossary][glossary]. In particular, this discussion
/// mentions [extended grapheme clusters][clusters] and [Unicode scalar
/// values][scalars].
///
/// [glossary]: http://www.unicode.org/glossary/
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
public struct Character : public struct Character :
_BuiltinExtendedGraphemeClusterLiteralConvertible, _BuiltinExtendedGraphemeClusterLiteralConvertible,
ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable, Comparable { ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable, Comparable {
@@ -33,7 +80,9 @@ public struct Character :
case small(Builtin.Int63) case small(Builtin.Int63)
} }
/// Construct a `Character` containing just the given `scalar`. /// Creates a character containing the given Unicode scalar value.
///
/// - Parameter scalar: The Unicode scalar value to convert into a character.
public init(_ scalar: UnicodeScalar) { public init(_ scalar: UnicodeScalar) {
var asInt: UInt64 = 0 var asInt: UInt64 = 0
var shift: UInt64 = 0 var shift: UInt64 = 0
@@ -55,7 +104,17 @@ public struct Character :
UTF32.self, input: CollectionOfOne(UInt32(value)))) UTF32.self, input: CollectionOfOne(UInt32(value))))
} }
/// Create an instance initialized to `value`. /// Creates a character with the specified value.
///
/// Don't call this initializer directly. It is used by the compiler when you
/// use a string literal to initialize a `Character` instance. For example:
///
/// let snowflake: Character = ""
/// print(snowflake)
/// // Prints ""
///
/// The assignment to the `snowflake` constant calls this initializer behind
/// the scenes.
public init(unicodeScalarLiteral value: Character) { public init(unicodeScalarLiteral value: Character) {
self = value self = value
} }
@@ -73,14 +132,31 @@ public struct Character :
isASCII: isASCII)) isASCII: isASCII))
} }
/// Create an instance initialized to `value`. /// Creates a character with the specified value.
///
/// Don't call this initializer directly. It is used by the compiler when you
/// use a string literal to initialize a `Character` instance. For example:
///
/// let oBreve: Character = "o\u{306}"
/// print(oBreve)
/// // Prints "ŏ"
///
/// The assignment to the `oBreve` constant calls this initializer behind the
/// scenes.
public init(extendedGraphemeClusterLiteral value: Character) { public init(extendedGraphemeClusterLiteral value: Character) {
self = value self = value
} }
/// Create an instance from a single-character `String`. /// Creates a character from a single-character string.
/// ///
/// - Precondition: `s` contains exactly one extended grapheme cluster. /// The following example creates a new character from the uppercase version
/// of a string that only holds one character.
///
/// let a = "a"
/// let capitalA = Character(a.uppercased())
///
/// - Parameter s: The single-character string to convert to a `Character`
/// instance. `s` must contain exactly one extended grapheme cluster.
public init(_ s: String) { public init(_ s: String) {
// The small representation can accept up to 8 code units as long // The small representation can accept up to 8 code units as long
// as the last one is a continuation. Since the high bit of the // as the last one is a continuation. Since the high bit of the
@@ -258,13 +334,10 @@ public struct Character :
var data: UInt64 var data: UInt64
} }
/// The hash value. /// The character's hash value.
/// ///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`. /// Hash values are not guaranteed to be equal across different executions of
/// /// your program. Do not save hash values to use during a future execution.
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
public var hashValue: Int { public var hashValue: Int {
// FIXME(performance): constructing a temporary string is extremely // FIXME(performance): constructing a temporary string is extremely
// wasteful and inefficient. // wasteful and inefficient.
@@ -281,14 +354,16 @@ public struct Character :
} }
extension Character : CustomDebugStringConvertible { extension Character : CustomDebugStringConvertible {
/// A textual representation of `self`, suitable for debugging. /// A textual representation of the character, suitable for debugging.
public var debugDescription: String { public var debugDescription: String {
return String(self).debugDescription return String(self).debugDescription
} }
} }
extension String { extension String {
/// Construct an instance containing just the given `Character`. /// Creates a string containing the given character.
///
/// - Parameter c: The character to convert to a string.
public init(_ c: Character) { public init(_ c: Character) {
switch c._representation { switch c._representation {
case let .small(_63bits): case let .small(_63bits):

View File

@@ -212,8 +212,8 @@ public struct CountableClosedRange<
return ClosedRangeIndex(lowerBound) return ClosedRangeIndex(lowerBound)
} }
/// The range's "past the end" position, or one greater than the last valid /// The range's "past the end" position---that is, the position one greater
/// subscript argument. /// than the last valid subscript argument.
public var endIndex: ClosedRangeIndex<Bound> { public var endIndex: ClosedRangeIndex<Bound> {
return ClosedRangeIndex() return ClosedRangeIndex()
} }

View File

@@ -38,8 +38,8 @@ public protocol IndexableBase {
/// If the collection is empty, `startIndex` is equal to `endIndex`. /// If the collection is empty, `startIndex` is equal to `endIndex`.
var startIndex: Index { get } var startIndex: Index { get }
/// The collection's "past the end" position, or one greater than the last /// The collection's "past the end" position---that is, the position one
/// valid subscript argument. /// greater than the last valid subscript argument.
/// ///
/// When you need a range that includes the last element of a collection, use /// When you need a range that includes the last element of a collection, use
/// the half-open range operator (`..<`) with `endIndex`. The `..<` operator /// the half-open range operator (`..<`) with `endIndex`. The `..<` operator
@@ -157,8 +157,11 @@ public protocol IndexableBase {
/// In most cases, it's best to ignore this protocol and use the `Collection` /// In most cases, it's best to ignore this protocol and use the `Collection`
/// protocol instead, because it has a more complete interface. /// protocol instead, because it has a more complete interface.
public protocol Indexable : IndexableBase { public protocol Indexable : IndexableBase {
/// A type that can represent the number of steps between a pair of /// A type used to represent the number of steps between two indices, where
/// indices. /// one value is reachable from the other.
///
/// In Swift, *reachability* refers to the ability to produce one value from
/// the other through zero or more applications of `index(after:)`.
associatedtype IndexDistance : SignedInteger = Int associatedtype IndexDistance : SignedInteger = Int
/// Returns an index that is the specified distance from the given index. /// Returns an index that is the specified distance from the given index.

View File

@@ -50,10 +50,11 @@ public struct CollectionOfOne<Element>
return 0 return 0
} }
/// The "past the end" position; always identical to /// The "past the end" position---that is, the position one greater than the
/// `index(after: startIndex)`. /// last valid subscript argument.
/// ///
/// - Note: `endIndex` is not a valid argument to `subscript`. /// In a `CollectionOfOne` instance, `endIndex` is always identical to
/// `index(after: startIndex)`.
public var endIndex: Int { public var endIndex: Int {
return 1 return 1
} }

View File

@@ -297,11 +297,31 @@ public protocol _BuiltinUnicodeScalarLiteralConvertible {
init(_builtinUnicodeScalarLiteral value: Builtin.Int32) init(_builtinUnicodeScalarLiteral value: Builtin.Int32)
} }
/// Conforming types can be initialized with string literals /// A type that can be initialized with a string literal containing a single
/// containing a single [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value). /// Unicode scalar value.
///
/// The `String`, `StaticString`, `Character`, and `UnicodeScalar` types all
/// conform to the `UnicodeScalarLiteralConvertible` protocol. You can
/// initialize a variable of any of these types using a string literal that
/// holds a single Unicode scalar.
///
/// let ñ: UnicodeScalar = "ñ"
/// print(ñ)
/// // Prints "ñ"
///
/// Conforming to UnicodeScalarLiteralConvertible
/// =============================================
///
/// To add `UnicodeScalarLiteralConvertible` conformance to your custom type,
/// implement the required initializer.
public protocol UnicodeScalarLiteralConvertible { public protocol UnicodeScalarLiteralConvertible {
/// A type that can represent a Unicode scalar literal.
///
/// Valid types for `UnicodeScalarLiteralType` are `UnicodeScalar`,
/// `String`, and `StaticString`.
associatedtype UnicodeScalarLiteralType : _BuiltinUnicodeScalarLiteralConvertible associatedtype UnicodeScalarLiteralType : _BuiltinUnicodeScalarLiteralConvertible
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given value.
init(unicodeScalarLiteral value: UnicodeScalarLiteralType) init(unicodeScalarLiteral value: UnicodeScalarLiteralType)
} }
@@ -314,14 +334,40 @@ public protocol _BuiltinExtendedGraphemeClusterLiteralConvertible
isASCII: Builtin.Int1) isASCII: Builtin.Int1)
} }
/// Conforming types can be initialized with string literals /// A type that can be initialized with a string literal containing a single
/// containing a single [Unicode extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster). /// extended grapheme cluster.
///
/// An *extended grapheme cluster* is a group of one or more Unicode code
/// points that approximates a single user-perceived character. Many
/// individual characters, such as "é", "", and "🇮🇳", can be made up of
/// multiple Unicode code points. These code points are combined by Unicode's
/// boundary algorithms into extended grapheme clusters.
///
/// The `String`, `StaticString`, and `Character` types conform to the
/// `ExtendedGraphemeClusterLiteralConvertible` protocol. You can initialize a
/// variable or constant of any of these types using a string literal that
/// holds a single character.
///
/// let snowflake: Character = ""
/// print(snowflake)
/// // Prints ""
///
/// Conforming to ExtendedGraphemeClusterLiteralConvertible
/// =======================================================
///
/// To add `ExtendedGraphemeClusterLiteralConvertible` conformance to your
/// custom type, implement the required initializer.
public protocol ExtendedGraphemeClusterLiteralConvertible public protocol ExtendedGraphemeClusterLiteralConvertible
: UnicodeScalarLiteralConvertible { : UnicodeScalarLiteralConvertible {
/// A type that can represent an extended grapheme cluster literal.
///
/// Valid types for `ExtendedGraphemeClusterLiteralType` are `Character`,
/// `String`, and `StaticString`.
associatedtype ExtendedGraphemeClusterLiteralType associatedtype ExtendedGraphemeClusterLiteralType
: _BuiltinExtendedGraphemeClusterLiteralConvertible : _BuiltinExtendedGraphemeClusterLiteralConvertible
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given value.
init(extendedGraphemeClusterLiteral value: ExtendedGraphemeClusterLiteralType) init(extendedGraphemeClusterLiteral value: ExtendedGraphemeClusterLiteralType)
} }
@@ -342,14 +388,30 @@ public protocol _BuiltinUTF16StringLiteralConvertible
utf16CodeUnitCount: Builtin.Word) utf16CodeUnitCount: Builtin.Word)
} }
/// Conforming types can be initialized with arbitrary string literals. /// A type that can be initialized with a string literal.
///
/// The `String` and `StaticString` types conform to the
/// `StringLiteralConvertible` protocol. You can initialize a variable or
/// constant of either of these types using a string literal of any length.
///
/// let picnicGuest = "Deserving porcupine"
///
/// Conforming to StringLiteralConvertible
/// ======================================
///
/// To add `StringLiteralConvertible` conformance to your custom type,
/// implement the required initializer.
public protocol StringLiteralConvertible public protocol StringLiteralConvertible
: ExtendedGraphemeClusterLiteralConvertible { : ExtendedGraphemeClusterLiteralConvertible {
// FIXME: when we have default function implementations in protocols, provide // FIXME: when we have default function implementations in protocols, provide
// an implementation of init(extendedGraphemeClusterLiteral:). // an implementation of init(extendedGraphemeClusterLiteral:).
/// A type that can represent a string literal.
///
/// Valid types for `StringLiteralType` are `String` and `StaticString`.
associatedtype StringLiteralType : _BuiltinStringLiteralConvertible associatedtype StringLiteralType : _BuiltinStringLiteralConvertible
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given string value.
init(stringLiteral value: StringLiteralType) init(stringLiteral value: StringLiteralType)
} }
@@ -537,12 +599,80 @@ public protocol DictionaryLiteralConvertible {
init(dictionaryLiteral elements: (Key, Value)...) init(dictionaryLiteral elements: (Key, Value)...)
} }
/// Conforming types can be initialized with string interpolations /// A type that can be initialized by string interpolation with a string
/// containing `\(`...`)` clauses. /// literal that includes expressions.
///
/// Use string interpolation to include one or more expressions in a string
/// literal, wrapped in a set of parentheses and prefixed by a backslash. For
/// example:
///
/// let price = 2
/// let number = 3
/// let message = "One cookie: $\(price), \(number) cookies: $\(price * number)."
/// print(message)
/// // Prints "One cookie: $2, 3 cookies: $6."
///
/// Conforming to the StringInterpolationConvertible Protocol
/// =========================================================
///
/// To use string interpolation to initialize instances of your custom type,
/// implement the required initializers for `StringInterpolationConvertible`
/// conformance. String interpolation is a multiple-step initialization
/// process. When you use string interpolation, the following steps occur:
///
/// 1. The string literal is broken into pieces. Each segment of the string
/// literal before, between, and after any included expressions, along with
/// the individual expressions themselves, are passed to the
/// `init(stringInterpolationSegment:)` initializer.
/// 2. The results of those calls are passed to the
/// `init(stringInterpolation:)` initializer in the order in which they
/// appear in the string literal.
///
/// In other words, initializing the `message` constant in the example above
/// using string interpolation is equivalent to the following code:
///
/// let message = String(stringInterpolation:
/// String(stringInterpolationSegment: "One cookie: $"),
/// String(stringInterpolationSegment: price),
/// String(stringInterpolationSegment: ", "),
/// String(stringInterpolationSegment: number),
/// String(stringInterpolationSegment: " cookies: $"),
/// String(stringInterpolationSegment: price * number),
/// String(stringInterpolationSegment: "."))
public protocol StringInterpolationConvertible { public protocol StringInterpolationConvertible {
/// Create an instance by concatenating the elements of `strings`. /// Creates an instance by concatenating the given values.
///
/// Do not call this initializer directly. It is used by the compiler when
/// you use string interpolation. For example:
///
/// let s = "\(5) x \(2) = \(5 * 2)"
/// print(s)
/// // Prints "5 x 2 = 10"
///
/// After calling `init(stringInterpolationSegment:)` with each segment of
/// the string literal, this initializer is called with their string
/// representations.
///
/// - Parameter strings: An array of instances of the conforming type.
init(stringInterpolation strings: Self...) init(stringInterpolation strings: Self...)
/// Create an instance containing `expr`'s `print` representation.
/// Creates an instance containing the appropriate representation for the
/// given value.
///
/// Do not call this initializer directly. It is used by the compiler for
/// each string interpolation segment when you use string interpolation. For
/// example:
///
/// let s = "\(5) x \(2) = \(5 * 2)"
/// print(s)
/// // Prints "5 x 2 = 10"
///
/// This initializer is called five times when processing the string literal
/// in the example above; once each for the following: the integer `5`, the
/// string `" x "`, the integer `2`, the string `" = "`, and the result of
/// the expression `5 * 2`.
///
/// - Parameter expr: The expression to represent.
init<T>(stringInterpolationSegment expr: T) init<T>(stringInterpolationSegment expr: T)
} }

View File

@@ -879,11 +879,11 @@ public struct ${Self}<Element>
return AnyIndex(_box: _box._startIndex) return AnyIndex(_box: _box._startIndex)
} }
/// The collection's "past the end" position. /// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// `endIndex` is always reachable from `startIndex` by zero or more
/// reachable from `startIndex` by zero or more applications of /// applications of `index(after:)`.
/// `index(after:)`.
public var endIndex: AnyIndex { public var endIndex: AnyIndex {
return AnyIndex(_box: _box._endIndex) return AnyIndex(_box: _box._endIndex)
} }

View File

@@ -209,13 +209,11 @@ public struct ${Self}<
return LazyFilterIndex(base: index) return LazyFilterIndex(base: index)
} }
/// The collection's "past the end" position. /// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// `endIndex` is always reachable from `startIndex` by zero or more
/// reachable from `startIndex` by zero or more applications of /// applications of `index(after:)`.
/// `index(after:)`.
///
/// - Complexity: O(1).
public var endIndex: Index { public var endIndex: Index {
return LazyFilterIndex(base: _base.endIndex) return LazyFilterIndex(base: _base.endIndex)
} }

View File

@@ -497,10 +497,10 @@ public struct Set<Element : Hashable> :
return _variantStorage.startIndex return _variantStorage.startIndex
} }
/// The "past the end" position for iterating members of the set. /// The "past the end" position for the set---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// The `endIndex` property is never a valid subscript argument. If the set /// If the set is empty, `endIndex` is equal to `startIndex`.
/// is empty, `endIndex` is equal to `startIndex`.
public var endIndex: Index { public var endIndex: Index {
return _variantStorage.endIndex return _variantStorage.endIndex
} }
@@ -681,7 +681,7 @@ public struct Set<Element : Hashable> :
// //
/// Creates a set containing the elements of the given array literal. /// Creates a set containing the elements of the given array literal.
/// ///
/// Don't directly call this initializer, which is used by the compiler when /// Do not call this initializer directly. It is used by the compiler when
/// you use an array literal. Instead, create a new set using an array /// you use an array literal. Instead, create a new set using an array
/// literal as its value by enclosing a comma-separated list of values in /// literal as its value by enclosing a comma-separated list of values in
/// square brackets. You can use an array literal anywhere a set is expected /// square brackets. You can use an array literal anywhere a set is expected
@@ -1087,9 +1087,8 @@ public struct Set<Element : Hashable> :
/// ///
/// Two sets that are equal will always have equal hash values. /// Two sets that are equal will always have equal hash values.
/// ///
/// - Note: The hash value is not guaranteed to be stable across /// Hash values are not guaranteed to be equal across different executions of
/// different invocations of the same program. Do not persist the hash value /// your program. Do not save hash values to use during a future execution.
/// across program runs.
public var hashValue: Int { public var hashValue: Int {
// FIXME: <rdar://problem/18915294> Cache Set<T> hashValue // FIXME: <rdar://problem/18915294> Cache Set<T> hashValue
var result: Int = _mixInt(0) var result: Int = _mixInt(0)
@@ -1683,7 +1682,8 @@ public struct Dictionary<Key : Hashable, Value> :
return _variantStorage.startIndex return _variantStorage.startIndex
} }
/// The dictionary's "past the end" position. /// The dictionary's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// If the collection is empty, `endIndex` is equal to `startIndex`. /// If the collection is empty, `endIndex` is equal to `startIndex`.
/// ///
@@ -1957,7 +1957,7 @@ public struct Dictionary<Key : Hashable, Value> :
/// Creates a dictionary initialized with a dictionary literal. /// Creates a dictionary initialized with a dictionary literal.
/// ///
/// Don't directly call this initializer, which is called by the compiler to /// Do not call this initializer directly. It is called by the compiler to
/// handle dictionary literals. To use a dictionary literal as the initial /// handle dictionary literals. To use a dictionary literal as the initial
/// value of a dictionary, enclose a comma-separated list of key-value pairs /// value of a dictionary, enclose a comma-separated list of key-value pairs
/// in square brackets. /// in square brackets.

View File

@@ -30,8 +30,8 @@ public enum ImplicitlyUnwrappedOptional<Wrapped> : NilLiteralConvertible {
/// Creates an instance initialized with `nil`. /// Creates an instance initialized with `nil`.
/// ///
/// Don't use this initializer directly; it is used by the compiler when you /// Do not call this initializer directly. It is used by the compiler when
/// initialize an `Optional` instance with a `nil` literal. For example: /// you initialize an `Optional` instance with a `nil` literal. For example:
/// ///
/// let i: Index! = nil /// let i: Index! = nil
@_transparent @_transparent

View File

@@ -116,11 +116,11 @@ extension ${Self} : ${TraversalCollection} {
return _base.startIndex return _base.startIndex
} }
/// The collection's "past the end" position. /// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// `endIndex` is always reachable from `startIndex` by zero or more
/// reachable from `startIndex` by zero or more applications of /// applications of `index(after:)`.
/// `index(after:)`.
public var endIndex: Base.Index { public var endIndex: Base.Index {
return _base.endIndex return _base.endIndex
} }

View File

@@ -30,9 +30,16 @@ public func withExtendedLifetime<T, Result>(
extension String { extension String {
/// Invoke `f` on the contents of this string, represented as /// Invokes the given closure on the contents of the string, represented as a
/// a nul-terminated array of char, ensuring that the array's /// pointer to a null-terminated sequence of UTF-8 code units.
/// lifetime extends through the execution of `f`. ///
/// The `withCString(_:)` method ensures that the sequence's lifetime extends
/// through the execution of `f`.
///
/// - Parameter f: A closure that takes a pointer to the string's UTF-8 code
/// unit sequence as its sole argument. If the closure has a return value,
/// it is used as the return value of the `withCString(_:)` method.
/// - Returns: The return value of the `f` closure, if any.
public func withCString<Result>( public func withCString<Result>(
_ f: @noescape (UnsafePointer<Int8>) throws -> Result _ f: @noescape (UnsafePointer<Int8>) throws -> Result
) rethrows -> Result { ) rethrows -> Result {

View File

@@ -815,7 +815,7 @@ extension DictionaryLiteral : RandomAccessCollection {
/// `endIndex`. /// `endIndex`.
public var startIndex: Int { return 0 } public var startIndex: Int { return 0 }
/// The collection's "past the end" position, or one /// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument. /// greater than the last valid subscript argument.
/// ///
/// If the `DictionaryLiteral` instance is empty, `endIndex` is equal to /// If the `DictionaryLiteral` instance is empty, `endIndex` is equal to
@@ -839,16 +839,44 @@ extension DictionaryLiteral : RandomAccessCollection {
} }
extension String { extension String {
/// Initialize `self` with the textual representation of `instance`. /// Creates a string representing the given value.
/// ///
/// * If `Subject` conforms to `Streamable`, the result is obtained by /// Use this initializer to convert an instance of any type to its preferred
/// calling `instance.write(to: s)` on an empty string `s`. /// representation as a `String` instance. The initializer creates the
/// * Otherwise, if `Subject` conforms to `CustomStringConvertible`, the /// string representation of `instance` in one of the following ways,
/// result is `instance`'s `description` /// depending on its protocol conformance:
/// * Otherwise, if `Subject` conforms to `CustomDebugStringConvertible`, ///
/// the result is `instance`'s `debugDescription` /// - If `instance` conforms to the `Streamable` protocol, the result is
/// * Otherwise, an unspecified result is supplied automatically by /// obtained by calling `instance.write(to: s)` on an empty string `s`.
/// the Swift standard library. /// - If `instance` conforms to the `CustomStringConvertible` protocol, the
/// result is `instance.description`.
/// - If `instance` conforms to the `CustomDebugStringConvertible` protocol,
/// the result is `instance.debugDescription`.
/// - An unspecified result is supplied automatically by the Swift standard
/// library.
///
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library.
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(String(p))
/// // Prints "Point(x: 21, y: 30)"
///
/// After adding `CustomStringConvertible` conformance by implementing the
/// `description` property, `Point` provides its own custom representation.
///
/// extension Point: CustomStringConvertible {
/// var description: String {
/// return "(\(x), \(y))"
/// }
/// }
///
/// print(String(p))
/// // Prints "(21, 30)"
/// ///
/// - SeeAlso: `String.init<Subject>(reflecting: Subject)` /// - SeeAlso: `String.init<Subject>(reflecting: Subject)`
public init<Subject>(_ instance: Subject) { public init<Subject>(_ instance: Subject) {
@@ -856,20 +884,49 @@ extension String {
_print_unlocked(instance, &self) _print_unlocked(instance, &self)
} }
/// Initialize `self` with a detailed textual representation of /// Creates a string with a detailed representation of the given value,
/// `subject`, suitable for debugging. /// suitable for debugging.
/// ///
/// * If `Subject` conforms to `CustomDebugStringConvertible`, the result /// Use this initializer to convert an instance of any type to its custom
/// is `subject`'s `debugDescription`. /// debugging representation. The initializer creates the string
/// representation of `instance` in one of the following ways, depending on
/// its protocol conformance:
/// ///
/// * Otherwise, if `Subject` conforms to `CustomStringConvertible`, /// - If `subject` conforms to the `CustomDebugStringConvertible` protocol,
/// the result is `subject`'s `description`. /// the result is `subject.debugDescription`.
/// /// - If `subject` conforms to the `CustomStringConvertible` protocol, the
/// * Otherwise, if `Subject` conforms to `Streamable`, the result is /// result is `subject.description`.
/// - If `subject` conforms to the `Streamable` protocol, the result is
/// obtained by calling `subject.write(to: s)` on an empty string `s`. /// obtained by calling `subject.write(to: s)` on an empty string `s`.
/// - An unspecified result is supplied automatically by the Swift standard
/// library.
/// ///
/// * Otherwise, an unspecified result is supplied automatically by /// For example, this custom `Point` struct uses the default representation
/// the Swift standard library. /// supplied by the standard library.
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(String(reflecting: p))
/// // Prints "p: Point = {
/// // x = 21
/// // y = 30
/// // }"
///
/// After adding `CustomDebugStringConvertible` conformance by implementing
/// the `debugDescription` property, `Point` provides its own custom
/// debugging representation.
///
/// extension Point: CustomDebugStringConvertible {
/// var debugDescription: String {
/// return "Point(x: \(x), y: \(y))"
/// }
/// }
///
/// print(String(reflecting: p))
/// // Prints "Point(x: 21, y: 30)"
/// ///
/// - SeeAlso: `String.init<Subject>(Subject)` /// - SeeAlso: `String.init<Subject>(Subject)`
public init<Subject>(reflecting subject: Subject) { public init<Subject>(reflecting subject: Subject) {

View File

@@ -36,8 +36,8 @@ public protocol MutableIndexable : Indexable {
/// If the collection is empty, `startIndex` is equal to `endIndex`. /// If the collection is empty, `startIndex` is equal to `endIndex`.
var startIndex: Index { get } var startIndex: Index { get }
/// The collection's "past the end" position, or one greater than the last /// The collection's "past the end" position---that is, the position one
/// valid subscript argument. /// greater than the last valid subscript argument.
/// ///
/// When you need a range that includes the last element of a collection, use /// When you need a range that includes the last element of a collection, use
/// the half-open range operator (`..<`) with `endIndex`. The `..<` operator /// the half-open range operator (`..<`) with `endIndex`. The `..<` operator

View File

@@ -194,7 +194,7 @@ public enum Optional<Wrapped> : NilLiteralConvertible {
/// Creates an instance initialized with `nil`. /// Creates an instance initialized with `nil`.
/// ///
/// Don't use this initializer directly; it is used by the compiler when you /// Do not call this initializer directly. It is used by the compiler when you
/// initialize an `Optional` instance with a `nil` literal. For example: /// initialize an `Optional` instance with a `nil` literal. For example:
/// ///
/// var i: Index? = nil /// var i: Index? = nil

View File

@@ -16,12 +16,60 @@ import SwiftShims
// Input/Output interfaces // Input/Output interfaces
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// A target of text streaming operations. /// A type that can be the target of text-streaming operations.
///
/// You can send the output of the standard library's `print(_:to:)` and
/// `dump(_:to:)` functions to an instance of a type that conforms to the
/// `OutputStream` protocol instead of to standard output. Swift's `String`
/// type conforms to `OutputStream` already, so you can capture the output
/// from `print(_:to:)` and `dump(_:to:)` in a string instead of logging it to
/// standard output.
///
/// var s = ""
/// for n in 1 ... 5 {
/// print(n, terminator: "", to: &s)
/// }
/// // s == "12345"
///
/// Conforming to the OutputStream Protocol
/// =======================================
///
/// To make your custom type conform to the `OutputStream` protocol, implement
/// the required `write(_:)` method. Functions that use an `OutputStream`
/// target may call `write(_:)` multiple times per writing operation.
///
/// As an example, here's an implementation of an output stream that converts
/// any input to its plain ASCII representation before sending it to standard
/// output.
///
/// struct ASCIILogger: OutputStream {
/// mutating func write(_ string: String) {
/// let ascii = string.unicodeScalars.lazy.map { scalar in
/// scalar == "\n"
/// ? "\n"
/// : scalar.escaped(asASCII: true)
/// }
/// print(ascii.joined(separator: ""), terminator: "")
/// }
/// }
///
/// The `ASCIILogger` type's `write(_:)` method processes its string input by
/// escaping each Unicode scalar, with the exception of `"\n"` line returns.
/// By sending the output of the `print(_:to:)` function to an instance of
/// `ASCIILogger`, you invoke its `write(_:)` method.
///
/// let s = "Hearts and Diamonds "
/// print(s)
/// // Prints "Hearts and Diamonds "
///
/// var asciiLogger = ASCIILogger()
/// print(s, to: &asciiLogger)
/// // Prints "Hearts \u{2661} and Diamonds \u{2662}"
public protocol OutputStream { public protocol OutputStream {
mutating func _lock() mutating func _lock()
mutating func _unlock() mutating func _unlock()
/// Append the given `string` to this stream. /// Appends the given string to the stream.
mutating func write(_ string: String) mutating func write(_ string: String)
} }
@@ -30,51 +78,143 @@ extension OutputStream {
public mutating func _unlock() {} public mutating func _unlock() {}
} }
/// A source of text streaming operations. `Streamable` instances can /// A source of text-streaming operations.
/// be written to any *output stream*.
/// ///
/// For example: `String`, `Character`, `UnicodeScalar`. /// Instances of types that conform to the `Streamable` protocol can write
/// their value to instances of any type that conforms to the `OutputStream`
/// protocol. The Swift standard library's text-related types, `String`,
/// `Character`, and `UnicodeScalar`, all conform to `Streamable`.
///
/// Conforming to the Streamable Protocol
/// =====================================
///
/// To add `Streamable` conformance to a custom type, implement the required
/// `write(to:)` method. Call the given output stream's `write(_:)` method in
/// your implementation.
public protocol Streamable { public protocol Streamable {
/// Write a textual representation of `self` into `target`. /// Writes a textual representation of this instance into the given output
/// stream.
func write<Target : OutputStream>(to target: inout Target) func write<Target : OutputStream>(to target: inout Target)
} }
/// A type with a customized textual representation. /// A type with a customized textual representation.
/// ///
/// This textual representation is used when values are written to an /// Types that conform to the `CustomStringConvertible` protocol can provide
/// *output stream*, for example, by `print`. /// their own representation to be used when converting an instance to a
/// string. The `String(_:)` initializer is the preferred way to convert an
/// instance of *any* type to a string. If the passed instance conforms to
/// `CustomStringConvertible`, the `String(_:)` initializer and the
/// `print(_:)` function use the instance's custom `description` property.
/// ///
/// - Note: `String(instance)` will work for an `instance` of *any* /// Accessing a type's `description` property directly or using
/// type, returning its `description` if the `instance` happens to be /// `CustomStringConvertible` as a generic constraint is discouraged.
/// `CustomStringConvertible`. Using `CustomStringConvertible` as a ///
/// generic constraint, or accessing a conforming type's `description` /// Conforming to the CustomStringConvertible Protocol
/// directly, is therefore discouraged. /// ==================================================
///
/// Add `CustomStringConvertible` conformance to your custom types by defining
/// a `description` property.
///
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library:
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(p)
/// // Prints "Point(x: 21, y: 30)"
///
/// After implementing the `description` property and declaring
/// `CustomStringConvertible` conformance, the `Point` type provides its own
/// custom representation.
///
/// extension Point: CustomStringConvertible {
/// var description: String {
/// return "(\(x), \(y))"
/// }
/// }
///
/// print(p)
/// // Prints "(21, 30)"
/// ///
/// - SeeAlso: `String.init<T>(T)`, `CustomDebugStringConvertible` /// - SeeAlso: `String.init<T>(T)`, `CustomDebugStringConvertible`
public protocol CustomStringConvertible { public protocol CustomStringConvertible {
/// A textual representation of the instance. /// A textual representation of this instance.
///
/// Instead of accessing this property directly, convert an instance of any
/// type to a string by using the `String(_:)` initializer. For example:
///
/// struct Point: CustomStringConvertible {
/// let x: Int, y: Int
///
/// var description: String {
/// return "(\(x), \(y))"
/// }
/// }
///
/// let p = Point(x: 21, y: 30)
/// let s = String(p)
/// print(s)
/// // Prints "(21, 30)"
///
/// The conversion of `p` to a string in the assignment to `s` uses the
/// `Point` type's `description` property.
var description: String { get } var description: String { get }
} }
/// A type with a customized textual representation suitable for /// A type with a customized textual representation suitable for debugging
/// debugging purposes. /// purposes.
/// ///
/// This textual representation is used when values are written to an /// Swift provides a default debugging textual representation for any type.
/// *output stream* by `debugPrint`, and is /// That default representation is used by the `String(reflecting:)`
/// typically more verbose than the text provided by a /// initializer and the `debugPrint(_:)` function for types that don't provide
/// `CustomStringConvertible`'s `description` property. /// their own. To customize that representation, make your type conform to the
/// `CustomDebugStringConvertible` protocol.
/// ///
/// - Note: `String(reflecting: instance)` will work for an `instance` /// Because the `String(reflecting:)` initializer works for instances of *any*
/// of *any* type, returning its `debugDescription` if the `instance` /// type, returning an instance's `debugDescription` if the value passed
/// happens to be `CustomDebugStringConvertible`. Using /// conforms to `CustomDebugStringConvertible`, accessing a type's
/// `CustomDebugStringConvertible` as a generic constraint, or /// `debugDescription` property directly or using
/// accessing a conforming type's `debugDescription` directly, is /// `CustomDebugStringConvertible` as a generic constraint is discouraged.
/// therefore discouraged.
/// ///
/// - SeeAlso: `String.init<T>(reflecting: T)`, /// Conforming to the CustomDebugStringConvertible Protocol
/// `CustomStringConvertible` /// =======================================================
///
/// Add `CustomDebugStringConvertible` conformance to your custom types by
/// defining a `debugDescription` property.
///
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library:
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(String(reflecting: p))
/// // Prints "p: Point = {
/// // x = 21
/// // y = 30
/// // }"
///
/// After adding `CustomDebugStringConvertible` conformance by implementing the
/// `debugDescription` property, `Point` provides its own custom debugging
/// representation.
///
/// extension Point: CustomDebugStringConvertible {
/// var debugDescription: String {
/// return "Point(x: \(x), y: \(y))"
/// }
/// }
///
/// print(String(reflecting: p))
/// // Prints "Point(x: 21, y: 30)"
///
/// - SeeAlso: `String.init<T>(reflecting: T)`, `CustomStringConvertible`
public protocol CustomDebugStringConvertible { public protocol CustomDebugStringConvertible {
/// A textual representation of the instance, suitable for debugging. /// A textual representation of this instance, suitable for debugging.
var debugDescription: String { get } var debugDescription: String { get }
} }
@@ -350,7 +490,9 @@ internal struct _Stdout : OutputStream {
} }
extension String : OutputStream { extension String : OutputStream {
/// Append `other` to this stream. /// Appends the given string to this string.
///
/// - Parameter other: A string to append.
public mutating func write(_ other: String) { public mutating func write(_ other: String) {
self += other self += other
} }
@@ -361,21 +503,28 @@ extension String : OutputStream {
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
extension String : Streamable { extension String : Streamable {
/// Write a textual representation of `self` into `target`. /// Writes the string into the given output stream.
///
/// - Parameter target: An output stream.
public func write<Target : OutputStream>(to target: inout Target) { public func write<Target : OutputStream>(to target: inout Target) {
target.write(self) target.write(self)
} }
} }
extension Character : Streamable { extension Character : Streamable {
/// Write a textual representation of `self` into `target`. /// Writes the character into the given output stream.
///
/// - Parameter target: An output stream.
public func write<Target : OutputStream>(to target: inout Target) { public func write<Target : OutputStream>(to target: inout Target) {
target.write(String(self)) target.write(String(self))
} }
} }
extension UnicodeScalar : Streamable { extension UnicodeScalar : Streamable {
/// Write a textual representation of `self` into `target`. /// Writes the textual representation of the Unicode scalar into the given
/// output stream.
///
/// - Parameter target: An output stream.
public func write<Target : OutputStream>(to target: inout Target) { public func write<Target : OutputStream>(to target: inout Target) {
target.write(String(Character(self))) target.write(String(Character(self)))
} }

View File

@@ -1060,9 +1060,8 @@ public func ^= <T : BitwiseOperations>(lhs: inout T, rhs: T) {
public protocol Hashable : Equatable { public protocol Hashable : Equatable {
/// The hash value. /// The hash value.
/// ///
/// - Important: Hash values are not guaranteed to be equal across different /// Hash values are not guaranteed to be equal across different executions of
/// executions of your program. Do not save hash values to use during a /// your program. Do not save hash values to use during a future execution.
/// future execution.
var hashValue: Int { get } var hashValue: Int { get }
} }

View File

@@ -392,7 +392,7 @@ extension SetAlgebra {
/// Creates a set containing the elements of the given array literal. /// Creates a set containing the elements of the given array literal.
/// ///
/// Don't directly call this initializer, which is used by the compiler when /// Do not call this initializer directly. It is used by the compiler when
/// you use an array literal. Instead, create a new set using an array /// you use an array literal. Instead, create a new set using an array
/// literal as its value by enclosing a comma-separated list of values in /// literal as its value by enclosing a comma-separated list of values in
/// square brackets. You can use an array literal anywhere a set is expected /// square brackets. You can use an array literal anywhere a set is expected

View File

@@ -287,11 +287,11 @@ struct _SliceBuffer<Element> : _ArrayBufferProtocol, RandomAccessCollection {
/// In an empty collection, `startIndex == endIndex`. /// In an empty collection, `startIndex == endIndex`.
public var startIndex: Int public var startIndex: Int
/// The collection's "past the end" position. /// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// `endIndex` is always reachable from `startIndex` by zero or more
/// reachable from `startIndex` by zero or more applications of /// applications of `index(after:)`.
/// `index(after:)`.
public var endIndex: Int { public var endIndex: Int {
get { get {
return Int(endIndexAndFlags >> 1) return Int(endIndexAndFlags >> 1)

View File

@@ -17,16 +17,13 @@
// are involved in its construction. This feature is crucial for // are involved in its construction. This feature is crucial for
// preventing infinite recursion even in non-asserting cases. // preventing infinite recursion even in non-asserting cases.
/// A simple string designed to represent text that is "knowable at /// A string type designed to represent text that is known at compile time.
/// compile-time".
/// ///
/// Logically speaking, each instance looks something like this: /// Instances of the `StaticString` type are immutable. `StaticString` provides
/// /// limited, pointer-based access to its contents, unlike Swift's more
/// enum StaticString { /// commonly used `String` type. A static string can store its value as a
/// case ascii(start: UnsafePointer<UInt8>, count: Int) /// pointer to an ASCII code unit sequence, as a pointer to a UTF-8 code unit
/// case utf8(start: UnsafePointer<UInt8>, count: Int) /// sequence, or as a single Unicode scalar value.
/// case scalar(UnicodeScalar)
/// }
@_fixed_layout @_fixed_layout
public struct StaticString public struct StaticString
: _BuiltinUnicodeScalarLiteralConvertible, : _BuiltinUnicodeScalarLiteralConvertible,
@@ -56,10 +53,11 @@ public struct StaticString
/// ASCII. /// ASCII.
internal var _flags: Builtin.Int8 internal var _flags: Builtin.Int8
/// A pointer to the beginning of UTF-8 code units. /// A pointer to the beginning of the string's UTF-8 encoded representation.
/// ///
/// - Precondition: `self` stores a pointer to either ASCII or UTF-8 code /// The static string must store a pointer to either ASCII or UTF-8 code
/// units. /// units. Accessing this property when `hasPointerRepresentation` is
/// `false` triggers a runtime error.
@_transparent @_transparent
public var utf8Start: UnsafePointer<UInt8> { public var utf8Start: UnsafePointer<UInt8> {
_precondition( _precondition(
@@ -70,7 +68,9 @@ public struct StaticString
/// The stored Unicode scalar value. /// The stored Unicode scalar value.
/// ///
/// - Precondition: `self` stores a single Unicode scalar value. /// The static string must store a single Unicode scalar value. Accessing
/// this property when `hasPointerRepresentation` is `true` triggers a
/// runtime error.
@_transparent @_transparent
public var unicodeScalar: UnicodeScalar { public var unicodeScalar: UnicodeScalar {
_precondition( _precondition(
@@ -79,11 +79,10 @@ public struct StaticString
return UnicodeScalar(UInt32(UInt(_startPtrOrData))) return UnicodeScalar(UInt32(UInt(_startPtrOrData)))
} }
/// If `self` stores a pointer to ASCII or UTF-8 code units, the /// The length in bytes of the static string's ASCII or UTF-8 representation.
/// length in bytes of that data.
/// ///
/// If `self` stores a single Unicode scalar value, the value of /// - Warning: If the static string stores a single Unicode scalar value, the
/// `utf8CodeUnitCount` is unspecified. /// value of `utf8CodeUnitCount` is unspecified.
@_transparent @_transparent
public var utf8CodeUnitCount: Int { public var utf8CodeUnitCount: Int {
_precondition( _precondition(
@@ -92,25 +91,38 @@ public struct StaticString
return Int(_utf8CodeUnitCount) return Int(_utf8CodeUnitCount)
} }
/// `true` iff `self` stores a pointer to ASCII or UTF-8 code units. /// A Boolean value indicating whether the static string stores a pointer to
/// ASCII or UTF-8 code units.
@_transparent @_transparent
public var hasPointerRepresentation: Bool { public var hasPointerRepresentation: Bool {
return (UInt8(_flags) & 0x1) == 0 return (UInt8(_flags) & 0x1) == 0
} }
/// `true` if `self` stores a pointer to ASCII code units. /// A Boolean value that is `true` if the static string stores a pointer to
/// ASCII code units.
/// ///
/// If `self` stores a single Unicode scalar value, the value of /// Use this property in conjunction with `hasPointerRepresentation` to
/// `isASCII` is unspecified. /// determine whether a static string with pointer representation stores an
/// ASCII or UTF-8 code unit sequence.
///
/// - Warning: If the static string stores a single Unicode scalar value, the
/// value of `isASCII` is unspecified.
@_transparent @_transparent
public var isASCII: Bool { public var isASCII: Bool {
return (UInt8(_flags) & 0x2) != 0 return (UInt8(_flags) & 0x2) != 0
} }
/// Invoke `body` with a buffer containing the UTF-8 code units of /// Invokes the given closure with a buffer containing the static string's
/// `self`. /// UTF-8 code unit sequence.
/// ///
/// This method works regardless of what `self` stores. /// This method works regardless of whether the static string stores a
/// pointer or a single Unicode scalar value.
///
/// - Parameter body: A closure that takes a buffer pointer to the static
/// string's UTF-8 code unit sequence as its sole argument. If the closure
/// has a return value, it is used as the return value of the
/// `withUTF8Buffer(invoke:)` method.
/// - Returns: The return value of the `body` closure, if any.
public func withUTF8Buffer<R>( public func withUTF8Buffer<R>(
invoke body: @noescape (UnsafeBufferPointer<UInt8>) -> R) -> R { invoke body: @noescape (UnsafeBufferPointer<UInt8>) -> R) -> R {
if hasPointerRepresentation { if hasPointerRepresentation {
@@ -130,7 +142,7 @@ public struct StaticString
} }
} }
/// Create an empty instance. /// Creates an empty static string.
@_transparent @_transparent
public init() { public init() {
self = "" self = ""
@@ -169,7 +181,10 @@ public struct StaticString
self = StaticString(unicodeScalar: value) self = StaticString(unicodeScalar: value)
} }
/// Create an instance initialized to `value`. /// Creates an instance initialized to a single Unicode scalar.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you initialize a static string with a Unicode scalar.
@effects(readonly) @effects(readonly)
@_transparent @_transparent
public init(unicodeScalarLiteral value: StaticString) { public init(unicodeScalarLiteral value: StaticString) {
@@ -190,7 +205,11 @@ public struct StaticString
) )
} }
/// Create an instance initialized to `value`. /// Creates an instance initialized to a single character that is made up of
/// one or more Unicode code points.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you initialize a static string using an extended grapheme cluster.
@effects(readonly) @effects(readonly)
@_transparent @_transparent
public init(extendedGraphemeClusterLiteral value: StaticString) { public init(extendedGraphemeClusterLiteral value: StaticString) {
@@ -210,14 +229,17 @@ public struct StaticString
isASCII: isASCII) isASCII: isASCII)
} }
/// Create an instance initialized to `value`. /// Creates an instance initialized to the value of a string literal.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you initialize a static string using a string literal.
@effects(readonly) @effects(readonly)
@_transparent @_transparent
public init(stringLiteral value: StaticString) { public init(stringLiteral value: StaticString) {
self = value self = value
} }
/// A textual representation of `self`. /// A string representation of the static string.
public var description: String { public var description: String {
return withUTF8Buffer { return withUTF8Buffer {
(buffer) in (buffer) in
@@ -225,7 +247,7 @@ public struct StaticString
} }
} }
/// A textual representation of `self`, suitable for debugging. /// A textual representation of the static string, suitable for debugging.
public var debugDescription: String { public var debugDescription: String {
return self.description.debugDescription return self.description.debugDescription
} }

View File

@@ -15,101 +15,276 @@ import SwiftShims
// FIXME: complexity documentation for most of methods on String is ought to be // FIXME: complexity documentation for most of methods on String is ought to be
// qualified with "amortized" at least, as Characters are variable-length. // qualified with "amortized" at least, as Characters are variable-length.
/// An arbitrary Unicode string value. /// A Unicode string value.
/// ///
/// Unicode-Correct /// A string is a series of characters, such as `"Swift"`. Strings in Swift are
/// =============== /// Unicode correct, locale insensitive, and designed to be efficient. The
/// `String` type bridges with the Objective-C class `NSString` and offers
/// interoperability with C functions that works with strings.
/// ///
/// Swift strings are designed to be Unicode-correct. In particular, /// You can create new strings using string literals or string interpolations.
/// the APIs make it easy to write code that works correctly, and does /// A string literal is a series of characters enclosed in quotes.
/// not surprise end-users, regardless of where you venture in the
/// Unicode character space. For example, the `==` operator checks
/// for [Unicode canonical
/// equivalence](http://www.unicode.org/glossary/#deterministic_comparison),
/// so two different representations of the same string will always
/// compare equal.
/// ///
/// Locale-Insensitive /// let greeting = "Welcome!"
/// ==================
/// ///
/// The fundamental operations on Swift strings are not sensitive to /// String interpolations are string literals that evaluate any included
/// locale settings. That's because, for example, the validity of a /// expressions and convert the results to string form. String interpolations
/// `Dictionary<String, T>` in a running program depends on a given /// are an easy way to build a string from multiple pieces. Wrap each
/// string comparison having a single, stable result. Therefore, /// expression in a string interpolation in parentheses, prefixed by a
/// Swift always uses the default, /// backslash.
/// un-[tailored](http://www.unicode.org/glossary/#tailorable) Unicode
/// algorithms for basic string operations.
/// ///
/// Importing `Foundation` endows swift strings with the full power of /// let name = "Rosa"
/// the `NSString` API, which allows you to choose more complex /// let personalizedGreeting = "Welcome, \(name)!"
/// locale-sensitive operations explicitly.
/// ///
/// Value Semantics /// let price = 2
/// =============== /// let number = 3
/// let cookiePrice = "\(number) cookies: $\(price * number)."
/// ///
/// Each string variable, `let` binding, or stored property has an /// Combine strings using the concatenation operator (`+`).
/// independent value, so mutations to the string are not observable
/// through its copies:
/// ///
/// var a = "foo" /// let longerGreeting = greeting + " We're glad you're here!"
/// var b = a /// print(longerGreeting)
/// b.append("bar") /// // Prints "Welcome! We're glad you're here!"
/// print("a=\(a), b=\(b)") // a=foo, b=foobar
/// ///
/// Strings use Copy-on-Write so that their data is only copied /// Modifying and Comparing Strings
/// lazily, upon mutation, when more than one string instance is using /// ===============================
/// the same buffer. Therefore, the first in any sequence of mutating
/// operations may cost `O(N)` time and space, where `N` is the length
/// of the string's (unspecified) underlying representation.
/// ///
/// Views /// Strings always have value semantics. Modifying a copy of a string leaves
/// ===== /// the original unaffected.
/// ///
/// `String` is not itself a collection of anything. Instead, it has /// var otherGreeting = greeting
/// properties that present the string's contents as meaningful /// otherGreeting += " Have a nice time!"
/// collections: /// print(otherGreeting)
/// // Prints "Welcome! Have a nice time!"
/// ///
/// - `characters`: a collection of `Character` ([extended grapheme /// print(greeting)
/// cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)) /// // Prints "Welcome!"
/// elements, a unit of text that is meaningful to most humans.
/// ///
/// - `unicodeScalars`: a collection of `UnicodeScalar` ([Unicode /// Comparing strings for equality using the is-equal-to operator (`==`) or a
/// scalar /// relational operator (like `<` and `>=`) is always performed using the
/// values](http://www.unicode.org/glossary/#unicode_scalar_value)) /// Unicode canonical representation. This means that different
/// the 21-bit codes that are the basic unit of Unicode. These /// representations of a string compare as being equal.
/// values are equivalent to UTF-32 code units.
/// ///
/// - `utf16`: a collection of `UTF16.CodeUnit`, the 16-bit /// let cafe1 = "Cafe\u{301}"
/// elements of the string's UTF-16 encoding. /// let cafe2 = "Café"
/// print(cafe1 == cafe2)
/// // Prints "true"
/// ///
/// - `utf8`: a collection of `UTF8.CodeUnit`, the 8-bit /// The Unicode code point `"\u{301}"` modifies the preceding character to
/// elements of the string's UTF-8 encoding. /// include an accent, so `"e\u{301}"` has the same canonical representation
/// as the single Unicode code point `"é"`.
/// ///
/// Growth and Capacity /// Basic string operations are not sensitive to locale settings. This ensures
/// =================== /// that string comparisons and other operations always have a single, stable
/// result, allowing strings to be used as keys in `Dictionary` instances and
/// for other purposes.
/// ///
/// When a string's contiguous storage fills up, new storage must be /// Representing Strings: Views
/// allocated and characters must be moved to the new storage. /// ===========================
/// `String` uses an exponential growth strategy that makes `append` a
/// constant time operation *when amortized over many invocations*.
/// ///
/// Objective-C Bridge /// A string is not itself a collection. Instead, it has properties that
/// ================== /// present its contents as meaningful collections. Each of these collections
/// is a particular type of *view* of the string's visible and data
/// representation.
/// ///
/// `String` is bridged to Objective-C as `NSString`, and a `String` /// To demonstrate the different views available for every string, the
/// that originated in Objective-C may store its characters in an /// following examples use this `String` instance:
/// `NSString`. Since any arbitrary subclass of `NSString` can ///
/// become a `String`, there are no guarantees about representation or /// let cafe = "Cafe\u{301} du 🌍"
/// efficiency in this case. Since `NSString` is immutable, it is /// print(cafe)
/// just as though the storage was shared by some copy: the first in /// // Prints "Café du 🌍"
/// any sequence of mutating operations causes elements to be copied ///
/// into unique, contiguous storage which may cost `O(N)` time and /// Character View
/// space, where `N` is the length of the string representation (or /// --------------
/// more, if the underlying `NSString` has unusual performance ///
/// characteristics). /// A string's `characters` property is a collection of *extended grapheme
/// clusters*, which approximate human-readable characters. Many individual
/// characters, such as "é", "", and "🇮🇳", can be made up of multiple Unicode
/// code points. These code points are combined by Unicode's boundary
/// algorithms into extended grapheme clusters, represented by Swift's
/// `Character` type. Each element of the `characters` view is represented by
/// a `Character` instance.
///
/// print(cafe.characters.count)
/// // Prints "9"
/// print(Array(cafe.characters))
/// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]"
///
/// Each visible character in the `cafe` string is a separate element of the
/// `characters` view.
///
/// Unicode Scalar View
/// -------------------
///
/// A string's `unicodeScalars` property is a collection of Unicode scalar
/// values, the 21-bit codes that are the basic unit of Unicode. Each scalar
/// value is represented by a `UnicodeScalar` instance and is equivalent to a
/// UTF-32 code unit.
///
/// print(cafe.unicodeScalars.count)
/// // Prints "10"
/// print(Array(cafe.unicodeScalars))
/// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]"
/// print(cafe.unicodeScalars.map { $0.value })
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]"
///
/// The `unicodeScalars` view's elements comprise each Unicode scalar value in
/// the `cafe` string. In particular, because `cafe` was declared using the
/// decomposed form of the `"é"` character, `unicodeScalars` contains the code
/// points for both the letter `"e"` (101) and the accent character `"´"`
/// (769).
///
/// UTF-16 View
/// -----------
///
/// A string's `utf16` property is a collection of UTF-16 code units, the
/// 16-bit encoding form of the string's Unicode scalar values. Each code unit
/// is stored as a `UInt16` instance.
///
/// print(cafe.utf16.count)
/// // Prints "11"
/// print(Array(cafe.utf16))
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]"
///
/// The elements of the `utf16` view are the code units for the string when
/// encoded in UTF-16.
///
/// The elements of this collection match those accessed through indexed
/// `NSString` APIs.
///
/// let nscafe = cafe as NSString
/// print(nscafe.length)
/// // Prints "11"
/// print(nscafe.character(at: 3))
/// // Prints "101"
///
/// UTF-8 View
/// ----------
///
/// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit
/// encoding form of the string's Unicode scalar values. Each code unit is
/// stored as a `UInt8` instance.
///
/// print(cafe.utf8.count)
/// // Prints "14"
/// print(Array(cafe.utf8))
/// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]"
///
/// The elements of the `utf8` view are the code units for the string when
/// encoded in UTF-8. This representation matches the one used when `String`
/// instances are passed to C APIs.
///
/// let cLength = strlen(cafe)
/// print(cLength)
/// // Prints "14"
///
/// Counting the Length of a String
/// ===============================
///
/// When you need to know the length of a string, you must first consider what
/// you'll use the length for. Are you measuring the number of characters that
/// will be displayed on the screen, or are you measuring the amount of
/// storage needed for the string in a particular encoding? A single string
/// can have greatly differing lengths when measured by its different views.
///
/// For example, an ASCII character like the capital letter *A* is represented
/// by a single element in each of its four views. The Unicode scalar value of
/// *A* is `65`, which is small enough to fit in a single code unit in both
/// UTF-16 and UTF-8.
///
/// let capitalA = "A"
/// print(capitalA.characters.count)
/// // Prints "1"
/// print(capitalA.unicodeScalars.count)
/// // Prints "1"
/// print(capitalA.utf16.count)
/// // Prints "1"
/// print(capitalA.utf8.count)
/// // Prints "1"
///
///
/// On the other hand, an emoji flag character is constructed from a pair of
/// Unicode scalars values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of
/// these scalar values, in turn, is too large to fit into a single UTF-16 or
/// UTF-8 code unit. As a result, each view of the string `"🇵🇷"` reports a
/// different length.
///
/// let flag = "🇵🇷"
/// print(flag.characters.count)
/// // Prints "1"
/// print(flag.unicodeScalars.count)
/// // Prints "2"
/// print(flag.utf16.count)
/// // Prints "4"
/// print(flag.utf8.count)
/// // Prints "8"
///
/// Accessing String View Elements
/// ==============================
///
/// To find individual elements of a string, use the appropriate view for your
/// task. For example, to retrieve the first word of a longer string, you can
/// search the `characters` view for a space and then create a new string from
/// a prefix of the `characters` view up to that point.
///
/// let name = "Marie Curie"
/// let firstSpace = name.characters.index(of: " ")!
/// let firstName = String(name.characters.prefix(upTo: firstSpace))
/// print(firstName)
/// // Prints "Marie"
///
/// You can convert an index into one of a string's views to an index into
/// another view.
///
/// let firstSpaceUTF8 = firstSpace.samePosition(in: name.utf8)
/// print(Array(name.utf8.prefix(upTo: firstSpaceUTF8)))
/// // Prints "[77, 97, 114, 105, 101]"
///
/// Performance Optimizations
/// =========================
///
/// Although strings in Swift have value semantics, strings use a copy-on-write
/// strategy to store their data in a buffer. This buffer can then be shared
/// by different copies of a string. A string's data is only copied lazily,
/// upon mutation, when more than one string instance is using the same
/// buffer. Therefore, the first in any sequence of mutating operations may
/// cost O(*n*) time and space.
///
/// When a string's contiguous storage fills up, a new buffer must be allocated
/// and data must be moved to the new storage. String buffers use an
/// exponential growth strategy that makes appending to a string a constant
/// time operation when averaged over many append operations.
///
/// Bridging between String and NSString
/// ====================================
///
/// Any `String` instance can be bridged to `NSString` using the type-cast
/// operator (`as`), and any `String` instance that originates in Objective-C
/// may use an `NSString` instance as its storage. Because any arbitrary
/// subclass of `NSString` can become a `String` instance, there are no
/// guarantees about representation or efficiency when a `String` instance is
/// backed by `NSString` storage. Because `NSString` is immutable, it is just
/// as though the storage was shared by a copy: The first in any sequence of
/// mutating operations causes elements to be copied into unique, contiguous
/// storage which may cost O(*n*) time and space, where *n* is the length of
/// the string's encoded representation (or more, if the underlying `NSString`
/// has unusual performance characteristics).
///
/// For more information about the Unicode terms used in this discussion, see
/// the [Unicode.org glossary][glossary]. In particular, this discussion
/// mentions [extended grapheme clusters][clusters],
/// [Unicode scalar values][scalars], and [canonical equivalence][equivalence].
///
/// [glossary]: http://www.unicode.org/glossary/
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
/// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent
///
/// - SeeAlso: `String.CharacterView`, `String.UnicodeScalarView`,
/// `String.UTF16View`, `String.UTF8View`
@_fixed_layout @_fixed_layout
public struct String { public struct String {
/// An empty `String`. /// Creates an empty string.
public init() { public init() {
_core = _StringCore() _core = _StringCore()
} }
@@ -175,7 +350,11 @@ extension String : _BuiltinUnicodeScalarLiteralConvertible {
} }
extension String : UnicodeScalarLiteralConvertible { extension String : UnicodeScalarLiteralConvertible {
/// Create an instance initialized to `value`. /// Creates an instance initialized to the given Unicode scalar value.
///
/// Don't call this initializer directly. It may be used by the compiler when
/// you initialize a string using a string literal that contains a single
/// Unicode scalar value.
public init(unicodeScalarLiteral value: String) { public init(unicodeScalarLiteral value: String) {
self = value self = value
} }
@@ -197,7 +376,12 @@ extension String : _BuiltinExtendedGraphemeClusterLiteralConvertible {
} }
extension String : ExtendedGraphemeClusterLiteralConvertible { extension String : ExtendedGraphemeClusterLiteralConvertible {
/// Create an instance initialized to `value`. /// Creates an instance initialized to the given extended grapheme cluster
/// literal.
///
/// Don't call this initializer directly. It may be used by the compiler when
/// you initialize a string using a string literal containing a single
/// extended grapheme cluster.
public init(extendedGraphemeClusterLiteral value: String) { public init(extendedGraphemeClusterLiteral value: String) {
self = value self = value
} }
@@ -247,14 +431,22 @@ extension String : _BuiltinStringLiteralConvertible {
} }
extension String : StringLiteralConvertible { extension String : StringLiteralConvertible {
/// Create an instance initialized to `value`. /// Creates an instance initialized to the given string value.
///
/// Don't call this initializer directly. It is used by the compiler when you
/// initialize a string using a string literal. For example:
///
/// let nextStop = "Clark & Lake"
///
/// This assignment to the `nextStop` constant calls this string literal
/// initializer behind the scenes.
public init(stringLiteral value: String) { public init(stringLiteral value: String) {
self = value self = value
} }
} }
extension String : CustomDebugStringConvertible { extension String : CustomDebugStringConvertible {
/// A textual representation of `self`, suitable for debugging. /// A representation of the string that is suitable for debugging.
public var debugDescription: String { public var debugDescription: String {
var result = "\"" var result = "\""
for us in self.unicodeScalars { for us in self.unicodeScalars {
@@ -294,8 +486,8 @@ extension String {
#if _runtime(_ObjC) #if _runtime(_ObjC)
/// Compare two strings using the Unicode collation algorithm in the /// Compare two strings using the Unicode collation algorithm in the
/// deterministic comparison mode. (The strings which are equivalent according /// deterministic comparison mode. (The strings which are equivalent according
/// to their NFD form are considered equal. Strings which are equivalent /// to their NFD form are considered equal. Strings which are equivalent
/// according to the plain Unicode collation algorithm are additionally ordered /// according to the plain Unicode collation algorithm are additionally ordered
/// based on their NFD.) /// based on their NFD.)
/// ///
@@ -431,14 +623,31 @@ public func <(lhs: String, rhs: String) -> Bool {
// Support for copy-on-write // Support for copy-on-write
extension String { extension String {
/// Append the elements of `other` to `self`. /// Appends the given string to this string.
///
/// The following example builds a customized greeting by using the
/// `append(_:)` method:
///
/// var greeting = "Hello, "
/// if let name = getUserName() {
/// greeting.append(name)
/// } else {
/// greeting.append("friend")
/// }
/// print(greeting)
/// // Prints "Hello, friend"
///
/// - Parameter other: Another string.
public mutating func append(_ other: String) { public mutating func append(_ other: String) {
_core.append(other._core) _core.append(other._core)
} }
/// Append `x` to `self`. /// Appends the given Unicode scalar to the string.
/// ///
/// - Complexity: Amortized O(1). /// - Parameter x: A Unicode scalar value.
///
/// - Complexity: Appending a Unicode scalar to a string averages to O(1)
/// over many additions.
public mutating func append(_ x: UnicodeScalar) { public mutating func append(_ x: UnicodeScalar) {
_core.append(x) _core.append(x)
} }
@@ -458,13 +667,10 @@ func _stdlib_NSStringHashValuePointer(_ str: OpaquePointer, _ isASCII: Bool) ->
#endif #endif
extension String : Hashable { extension String : Hashable {
/// The hash value. /// The string's hash value.
/// ///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`. /// Hash values are not guaranteed to be equal across different executions of
/// /// your program. Do not save hash values to use during a future execution.
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
public var hashValue: Int { public var hashValue: Int {
#if _runtime(_ObjC) #if _runtime(_ObjC)
// Mix random bits into NSString's hash so that clients don't rely on // Mix random bits into NSString's hash so that clients don't rely on
@@ -677,7 +883,15 @@ extension String {
} }
} }
/// Return `self` converted to lower case. /// Returns a lowercase version of the string.
///
/// Here's an example of transforming a string to all lowercase letters.
///
/// let cafe = "Café 🍵"
/// print(cafe.lowercased())
/// // Prints "café 🍵"
///
/// - Returns: A lowercase copy of the string.
/// ///
/// - Complexity: O(n) /// - Complexity: O(n)
public func lowercased() -> String { public func lowercased() -> String {
@@ -718,7 +932,15 @@ extension String {
#endif #endif
} }
/// Return `self` converted to upper case. /// Returns an uppercase version of the string.
///
/// The following example transforms a string to uppercase letters:
///
/// let cafe = "Café 🍵"
/// print(cafe.uppercased())
/// // Prints "CAFÉ 🍵"
///
/// - Returns: An uppercase copy of the string.
/// ///
/// - Complexity: O(n) /// - Complexity: O(n)
public func uppercased() -> String { public func uppercased() -> String {

View File

@@ -71,7 +71,7 @@ internal func _cocoaStringToContiguous(
source: _CocoaString, range: Range<Int>, minimumCapacity: Int source: _CocoaString, range: Range<Int>, minimumCapacity: Int
) -> _StringBuffer { ) -> _StringBuffer {
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(source) == nil, _sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(source) == nil,
"Known contiguously-stored strings should already be converted to Swift") "Known contiguously stored strings should already be converted to Swift")
let startIndex = range.lowerBound let startIndex = range.lowerBound
let count = range.upperBound - startIndex let count = range.upperBound - startIndex
@@ -107,7 +107,7 @@ internal func _cocoaStringSlice(
_sanityCheck( _sanityCheck(
_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil, _swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously-stored strings should already be converted to Swift") "Known contiguously stored strings should already be converted to Swift")
let cfResult: AnyObject = _swift_stdlib_CFStringCreateWithSubstring( let cfResult: AnyObject = _swift_stdlib_CFStringCreateWithSubstring(
nil, cfSelf, _swift_shims_CFRange( nil, cfSelf, _swift_shims_CFRange(
@@ -124,7 +124,7 @@ internal func _cocoaStringSubscript(
let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil, _sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously-stored strings should already be converted to Swift") "Known contiguously stored strings should already be converted to Swift")
return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position) return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position)
} }

View File

@@ -19,13 +19,42 @@
// allow performance optimizations of linear traversals. // allow performance optimizations of linear traversals.
extension String { extension String {
/// A `String`'s collection of `Character`s ([extended grapheme /// A view of a string's contents as a collection of characters.
/// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster)) ///
/// elements. /// In Swift, every string provides a view of its contents as characters. In
/// this view, many individual characters---for example, "é", "", and
/// "🇮🇳"---can be made up of multiple Unicode code points. These code points
/// are combined by Unicode's boundary algorithms into *extended grapheme
/// clusters*, represented by the `Character` type. Each element of a
/// `CharacterView` collection is a `Character` instance.
///
/// let flowers = "Flowers 💐"
/// for c in flowers {
/// print(c)
/// }
/// // F
/// // l
/// // o
/// // w
/// // e
/// // r
/// // s
/// //
/// // 💐
///
/// You can convert a `String.CharacterView` instance back into a string
/// using the `String` type's `init(_:)` initializer.
///
/// let name = "Marie Curie"
/// if let firstSpace = name.characters.index(of: " ") {
/// let firstName = String(name.characters.prefix(upTo: firstSpace))
/// print(firstName)
/// }
/// // Prints "Marie"
public struct CharacterView { public struct CharacterView {
internal var _core: _StringCore internal var _core: _StringCore
/// Create a view of the `Character`s in `text`. /// Creates a view of the given string.
public init(_ text: String) { public init(_ text: String) {
self._core = text._core self._core = text._core
} }
@@ -36,9 +65,7 @@ extension String {
} }
} }
/// A collection of `Characters` representing the `String`'s /// A view of the string's contents as a collection of characters.
/// [extended grapheme
/// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster).
public var characters: CharacterView { public var characters: CharacterView {
get { get {
return CharacterView(self) return CharacterView(self)
@@ -48,12 +75,34 @@ extension String {
} }
} }
/// Efficiently mutate `self` by applying `body` to its `characters`. /// Applies the given closure to a mutable view of the string's characters.
/// ///
/// - Warning: Do not rely on anything about `self` (the `String` /// Do not use the string that is the target of this method inside the
/// that is the target of this method) during the execution of /// closure passed to `body`, as it may not have its correct value.
/// `body`: it may not appear to have its correct value. Instead, /// Instead, use the closure's `String.CharacterView` argument.
/// use only the `String.CharacterView` argument to `body`. ///
/// This example below uses the `withMutableCharacters(_:)` method to truncate
/// the string `str` at the first space and to return the remainder of the
/// string.
///
/// var str = "All this happened, more or less."
/// let afterSpace = str.withMutableCharacters { chars -> String.CharacterView in
/// if let i = chars.index(of: " ") {
/// let result = chars.suffix(from: chars.index(after: i))
/// chars.removeSubrange(i..<chars.endIndex)
/// return result
/// }
/// return String.CharacterView()
/// }
///
/// print(str)
/// // Prints "All"
/// print(String(afterSpace))
/// // Prints "this happened, more or less."
///
/// - Parameter body: A closure that takes a character view as its argument.
/// - Returns: The return value of the `body` closure, if any, is the return
/// value of this method.
public mutating func withMutableCharacters<R>(_ body: (inout CharacterView) -> R) -> R { public mutating func withMutableCharacters<R>(_ body: (inout CharacterView) -> R) -> R {
// Naively mutating self.characters forces multiple references to // Naively mutating self.characters forces multiple references to
// exist at the point of mutation. Instead, temporarily move the // exist at the point of mutation. Instead, temporarily move the
@@ -65,8 +114,20 @@ extension String {
return r return r
} }
/// Construct the `String` corresponding to the given sequence of /// Creates a string from the given character view.
/// Unicode scalars. ///
/// Use this initializer to recover a string after performing a collection
/// slicing operation on a character view.
///
/// let poem = "'Twas brillig, and the slithy toves / " +
/// "Did gyre and gimbal in the wabe: / " +
/// "All mimsy were the borogoves / " +
/// "And the mome raths outgrabe."
/// let excerpt = String(poem.characters.prefix(22)) + "..."
/// print(excerpt)
/// // Prints "'Twas brillig, and the..."
///
/// - Parameter characters: A character view to convert to a string.
public init(_ characters: CharacterView) { public init(_ characters: CharacterView) {
self.init(characters._core) self.init(characters._core)
} }
@@ -79,7 +140,20 @@ extension String.CharacterView : BidirectionalCollection {
return UnicodeScalarView(_core) return UnicodeScalarView(_core)
} }
/// A character position. /// A position in a string's `CharacterView` instance.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// The following example finds the index of the first space in the string's
/// character view and then converts that to the same position in the UTF-8
/// view:
///
/// let hearts = "Hearts <3 💘"
/// if let i = hearts.characters.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf8)
/// print(Array(hearts.utf8.prefix(upTo: j)))
/// }
/// // Prints "[72, 101, 97, 114, 116, 115]"
public struct Index : Comparable, CustomPlaygroundQuickLookable { public struct Index : Comparable, CustomPlaygroundQuickLookable {
public // SPI(Foundation) public // SPI(Foundation)
init(_base: String.UnicodeScalarView.Index) { init(_base: String.UnicodeScalarView.Index) {
@@ -198,17 +272,17 @@ extension String.CharacterView : BidirectionalCollection {
public typealias IndexDistance = Int public typealias IndexDistance = Int
/// The position of the first `Character` if `self` is /// The position of the first character in a nonempty character view.
/// non-empty; identical to `endIndex` otherwise. ///
/// In an empty character view, `startIndex` is equal to `endIndex`.
public var startIndex: Index { public var startIndex: Index {
return Index(_base: unicodeScalars.startIndex) return Index(_base: unicodeScalars.startIndex)
} }
/// The "past the end" position. /// A character view's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// In an empty character view, `endIndex` is equal to `startIndex`.
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
public var endIndex: Index { public var endIndex: Index {
return Index(_base: unicodeScalars.endIndex) return Index(_base: unicodeScalars.endIndex)
} }
@@ -235,27 +309,44 @@ extension String.CharacterView : BidirectionalCollection {
i._utf16Index - predecessorLengthUTF16, i._base._core)) i._utf16Index - predecessorLengthUTF16, i._base._core))
} }
/// Access the `Character` at `position`. /// Accesses the character at the given position.
/// ///
/// - Precondition: `position` is a valid position in `self` and /// The following example searches a string's character view for a capital
/// `position != endIndex`. /// letter and then prints the character at the found index:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.characters.index(where: { "A"..."Z" ~= $0 }) {
/// print("First capital letter: \(greeting.characters[i])")
/// }
/// // Prints "First capital letter: H"
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
public subscript(i: Index) -> Character { public subscript(i: Index) -> Character {
return Character(String(unicodeScalars[i._base..<i._endBase])) return Character(String(unicodeScalars[i._base..<i._endBase]))
} }
} }
extension String.CharacterView : RangeReplaceableCollection { extension String.CharacterView : RangeReplaceableCollection {
/// Create an empty instance. /// Creates an empty character view.
public init() { public init() {
self.init("") self.init("")
} }
/// Replace the characters within `bounds` with `newElements`. /// Replaces the characters within the specified bounds with the given
/// characters.
/// ///
/// Invalidates all indices with respect to `self`. /// Invalidates all indices with respect to the string.
/// ///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound /// - Parameters:
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. /// - bounds: The range of characters to replace. The bounds of the range
/// must be valid indices of the character view.
/// - newElements: The new characters to add to the view.
///
/// - Complexity: O(*m*), where *m* is the combined length of the character
/// view and `newElements`. If the call to `replaceSubrange(_:with:)`
/// simply removes characters at the end of the view, the complexity is
/// O(*n*), where *n* is equal to `bounds.count`.
public mutating func replaceSubrange< public mutating func replaceSubrange<
C: Collection where C.Iterator.Element == Character C: Collection where C.Iterator.Element == Character
>( >(
@@ -268,16 +359,25 @@ extension String.CharacterView : RangeReplaceableCollection {
_core.replaceSubrange(rawSubRange, with: lazyUTF16) _core.replaceSubrange(rawSubRange, with: lazyUTF16)
} }
/// Reserve enough space to store `n` ASCII characters. /// Reserves enough space in the character view's underlying storage to store
/// the specified number of ASCII characters.
/// ///
/// - Complexity: O(`n`). /// Because each element of a character view can require more than a single
/// ASCII character's worth of storage, additional allocation may be
/// necessary when adding characters to the character view after a call to
/// `reserveCapacity(_:)`.
///
/// - Parameter n: The minimum number of ASCII character's worth of storage
/// to allocate.
///
/// - Complexity: O(*n*), where *n* is the capacity being reserved.
public mutating func reserveCapacity(_ n: Int) { public mutating func reserveCapacity(_ n: Int) {
_core.reserveCapacity(n) _core.reserveCapacity(n)
} }
/// Append `c` to `self`. /// Appends the given character to the character view.
/// ///
/// - Complexity: Amortized O(1). /// - Parameter c: The character to append to the character view.
public mutating func append(_ c: Character) { public mutating func append(_ c: Character) {
switch c._representation { switch c._representation {
case .small(let _63bits): case .small(let _63bits):
@@ -288,7 +388,9 @@ extension String.CharacterView : RangeReplaceableCollection {
} }
} }
/// Append the elements of `newElements` to `self`. /// Appends the characters in the given sequence to the character view.
///
/// - Parameter newElements: A sequence of characters.
public mutating func append< public mutating func append<
S : Sequence where S.Iterator.Element == Character S : Sequence where S.Iterator.Element == Character
>(contentsOf newElements: S) { >(contentsOf newElements: S) {
@@ -298,7 +400,10 @@ extension String.CharacterView : RangeReplaceableCollection {
} }
} }
/// Create an instance containing `characters`. /// Creates a new character view containing the characters in the given
/// sequence.
///
/// - Parameter characters: A sequence of characters.
public init< public init<
S : Sequence where S.Iterator.Element == Character S : Sequence where S.Iterator.Element == Character
>(_ characters: S) { >(_ characters: S) {
@@ -309,10 +414,19 @@ extension String.CharacterView : RangeReplaceableCollection {
// Algorithms // Algorithms
extension String.CharacterView { extension String.CharacterView {
/// Access the characters in `bounds`. /// Accesses the characters in the given range.
/// ///
/// - Complexity: O(1) unless bridging from Objective-C requires an /// The example below uses this subscript to access the characters up to, but
/// O(N) conversion. /// not including, the first comma (`","`) in the string.
///
/// let str = "All this happened, more or less."
/// let i = str.characters.index(of: ",")!
/// let substring = str.characters[str.characters.startIndex ..< i]
/// print(String(substring))
/// // Prints "All this happened"
///
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> String.CharacterView { public subscript(bounds: Range<Index>) -> String.CharacterView {
let unicodeScalarRange = let unicodeScalarRange =
bounds.lowerBound._base..<bounds.upperBound._base bounds.lowerBound._base..<bounds.upperBound._base

View File

@@ -11,14 +11,42 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
extension String.Index { extension String.Index {
/// Construct the position in `characters` that corresponds exactly to /// Creates an index in the given string that corresponds exactly to the
/// `unicodeScalarIndex`. If no such position exists, the result is `nil`. /// specified `UnicodeScalarView` position.
/// ///
/// - Precondition: `unicodeScalarIndex` is an element of /// The following example converts the position of the Unicode scalar `"e"`
/// `characters.unicodeScalars.indices`. /// into its corresponding position in the string's character view. The
/// character at that position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.index(of: "e")!
/// let charactersIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(String(cafe.characters.prefix(through: charactersIndex)))
/// // Prints "Café"
///
/// If the position passed in `unicodeScalarIndex` doesn't have an exact
/// corresponding position in `other.characters`, the result of the
/// initializer is `nil`. For example, an attempt to convert the position of
/// the combining acute accent (`"\u{0301}"`) fails. Combining Unicode
/// scalars do not have their own position in a character view.
///
/// let nextIndex = String.Index(cafe.unicodeScalars.index(after: scalarsIndex),
/// within: cafe)
/// print(nextIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - unicodeScalarIndex: A position in the `unicodeScalars` view of the
/// `other` parameter.
/// - other: The string referenced by both `unicodeScalarIndex` and the
/// resulting index.
public init?( public init?(
_ unicodeScalarIndex: String.UnicodeScalarIndex, _ unicodeScalarIndex: String.UnicodeScalarIndex,
within characters: String within other: String
) { ) {
if !unicodeScalarIndex._isOnGraphemeClusterBoundary { if !unicodeScalarIndex._isOnGraphemeClusterBoundary {
return nil return nil
@@ -26,18 +54,51 @@ extension String.Index {
self.init(_base: unicodeScalarIndex) self.init(_base: unicodeScalarIndex)
} }
/// Construct the position in `characters` that corresponds exactly to /// Creates an index in the given string that corresponds exactly to the
/// `utf16Index`. If no such position exists, the result is `nil`. /// specified `UTF16View` position.
/// ///
/// - Precondition: `utf16Index` is an element of /// The following example finds the position of a space in a string's `utf16`
/// `characters.utf16.indices`. /// view and then converts that position to an index in the the string's
/// `characters` view. The value `32` is the UTF-16 encoded value of a space
/// character.
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let charactersIndex = String.Index(utf16Index, within: cafe)!
///
/// print(String(cafe.characters.prefix(upTo: charactersIndex)))
/// // Prints "Café"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `other.characters`, the result of the
/// initializer is `nil`. For example, an attempt to convert the position of
/// the trailing surrogate of a UTF-16 surrogate pair fails.
///
/// The next example attempts to convert the indices of the two UTF-16 code
/// points that represent the teacup emoji (`"🍵"`). The index of the lead
/// surrogate is successfully converted to a position in `other.characters`,
/// but the index of the trailing surrogate is not.
///
/// let emojiHigh = cafe.utf16.index(after: utf16Index)
/// print(String.Index(emojiHigh, within: cafe))
/// // Prints "Optional(String.Index(...))"
///
/// let emojiLow = cafe.utf16.index(after: emojiHigh)
/// print(String.Index(emojiLow, within: cafe))
/// // Prints "nil"
///
/// - Parameters:
/// - utf16Index: A position in the `utf16` view of the `other` parameter.
/// - other: The string referenced by both `utf16Index` and the resulting
/// index.
public init?( public init?(
_ utf16Index: String.UTF16Index, _ utf16Index: String.UTF16Index,
within characters: String within other: String
) { ) {
if let me = utf16Index.samePosition( if let me = utf16Index.samePosition(
in: characters.unicodeScalars in: other.unicodeScalars
)?.samePosition(in: characters) { )?.samePosition(in: other) {
self = me self = me
} }
else { else {
@@ -45,18 +106,25 @@ extension String.Index {
} }
} }
/// Construct the position in `characters` that corresponds exactly to /// Creates an index in the given string that corresponds exactly to the
/// `utf8Index`. If no such position exists, the result is `nil`. /// specified `UTF8View` position.
/// ///
/// - Precondition: `utf8Index` is an element of /// If the position passed in `utf8Index` doesn't have an exact corresponding
/// `characters.utf8.indices`. /// position in `other.characters`, the result of the initializer is `nil`.
/// For example, an attempt to convert the position of a UTF-8 continuation
/// byte returns `nil`.
///
/// - Parameters:
/// - utf8Index: A position in the `utf8` view of the `other` parameter.
/// - other: The string referenced by both `utf8Index` and the resulting
/// index.
public init?( public init?(
_ utf8Index: String.UTF8Index, _ utf8Index: String.UTF8Index,
within characters: String within other: String
) { ) {
if let me = utf8Index.samePosition( if let me = utf8Index.samePosition(
in: characters.unicodeScalars in: other.unicodeScalars
)?.samePosition(in: characters) { )?.samePosition(in: other) {
self = me self = me
} }
else { else {
@@ -64,30 +132,71 @@ extension String.Index {
} }
} }
/// Returns the position in `utf8` that corresponds exactly /// Returns the position in the given UTF-8 view that corresponds exactly to
/// to `self`. /// this index.
/// ///
/// - Precondition: `self` is an element of `String(utf8).indices`. /// The index must be a valid index of `String(utf8).characters`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)
/// print(Array(cafe.utf8.suffix(from: j)))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
public func samePosition( public func samePosition(
in utf8: String.UTF8View in utf8: String.UTF8View
) -> String.UTF8View.Index { ) -> String.UTF8View.Index {
return String.UTF8View.Index(self, within: utf8) return String.UTF8View.Index(self, within: utf8)
} }
/// Returns the position in `utf16` that corresponds exactly /// Returns the position in the given UTF-16 view that corresponds exactly to
/// to `self`. /// this index.
/// ///
/// - Precondition: `self` is an element of `String(utf16).indices`. /// The index must be a valid index of `String(utf16).characters`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion.
/// - Returns: The position in `utf16` that corresponds exactly to this index.
public func samePosition( public func samePosition(
in utf16: String.UTF16View in utf16: String.UTF16View
) -> String.UTF16View.Index { ) -> String.UTF16View.Index {
return String.UTF16View.Index(self, within: utf16) return String.UTF16View.Index(self, within: utf16)
} }
/// Returns the position in `unicodeScalars` that corresponds exactly /// Returns the position in the given view of Unicode scalars that
/// to `self`. /// corresponds exactly to this index.
/// ///
/// - Precondition: `self` is an element of `String(unicodeScalars).indices`. /// The index must be a valid index of `String(unicodeScalars).characters`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `unicodeScalars`
/// view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.unicodeScalars)
/// print(cafe.unicodeScalars[j])
/// }
/// // Prints "é"
///
/// - Parameter unicodeScalars: The view to use for the index conversion.
/// - Returns: The position in `unicodeScalars` that corresponds exactly to
/// this index.
public func samePosition( public func samePosition(
in unicodeScalars: String.UnicodeScalarView in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarView.Index { ) -> String.UnicodeScalarView.Index {

View File

@@ -35,7 +35,20 @@ for int_ty in all_integer_types(word_bits):
}% }%
extension String : StringInterpolationConvertible { extension String : StringInterpolationConvertible {
/// Create an instance by concatenating the elements of `strings`. /// Creates a new string by concatenating the given interpolations.
///
/// Do not call this initializer directly. It is used by the compiler when
/// you create a string using string interpolation. Instead, use string
/// interpolation to create a new string by including values, literals,
/// variables, or expressions enclosed in parentheses, prefixed by a
/// backslash (`\(`...`)`).
///
/// let price = 2
/// let number = 3
/// let message = "If one cookie costs \(price) dollars, " +
/// "\(number) cookies cost \(price * number) dollars."
/// print(message)
/// // Prints "If one cookie costs 2 dollars, 3 cookies cost 6 dollars."
@effects(readonly) @effects(readonly)
public init(stringInterpolation strings: String...) { public init(stringInterpolation strings: String...) {
self.init() self.init()
@@ -44,18 +57,36 @@ extension String : StringInterpolationConvertible {
} }
} }
/// Create an instance containing `expr`'s `print` representation. /// Creates a string containing the given expression's textual
/// representation.
///
/// Do not call this initializer directly. It is used by the compiler when
/// interpreting string interpolations.
///
/// - SeeAlso: `StringInterpolationConvertible`
public init<T>(stringInterpolationSegment expr: T) { public init<T>(stringInterpolationSegment expr: T) {
self = String(expr) self = String(expr)
} }
% for Type in StreamableTypes: % for Type in StreamableTypes:
/// Creates a string containing the given value's textual representation.
///
/// Do not call this initializer directly. It is used by the compiler when
/// interpreting string interpolations.
///
/// - SeeAlso: `StringInterpolationConvertible`
public init(stringInterpolationSegment expr: ${Type}) { public init(stringInterpolationSegment expr: ${Type}) {
self = _toStringReadOnlyStreamable(expr) self = _toStringReadOnlyStreamable(expr)
} }
% end % end
% for Type in PrintableTypes: % for Type in PrintableTypes:
/// Creates a string containing the given value's textual representation.
///
/// Do not call this initializer directly. It is used by the compiler when
/// interpreting string interpolations.
///
/// - SeeAlso: `StringInterpolationConvertible`
public init(stringInterpolationSegment expr: ${Type}) { public init(stringInterpolationSegment expr: ${Type}) {
self = _toStringReadOnlyPrintable(expr) self = _toStringReadOnlyPrintable(expr)
} }

View File

@@ -13,8 +13,15 @@
import SwiftShims import SwiftShims
extension String { extension String {
/// Construct an instance that is the concatenation of `count` copies /// Creates a string representing the given character repeated the specified
/// of `repeatedValue`. /// number of times.
///
/// For example, use this initializer to create a string with ten `"0"`
/// characters in a row.
///
/// let zeroes = String("0" as Character, count: 10)
/// print(zeroes)
/// // Prints "0000000000"
public init(repeating repeatedValue: Character, count: Int) { public init(repeating repeatedValue: Character, count: Int) {
let s = String(repeatedValue) let s = String(repeatedValue)
self = String(_storage: _StringBuffer( self = String(_storage: _StringBuffer(
@@ -26,8 +33,15 @@ extension String {
} }
} }
/// Construct an instance that is the concatenation of `count` copies /// Creates a string representing the given Unicode scalar repeated the
/// of `Character(repeatedValue)`. /// specified number of times.
///
/// For example, use this initializer to create a string with ten `"0"`
/// scalars in a row.
///
/// let zeroes = String("0" as UnicodeScalar, count: 10)
/// print(zeroes)
/// // Prints "0000000000"
public init(repeating repeatedValue: UnicodeScalar, count: Int) { public init(repeating repeatedValue: UnicodeScalar, count: Int) {
self = String._fromWellFormedCodeUnitSequence( self = String._fromWellFormedCodeUnitSequence(
UTF32.self, UTF32.self,
@@ -43,7 +57,7 @@ extension String {
return scalarSlices.map { String($0) } return scalarSlices.map { String($0) }
} }
/// `true` iff `self` contains no characters. /// A Boolean value indicating whether a string has no characters.
public var isEmpty : Bool { public var isEmpty : Bool {
return _core.count == 0 return _core.count == 0
} }
@@ -72,7 +86,36 @@ func _stdlib_NSStringHasSuffixNFD(_ theString: AnyObject, _ suffix: AnyObject) -
func _stdlib_NSStringHasSuffixNFDPointer(_ theString: OpaquePointer, _ suffix: OpaquePointer) -> Bool func _stdlib_NSStringHasSuffixNFDPointer(_ theString: OpaquePointer, _ suffix: OpaquePointer) -> Bool
extension String { extension String {
/// Returns `true` iff `self` begins with `prefix`. /// Returns a Boolean value indicating whether the string begins with the
/// specified prefix.
///
/// The comparison is both case sensitive and Unicode safe. The
/// case-sensitive comparision will only match strings whose corresponding
/// characters have the same case.
///
/// let cafe = "Café du Monde"
///
/// // Case sensitive
/// print(cafe.hasPrefix("café"))
/// // Prints "false"
///
/// The Unicode-safe comparison matches Unicode scalar values rather than the
/// code points used to compose them. The example below uses two strings
/// with different forms of the `"é"` character---the first uses the composed
/// form and the second uses the decomposed form.
///
/// // Unicode safe
/// let composedCafe = "Café"
/// let decomposedCafe = "Cafe\u{0301}"
///
/// print(cafe.hasPrefix(composedCafe))
/// // Prints "true"
/// print(cafe.hasPrefix(decomposedCafe))
/// // Prints "true"
///
/// - Parameter prefix: A possible prefix to test against this string.
/// Passing an empty string (`""`) as `prefix` always results in `false`.
/// - Returns: `true` if the string begins with `prefix`, otherwise, `false`.
public func hasPrefix(_ prefix: String) -> Bool { public func hasPrefix(_ prefix: String) -> Bool {
let selfCore = self._core let selfCore = self._core
let prefixCore = prefix._core let prefixCore = prefix._core
@@ -96,7 +139,36 @@ extension String {
self._bridgeToObjectiveCImpl(), prefix._bridgeToObjectiveCImpl()) self._bridgeToObjectiveCImpl(), prefix._bridgeToObjectiveCImpl())
} }
/// Returns `true` iff `self` ends with `suffix`. /// Returns a Boolean value indicating whether the string ends with the
/// specified suffix.
///
/// The comparison is both case sensitive and Unicode safe. The
/// case-sensitive comparision will only match strings whose corresponding
/// characters have the same case.
///
/// let plans = "Let's meet at the café"
///
/// // Case sensitive
/// print(plans.hasSuffix("Café"))
/// // Prints "false"
///
/// The Unicode-safe comparison matches Unicode scalar values rather than the
/// code points used to compose them. The example below uses two strings
/// with different forms of the `"é"` character---the first uses the composed
/// form and the second uses the decomposed form.
///
/// // Unicode safe
/// let composedCafe = "café"
/// let decomposedCafe = "cafe\u{0301}"
///
/// print(plans.hasSuffix(composedCafe))
/// // Prints "true"
/// print(plans.hasSuffix(decomposedCafe))
/// // Prints "true"
///
/// - Parameter suffix: A possible suffix to test against this string.
/// Passing an empty string (`""`) as `suffix` always results in `false`.
/// - Returns: `true` if the string ends with `suffix`, otherwise, `false`.
public func hasSuffix(_ suffix: String) -> Bool { public func hasSuffix(_ suffix: String) -> Bool {
let selfCore = self._core let selfCore = self._core
let suffixCore = suffix._core let suffixCore = suffix._core
@@ -133,38 +205,86 @@ extension String {
// FIXME: can't just use a default arg for radix below; instead we // FIXME: can't just use a default arg for radix below; instead we
// need these single-arg overloads <rdar://problem/17775455> // need these single-arg overloads <rdar://problem/17775455>
/// Create an instance representing `v` in base 10. /// Creates a string representing the given value in base 10.
///
/// The following example converts the maximal `Int` value to a string and
/// prints its length:
///
/// let max = String(Int.max)
/// print("\(max) has \(max.utf16.count) digits.")
/// // Prints "9223372036854775807 has 19 digits."
public init<T : _SignedInteger>(_ v: T) { public init<T : _SignedInteger>(_ v: T) {
self = _int64ToString(v.toIntMax()) self = _int64ToString(v.toIntMax())
} }
/// Create an instance representing `v` in base 10. /// Creates a string representing the given value in base 10.
///
/// The following example converts the maximal `UInt` value to a string and
/// prints its length:
///
/// let max = String(UInt.max)
/// print("\(max) has \(max.utf16.count) digits.")
/// // Prints "18446744073709551615 has 20 digits."
public init<T : UnsignedInteger>(_ v: T) { public init<T : UnsignedInteger>(_ v: T) {
self = _uint64ToString(v.toUIntMax()) self = _uint64ToString(v.toUIntMax())
} }
/// Create an instance representing `v` in the given `radix` (base). /// Creates a string representing the given value in the specified base.
/// ///
/// Numerals greater than 9 are represented as roman letters, /// Numerals greater than 9 are represented as Roman letters. These letters
/// starting with `a` if `uppercase` is `false` or `A` otherwise. /// start with `"A"` if `uppercase` is `true`; otherwise, with `"a"`.
///
/// let v = 999_999
/// print(String(v, radix: 2))
/// // Prints "11110100001000111111"
///
/// print(String(v, radix: 16))
/// // Prints "f423f"
/// print(String(v, radix: 16, uppercase: true))
/// // Prints "F423F"
///
/// - Parameters:
/// - value: The value to convert to a string.
/// - radix: The base to use for the string representation. `radix` must be
/// at least 2 and at most 36.
/// - uppercase: Pass `true` to use uppercase letters to represent numerals
/// greater than 9, or `false` to use lowercase letters. The default is
/// `false`.
public init<T : _SignedInteger>( public init<T : _SignedInteger>(
_ v: T, radix: Int, uppercase: Bool = false _ value: T, radix: Int, uppercase: Bool = false
) { ) {
_precondition(radix > 1, "Radix must be greater than 1") _precondition(radix > 1, "Radix must be greater than 1")
self = _int64ToString( self = _int64ToString(
v.toIntMax(), radix: Int64(radix), uppercase: uppercase) value.toIntMax(), radix: Int64(radix), uppercase: uppercase)
} }
/// Create an instance representing `v` in the given `radix` (base). /// Creates a string representing the given value in the specified base.
/// ///
/// Numerals greater than 9 are represented as roman letters, /// Numerals greater than 9 are represented as Roman letters. These letters
/// starting with `a` if `uppercase` is `false` or `A` otherwise. /// start with `"A"` if `uppercase` is `true`; otherwise, with `"a"`.
///
/// let v: UInt = 999_999
/// print(String(v, radix: 2))
/// // Prints "11110100001000111111"
///
/// print(String(v, radix: 16))
/// // Prints "f423f"
/// print(String(v, radix: 16, uppercase: true))
/// // Prints "F423F"
///
/// - Parameters:
/// - value: The value to convert to a string.
/// - radix: The base to use for the string representation. `radix` must be
/// at least 2 and at most 36.
/// - uppercase: Pass `true` to use uppercase letters to represent numerals
/// greater than 9, or `false` to use lowercase letters. The default is
/// `false`.
public init<T : UnsignedInteger>( public init<T : UnsignedInteger>(
_ v: T, radix: Int, uppercase: Bool = false _ value: T, radix: Int, uppercase: Bool = false
) { ) {
_precondition(radix > 1, "Radix must be greater than 1") _precondition(radix > 1, "Radix must be greater than 1")
self = _uint64ToString( self = _uint64ToString(
v.toUIntMax(), radix: Int64(radix), uppercase: uppercase) value.toUIntMax(), radix: Int64(radix), uppercase: uppercase)
} }
} }

View File

@@ -14,18 +14,25 @@
// similar API. // similar API.
extension String { extension String {
/// The index type for subscripting a string.
public typealias Index = CharacterView.Index public typealias Index = CharacterView.Index
/// A type used to represent the number of steps between two `String.Index`
/// values, where one value is reachable from the other.
///
/// In Swift, *reachability* refers to the ability to produce one value from
/// the other through zero or more applications of `index(after:)`.
public typealias IndexDistance = CharacterView.IndexDistance public typealias IndexDistance = CharacterView.IndexDistance
/// The position of the first `Character` in `self.characters` if /// The position of the first character in a nonempty string.
/// `self` is non-empty; identical to `endIndex` otherwise. ///
/// In an empty string, `startIndex` is equal to `endIndex`.
public var startIndex: Index { return characters.startIndex } public var startIndex: Index { return characters.startIndex }
/// The "past the end" position in `self.characters`. /// A string's "past the end" position---that is, the position one greater
/// than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// In an empty string, `endIndex` is equal to `startIndex`.
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
public var endIndex: Index { return characters.endIndex } public var endIndex: Index { return characters.endIndex }
// TODO: swift-3-indexing-model - add docs // TODO: swift-3-indexing-model - add docs
@@ -55,16 +62,25 @@ extension String {
return characters.distance(from: start, to: end) return characters.distance(from: start, to: end)
} }
/// Access the `Character` at `position`. /// Accesses the character at the given position.
/// ///
/// - Precondition: `position` is a valid position in `self.characters` /// Indices for a subscripting a string are shared with the string's
/// and `position != endIndex`. /// `characters` view. For example:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.characters.index(where: { $0 >= "A" && $0 <= "Z" }) {
/// print("First capital letter: \(greeting[i])")
/// }
/// // Prints "First capital letter: H"
///
/// - Parameter i: A valid index of the string. `i` must be less than the
/// string's end index.
public subscript(i: Index) -> Character { return characters[i] } public subscript(i: Index) -> Character { return characters[i] }
/// Return the characters within the given `bounds`. /// Accesses the text in the given range.
/// ///
/// - Complexity: O(1) unless bridging from Objective-C requires an /// - Complexity: O(*n*) if the underlying string is bridged from
/// O(N) conversion. /// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> String { public subscript(bounds: Range<Index>) -> String {
return String(characters[bounds]) return String(characters[bounds])
} }
@@ -79,24 +95,63 @@ public func < (lhs: String.Index, rhs: String.Index) -> Bool {
} }
extension String { extension String {
/// Create an instance containing `characters`. /// Creates a new string containing the characters in the given sequence.
///
/// You can use this initializer to create a new string from the result of
/// one or more operations on a string's `characters` view. For example:
///
/// let str = "The rain in Spain stays mainly in the plain."
///
/// let vowels: Set<Character> = ["a", "e", "i", "o", "u"]
/// let disemvowelled = String(str.characters.lazy.filter { !vowels.contains($0) })
///
/// print(disemvowelled)
/// // Prints "Th rn n Spn stys mnly n th pln."
///
/// - Parameter characters: A sequence of characters.
public init< public init<
S : Sequence where S.Iterator.Element == Character S : Sequence where S.Iterator.Element == Character
>(_ characters: S) { >(_ characters: S) {
self._core = CharacterView(characters)._core self._core = CharacterView(characters)._core
} }
/// Reserves enough space in the string's underlying storage to store the
/// specified number of ASCII characters.
///
/// Because each character in a string can require more than a single ASCII
/// character's worth of storage, additional allocation may be necessary
/// when adding characters to a string after a call to
/// `reserveCapacity(_:)`.
///
/// - Parameter n: The minimum number of ASCII character's worth of storage
/// to allocate.
///
/// - Complexity: O(*n*)
public mutating func reserveCapacity(_ n: Int) { public mutating func reserveCapacity(_ n: Int) {
withMutableCharacters { withMutableCharacters {
(v: inout CharacterView) in v.reserveCapacity(n) (v: inout CharacterView) in v.reserveCapacity(n)
} }
} }
/// Appends the given character to the string.
///
/// The following example adds an emoji globe to the end of a string.
///
/// var globe = "Globe "
/// globe.append("🌍")
/// print(globe)
/// // Prints "Globe 🌍"
///
/// - Parameter c: The character to append to the string.
public mutating func append(_ c: Character) { public mutating func append(_ c: Character) {
withMutableCharacters { withMutableCharacters {
(v: inout CharacterView) in v.append(c) (v: inout CharacterView) in v.append(c)
} }
} }
/// Appends the characters in the given sequence to the string.
///
/// - Parameter newElements: A sequence of characters.
public mutating func append< public mutating func append<
S : Sequence where S.Iterator.Element == Character S : Sequence where S.Iterator.Element == Character
>(contentsOf newElements: S) { >(contentsOf newElements: S) {
@@ -106,13 +161,20 @@ extension String {
} }
% for Range in ['Range', 'ClosedRange']: % for Range in ['Range', 'ClosedRange']:
/// Replace the characters within `bounds` with the elements of /// Replaces the text within the specified bounds with the given characters.
/// `replacement`.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound /// - Parameters:
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. /// - bounds: The range of text to replace. The bounds of the range must be
/// valid indices of the string.
/// - newElements: The new characters to add to the string.
///
/// - Complexity: O(*m*), where *m* is the combined length of the string and
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes text at the end of the string, the complexity is O(*n*), where
/// *n* is equal to `bounds.count`.
public mutating func replaceSubrange< public mutating func replaceSubrange<
C : Collection where C.Iterator.Element == Character C : Collection where C.Iterator.Element == Character
>( >(
@@ -125,12 +187,20 @@ extension String {
} }
} }
/// Replace the text in `bounds` with `replacement`. /// Replaces the text within the specified bounds with the given string.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound /// - Parameters:
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. /// - bounds: The range of text to replace. The bounds of the range must be
/// valid indices of the string.
/// - newElements: The new text to add to the string.
///
/// - Complexity: O(*m*), where *m* is the combined length of the string and
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes text at the end of the string, the complexity is O(*n*), where
/// *n* is equal to `bounds.count`.
public mutating func replaceSubrange( public mutating func replaceSubrange(
_ bounds: ${Range}<Index>, with newElements: String _ bounds: ${Range}<Index>, with newElements: String
) { ) {
@@ -139,22 +209,37 @@ extension String {
} }
% end % end
/// Insert `newElement` at position `i`. /// Inserts a new character at the specified position.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - Complexity: O(`self.count`). /// - Parameters:
/// - newElement: The new character to insert into the string.
/// - i: A valid index of the string. If `i` is equal to the string's end
/// index, this methods appends `newElement` to the string.
///
/// - Complexity: O(*n*), where *n* is the length of the string.
public mutating func insert(_ newElement: Character, at i: Index) { public mutating func insert(_ newElement: Character, at i: Index) {
withMutableCharacters { withMutableCharacters {
(v: inout CharacterView) in v.insert(newElement, at: i) (v: inout CharacterView) in v.insert(newElement, at: i)
} }
} }
/// Insert `newElements` at position `i`. /// Inserts a collection of characters at the specified position.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - Complexity: O(`self.count + newElements.count`). /// - Parameters:
/// - newElements: A collection of `Character` elements to insert into the
/// string.
/// - i: A valid index of the string. If `i` is equal to the string's end
/// index, this methods appends the contents of `newElements` to the
/// string.
///
/// - Complexity: O(*n*), where *n* is the combined length of the string and
/// `newElements`.
public mutating func insert< public mutating func insert<
S : Collection where S.Iterator.Element == Character S : Collection where S.Iterator.Element == Character
>(contentsOf newElements: S, at i: Index) { >(contentsOf newElements: S, at i: Index) {
@@ -163,11 +248,24 @@ extension String {
} }
} }
/// Remove and return the `Character` at position `i`. /// Removes and returns the character at the specified position.
/// ///
/// Invalidates all indices with respect to `self`. /// All the elements following `i` are moved to close the gap. This example
/// removes the hyphen from the middle of a string.
/// ///
/// - Complexity: O(`self.count`). /// var nonempty = "non-empty"
/// if let i = nonempty.characters.index(of: "-") {
/// nonempty.remove(at: i)
/// }
/// print(nonempty)
/// // Prints "nonempty"
///
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Parameter i: The position of the character to remove. `i` must be a
/// valid index of the string that is not equal to the string's end index.
/// - Returns: The character that was removed.
@discardableResult @discardableResult
public mutating func remove(at i: Index) -> Character { public mutating func remove(at i: Index) -> Character {
return withMutableCharacters { return withMutableCharacters {
@@ -176,11 +274,19 @@ extension String {
} }
% for Range in ['Range', 'ClosedRange']: % for Range in ['Range', 'ClosedRange']:
/// Remove the characters in `bounds`. /// Removes the characters in the given range.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - Complexity: O(`self.count`). % if Range == 'ClosedRange':
/// - Parameter bounds: The range of the elements to remove. The upper and
/// lower bounds of `bounds` must be valid indices of the string and not
/// equal to the string's end index.
% else:
/// - Parameter bounds: The range of the elements to remove. The upper and
/// lower bounds of `bounds` must be valid indices of the string.
% end
public mutating func removeSubrange(_ bounds: ${Range}<Index>) { public mutating func removeSubrange(_ bounds: ${Range}<Index>) {
// FIXME: swift-3-indexing-model: tests. // FIXME: swift-3-indexing-model: tests.
withMutableCharacters { withMutableCharacters {
@@ -189,13 +295,15 @@ extension String {
} }
% end % end
/// Replace `self` with the empty string. /// Replaces this string with the empty string.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - parameter keepCapacity: If `true`, prevents the release of /// - Parameter keepCapacity: Pass `true` to prevent the release of the
/// allocated storage, which can be a useful optimization /// string's allocated storage. Retaining the storage can be a useful
/// when `self` is going to be grown again. /// optimization when you're planning to grow the string again. The
/// default value is `false`.
public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) { public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) {
withMutableCharacters { withMutableCharacters {
(v: inout CharacterView) in v.removeAll(keepingCapacity: keepCapacity) (v: inout CharacterView) in v.removeAll(keepingCapacity: keepCapacity)

View File

@@ -14,13 +14,123 @@
// allow performance optimizations of linear traversals. // allow performance optimizations of linear traversals.
extension String { extension String {
/// A collection of UTF-16 code units that encodes a `String` value. /// A view of a string's contents as a collection of UTF-16 code units.
///
/// You can access a string's view of UTF-16 code units by using its `utf16`
/// property. A string's UTF-16 view encodes the string's Unicode scalar
/// values as 16-bit integers.
///
/// let flowers = "Flowers 💐"
/// for v in flowers.utf16 {
/// print(v)
/// }
/// // 70
/// // 108
/// // 111
/// // 119
/// // 101
/// // 114
/// // 115
/// // 32
/// // 55357
/// // 56464
///
/// Unicode scalar values that make up a string's contents can be up to 21
/// bits long. The longer scalar values may need two `UInt16` values for
/// storage. Those "pairs" of code units are called *surrogate pairs*.
///
/// let flowermoji = "💐"
/// for v in flowermoji.unicodeScalars {
/// print(v, v.value)
/// }
/// // 💐 128144
///
/// for v in flowermoji.utf16 {
/// print(v)
/// }
/// // 55357
/// // 56464
///
/// To convert a `String.UTF16View` instance back into a string, use the
/// `String` type's `init(_:)` initializer.
///
/// let favemoji = "My favorite emoji is 🎉"
/// if let i = favemoji.utf16.index(where: { $0 >= 128 }) {
/// let asciiPrefix = String(favemoji.utf16.prefix(upTo: i))
/// print(asciiPrefix)
/// }
/// // Prints "My favorite emoji is "
///
/// UTF16View Elements Match NSString Characters
/// ============================================
///
/// The UTF-16 code units of a string's `utf16` view match the elements
/// accessed through indexed `NSString` APIs.
///
/// print(flowers.utf16.count)
/// // Prints "10"
///
/// let nsflowers = flowers as NSString
/// print(nsflowers.length)
/// // Prints "10"
///
/// Unlike `NSString`, however, `String.UTF16View` does not use integer
/// indices. If you need to access a specific position in a UTF-16 view, use
/// Swift's index manipulation methods. The following example accesses the
/// fourth code unit in both the `flowers` and `nsflowers` strings:
///
/// print(nsflowers.character(at: 3))
/// // Prints "119"
///
/// let i = flowers.utf16.index(flowers.utf16.startIndex, offsetBy: 3)
/// print(flowers.utf16[i])
/// // Prints "119"
///
/// Although the Swift overlay updates many Objective-C methods to return
/// native Swift indices and index ranges, some still return instances of
/// `NSRange`. To convert an `NSRange` instance to a range of
/// `String.UTF16View.Index`, follow these steps:
///
/// 1. Use the `NSRange` type's `toRange` method to convert the instance to
/// an optional range of `Int` values.
/// 2. Use your string's `utf16` view's index manipulation methods to convert
/// the integer bounds to `String.UTF16View.Index` values.
/// 3. Create a new `Range` instance from the new index values.
///
/// Here's an implementation of those steps, showing how to retrieve a
/// substring described by an `NSRange` instance from the middle of a
/// string.
///
/// let snowy = " Let it snow! "
/// let nsrange = NSRange(location: 3, length: 12)
/// if let r = nsrange.toRange() {
/// let start = snowy.utf16.index(snowy.utf16.startIndex, offsetBy: r.lowerBound)
/// let end = snowy.utf16.index(snowy.utf16.startIndex, offsetBy: r.upperBound)
/// let substringRange = start..<end
/// print(snowy.utf16[substringRange])
/// }
/// // Prints "Let it snow!"
public struct UTF16View public struct UTF16View
: BidirectionalCollection, : BidirectionalCollection,
CustomStringConvertible, CustomStringConvertible,
CustomDebugStringConvertible { CustomDebugStringConvertible {
/// A position in a string's collection of UTF-16 code units. /// A position in a string's collection of UTF-16 code units.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// For example, the following code sample finds the index of the first
/// space in the string's character view and then converts that to the same
/// position in the UTF-16 view.
///
/// let hearts = "Hearts <3 💘"
/// if let i = hearts.characters.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf16)
/// print(Array(hearts.utf16.suffix(from: j)))
/// print(hearts.utf16.suffix(from: j))
/// }
/// // Prints "[32, 60, 51, 32, 9829, 65038, 32, 55357, 56472]"
/// // Prints " <3 💘"
public struct Index : Comparable { public struct Index : Comparable {
// Foundation needs access to these fields so it can expose // Foundation needs access to these fields so it can expose
// random access // random access
@@ -33,16 +143,15 @@ extension String {
public typealias IndexDistance = Int public typealias IndexDistance = Int
/// The position of the first code unit if the `String` is /// The position of the first code unit if the `String` is
/// non-empty; identical to `endIndex` otherwise. /// nonempty; identical to `endIndex` otherwise.
public var startIndex: Index { public var startIndex: Index {
return Index(_offset: 0) return Index(_offset: 0)
} }
/// The "past the end" position. /// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
public var endIndex: Index { public var endIndex: Index {
return Index(_offset: _length) return Index(_offset: _length)
} }
@@ -98,10 +207,18 @@ extension String {
return _core.startIndex + _offset + i return _core.startIndex + _offset + i
} }
/// Access the element at `position`. /// Accesses the code unit at the given position.
/// ///
/// - Precondition: `position` is a valid position in `self` and /// The following example uses the subscript to print the value of a
/// `position != endIndex`. /// string's first UTF-16 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf16.startIndex
/// print("First character's UTF-16 code unit: \(greeting.utf16[i])")
/// // Prints "First character's UTF-16 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position` must be
/// less than the view's end index.
public subscript(i: Index) -> UTF16.CodeUnit { public subscript(i: Index) -> UTF16.CodeUnit {
let position = i._offset let position = i._offset
_precondition(position >= 0 && position < _length, _precondition(position >= 0 && position < _length,
@@ -152,10 +269,11 @@ extension String {
} }
#endif #endif
/// Get the contiguous subrange of elements enclosed by `bounds`. /// Accesses the contiguous subrange of elements enclosed by the specified
/// range.
/// ///
/// - Complexity: O(1) unless bridging from Objective-C requires an /// - Complexity: O(*n*) if the underlying string is bridged from
/// O(N) conversion. /// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> UTF16View { public subscript(bounds: Range<Index>) -> UTF16View {
return UTF16View( return UTF16View(
_core, _core,
@@ -200,9 +318,24 @@ extension String {
} }
} }
/// Construct the `String` corresponding to the given sequence of /// Creates a string corresponding to the given sequence of UTF-8 code units.
/// UTF-16 code units. If `utf16` contains unpaired surrogates, the ///
/// result is `nil`. /// If `utf16` contains unpaired UTF-16 surrogates, the result is `nil`.
///
/// You can use this initializer to create a new string from a slice of
/// another string's `utf16` view.
///
/// let picnicGuest = "Deserving porcupine"
/// if let i = picnicGuest.utf16.index(of: 32) {
/// let adjective = String(picnicGuest.utf16.prefix(upTo: i))
/// print(adjective)
/// }
/// // Prints "Optional(Deserving)"
///
/// The `adjective` constant is created by calling this initializer with a
/// slice of the `picnicGuest.utf16` view.
///
/// - Parameter utf16: A UTF-16 code sequence.
public init?(_ utf16: UTF16View) { public init?(_ utf16: UTF16View) {
let wholeString = String(utf16._core) let wholeString = String(utf16._core)
@@ -219,7 +352,7 @@ extension String {
return nil return nil
} }
/// The index type for subscripting a `String`'s `utf16` view. /// The index type for subscripting a string's `utf16` view.
public typealias UTF16Index = UTF16View.Index public typealias UTF16Index = UTF16View.Index
} }
@@ -239,11 +372,31 @@ public func < (
// Index conversions // Index conversions
extension String.UTF16View.Index { extension String.UTF16View.Index {
/// Construct the position in `utf16` that corresponds exactly to /// Creates an index in the given UTF-16 view that corresponds exactly to the
/// `utf8Index`. If no such position exists, the result is `nil`. /// specified `UTF8View` position.
/// ///
/// - Precondition: `utf8Index` is an element of /// The following example finds the position of a space in a string's `utf8`
/// `String(utf16)!.utf8.indices`. /// view and then converts that position to an index in the the string's
/// `utf16` view.
///
/// let cafe = "Café 🍵"
///
/// let utf8Index = cafe.utf8.index(of: 32)!
/// let utf16Index = String.UTF16View.Index(utf8Index, within: cafe.utf16)!
///
/// print(cafe.utf16.prefix(upTo: utf16Index))
/// // Prints "Café"
///
/// If the position passed as `utf8Index` doesn't have an exact corresponding
/// position in `utf16`, the result of the initializer is `nil`. For
/// example, because UTF-8 and UTF-16 represent high Unicode code points
/// differently, an attempt to convert the position of a UTF-8 continuation
/// byte fails.
///
/// - Parameters:
/// - utf8Index: A position in a `UTF8View` instance. `utf8Index` must be
/// an element in `String(utf16).utf8.indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init?( public init?(
_ utf8Index: String.UTF8Index, within utf16: String.UTF16View _ utf8Index: String.UTF8Index, within utf16: String.UTF16View
) { ) {
@@ -260,52 +413,129 @@ extension String.UTF16View.Index {
_offset = utf8Index._coreIndex _offset = utf8Index._coreIndex
} }
/// Construct the position in `utf16` that corresponds exactly to /// Creates an index in the given UTF-16 view that corresponds exactly to the
/// `unicodeScalarIndex`. /// specified `UnicodeScalarView` position.
/// ///
/// - Precondition: `unicodeScalarIndex` is an element of /// The following example finds the position of a space in a string's `utf8`
/// `String(utf16)!.unicodeScalars.indices`. /// view and then converts that position to an index in the the string's
/// `utf16` view.
///
/// let cafe = "Café 🍵"
///
/// let scalarIndex = cafe.unicodeScalars.index(of: "é")!
/// let utf16Index = String.UTF16View.Index(scalarIndex, within: cafe.utf16)
///
/// print(cafe.utf16.prefix(through: utf16Index))
/// // Prints "Café"
///
/// - Parameters:
/// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance.
/// `unicodeScalarIndex` must be an element in
/// `String(utf16).unicodeScalarIndex.indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init( public init(
_ unicodeScalarIndex: String.UnicodeScalarIndex, _ unicodeScalarIndex: String.UnicodeScalarIndex,
within utf16: String.UTF16View) { within utf16: String.UTF16View) {
_offset = unicodeScalarIndex._position _offset = unicodeScalarIndex._position
} }
/// Construct the position in `utf16` that corresponds exactly to /// Creates an index in the given UTF-16 view that corresponds exactly to the
/// `characterIndex`. /// specified `CharacterView` position.
/// ///
/// - Precondition: `characterIndex` is an element of /// The following example finds the position of a space in a string's `characters`
/// `String(utf16)!.indices`. /// view and then converts that position to an index in the the string's
/// `utf16` view.
///
/// let cafe = "Café 🍵"
///
/// let characterIndex = cafe.characters.index(of: "é")!
/// let utf16Index = String.UTF16View.Index(characterIndex, within: cafe.utf16)
///
/// print(cafe.utf16.prefix(through: utf16Index))
/// // Prints "Café"
///
/// - Parameters:
/// - characterIndex: A position in a `CharacterView` instance.
/// `characterIndex` must be an element in
/// `String(utf16).characters.indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init(_ characterIndex: String.Index, within utf16: String.UTF16View) { public init(_ characterIndex: String.Index, within utf16: String.UTF16View) {
_offset = characterIndex._utf16Index _offset = characterIndex._utf16Index
} }
/// Returns the position in `utf8` that corresponds exactly /// Returns the position in the given UTF-8 view that corresponds exactly to
/// to `self`, or if no such position exists, `nil`. /// this index.
/// ///
/// - Precondition: `self` is an element of /// The index must be a valid index of `String(utf8).utf16`.
/// `String(utf8)!.utf16.indices`. ///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method to find the same
/// position in the string's `utf8` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe.utf8)!
/// print(Array(cafe.utf8.prefix(upTo: j)))
/// // Prints "[67, 97, 102, 195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
/// If this index does not have an exact corresponding position in `utf8`,
/// this method returns `nil`. For example, an attempt to convert the
/// position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition( public func samePosition(
in utf8: String.UTF8View in utf8: String.UTF8View
) -> String.UTF8View.Index? { ) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8) return String.UTF8View.Index(self, within: utf8)
} }
/// Returns the position in `unicodeScalars` that corresponds exactly /// Returns the position in the given view of Unicode scalars that
/// to `self`, or if no such position exists, `nil`. /// corresponds exactly to this index.
/// ///
/// - Precondition: `self` is an element of /// This index must be a valid index of `String(unicodeScalars).utf16`.
/// `String(unicodeScalars).utf16.indices`. ///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method to find the same
/// position in the string's `unicodeScalars` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe.unicodeScalars)!
/// print(cafe.unicodeScalars.prefix(upTo: j))
/// // Prints "Café"
///
/// - Parameter unicodeScalars: The view to use for the index conversion.
/// - Returns: The position in `unicodeScalars` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `unicodeScalars`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-16 trailing surrogate
/// returns `nil`.
public func samePosition( public func samePosition(
in unicodeScalars: String.UnicodeScalarView in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? { ) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: unicodeScalars) return String.UnicodeScalarIndex(self, within: unicodeScalars)
} }
/// Returns the position in `characters` that corresponds exactly /// Returns the position in the given string that corresponds exactly to this
/// to `self`, or if no such position exists, `nil`. /// index.
/// ///
/// - Precondition: `self` is an element of `characters.utf16.indices`. /// This index must be a valid index of `characters.utf16`.
///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe)!
/// print(cafe[cafe.startIndex ..< j])
/// // Prints "Café"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `characters`, this method returns `nil`. For example, an attempt to
/// convert the position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition( public func samePosition(
in characters: String in characters: String
) -> String.Index? { ) -> String.Index? {
@@ -315,7 +545,7 @@ extension String.UTF16View.Index {
// Reflection // Reflection
extension String.UTF16View : CustomReflectable { extension String.UTF16View : CustomReflectable {
/// Returns a mirror that reflects `self`. /// Returns a mirror that reflects the UTF-16 view of a string.
public var customMirror: Mirror { public var customMirror: Mirror {
return Mirror(self, unlabeledChildren: self) return Mirror(self, unlabeledChildren: self)
} }

View File

@@ -88,7 +88,80 @@ extension _StringCore {
} }
extension String { extension String {
/// A collection of UTF-8 code units that encodes a `String` value. /// A view of a string's contents as a collection of UTF-8 code units.
///
/// You can access a string's view of UTF-8 code units by using its `utf8`
/// property. A string's UTF-8 view encodes the string's Unicode scalar
/// values as 8-bit integers.
///
/// let flowers = "Flowers 💐"
/// for v in flowers.utf8 {
/// print(v)
/// }
/// // 70
/// // 108
/// // 111
/// // 119
/// // 101
/// // 114
/// // 115
/// // 32
/// // 240
/// // 159
/// // 146
/// // 144
///
/// A string's Unicode scalar values can be up to 21 bits in length. To
/// represent those scalar values using 8-bit integers, more than one UTF-8
/// code unit is often required.
///
/// let flowermoji = "💐"
/// for v in flowermoji.unicodeScalars {
/// print(v, v.value)
/// }
/// // 💐 128144
///
/// for v in flowermoji.utf8 {
/// print(v)
/// }
/// // 240
/// // 159
/// // 146
/// // 144
///
/// In the encoded representation of a Unicode scalar value, each UTF-8 code
/// unit after the first is called a *continuation byte*.
///
/// UTF8View Elements Match Encoded C Strings
/// =========================================
///
/// Swift streamlines interoperation with C string APIs by letting you pass a
/// `String` instance to a function as an `Int8` or `UInt8` pointer. When you
/// call a C function using a `String`, Swift automatically creates a buffer
/// of UTF-8 code units and passes a pointer to that buffer. The code units
/// of that buffer match the code units in the string's `utf8` view.
///
/// The following example uses the C `strncmp` function to compare the
/// beginning of two Swift strings. The `strncmp` function takes two
/// `const char*` pointers and an integer specifying the number of characters
/// to compare. Because the strings are identical up to the 14th character,
/// comparing only those characters results in a return value of `0`.
///
/// let s1 = "They call me 'Bell'"
/// let s2 = "They call me 'Stacey'"
///
/// print(strncmp(s1, s2, 14))
/// // Prints "0"
/// print(String(s1.utf8.prefix(14))
/// // Prints "They call me '"
///
/// Extending the compared character count to 15 includes the differing
/// characters, so a nonzero result is returned.
///
/// print(strncmp(s1, s2, 15))
/// // Prints "-17"
/// print(String(s1.utf8.prefix(14))
/// // Prints "They call me 'B"
public struct UTF8View public struct UTF8View
: Collection, : Collection,
CustomStringConvertible, CustomStringConvertible,
@@ -114,7 +187,22 @@ extension String {
self._endIndex = e self._endIndex = e
} }
/// A position in a `String.UTF8View`. /// A position in a string's `UTF8View` instance.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// For example, the following code sample finds the index of the first
/// space in the string's character view and then converts that to the same
/// position in the UTF-8 view.
///
/// let hearts = "Hearts <3 💘"
/// if let i = hearts.characters.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf8)
/// print(Array(hearts.utf8.prefix(upTo: j)))
/// print(hearts.utf8.prefix(upTo: j))
/// }
/// // Prints "[72, 101, 97, 114, 116, 115]"
/// // Prints "Hearts"
public struct Index : Comparable { public struct Index : Comparable {
internal typealias Buffer = _StringCore._UTF8Chunk internal typealias Buffer = _StringCore._UTF8Chunk
@@ -169,17 +257,18 @@ extension String {
public typealias IndexDistance = Int public typealias IndexDistance = Int
/// The position of the first code unit if the `String` is /// The position of the first code unit if the UTF-8 view is
/// non-empty; identical to `endIndex` otherwise. /// nonempty.
///
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
public var startIndex: Index { public var startIndex: Index {
return self._startIndex return self._startIndex
} }
/// The "past the end" position. /// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
public var endIndex: Index { public var endIndex: Index {
return self._endIndex return self._endIndex
} }
@@ -201,7 +290,7 @@ extension String {
let nextCoreIndex = i._coreIndex &+ increment let nextCoreIndex = i._coreIndex &+ increment
let nextBuffer = Index._nextBuffer(after: i._buffer) let nextBuffer = Index._nextBuffer(after: i._buffer)
// if the nextBuffer is non-empty, we have all we need // if the nextBuffer is nonempty, we have all we need
if _fastPath(nextBuffer != Index._emptyBuffer) { if _fastPath(nextBuffer != Index._emptyBuffer) {
return Index(i._core, nextCoreIndex, nextBuffer) return Index(i._core, nextCoreIndex, nextBuffer)
} }
@@ -219,20 +308,29 @@ extension String {
} }
} }
/// Access the element at `position`. /// Accesses the code unit at the given position.
/// ///
/// - Precondition: `position` is a valid position in `self` and /// The following example uses the subscript to print the value of a
/// `position != endIndex`. /// string's first UTF-8 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf8.startIndex
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
/// // Prints "First character's UTF-8 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position`
/// must be less than the view's end index.
public subscript(position: Index) -> UTF8.CodeUnit { public subscript(position: Index) -> UTF8.CodeUnit {
let result = UTF8.CodeUnit(truncatingBitPattern: position._buffer & 0xFF) let result = UTF8.CodeUnit(truncatingBitPattern: position._buffer & 0xFF)
_precondition(result != 0xFF, "cannot subscript using endIndex") _precondition(result != 0xFF, "cannot subscript using endIndex")
return result return result
} }
/// Access the contiguous subrange of elements enclosed by `bounds`. /// Accesses the contiguous subrange of elements enclosed by the specified
/// range.
/// ///
/// - Complexity: O(1) unless bridging from Objective-C requires an /// - Complexity: O(*n*) if the underlying string is bridged from
/// O(N) conversion. /// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> UTF8View { public subscript(bounds: Range<Index>) -> UTF8View {
return UTF8View(_core, bounds.lowerBound, bounds.upperBound) return UTF8View(_core, bounds.lowerBound, bounds.upperBound)
} }
@@ -260,11 +358,21 @@ extension String {
return _core.elementWidth == 1 ? _core.startASCII : nil return _core.elementWidth == 1 ? _core.startASCII : nil
} }
/// A contiguously-stored nul-terminated UTF-8 representation of /// A contiguously stored null-terminated UTF-8 representation of
/// `self`. /// the string.
/// ///
/// To access the underlying memory, invoke /// To access the underlying memory, invoke
/// `withUnsafeBufferPointer` on the `ContiguousArray`. /// `withUnsafeBufferPointer` on the array.
///
/// let s = "Hello!"
/// let bytes = s.nulTerminatedUTF8
/// print(bytes)
/// // Prints "[72, 101, 108, 108, 111, 33, 0]"
///
/// bytes.withUnsafeBufferPointer { ptr in
/// print(strlen(UnsafePointer(ptr.baseAddress!)))
/// }
/// // Prints "6"
public var nulTerminatedUTF8: ContiguousArray<UTF8.CodeUnit> { public var nulTerminatedUTF8: ContiguousArray<UTF8.CodeUnit> {
var result = ContiguousArray<UTF8.CodeUnit>() var result = ContiguousArray<UTF8.CodeUnit>()
result.reserveCapacity(utf8.count + 1) result.reserveCapacity(utf8.count + 1)
@@ -283,9 +391,24 @@ extension String {
return try nulTerminatedUTF8.withUnsafeBufferPointer(body) return try nulTerminatedUTF8.withUnsafeBufferPointer(body)
} }
/// Construct the `String` corresponding to the given sequence of /// Creates a string corresponding to the given sequence of UTF-8 code units.
/// UTF-8 code units. If `utf8` contains unpaired surrogates, the ///
/// result is `nil`. /// If `utf8` is an ill-formed UTF-8 code sequence, the result is `nil`.
///
/// You can use this initializer to create a new string from a slice of
/// another string's `utf8` view.
///
/// let picnicGuest = "Deserving porcupine"
/// if let i = picnicGuest.utf8.index(of: 32) {
/// let adjective = String(picnicGuest.utf8.prefix(upTo: i))
/// print(adjective)
/// }
/// // Prints "Optional(Deserving)"
///
/// The `adjective` constant is created by calling this initializer with a
/// slice of the `picnicGuest.utf8` view.
///
/// - Parameter utf8: A UTF-8 code sequence.
public init?(_ utf8: UTF8View) { public init?(_ utf8: UTF8View) {
let wholeString = String(utf8._core) let wholeString = String(utf8._core)
@@ -297,7 +420,7 @@ extension String {
return nil return nil
} }
/// The index type for subscripting a `String`'s `.utf8` view. /// The index type for subscripting a string's `utf8` view.
public typealias UTF8Index = UTF8View.Index public typealias UTF8Index = UTF8View.Index
} }
@@ -356,11 +479,44 @@ extension String.UTF8View.Index {
self.init(core, _utf16Offset, buffer) self.init(core, _utf16Offset, buffer)
} }
/// Construct the position in `utf8` that corresponds exactly to /// Creates an index in the given UTF-8 view that corresponds exactly to the
/// `utf16Index`. If no such position exists, the result is `nil`. /// specified `UTF16View` position.
/// ///
/// - Precondition: `utf8Index` is an element of /// The following example finds the position of a space in a string's `utf16`
/// `String(utf16)!.utf8.indices`. /// view and then converts that position to an index in the the string's
/// `utf8` view.
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let utf8Index = String.UTF8View.Index(utf16Index, within: cafe.utf8)!
///
/// print(Array(cafe.utf8.prefix(upTo: utf8Index)))
/// // Prints "[67, 97, 102, 195, 169]"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `utf8`, the result of the initializer is
/// `nil`. For example, because UTF-8 and UTF-16 represent high Unicode code
/// points differently, an attempt to convert the position of the trailing
/// surrogate of a UTF-16 surrogate pair fails.
///
/// The next example attempts to convert the indices of the two UTF-16 code
/// points that represent the teacup emoji (`"🍵"`). The index of the lead
/// surrogate is successfully converted to a position in `utf8`, but the
/// index of the trailing surrogate is not.
///
/// let emojiHigh = cafe.utf16.index(after: utf16Index)
/// print(String.UTF8View.Index(emojiHigh, within: cafe.utf8))
/// // Prints "Optional(String.Index(...))"
///
/// let emojiLow = cafe.utf16.index(after: emojiHigh)
/// print(String.UTF8View.Index(emojiLow, within: cafe.utf8))
/// // Prints "nil"
///
/// - Parameters:
/// - utf16Index: A position in a `UTF16View` instance. `utf16Index` must
/// be an element in `String(utf8).utf16.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init?(_ utf16Index: String.UTF16Index, within utf8: String.UTF8View) { public init?(_ utf16Index: String.UTF16Index, within utf8: String.UTF8View) {
let utf16 = String.UTF16View(utf8._core) let utf16 = String.UTF16View(utf8._core)
@@ -383,11 +539,24 @@ extension String.UTF8View.Index {
self.init(utf8._core, _utf16Offset: utf16Index._offset) self.init(utf8._core, _utf16Offset: utf16Index._offset)
} }
/// Construct the position in `utf8` that corresponds exactly to /// Creates an index in the given UTF-8 view that corresponds exactly to the
/// `unicodeScalarIndex`. /// specified `UnicodeScalarView` position.
/// ///
/// - Precondition: `unicodeScalarIndex` is an element of /// The following example converts the position of the Unicode scalar `"e"`
/// `String(utf8)!.unicodeScalars.indices`. /// into its corresponding position in the string's `utf8` view.
///
/// let cafe = "Cafe\u{0301}"
/// let scalarsIndex = cafe.unicodeScalars.index(of: "e")!
/// let utf8Index = String.UTF8View.Index(scalarsIndex, within: cafe.utf8)
///
/// print(Array(cafe.utf8.prefix(through: utf8Index)))
/// // Prints "[67, 97, 102, 101]"
///
/// - Parameters:
/// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance.
/// `unicodeScalarIndex` must be an element of
/// `String(utf8).unicodeScalars.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init( public init(
_ unicodeScalarIndex: String.UnicodeScalarIndex, _ unicodeScalarIndex: String.UnicodeScalarIndex,
within utf8: String.UTF8View within utf8: String.UTF8View
@@ -395,40 +564,102 @@ extension String.UTF8View.Index {
self.init(utf8._core, _utf16Offset: unicodeScalarIndex._position) self.init(utf8._core, _utf16Offset: unicodeScalarIndex._position)
} }
/// Construct the position in `utf8` that corresponds exactly to /// Creates an index in the given UTF-8 view that corresponds exactly to the
/// `characterIndex`. /// specified string position.
/// ///
/// - Precondition: `characterIndex` is an element of /// The following example converts the position of the teacup emoji (`"🍵"`)
/// `String(utf8)!.indices`. /// into its corresponding position in the string's `utf8` view.
///
/// let cafe = "Café 🍵"
/// let characterIndex = cafe.characters.index(of: "🍵")!
/// let utf8Index = String.UTF8View.Index(characterIndex, within: cafe.utf8)
///
/// print(Array(cafe.utf8.suffix(from: utf8Index)))
/// // Prints "[240, 159, 141, 181]"
///
/// - Parameters:
/// - characterIndex: A position in a `CharacterView` instance.
/// `characterIndex` must be an element of
/// `String(utf8).characters.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init(_ characterIndex: String.Index, within utf8: String.UTF8View) { public init(_ characterIndex: String.Index, within utf8: String.UTF8View) {
self.init(utf8._core, _utf16Offset: characterIndex._base._position) self.init(utf8._core, _utf16Offset: characterIndex._base._position)
} }
/// Returns the position in `utf16` that corresponds exactly /// Returns the position in the given UTF-16 view that corresponds exactly to
/// to `self`, or if no such position exists, `nil`. /// this index.
/// ///
/// - Precondition: `self` is an element of `String(utf16)!.utf8.indices`. /// The index must be a valid index of `String(utf16).utf8`.
///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method to find the same
/// position in the string's `utf16` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf8.index(of: 32)!
/// let j = i.samePosition(in: cafe.utf16)!
/// print(cafe.utf16.prefix(upTo: j))
/// // Prints "Café"
///
/// - Parameter utf16: The view to use for the index conversion.
/// - Returns: The position in `utf16` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `utf16`, this method returns `nil`. For example, an attempt to convert
/// the position of a UTF-8 continuation byte returns `nil`.
public func samePosition( public func samePosition(
in utf16: String.UTF16View in utf16: String.UTF16View
) -> String.UTF16View.Index? { ) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: utf16) return String.UTF16View.Index(self, within: utf16)
} }
/// Returns the position in `unicodeScalars` that corresponds exactly /// Returns the position in the given view of Unicode scalars that
/// to `self`, or if no such position exists, `nil`. /// corresponds exactly to this index.
/// ///
/// - Precondition: `self` is an element of /// This index must be a valid index of `String(unicodeScalars).utf8`.
/// `String(unicodeScalars).utf8.indices`. ///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method to find the same position
/// in the string's `unicodeScalars` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf8.index(of: 32)!
/// let j = i.samePosition(in: cafe.unicodeScalars)!
/// print(cafe.unicodeScalars.prefix(upTo: j))
/// // Prints "Café"
///
/// - Parameter unicodeScalars: The view to use for the index conversion.
/// - Returns: The position in `unicodeScalars` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `unicodeScalars`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
public func samePosition( public func samePosition(
in unicodeScalars: String.UnicodeScalarView in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? { ) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: unicodeScalars) return String.UnicodeScalarIndex(self, within: unicodeScalars)
} }
/// Returns the position in `characters` that corresponds exactly /// Returns the position in the given string that corresponds exactly to this
/// to `self`, or if no such position exists, `nil`. /// index.
/// ///
/// - Precondition: `self` is an element of `characters.utf8.indices`. /// This index must be a valid index of `characters.utf8`.
///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf8.index(of: 32)!
/// let j = i.samePosition(in: cafe)!
/// print(cafe[cafe.startIndex ..< j])
/// // Prints "Café"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `characters`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
public func samePosition( public func samePosition(
in characters: String in characters: String
) -> String.Index? { ) -> String.Index? {
@@ -438,7 +669,7 @@ extension String.UTF8View.Index {
// Reflection // Reflection
extension String.UTF8View : CustomReflectable { extension String.UTF8View : CustomReflectable {
/// Returns a mirror that reflects `self`. /// Returns a mirror that reflects the UTF-8 view of a string.
public var customMirror: Mirror { public var customMirror: Mirror {
return Mirror(self, unlabeledChildren: self) return Mirror(self, unlabeledChildren: self)
} }

View File

@@ -25,8 +25,52 @@ public func < (
} }
extension String { extension String {
/// A collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value) that /// A view of a string's contents as a collection of Unicode scalar values.
/// encodes a `String` value. ///
/// You can access a string's view of Unicode scalar values by using its
/// `unicodeScalars` property. Unicode scalar values are the 21-bit codes
/// that are the basic unit of Unicode. Each scalar value is represented by
/// a `UnicodeScalar` instance and is equivalent to a UTF-32 code unit.
///
/// let flowers = "Flowers 💐"
/// for v in flowers.unicodeScalars {
/// print(v.value)
/// }
/// // 70
/// // 108
/// // 111
/// // 119
/// // 101
/// // 114
/// // 115
/// // 32
/// // 128144
///
/// Some characters that are visible in a string are made up of more than one
/// Unicode scalar value. In that case, a string's `unicodeScalars` view
/// contains more values than its `characters` view.
///
/// let flag = "🇵🇷"
/// for c in flag.characters {
/// print(c)
/// }
/// // 🇵🇷
///
/// for v in flag.unicodeScalars {
/// print(v.value)
/// }
/// // 127477
/// // 127479
///
/// You can convert a `String.UnicodeScalarView` instance back into a string
/// using the `String` type's `init(_:)` initializer.
///
/// let favemoji = "My favorite emoji is 🎉"
/// if let i = favemoji.unicodeScalars.index(where: { $0.value >= 128 }) {
/// let asciiPrefix = String(favemoji.unicodeScalars.prefix(upTo: i))
/// print(asciiPrefix)
/// }
/// // Prints "My favorite emoji is "
public struct UnicodeScalarView : public struct UnicodeScalarView :
BidirectionalCollection, BidirectionalCollection,
CustomStringConvertible, CustomStringConvertible,
@@ -54,7 +98,22 @@ extension String {
} }
} }
/// A position in a `String.UnicodeScalarView`. /// A position in a string's `UnicodeScalars` view.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// The following example finds the index of the solid heart pictograph in
/// the string's character view and then converts that to the same
/// position in the Unicode scalars view:
///
/// let hearts = "Hearts <3 💘"
/// let i = hearts.characters.index(of: "")!
///
/// let j = i.samePosition(in: hearts.unicodeScalars)
/// print(hearts.unicodeScalars.suffix(from: j))
/// // Prints " 💘"
/// print(hearts.unicodeScalars[j].value)
/// // Prints "9829"
public struct Index : Comparable { public struct Index : Comparable {
public init(_ _position: Int, _ _core: _StringCore) { public init(_ _position: Int, _ _core: _StringCore) {
self._position = _position self._position = _position
@@ -75,17 +134,18 @@ extension String {
@_versioned internal var _core: _StringCore @_versioned internal var _core: _StringCore
} }
/// The position of the first `UnicodeScalar` if the `String` is /// The position of the first Unicode scalar value if the string is
/// non-empty; identical to `endIndex` otherwise. /// nonempty.
///
/// If the string is empty, `startIndex` is equal to `endIndex`.
public var startIndex: Index { public var startIndex: Index {
return Index(_core.startIndex, _core) return Index(_core.startIndex, _core)
} }
/// The "past the end" position. /// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
public var endIndex: Index { public var endIndex: Index {
return Index(_core.endIndex, _core) return Index(_core.endIndex, _core)
} }
@@ -114,10 +174,22 @@ extension String {
return Index(i, _core) return Index(i, _core)
} }
/// Access the element at `position`. /// Accesses the Unicode scalar value at the given position.
/// ///
/// - Precondition: `position` is a valid position in `self` and /// The following example searches a string's Unicode scalars view for a
/// `position != endIndex`. /// capital letter and then prints the character and Unicode scalar value
/// at the found index:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.unicodeScalars.index(where: { "A"..."Z" ~= $0 }) {
/// print("First capital letter: \(greeting.unicodeScalars[i])")
/// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
/// }
/// // Prints "First capital letter: H"
/// // Prints "Unicode scalar value: 72"
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
public subscript(position: Index) -> UnicodeScalar { public subscript(position: Index) -> UnicodeScalar {
var scratch = _ScratchIterator(_core, position._position) var scratch = _ScratchIterator(_core, position._position)
var decoder = UTF16() var decoder = UTF16()
@@ -131,17 +203,26 @@ extension String {
} }
} }
/// Access the contiguous subrange of elements enclosed by `bounds`. /// Accesses the Unicode scalar values in the given range.
/// ///
/// - Complexity: O(1) unless bridging from Objective-C requires an /// The example below uses this subscript to access the scalar values up
/// O(N) conversion. /// to, but not including, the first comma (`","`) in the string.
///
/// let str = "All this happened, more or less."
/// let i = str.unicodeScalars.index(of: ",")!
/// let substring = str.unicodeScalars[str.unicodeScalars.startIndex ..< i]
/// print(String(substring))
/// // Prints "All this happened"
///
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(r: Range<Index>) -> UnicodeScalarView { public subscript(r: Range<Index>) -> UnicodeScalarView {
return UnicodeScalarView( return UnicodeScalarView(
_core[r.lowerBound._position..<r.upperBound._position]) _core[r.lowerBound._position..<r.upperBound._position])
} }
/// A type whose instances can produce the elements of this /// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
/// sequence, in order. /// collection.
public struct Iterator : IteratorProtocol { public struct Iterator : IteratorProtocol {
init(_ _base: _StringCore) { init(_ _base: _StringCore) {
if _base.hasContiguousStorage { if _base.hasContiguousStorage {
@@ -164,11 +245,13 @@ extension String {
} }
} }
/// Advance to the next element and return it, or `nil` if no next /// Advances to the next element and returns it.
/// element exists.
/// ///
/// - Precondition: No preceding call to `self.next()` has returned /// Do not call this method if a copy of the iterator has been advanced.
/// `nil`. ///
/// - Returns: The next element in the collection if an element is
/// available; otherwise, `nil`. After returning `nil` once, this
/// method returns `nil` on every subsequent call.
public mutating func next() -> UnicodeScalar? { public mutating func next() -> UnicodeScalar? {
var result: UnicodeDecodingResult var result: UnicodeDecodingResult
if _baseSet { if _baseSet {
@@ -202,10 +285,9 @@ extension String {
internal var _iterator: IndexingIterator<_StringCore>! internal var _iterator: IndexingIterator<_StringCore>!
} }
/// Returns an iterator over the `UnicodeScalar`s that comprise /// Returns an iterator over the Unicode scalars that make up this view.
/// this sequence.
/// ///
/// - Complexity: O(1). /// - Returns: An iterator over this collection's `UnicodeScalar` elements.
public func makeIterator() -> Iterator { public func makeIterator() -> Iterator {
return Iterator(_core) return Iterator(_core)
} }
@@ -221,19 +303,33 @@ extension String {
internal var _core: _StringCore internal var _core: _StringCore
} }
/// Construct the `String` corresponding to the given sequence of /// Creates a string corresponding to the given collection of Unicode
/// Unicode scalars. /// scalars.
///
/// You can use this initializer to create a new string from a slice of
/// another string's `unicodeScalars` view.
///
/// let picnicGuest = "Deserving porcupine"
/// if let i = picnicGuest.unicodeScalars.index(of: " ") {
/// let adjective = String(picnicGuest.unicodeScalars.prefix(upTo: i))
/// print(adjective)
/// }
/// // Prints "Deserving"
///
/// The `adjective` constant is created by calling this initializer with a
/// slice of the `picnicGuest.unicodeScalars` view.
///
/// - Parameter unicodeScalars: A collection of Unicode scalar values.
public init(_ unicodeScalars: UnicodeScalarView) { public init(_ unicodeScalars: UnicodeScalarView) {
self.init(unicodeScalars._core) self.init(unicodeScalars._core)
} }
/// The index type for subscripting a `String`'s `.unicodeScalars` /// The index type for a string's `unicodeScalars` view.
/// view.
public typealias UnicodeScalarIndex = UnicodeScalarView.Index public typealias UnicodeScalarIndex = UnicodeScalarView.Index
} }
extension String { extension String {
/// The value of `self` as a collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value). /// The string's value represented as a collection of Unicode scalar values.
public var unicodeScalars : UnicodeScalarView { public var unicodeScalars : UnicodeScalarView {
get { get {
return UnicodeScalarView(_core) return UnicodeScalarView(_core)
@@ -245,36 +341,60 @@ extension String {
} }
extension String.UnicodeScalarView : RangeReplaceableCollection { extension String.UnicodeScalarView : RangeReplaceableCollection {
/// Construct an empty instance. /// Creates an empty view instance.
public init() { public init() {
self = String.UnicodeScalarView(_StringCore()) self = String.UnicodeScalarView(_StringCore())
} }
/// Reserve enough space to store `n` ASCII characters.
/// Reserves enough space in the view's underlying storage to store the
/// specified number of ASCII characters.
/// ///
/// - Complexity: O(`n`). /// Because a Unicode scalar value can require more than a single ASCII
/// character's worth of storage, additional allocation may be necessary
/// when adding to a Unicode scalar view after a call to
/// `reserveCapacity(_:)`.
///
/// - Parameter n: The minimum number of ASCII character's worth of storage
/// to allocate.
///
/// - Complexity: O(*n*), where *n* is the capacity being reserved.
public mutating func reserveCapacity(_ n: Int) { public mutating func reserveCapacity(_ n: Int) {
_core.reserveCapacity(n) _core.reserveCapacity(n)
} }
/// Append `x` to `self`.
/// Appends the given Unicode scalar to the view.
/// ///
/// - Complexity: Amortized O(1). /// - Parameter c: The character to append to the string.
public mutating func append(_ x: UnicodeScalar) { public mutating func append(_ x: UnicodeScalar) {
_core.append(x) _core.append(x)
} }
/// Append the elements of `newElements` to `self`.
/// Appends the Unicode scalar values in the given sequence to the view.
/// ///
/// - Complexity: O(*length of result*). /// - Parameter newElements: A sequence of Unicode scalar values.
///
/// - Complexity: O(*n*), where *n* is the length of the resulting view.
public mutating func append< public mutating func append<
S : Sequence where S.Iterator.Element == UnicodeScalar S : Sequence where S.Iterator.Element == UnicodeScalar
>(contentsOf newElements: S) { >(contentsOf newElements: S) {
_core.append(contentsOf: newElements.lazy.flatMap { $0.utf16 }) _core.append(contentsOf: newElements.lazy.flatMap { $0.utf16 })
} }
/// Replace the elements within `bounds` with `newElements`.
/// Replaces the elements within the specified bounds with the given Unicode
/// scalar values.
/// ///
/// Invalidates all indices with respect to `self`. /// Calling this method invalidates any existing indices for use with this
/// string.
/// ///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound /// - Parameters:
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. /// - bounds: The range of elements to replace. The bounds of the range
/// must be valid indices of the view.
/// - newElements: The new Unicode scalar values to add to the string.
///
/// - Complexity: O(*m*), where *m* is the combined length of the view and
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes elements at the end of the string, the complexity is O(*n*),
/// where *n* is equal to `bounds.count`.
public mutating func replaceSubrange< public mutating func replaceSubrange<
C: Collection where C.Iterator.Element == UnicodeScalar C: Collection where C.Iterator.Element == UnicodeScalar
>( >(
@@ -290,11 +410,31 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
// Index conversions // Index conversions
extension String.UnicodeScalarIndex { extension String.UnicodeScalarIndex {
/// Construct the position in `unicodeScalars` that corresponds exactly to /// Creates an index in the given Unicode scalars view that corresponds
/// `utf16Index`. If no such position exists, the result is `nil`. /// exactly to the specified `UTF16View` position.
/// ///
/// - Precondition: `utf16Index` is an element of /// The following example finds the position of a space in a string's `utf16`
/// `String(unicodeScalars).utf16.indices`. /// view and then converts that position to an index in the the string's
/// `unicodeScalars` view:
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let scalarIndex = String.UnicodeScalarView.Index(utf16Index, within: cafe.unicodeScalars)!
///
/// print(String(cafe.unicodeScalars.prefix(upTo: scalarIndex)))
/// // Prints "Café"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `unicodeScalars`, the result of the
/// initializer is `nil`. For example, an attempt to convert the position of
/// the trailing surrogate of a UTF-16 surrogate pair fails.
///
/// - Parameters:
/// - utf16Index: A position in the `utf16` view of the `characters`
/// parameter.
/// - unicodeScalars: The `UnicodeScalarView` instance referenced by both
/// `utf16Index` and the resulting index.
public init?( public init?(
_ utf16Index: String.UTF16Index, _ utf16Index: String.UTF16Index,
within unicodeScalars: String.UnicodeScalarView within unicodeScalars: String.UnicodeScalarView
@@ -320,11 +460,19 @@ extension String.UnicodeScalarIndex {
self.init(utf16Index._offset, unicodeScalars._core) self.init(utf16Index._offset, unicodeScalars._core)
} }
/// Construct the position in `unicodeScalars` that corresponds exactly to /// Creates an index in the given Unicode scalars view that corresponds
/// `utf8Index`. If no such position exists, the result is `nil`. /// exactly to the specified `UTF8View` position.
/// ///
/// - Precondition: `utf8Index` is an element of /// If the position passed as `utf8Index` doesn't have an exact corresponding
/// `String(unicodeScalars).utf8.indices`. /// position in `unicodeScalars`, the result of the initializer is `nil`.
/// For example, an attempt to convert the position of a UTF-8 continuation
/// byte returns `nil`.
///
/// - Parameters:
/// - utf8Index: A position in the `utf8` view of the `characters`
/// parameter.
/// - unicodeScalars: The `UnicodeScalarView` instance referenced by both
/// `utf8Index` and the resulting index.
public init?( public init?(
_ utf8Index: String.UTF8Index, _ utf8Index: String.UTF8Index,
within unicodeScalars: String.UnicodeScalarView within unicodeScalars: String.UnicodeScalarView
@@ -342,11 +490,24 @@ extension String.UnicodeScalarIndex {
self.init(utf8Index._coreIndex, core) self.init(utf8Index._coreIndex, core)
} }
/// Construct the position in `unicodeScalars` that corresponds /// Creates an index in the given Unicode scalars view that corresponds
/// exactly to `characterIndex`. /// exactly to the specified string position.
/// ///
/// - Precondition: `characterIndex` is an element of /// The following example converts the position of the teacup emoji (`"🍵"`)
/// `String(unicodeScalars).indices`. /// into its corresponding position in the string's `unicodeScalars` view.
///
/// let cafe = "Café 🍵"
/// let characterIndex = cafe.characters.index(of: "🍵")!
/// let scalarIndex = String.UnicodeScalarView.Index(characterIndex, within: cafe.unicodeScalars)
///
/// print(cafe.unicodeScalars.suffix(from: scalarIndex))
/// // Prints "🍵"
///
/// - Parameters:
/// - characterIndex: A position in a `CharacterView` instance.
/// `characterIndex` must be an element of
/// `String(utf8).characters.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init( public init(
_ characterIndex: String.Index, _ characterIndex: String.Index,
within unicodeScalars: String.UnicodeScalarView within unicodeScalars: String.UnicodeScalarView
@@ -354,29 +515,71 @@ extension String.UnicodeScalarIndex {
self.init(characterIndex._base._position, unicodeScalars._core) self.init(characterIndex._base._position, unicodeScalars._core)
} }
/// Returns the position in `utf8` that corresponds exactly /// Returns the position in the given UTF-8 view that corresponds exactly to
/// to `self`. /// this index.
/// ///
/// - Precondition: `self` is an element of `String(utf8)!.indices`. /// The index must be a valid index of `String(utf8).unicodeScalars`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.unicodeScalars.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)
/// print(Array(cafe.utf8.suffix(from: j)))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index { public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index {
return String.UTF8View.Index(self, within: utf8) return String.UTF8View.Index(self, within: utf8)
} }
/// Returns the position in `utf16` that corresponds exactly /// Returns the position in the given UTF-16 view that corresponds exactly to
/// to `self`. /// this index.
/// ///
/// - Precondition: `self` is an element of `String(utf16)!.indices`. /// The index must be a valid index of `String(utf16).unicodeScalars`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion.
/// - Returns: The position in `utf16` that corresponds exactly to this index.
public func samePosition( public func samePosition(
in utf16: String.UTF16View in utf16: String.UTF16View
) -> String.UTF16View.Index { ) -> String.UTF16View.Index {
return String.UTF16View.Index(self, within: utf16) return String.UTF16View.Index(self, within: utf16)
} }
/// Returns the position in `characters` that corresponds exactly /// Returns the position in the given string that corresponds exactly to this
/// to `self`, or if no such position exists, `nil`. /// index.
/// ///
/// - Precondition: `self` is an element of /// This index must be a valid index of `characters.unicodeScalars`.
/// `characters.unicodeScalars.indices`. ///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.unicodeScalars.index(of: "🍵")
/// let j = i.samePosition(in: cafe)!
/// print(cafe.suffix(from: j))
/// // Prints "🍵"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `characters`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
public func samePosition(in characters: String) -> String.Index? { public func samePosition(in characters: String) -> String.Index? {
return String.Index(self, within: characters) return String.Index(self, within: characters)
} }
@@ -408,7 +611,7 @@ extension String.UnicodeScalarIndex {
// Reflection // Reflection
extension String.UnicodeScalarView : CustomReflectable { extension String.UnicodeScalarView : CustomReflectable {
/// Returns a mirror that reflects `self`. /// Returns a mirror that reflects the Unicode scalars view of a string.
public var customMirror: Mirror { public var customMirror: Mirror {
return Mirror(self, unlabeledChildren: self) return Mirror(self, unlabeledChildren: self)
} }

View File

@@ -16,11 +16,19 @@
/// The result of one Unicode decoding step. /// The result of one Unicode decoding step.
/// ///
/// A unicode scalar value, an indication that no more unicode scalars /// Each `UnicodeDecodingResult` instance can represent a Unicode scalar value,
/// are available, or an indication of a decoding error. /// an indication that no more Unicode scalars are available, or an indication
/// of a decoding error.
///
/// - SeeAlso: `UnicodeCodec.decode(next:)`
public enum UnicodeDecodingResult : Equatable { public enum UnicodeDecodingResult : Equatable {
/// A decoded Unicode scalar value.
case scalarValue(UnicodeScalar) case scalarValue(UnicodeScalar)
/// An indication that no more Unicode scalars are available in the input.
case emptyInput case emptyInput
/// An indication of a decoding error.
case error case error
} }
@@ -40,56 +48,102 @@ public func == (
} }
} }
/// A Unicode [encoding scheme](http://www.unicode.org/glossary/#character_encoding_scheme). /// A Unicode encoding form that translates between Unicode scalar values and
/// form-specific code units.
/// ///
/// Consists of an underlying [code unit](http://www.unicode.org/glossary/#code_unit) /// The `UnicodeCodec` protocol declares methods that decode code unit
/// and functions to translate between sequences of these code units and /// sequences into Unicode scalar values and encode Unicode scalar values
/// [unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value). /// into code unit sequences. The standard library implements codecs for the
/// UTF-8, UTF-16, and UTF-32 encoding schemes as the `UTF8`, `UTF16`, and
/// `UTF32` types, respectively. Use the `UnicodeScalar` type to work with
/// decoded Unicode scalar values.
///
/// - SeeAlso: `UTF8`, `UTF16`, `UTF32`, `UnicodeScalar`
public protocol UnicodeCodec { public protocol UnicodeCodec {
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) /// A type that can hold code unit values for this encoding.
/// values for this encoding.
associatedtype CodeUnit associatedtype CodeUnit
/// Creates an instance of the codec.
init() init()
/// Start or continue decoding a UTF sequence. /// Starts or continues decoding a code unit sequence into Unicode scalar
/// values.
/// ///
/// In order to decode a code unit sequence completely, this function should /// To decode a code unit sequence completely, call this method repeatedly
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// Checking that the iterator was exhausted is not sufficient. The decoder /// iterator was exhausted is not sufficient, because the decoder can store
/// can have an internal buffer that is pre-filled with data from the input /// buffered data from the input iterator.
/// iterator.
/// ///
/// Because of buffering, it is impossible to find the corresponding position /// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error. /// in the iterator for a given returned `UnicodeScalar` or an error.
/// ///
/// - Parameter next: An iterator of code units to be decoded. Repeated /// The following example decodes the UTF-8 encoded bytes of a string into an
/// calls to this method on the same instance should always pass the same /// array of `UnicodeScalar` instances:
/// iterator and the iterator or copies thereof should not be used for ///
/// anything else between calls. Failing to do so will yield unspecified /// let str = "Unicode"
/// results. /// print(Array(str.utf8))
/// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]"
///
/// var bytesIterator = str.utf8.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf8Decoder = UTF8()
/// Decode: while true {
/// switch utf8Decoder.decode(&bytesIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
mutating func decode< mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit I : IteratorProtocol where I.Element == CodeUnit
>(_ next: inout I) -> UnicodeDecodingResult >(_ next: inout I) -> UnicodeDecodingResult
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by /// Encodes a Unicode scalar as a series of code units by calling the given
/// calling `processCodeUnit` on each `CodeUnit`. /// closure on each code unit.
///
/// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar
/// value (`\u{1D110}`) but requires four code units for its UTF-8
/// representation. The following code uses the `UTF8` codec to encode a
/// fermata in UTF-8:
///
/// var bytes: [UTF8.CodeUnit] = []
/// UTF8.encode("𝄐", sendingOutputTo: { bytes.append($0) })
/// print(bytes)
/// // Prints "[240, 157, 132, 144]"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
static func encode( static func encode(
_ input: UnicodeScalar, _ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
) )
} }
/// A codec for [UTF-8](http://www.unicode.org/glossary/#UTF_8). /// A codec for translating between Unicode scalar values and UTF-8 code
/// units.
public struct UTF8 : UnicodeCodec { public struct UTF8 : UnicodeCodec {
// See Unicode 8.0.0, Ch 3.9, UTF-8. // See Unicode 8.0.0, Ch 3.9, UTF-8.
// http://www.unicode.org/versions/Unicode8.0.0/ch03.pdf // http://www.unicode.org/versions/Unicode8.0.0/ch03.pdf
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) /// A type that can hold code unit values for this encoding.
/// values for this encoding.
public typealias CodeUnit = UInt8 public typealias CodeUnit = UInt8
/// Creates an instance of the UTF-8 codec.
public init() {} public init() {}
/// Lookahead buffer used for UTF-8 decoding. New bytes are inserted at MSB, /// Lookahead buffer used for UTF-8 decoding. New bytes are inserted at MSB,
@@ -105,22 +159,47 @@ public struct UTF8 : UnicodeCodec {
/// we are done decoding, as there might still be bytes left in the buffer. /// we are done decoding, as there might still be bytes left in the buffer.
internal var _didExhaustIterator: Bool = false internal var _didExhaustIterator: Bool = false
/// Start or continue decoding a UTF-8 sequence. /// Starts or continues decoding a UTF-8 sequence.
/// ///
/// In order to decode a code unit sequence completely, this function should /// To decode a code unit sequence completely, call this method repeatedly
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// Checking that the iterator was exhausted is not sufficient. The decoder /// iterator was exhausted is not sufficient, because the decoder can store
/// can have an internal buffer that is pre-filled with data from the input /// buffered data from the input iterator.
/// iterator.
/// ///
/// Because of buffering, it is impossible to find the corresponding position /// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error. /// in the iterator for a given returned `UnicodeScalar` or an error.
/// ///
/// - Parameter next: An iterator of code units to be decoded. Repeated /// The following example decodes the UTF-8 encoded bytes of a string into an
/// calls to this method on the same instance should always pass the same /// array of `UnicodeScalar` instances. This is a demonstration only---if
/// iterator and the iterator or copies thereof should not be used for /// you need the Unicode scalar representation of a string, use its
/// anything else between calls. Failing to do so will yield unspecified /// `unicodeScalars` view.
/// results. ///
/// let str = "Unicode"
/// print(Array(str.utf8))
/// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]"
///
/// var bytesIterator = str.utf8.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf8Decoder = UTF8()
/// Decode: while true {
/// switch utf8Decoder.decode(&bytesIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
public mutating func decode< public mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit I : IteratorProtocol where I.Element == CodeUnit
>(_ next: inout I) -> UnicodeDecodingResult { >(_ next: inout I) -> UnicodeDecodingResult {
@@ -280,8 +359,22 @@ public struct UTF8 : UnicodeCodec {
} }
} }
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by /// Encodes a Unicode scalar as a series of code units by calling the given
/// calling `processCodeUnit` on each `CodeUnit`. /// closure on each code unit.
///
/// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar
/// value (`\u{1D110}`) but requires four code units for its UTF-8
/// representation. The following code encodes a fermata in UTF-8:
///
/// var bytes: [UTF8.CodeUnit] = []
/// UTF8.encode("𝄐", sendingOutputTo: { bytes.append($0) })
/// print(bytes)
/// // Prints "[240, 157, 132, 144]"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
public static func encode( public static func encode(
_ input: UnicodeScalar, _ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
@@ -315,19 +408,35 @@ public struct UTF8 : UnicodeCodec {
processCodeUnit(buf3) processCodeUnit(buf3)
} }
/// Returns `true` if `byte` is a continuation byte of the form /// Returns a Boolean value indicating whether the specified code unit is a
/// `0b10xxxxxx`. /// UTF-8 continuation byte.
///
/// Continuation bytes take the form `0b10xxxxxx`. For example, a lowercase
/// "e" with an acute accent above it (`"é"`) uses 2 bytes for its UTF-8
/// representation: `0b11000011` (195) and `0b10101001` (169). The second
/// byte is a continuation byte.
///
/// let eAcute = "é"
/// for codePoint in eAcute.utf8 {
/// print(codePoint, UTF8.isContinuation(codePoint))
/// }
/// // Prints "195 false"
/// // Prints "169 true"
///
/// - Parameter byte: A UTF-8 code unit.
/// - Returns: `true` if `byte` is a continuation byte; otherwise, `false`.
public static func isContinuation(_ byte: CodeUnit) -> Bool { public static func isContinuation(_ byte: CodeUnit) -> Bool {
return byte & 0b11_00__0000 == 0b10_00__0000 return byte & 0b11_00__0000 == 0b10_00__0000
} }
} }
/// A codec for [UTF-16](http://www.unicode.org/glossary/#UTF_16). /// A codec for translating between Unicode scalar values and UTF-16 code
/// units.
public struct UTF16 : UnicodeCodec { public struct UTF16 : UnicodeCodec {
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) /// A type that can hold code unit values for this encoding.
/// values for this encoding.
public typealias CodeUnit = UInt16 public typealias CodeUnit = UInt16
/// Creates an instance of the UTF-16 codec.
public init() {} public init() {}
/// A lookahead buffer for one UTF-16 code unit. /// A lookahead buffer for one UTF-16 code unit.
@@ -340,22 +449,47 @@ public struct UTF16 : UnicodeCodec {
/// `x` is set when `_decodeLookahead` contains a code unit. /// `x` is set when `_decodeLookahead` contains a code unit.
internal var _lookaheadFlags: UInt8 = 0 internal var _lookaheadFlags: UInt8 = 0
/// Start or continue decoding a UTF sequence. /// Starts or continues decoding a UTF-16 sequence.
/// ///
/// In order to decode a code unit sequence completely, this function should /// To decode a code unit sequence completely, call this method repeatedly
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// Checking that the iterator was exhausted is not sufficient. The decoder /// iterator was exhausted is not sufficient, because the decoder can store
/// can have an internal buffer that is pre-filled with data from the input /// buffered data from the input iterator.
/// iterator.
/// ///
/// Because of buffering, it is impossible to find the corresponding position /// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error. /// in the iterator for a given returned `UnicodeScalar` or an error.
/// ///
/// - Parameter next: An iterator of code units to be decoded. Repeated /// The following example decodes the UTF-16 encoded bytes of a string into an
/// calls to this method on the same instance should always pass the same /// array of `UnicodeScalar` instances. This is a demonstration only---if
/// iterator and the iterator or copies thereof should not be used for /// you need the Unicode scalar representation of a string, use its
/// anything else between calls. Failing to do so will yield unspecified /// `unicodeScalars` view.
/// results. ///
/// let str = "Unicode"
/// print(Array(str.utf16))
/// // Prints "[10024, 85, 110, 105, 99, 111, 100, 101, 10024]"
///
/// var codeUnitIterator = str.utf16.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf16Decoder = UTF16()
/// Decode: while true {
/// switch utf16Decoder.decode(&codeUnitIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
public mutating func decode< public mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit I : IteratorProtocol where I.Element == CodeUnit
>(_ input: inout I) -> UnicodeDecodingResult { >(_ input: inout I) -> UnicodeDecodingResult {
@@ -451,8 +585,22 @@ public struct UTF16 : UnicodeCodec {
} }
} }
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by /// Encodes a Unicode scalar as a series of code units by calling the given
/// calling `processCodeUnit` on each `CodeUnit`. /// closure on each code unit.
///
/// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar
/// value (`\u{1D110}`) but requires two code units for its UTF-16
/// representation. The following code encodes a fermata in UTF-16:
///
/// var codeUnits: [UTF16.CodeUnit] = []
/// UTF16.encode("𝄐", sendingOutputTo: { codeUnits.append($0) })
/// print(codeUnits)
/// // Prints "[55348, 56592]"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
public static func encode( public static func encode(
_ input: UnicodeScalar, _ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
@@ -470,30 +618,56 @@ public struct UTF16 : UnicodeCodec {
} }
} }
/// A codec for [UTF-32](http://www.unicode.org/glossary/#UTF_32). /// A codec for translating between Unicode scalar values and UTF-32 code
/// units.
public struct UTF32 : UnicodeCodec { public struct UTF32 : UnicodeCodec {
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) /// A type that can hold code unit values for this encoding.
/// values for this encoding.
public typealias CodeUnit = UInt32 public typealias CodeUnit = UInt32
/// Creates an instance of the UTF-32 codec.
public init() {} public init() {}
/// Start or continue decoding a UTF sequence. /// Starts or continues decoding a UTF-32 sequence.
/// ///
/// In order to decode a code unit sequence completely, this function should /// To decode a code unit sequence completely, call this method repeatedly
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// Checking that the iterator was exhausted is not sufficient. The decoder /// iterator was exhausted is not sufficient, because the decoder can store
/// can have an internal buffer that is pre-filled with data from the input /// buffered data from the input iterator.
/// iterator.
/// ///
/// Because of buffering, it is impossible to find the corresponding position /// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error. /// in the iterator for a given returned `UnicodeScalar` or an error.
/// ///
/// - Parameter next: An iterator of code units to be decoded. Repeated /// The following example decodes the UTF-16 encoded bytes of a string
/// calls to this method on the same instance should always pass the same /// into an array of `UnicodeScalar` instances. This is a demonstration
/// iterator and the iterator or copies thereof should not be used for /// only---if you need the Unicode scalar representation of a string, use
/// anything else between calls. Failing to do so will yield unspecified /// its `unicodeScalars` view.
/// results. ///
/// // UTF-32 representation of "Unicode"
/// let codeUnits: [UTF32.CodeUnit] =
/// [10024, 85, 110, 105, 99, 111, 100, 101, 10024]
///
/// var codeUnitIterator = codeUnits.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf32Decoder = UTF32()
/// Decode: while true {
/// switch utf32Decoder.decode(&codeUnitIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
public mutating func decode< public mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit I : IteratorProtocol where I.Element == CodeUnit
>(_ input: inout I) -> UnicodeDecodingResult { >(_ input: inout I) -> UnicodeDecodingResult {
@@ -511,8 +685,22 @@ public struct UTF32 : UnicodeCodec {
} }
} }
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by /// Encodes a Unicode scalar as a UTF-32 code unit by calling the given
/// calling `processCodeUnit` on each `CodeUnit`. /// closure.
///
/// For example, like every Unicode scalar, the musical fermata symbol ("𝄐")
/// can be represented in UTF-32 as a single code unit. The following code
/// encodes a fermata in UTF-32:
///
/// var codeUnit: UTF32.CodeUnit = 0
/// UTF32.encode("𝄐", sendingOutputTo: { codeUnit = $0 })
/// print(codeUnit)
/// // Prints "119056"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
public static func encode( public static func encode(
_ input: UnicodeScalar, _ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
@@ -521,12 +709,41 @@ public struct UTF32 : UnicodeCodec {
} }
} }
/// Translate `input`, in the given `InputEncoding`, into `processCodeUnit`, in /// Translates the given input from one Unicode encoding to another by calling
/// the given `OutputEncoding`. /// the given closure.
/// ///
/// - Parameter stopOnError: Causes encoding to stop when an encoding /// The following example transcodes the UTF-8 representation of the string
/// error is detected in `input`, if `true`. Otherwise, U+FFFD /// `"Fermata 𝄐"` into UTF-32.
/// replacement characters are inserted for each detected error. ///
/// let fermata = "Fermata 𝄐"
/// let bytes = fermata.utf8
/// print(Array(bytes))
/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]"
///
/// var codeUnits: [UTF32.CodeUnit] = []
/// let sink = { codeUnits.append($0) }
/// transcode(bytes.makeIterator(), from: UTF8.self, to: UTF32.self,
/// stoppingOnError: false, sendingOutputTo: sink)
/// print(codeUnits)
/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 119056]"
///
/// The `sink` closure is called with each resulting UTF-32 code unit as the
/// function iterates over its input.
///
/// - Parameters:
/// - input: An iterator of code units to be translated, encoded as
/// `inputEncoding`. If `stopOnError` is `false`, the entire iterator will
/// be exhausted. Otherwise, iteration will stop if an encoding error is
/// detected.
/// - inputEncoding: The Unicode encoding of `input`.
/// - outputEncoding: The destination Unicode encoding.
/// - stopOnError: Pass `true` to stop translation when an encoding error is
/// detected in `input`. Otherwise, a Unicode replacement character
/// (`"\u{FFFD}"`) is inserted for each detected error.
/// - processCodeUnit: A closure that processes one `outputEncoding` code
/// unit at a time.
/// - Returns: `true` if the translation detected encoding errors in `input`;
/// otherwise, `false`.
public func transcode< public func transcode<
Input : IteratorProtocol, Input : IteratorProtocol,
InputEncoding : UnicodeCodec, InputEncoding : UnicodeCodec,
@@ -697,24 +914,76 @@ extension UTF8.CodeUnit : _StringElement {
} }
extension UTF16 { extension UTF16 {
/// Returns the number of code units required to encode `x`. /// Returns the number of code units required to encode the given Unicode
/// scalar.
///
/// Because a Unicode scalar value can require up to 21 bits to store its
/// value, some Unicode scalars are represented in UTF-16 by a pair of
/// 16-bit code units. The first and second code units of the pair,
/// designated *leading* and *trailing* surrogates, make up a *surrogate
/// pair*.
///
/// let anA: UnicodeScalar = "A"
/// print(anA.value)
/// // Prints "65"
/// print(UTF16.width(anA))
/// // Prints "1"
///
/// let anApple: UnicodeScalar = "🍎"
/// print(anApple.value)
/// // Prints "127822"
/// print(UTF16.width(anApple))
/// // Prints "2"
///
/// - Parameter x: A Unicode scalar value.
/// - Returns: The width of `x` when encoded in UTF-16, either `1` or `2`.
public static func width(_ x: UnicodeScalar) -> Int { public static func width(_ x: UnicodeScalar) -> Int {
return x.value <= 0xFFFF ? 1 : 2 return x.value <= 0xFFFF ? 1 : 2
} }
/// Returns the high surrogate code unit of a [surrogate pair](http://www.unicode.org/glossary/#surrogate_pair) representing /// Returns the high-surrogate code unit of the surrogate pair representing
/// `x`. /// the specifed Unicode scalar.
/// ///
/// - Precondition: `width(x) == 2`. /// Because a Unicode scalar value can require up to 21 bits to store its
/// value, some Unicode scalars are represented in UTF-16 by a pair of
/// 16-bit code units. The first and second code units of the pair,
/// designated *leading* and *trailing* surrogates, make up a *surrogate
/// pair*.
///
/// let apple: UnicodeScalar = "🍎"
/// print(UTF16.leadSurrogate(apple)
/// // Prints "55356"
///
/// - Parameter x: A Unicode scalar value. `x` must be represented by a
/// surrogate pair when encoded in UTF-16. To check whether `x` is
/// represented by a surrogate pair, use `UTF16.width(x) == 2`.
/// - Returns: The leading surrogate code unit of `x` when encoded in UTF-16.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.trailSurrogate(_:)`
public static func leadSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit { public static func leadSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit {
_precondition(width(x) == 2) _precondition(width(x) == 2)
return UTF16.CodeUnit((x.value - 0x1_0000) >> (10 as UInt32)) + 0xD800 return UTF16.CodeUnit((x.value - 0x1_0000) >> (10 as UInt32)) + 0xD800
} }
/// Returns the low surrogate code unit of a [surrogate pair](http://www.unicode.org/glossary/#surrogate_pair) representing /// Returns the low-surrogate code unit of the surrogate pair representing
/// `x`. /// the specifed Unicode scalar.
/// ///
/// - Precondition: `width(x) == 2`. /// Because a Unicode scalar value can require up to 21 bits to store its
/// value, some Unicode scalars are represented in UTF-16 by a pair of
/// 16-bit code units. The first and second code units of the pair,
/// designated *leading* and *trailing* surrogates, make up a *surrogate
/// pair*.
///
/// let apple: UnicodeScalar = "🍎"
/// print(UTF16.trailSurrogate(apple)
/// // Prints "57166"
///
/// - Parameter x: A Unicode scalar value. `x` must be represented by a
/// surrogate pair when encoded in UTF-16. To check whether `x` is
/// represented by a surrogate pair, use `UTF16.width(x) == 2`.
/// - Returns: The trailing surrogate code unit of `x` when encoded in UTF-16.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)`
public static func trailSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit { public static func trailSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit {
_precondition(width(x) == 2) _precondition(width(x) == 2)
return UTF16.CodeUnit( return UTF16.CodeUnit(
@@ -722,10 +991,57 @@ extension UTF16 {
) + 0xDC00 ) + 0xDC00
} }
/// Returns a Boolean value indicating whether the specified code unit is a
/// high-surrogate code unit.
///
/// Here's an example of checking whether each code unit in a string's
/// `utf16` view is a lead surrogate. The `apple` string contains a single
/// emoji character made up of a surrogate pair when encoded in UTF-16.
///
/// let apple = "🍎"
/// for unit in apple.utf16 {
/// print(UTF16.isLeadSurrogate(unit))
/// }
/// // Prints "true"
/// // Prints "false"
///
/// This method does not validate the encoding of a UTF-16 sequence beyond
/// the specified code unit. Specifically, it does not validate that a
/// low-surrogate code unit follows `x`.
///
/// - Parameter x: A UTF-16 code unit.
/// - Returns: `true` if `x` is a high-surrogate code unit; otherwise,
/// `false`.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)`
public static func isLeadSurrogate(_ x: CodeUnit) -> Bool { public static func isLeadSurrogate(_ x: CodeUnit) -> Bool {
return 0xD800...0xDBFF ~= x return 0xD800...0xDBFF ~= x
} }
/// Returns a Boolean value indicating whether the specified code unit is a
/// low-surrogate code unit.
///
/// Here's an example of checking whether each code unit in a string's
/// `utf16` view is a trailing surrogate. The `apple` string contains a
/// single emoji character made up of a surrogate pair when encoded in
/// UTF-16.
///
/// let apple = "🍎"
/// for unit in apple.utf16 {
/// print(UTF16.isTrailSurrogate(unit))
/// }
/// // Prints "false"
/// // Prints "true"
///
/// This method does not validate the encoding of a UTF-16 sequence beyond
/// the specified code unit. Specifically, it does not validate that a
/// high-surrogate code unit precedes `x`.
///
/// - Parameter x: A UTF-16 code unit.
/// - Returns: `true` if `x` is a low-surrogate code unit; otherwise,
/// `false`.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)`
public static func isTrailSurrogate(_ x: CodeUnit) -> Bool { public static func isTrailSurrogate(_ x: CodeUnit) -> Bool {
return 0xDC00...0xDFFF ~= x return 0xDC00...0xDFFF ~= x
} }
@@ -751,12 +1067,39 @@ extension UTF16 {
} }
/// Returns the number of UTF-16 code units required for the given code unit /// Returns the number of UTF-16 code units required for the given code unit
/// sequence when transcoded to UTF-16, and a bit describing if the sequence /// sequence when transcoded to UTF-16, and a Boolean value indicating
/// was found to contain only ASCII characters. /// whether the sequence was found to contain only ASCII characters.
/// ///
/// If `repairIllFormedSequences` is `true`, the function always succeeds. /// The following example finds the length of the UTF-16 encoding of the
/// If it is `false`, `nil` is returned if an ill-formed code unit sequence is /// string `"Fermata 𝄐"`, starting with its UTF-8 representation.
/// found in `input`. ///
/// let fermata = "Fermata 𝄐"
/// let bytes = fermata.utf8
/// print(Array(bytes))
/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]"
///
/// let result = transcodedLength(of: bytes.makeIterator(),
/// decodedAs: UTF8.self,
/// repairingIllFormedSequences: false)
/// print(result)
/// // Prints "Optional((10, false))"
///
/// - Parameters:
/// - input: An iterator of code units to be translated, encoded as
/// `sourceEncoding`. If `repairingIllFormedSequences` is `true`, the
/// entire iterator will be exhausted. Otherwise, iteration will stop if
/// an ill-formed sequence is detected.
/// - sourceEncoding: The Unicode encoding of `input`.
/// - repairingIllFormedSequences: Pass `true` to measure the length of
/// `input` even when `input` contains ill-formed sequences. Each
/// ill-formed sequence is replaced with a Unicode replacement character
/// (`"\u{FFFD}"`) and is measured as such. Pass `false` to immediately
/// stop measuring `input` when an ill-formed sequence is encountered.
/// - Returns: A tuple containing the number of UTF-16 code units required to
/// encode `input` and a Boolean value that indicates whether the `input`
/// contained only ASCII characters. If `repairingIllFormedSequences` is
/// `false` and an ill-formed sequence is detected, this method returns
/// `nil`.
public static func transcodedLength< public static func transcodedLength<
Encoding : UnicodeCodec, Input : IteratorProtocol Encoding : UnicodeCodec, Input : IteratorProtocol
where Encoding.CodeUnit == Input.Element where Encoding.CodeUnit == Input.Element
@@ -792,7 +1135,7 @@ extension UTF16 {
} }
} }
// Unchecked init to avoid precondition branches in hot code paths were we // Unchecked init to avoid precondition branches in hot code paths where we
// already know the value is a valid unicode scalar. // already know the value is a valid unicode scalar.
extension UnicodeScalar { extension UnicodeScalar {
/// Create an instance with numeric value `value`, bypassing the regular /// Create an instance with numeric value `value`, bypassing the regular

View File

@@ -12,7 +12,25 @@
// UnicodeScalar Type // UnicodeScalar Type
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// A [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value). /// A Unicode scalar value.
///
/// The `UnicodeScalar` type, representing a single Unicode scalar value, is
/// the element type of a string's `unicodeScalars` collection.
///
/// You can create a `UnicodeScalar` instance by using a string literal that
/// contains a single character representing exactly one Unicode scalar value.
///
/// let letterK: UnicodeScalar = "K"
/// let kim: UnicodeScalar = ""
/// print(letterK, kim)
/// // Prints "K "
///
/// You can also create Unicode scalar values directly from their numeric
/// representation.
///
/// let airplane = UnicodeScalar(9992)
/// print(airplane)
/// // Prints ""
@_fixed_layout @_fixed_layout
public struct UnicodeScalar : public struct UnicodeScalar :
_BuiltinUnicodeScalarLiteralConvertible, _BuiltinUnicodeScalarLiteralConvertible,
@@ -20,7 +38,7 @@ public struct UnicodeScalar :
var _value: UInt32 var _value: UInt32
/// A numeric representation of `self`. /// A numeric representation of the Unicode scalar.
public var value: UInt32 { return _value } public var value: UInt32 { return _value }
@_transparent @_transparent
@@ -28,15 +46,35 @@ public struct UnicodeScalar :
self._value = UInt32(value) self._value = UInt32(value)
} }
/// Create an instance initialized to `value`. /// Creates a Unicode scalar with the specified value.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you use a string literal to initialize a `UnicodeScalar` instance.
///
/// let letterK: UnicodeScalar = "K"
/// print(letterK)
/// // Prints "K"
///
/// In this example, the assignment to the `letterK` constant is handled by
/// this initializer behind the scenes.
@_transparent @_transparent
public init(unicodeScalarLiteral value: UnicodeScalar) { public init(unicodeScalarLiteral value: UnicodeScalar) {
self = value self = value
} }
/// Create an instance with numeric value `v`. /// Creates a Unicode scalar with the specified numeric value.
/// ///
/// - Precondition: `v` is a valid Unicode scalar value. /// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of an emoji character:
///
/// let codepoint: UInt32 = 127881
/// let emoji = UnicodeScalar(codepoint)
/// print(emoji)
/// // Prints "🎉"
///
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the range `0...0xD7FF` or
/// `0xE000...0x10FFFF`.
public init(_ v: UInt32) { public init(_ v: UInt32) {
// Unicode 6.3.0: // Unicode 6.3.0:
// //
@@ -55,29 +93,70 @@ public struct UnicodeScalar :
self._value = v self._value = v
} }
/// Create an instance with numeric value `v`. /// Creates a Unicode scalar with the specified numeric value.
/// ///
/// - Precondition: `v` is a valid Unicode scalar value. /// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of ``, the Korean word for rice:
///
/// let codepoint: UInt16 = 48165
/// let bap = UnicodeScalar(codepoint)
/// print(bap)
/// // Prints ""
///
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the range `0...0xD7FF` or
/// `0xE000...0xFFFF`.
public init(_ v: UInt16) { public init(_ v: UInt16) {
self = UnicodeScalar(UInt32(v)) self = UnicodeScalar(UInt32(v))
} }
/// Create an instance with numeric value `v`. /// Creates a Unicode scalar with the specified numeric value.
///
/// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of `7`:
///
/// let codepoint: UInt8 = 55
/// let seven = UnicodeScalar(codepoint)
/// print(seven)
/// // Prints "7"
///
/// - Parameter v: The code point to use for the scalar.
public init(_ v: UInt8) { public init(_ v: UInt8) {
self = UnicodeScalar(UInt32(v)) self = UnicodeScalar(UInt32(v))
} }
/// Create a duplicate of `v`. /// Creates a duplicate of the given Unicode scalar.
public init(_ v: UnicodeScalar) { public init(_ v: UnicodeScalar) {
// This constructor allows one to provide necessary type context to // This constructor allows one to provide necessary type context to
// disambiguate between function overloads on 'String' and 'UnicodeScalar'. // disambiguate between function overloads on 'String' and 'UnicodeScalar'.
self = v self = v
} }
/// Returns a String representation of `self` . /// Returns a string representation of the Unicode scalar.
/// ///
/// - parameter forceASCII: If `true`, forces most values into a numeric /// Scalar values representing characters that are normally unprintable or
/// representation. /// that otherwise require escaping are escaped with a backslash.
///
/// let tab = UnicodeScalar(9)
/// print(tab)
/// // Prints " "
/// print(tab.escaped(asASCII: false))
/// // Prints "\t"
///
/// When the `forceASCII` parameter is `true`, a `UnicodeScalar` instance
/// with a value greater than 127 is represented using an escaped numeric
/// value; otherwise, non-ASCII characters are represented using their
/// typical string value.
///
/// let bap = UnicodeScalar(48165)
/// print(bap.escaped(asASCII: false))
/// // Prints ""
/// print(bap.escaped(asASCII: true))
/// // Prints "\u{BC25}"
///
/// - Parameter forceASCII: Pass `true` if you need the result to use only
/// ASCII characters; otherwise, pass `false`.
/// - Returns: A string representation of the scalar.
public func escaped(asASCII forceASCII: Bool) -> String { public func escaped(asASCII forceASCII: Bool) -> String {
func lowNibbleAsHex(_ v: UInt32) -> String { func lowNibbleAsHex(_ v: UInt32) -> String {
let nibble = v & 15 let nibble = v & 15
@@ -137,8 +216,21 @@ public struct UnicodeScalar :
} }
} }
/// Returns `true` if this is an ASCII character (code point 0 to 127 /// A Boolean value indicating whether the Unicode scalar is an ASCII
/// inclusive). /// character.
///
/// ASCII characters have a scalar value between 0 and 127, inclusive. For
/// example:
///
/// let canyon = "Cañón"
/// for scalar in canyon.unicodeScalars {
/// print(scalar, scalar.isASCII, scalar.value)
/// }
/// // Prints "C true 67"
/// // Prints "a true 97"
/// // Prints "ñ false 241"
/// // Prints "ó false 243"
/// // Prints "n true 110"
public var isASCII: Bool { public var isASCII: Bool {
return value <= 127 return value <= 127
} }
@@ -155,33 +247,41 @@ public struct UnicodeScalar :
} }
extension UnicodeScalar : CustomStringConvertible, CustomDebugStringConvertible { extension UnicodeScalar : CustomStringConvertible, CustomDebugStringConvertible {
/// A textual representation of `self`. /// An escaped textual representation of the Unicode scalar.
public var description: String { public var description: String {
return "\"\(escaped(asASCII: false))\"" return "\"\(escaped(asASCII: false))\""
} }
/// A textual representation of `self`, suitable for debugging. /// An escaped textual representation of the Unicode scalar, suitable for
/// debugging.
public var debugDescription: String { public var debugDescription: String {
return "\"\(escaped(asASCII: true))\"" return "\"\(escaped(asASCII: true))\""
} }
} }
extension UnicodeScalar : Hashable { extension UnicodeScalar : Hashable {
/// The hash value. /// The Unicode scalar's hash value.
/// ///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`. /// Hash values are not guaranteed to be equal across different executions of
/// /// your program. Do not save hash values to use during a future execution.
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
public var hashValue: Int { public var hashValue: Int {
return Int(self.value) return Int(self.value)
} }
} }
extension UnicodeScalar { extension UnicodeScalar {
/// Construct with value `v`. /// Creates a Unicode scalar with the specified numeric value.
/// ///
/// - Precondition: `v` is a valid unicode scalar value. /// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of an emoji character:
///
/// let codepoint = 127881
/// let emoji = UnicodeScalar(codepoint)
/// print(emoji)
/// // Prints "🎉"
///
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the ranges `0...0xD7FF` or
/// `0xE000...0x10FFFF`.
public init(_ v: Int) { public init(_ v: Int) {
self = UnicodeScalar(UInt32(v)) self = UnicodeScalar(UInt32(v))
} }
@@ -244,11 +344,10 @@ extension UnicodeScalar.UTF16View : RandomAccessCollection {
return 0 return 0
} }
/// The "past the end" position. /// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// If the collection is empty, `endIndex` is equal to `startIndex`.
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
var endIndex: Int { var endIndex: Int {
return 0 + UTF16.width(value) return 0 + UTF16.width(value)
} }
@@ -273,7 +372,7 @@ func _ascii16(_ c: UnicodeScalar) -> UTF16.CodeUnit {
extension UnicodeScalar { extension UnicodeScalar {
/// Creates an instance of the NUL scalar value. /// Creates an instance of the NUL scalar value.
@available(*, unavailable, message: "use the 'UnicodeScalar(\"\\0\")'") @available(*, unavailable, message: "use 'UnicodeScalar(0)'")
public init() { public init() {
Builtin.unreachable() Builtin.unreachable()
} }

View File

@@ -48,11 +48,11 @@ public struct Unsafe${Mutable}BufferPointer<Element>
return 0 return 0
} }
/// The "past the end" position; always identical to `count`. /// The "past the end" position---that is, the position one greater than the
/// last valid subscript argument.
/// ///
/// `endIndex` is not a valid argument to `subscript`, and is always /// The `endIndex` property of an `Unsafe${Mutable}BufferPointer` instance is
/// reachable from `startIndex` by zero or more applications of /// always identical to `count`.
/// `index(after:)`.
public var endIndex: Int { public var endIndex: Int {
return count return count
} }