[stdlib] Revise documentation for string-related types

This documentation revision covers a large number of types & protocols:
String, its views and their indices, the Unicode codec types and protocol,
as well as Character, UnicodeScalar, and StaticString, among others.

This also includes a few small changes across the standard library for
consistency.
This commit is contained in:
Nate Cook
2016-04-06 13:03:46 -05:00
parent 7f31d4e889
commit 44b2d56a7f
35 changed files with 2998 additions and 670 deletions

View File

@@ -482,8 +482,8 @@ public struct ${Self}<Element>
%end
}
/// The array's "past the end" position, or one greater than the last valid
/// subscript argument.
/// The array's "past the end" position---that is, the position one greater
/// than the last valid subscript argument.
///
/// When you need a range that includes the last element of an array, use the
/// half-open range operator (`..<`) with `endIndex`. The `..<` operator
@@ -934,7 +934,7 @@ extension ${Self} : ArrayLiteralConvertible {
// Optimized implementation for Array
/// Creates an array from the given array literal.
///
/// Don't directly call this initializer, which is used by the compiler
/// Do not call this initializer directly. It is used by the compiler
/// when you use an array literal. Instead, create a new array by using an
/// array literal as its value. To do this, enclose a comma-separated list of
/// values in square brackets.
@@ -951,7 +951,7 @@ extension ${Self} : ArrayLiteralConvertible {
%else:
/// Creates an array from the given array literal.
///
/// Don't directly call this initializer, which is used by the compiler when
/// Do not call this initializer directly. It is used by the compiler when
/// you use an array literal. Instead, create a new array by using an array
/// literal as its value. To do this, enclose a comma-separated list of
/// values in square brackets.

View File

@@ -79,7 +79,7 @@ extension Bool : _BuiltinBooleanLiteralConvertible, BooleanLiteralConvertible {
/// Creates an instance initialized to the specified Boolean literal.
///
/// Don't directly call this initializer, which is used by the compiler when
/// Do not call this initializer directly. It is used by the compiler when
/// you use a Boolean literal. Instead, create a new `Bool` instance by
/// using one of the Boolean literals `true` and `false`.
///

View File

@@ -16,25 +16,64 @@ import SwiftShims
extension String {
/// Create a new `String` by copying the nul-terminated UTF-8 data
/// referenced by a `cString`.
/// Creates a new string by copying the null-terminated UTF-8 data referenced
/// by the given pointer.
///
/// If `cString` contains ill-formed UTF-8 code unit sequences, replaces them
/// with replacement characters (U+FFFD).
/// If `cString` contains ill-formed UTF-8 code unit sequences, this
/// initializer replaces them with the Unicode replacement character
/// (`"\u{FFFD}"`).
///
/// - Precondition: `cString != nil`
/// The following example calls this initializer with pointers to the
/// contents of two different `CChar` arrays---the first with well-formed
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
/// the end.
///
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(cString: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Café"
///
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(cString: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Caf<EFBFBD>"
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init(cString: UnsafePointer<CChar>) {
self = String.decodeCString(UnsafePointer(cString), as: UTF8.self,
repairingInvalidCodeUnits: true)!.result
}
/// Create a new `String` by copying the nul-terminated UTF-8 data
/// referenced by a `cString`.
/// Creates a new string by copying and validating the null-terminated UTF-8
/// data referenced by the given pointer.
///
/// Does not try to repair ill-formed UTF-8 code unit sequences, fails if any
/// such sequences are found.
/// This initializer does not try to repair ill-formed UTF-8 code unit
/// sequences. If any are found, the result of the initializer is `nil`.
///
/// - Precondition: `cString != nil`
/// The following example calls this initializer with pointers to the
/// contents of two different `CChar` arrays---the first with well-formed
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
/// the end.
///
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(validatingUTF8: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Optional(Café)"
///
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(validatingUTF8: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "nil"
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
guard let (result, _) = String.decodeCString(
UnsafePointer(cString),
@@ -45,12 +84,50 @@ extension String {
self = result
}
/// Create a new `String` by copying the nul-terminated data
/// referenced by a `cString` using `encoding`.
/// Creates a new string by copying the null-terminated data referenced by
/// the given pointer using the specified encoding.
///
/// Returns `nil` if the `cString` is `nil` or if it contains ill-formed code
/// units and no repairing has been requested. Otherwise replaces
/// ill-formed code units with replacement characters (U+FFFD).
/// When you pass `true` as `isRepairing`, this method replaces ill-formed
/// sequences with the Unicode replacement character (`"\u{FFFD}"`);
/// otherwise, an ill-formed sequence causes this method to stop decoding
/// and return `nil`.
///
/// The following example calls this method with pointers to the contents of
/// two different `CChar` arrays---the first with well-formed UTF-8 code
/// unit sequences and the second with an ill-formed sequence at the end.
///
/// let validUTF8: [UInt8] = [67, 97, 102, 195, 169, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String.decodeCString(ptr.baseAddress,
/// as: UTF8.self,
/// repairingInvalidCodeUnits: true)
/// print(s)
/// }
/// // Prints "Optional((Café, false))"
///
/// let invalidUTF8: [UInt8] = [67, 97, 102, 195, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String.decodeCString(ptr.baseAddress,
/// as: UTF8.self,
/// repairingInvalidCodeUnits: true)
/// print(s)
/// }
/// // Prints "Optional((Caf<EFBFBD>, true))"
///
/// - Parameters:
/// - cString: A pointer to a null-terminated code sequence encoded in
/// `encoding`.
/// - encoding: The Unicode encoding of the data referenced by `cString`.
/// - isRepairing: Pass `true` to create a new string, even when the data
/// referenced by `cString` contains ill-formed sequences. Ill-formed
/// sequences are replaced with the Unicode replacement character
/// (`"\u{FFFD}"`). Pass `false` to interrupt the creation of the new
/// string if an ill-formed sequence is detected.
/// - Returns: A tuple with the new string and a Boolean value that indicates
/// whether any repairs were made. If `isRepairing` is `false` and an
/// ill-formed sequence is detected, this method returns `nil`.
///
/// - SeeAlso: `UnicodeCodec`
public static func decodeCString<Encoding : UnicodeCodec>(
_ cString: UnsafePointer<Encoding.CodeUnit>?,
as encoding: Encoding.Type,

View File

@@ -10,9 +10,56 @@
//
//===----------------------------------------------------------------------===//
/// `Character` represents some Unicode grapheme cluster as
/// defined by a canonical, localized, or otherwise tailored
/// segmentation algorithm.
/// A single extended grapheme cluster, which approximates a user-perceived
/// character.
///
/// The `Character` type represents a character made up of one or more Unicode
/// scalar values, grouped by a Unicode boundary algorithm. Generally, a
/// `Character` instance matches what the reader of a string will perceive as
/// a single character. The number of visible characters is generally the most
/// natural way to count the length of a string.
///
/// let greeting = "Hello! 🐥"
/// print("Character count: \(greeting.characters.count)")
/// // Prints "Character count: 8"
///
/// Because each character in a string can be made up of one or more Unicode
/// code points, the number of characters in a string may not match the length
/// of the Unicode code point representation or the length of the string in a
/// particular binary representation.
///
/// print("Unicode code point count: \(greeting.unicodeScalars.count)")
/// // Prints "Unicode code point count: 15"
///
/// print("UTF-8 representation count: \(greeting.utf8.count)")
/// // Prints "UTF-8 representation count: 18"
///
/// Every `Character` instance is composed of one or more Unicode code points
/// that are grouped together as an *extended grapheme cluster*. The way these
/// code points are grouped is defined by a canonical, localized, or otherwise
/// tailored Unicode segmentation algorithm.
///
/// For example, a country's Unicode flag character is made up of two regional
/// indicator code points that correspond to that country's ISO 3166-1 alpha-2
/// code. The alpha-2 code for The United States is "US", so its flag
/// character is made up of the Unicode code points `"\u{1F1FA}"` (REGIONAL
/// INDICATOR SYMBOL LETTER U) and `"\u{1F1F8}"` (REGIONAL INDICATOR SYMBOL
/// LETTER S). When placed next to each other in a Swift string literal, these
/// two code points are combined into a single grapheme cluster, represented
/// by a `Character` instance in Swift.
///
/// let usFlag: Character = "\u{1F1FA}\u{1F1F8}"
/// print(usFlag)
/// // Prints "🇺🇸"
///
/// For more information about the Unicode terms used in this discussion, see
/// the [Unicode.org glossary][glossary]. In particular, this discussion
/// mentions [extended grapheme clusters][clusters] and [Unicode scalar
/// values][scalars].
///
/// [glossary]: http://www.unicode.org/glossary/
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
public struct Character :
_BuiltinExtendedGraphemeClusterLiteralConvertible,
ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable, Comparable {
@@ -33,7 +80,9 @@ public struct Character :
case small(Builtin.Int63)
}
/// Construct a `Character` containing just the given `scalar`.
/// Creates a character containing the given Unicode scalar value.
///
/// - Parameter scalar: The Unicode scalar value to convert into a character.
public init(_ scalar: UnicodeScalar) {
var asInt: UInt64 = 0
var shift: UInt64 = 0
@@ -55,7 +104,17 @@ public struct Character :
UTF32.self, input: CollectionOfOne(UInt32(value))))
}
/// Create an instance initialized to `value`.
/// Creates a character with the specified value.
///
/// Don't call this initializer directly. It is used by the compiler when you
/// use a string literal to initialize a `Character` instance. For example:
///
/// let snowflake: Character = ""
/// print(snowflake)
/// // Prints ""
///
/// The assignment to the `snowflake` constant calls this initializer behind
/// the scenes.
public init(unicodeScalarLiteral value: Character) {
self = value
}
@@ -73,14 +132,31 @@ public struct Character :
isASCII: isASCII))
}
/// Create an instance initialized to `value`.
/// Creates a character with the specified value.
///
/// Don't call this initializer directly. It is used by the compiler when you
/// use a string literal to initialize a `Character` instance. For example:
///
/// let oBreve: Character = "o\u{306}"
/// print(oBreve)
/// // Prints "ŏ"
///
/// The assignment to the `oBreve` constant calls this initializer behind the
/// scenes.
public init(extendedGraphemeClusterLiteral value: Character) {
self = value
}
/// Create an instance from a single-character `String`.
/// Creates a character from a single-character string.
///
/// - Precondition: `s` contains exactly one extended grapheme cluster.
/// The following example creates a new character from the uppercase version
/// of a string that only holds one character.
///
/// let a = "a"
/// let capitalA = Character(a.uppercased())
///
/// - Parameter s: The single-character string to convert to a `Character`
/// instance. `s` must contain exactly one extended grapheme cluster.
public init(_ s: String) {
// The small representation can accept up to 8 code units as long
// as the last one is a continuation. Since the high bit of the
@@ -258,13 +334,10 @@ public struct Character :
var data: UInt64
}
/// The hash value.
/// The character's hash value.
///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`.
///
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
public var hashValue: Int {
// FIXME(performance): constructing a temporary string is extremely
// wasteful and inefficient.
@@ -281,14 +354,16 @@ public struct Character :
}
extension Character : CustomDebugStringConvertible {
/// A textual representation of `self`, suitable for debugging.
/// A textual representation of the character, suitable for debugging.
public var debugDescription: String {
return String(self).debugDescription
}
}
extension String {
/// Construct an instance containing just the given `Character`.
/// Creates a string containing the given character.
///
/// - Parameter c: The character to convert to a string.
public init(_ c: Character) {
switch c._representation {
case let .small(_63bits):

View File

@@ -212,8 +212,8 @@ public struct CountableClosedRange<
return ClosedRangeIndex(lowerBound)
}
/// The range's "past the end" position, or one greater than the last valid
/// subscript argument.
/// The range's "past the end" position---that is, the position one greater
/// than the last valid subscript argument.
public var endIndex: ClosedRangeIndex<Bound> {
return ClosedRangeIndex()
}

View File

@@ -38,8 +38,8 @@ public protocol IndexableBase {
/// If the collection is empty, `startIndex` is equal to `endIndex`.
var startIndex: Index { get }
/// The collection's "past the end" position, or one greater than the last
/// valid subscript argument.
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// When you need a range that includes the last element of a collection, use
/// the half-open range operator (`..<`) with `endIndex`. The `..<` operator
@@ -157,8 +157,11 @@ public protocol IndexableBase {
/// In most cases, it's best to ignore this protocol and use the `Collection`
/// protocol instead, because it has a more complete interface.
public protocol Indexable : IndexableBase {
/// A type that can represent the number of steps between a pair of
/// indices.
/// A type used to represent the number of steps between two indices, where
/// one value is reachable from the other.
///
/// In Swift, *reachability* refers to the ability to produce one value from
/// the other through zero or more applications of `index(after:)`.
associatedtype IndexDistance : SignedInteger = Int
/// Returns an index that is the specified distance from the given index.

View File

@@ -50,10 +50,11 @@ public struct CollectionOfOne<Element>
return 0
}
/// The "past the end" position; always identical to
/// `index(after: startIndex)`.
/// The "past the end" position---that is, the position one greater than the
/// last valid subscript argument.
///
/// - Note: `endIndex` is not a valid argument to `subscript`.
/// In a `CollectionOfOne` instance, `endIndex` is always identical to
/// `index(after: startIndex)`.
public var endIndex: Int {
return 1
}

View File

@@ -297,11 +297,31 @@ public protocol _BuiltinUnicodeScalarLiteralConvertible {
init(_builtinUnicodeScalarLiteral value: Builtin.Int32)
}
/// Conforming types can be initialized with string literals
/// containing a single [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value).
/// A type that can be initialized with a string literal containing a single
/// Unicode scalar value.
///
/// The `String`, `StaticString`, `Character`, and `UnicodeScalar` types all
/// conform to the `UnicodeScalarLiteralConvertible` protocol. You can
/// initialize a variable of any of these types using a string literal that
/// holds a single Unicode scalar.
///
/// let ñ: UnicodeScalar = "ñ"
/// print(ñ)
/// // Prints "ñ"
///
/// Conforming to UnicodeScalarLiteralConvertible
/// =============================================
///
/// To add `UnicodeScalarLiteralConvertible` conformance to your custom type,
/// implement the required initializer.
public protocol UnicodeScalarLiteralConvertible {
/// A type that can represent a Unicode scalar literal.
///
/// Valid types for `UnicodeScalarLiteralType` are `UnicodeScalar`,
/// `String`, and `StaticString`.
associatedtype UnicodeScalarLiteralType : _BuiltinUnicodeScalarLiteralConvertible
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given value.
init(unicodeScalarLiteral value: UnicodeScalarLiteralType)
}
@@ -314,14 +334,40 @@ public protocol _BuiltinExtendedGraphemeClusterLiteralConvertible
isASCII: Builtin.Int1)
}
/// Conforming types can be initialized with string literals
/// containing a single [Unicode extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster).
/// A type that can be initialized with a string literal containing a single
/// extended grapheme cluster.
///
/// An *extended grapheme cluster* is a group of one or more Unicode code
/// points that approximates a single user-perceived character. Many
/// individual characters, such as "é", "", and "🇮🇳", can be made up of
/// multiple Unicode code points. These code points are combined by Unicode's
/// boundary algorithms into extended grapheme clusters.
///
/// The `String`, `StaticString`, and `Character` types conform to the
/// `ExtendedGraphemeClusterLiteralConvertible` protocol. You can initialize a
/// variable or constant of any of these types using a string literal that
/// holds a single character.
///
/// let snowflake: Character = ""
/// print(snowflake)
/// // Prints ""
///
/// Conforming to ExtendedGraphemeClusterLiteralConvertible
/// =======================================================
///
/// To add `ExtendedGraphemeClusterLiteralConvertible` conformance to your
/// custom type, implement the required initializer.
public protocol ExtendedGraphemeClusterLiteralConvertible
: UnicodeScalarLiteralConvertible {
/// A type that can represent an extended grapheme cluster literal.
///
/// Valid types for `ExtendedGraphemeClusterLiteralType` are `Character`,
/// `String`, and `StaticString`.
associatedtype ExtendedGraphemeClusterLiteralType
: _BuiltinExtendedGraphemeClusterLiteralConvertible
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given value.
init(extendedGraphemeClusterLiteral value: ExtendedGraphemeClusterLiteralType)
}
@@ -342,14 +388,30 @@ public protocol _BuiltinUTF16StringLiteralConvertible
utf16CodeUnitCount: Builtin.Word)
}
/// Conforming types can be initialized with arbitrary string literals.
/// A type that can be initialized with a string literal.
///
/// The `String` and `StaticString` types conform to the
/// `StringLiteralConvertible` protocol. You can initialize a variable or
/// constant of either of these types using a string literal of any length.
///
/// let picnicGuest = "Deserving porcupine"
///
/// Conforming to StringLiteralConvertible
/// ======================================
///
/// To add `StringLiteralConvertible` conformance to your custom type,
/// implement the required initializer.
public protocol StringLiteralConvertible
: ExtendedGraphemeClusterLiteralConvertible {
// FIXME: when we have default function implementations in protocols, provide
// an implementation of init(extendedGraphemeClusterLiteral:).
/// A type that can represent a string literal.
///
/// Valid types for `StringLiteralType` are `String` and `StaticString`.
associatedtype StringLiteralType : _BuiltinStringLiteralConvertible
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given string value.
init(stringLiteral value: StringLiteralType)
}
@@ -537,12 +599,80 @@ public protocol DictionaryLiteralConvertible {
init(dictionaryLiteral elements: (Key, Value)...)
}
/// Conforming types can be initialized with string interpolations
/// containing `\(`...`)` clauses.
/// A type that can be initialized by string interpolation with a string
/// literal that includes expressions.
///
/// Use string interpolation to include one or more expressions in a string
/// literal, wrapped in a set of parentheses and prefixed by a backslash. For
/// example:
///
/// let price = 2
/// let number = 3
/// let message = "One cookie: $\(price), \(number) cookies: $\(price * number)."
/// print(message)
/// // Prints "One cookie: $2, 3 cookies: $6."
///
/// Conforming to the StringInterpolationConvertible Protocol
/// =========================================================
///
/// To use string interpolation to initialize instances of your custom type,
/// implement the required initializers for `StringInterpolationConvertible`
/// conformance. String interpolation is a multiple-step initialization
/// process. When you use string interpolation, the following steps occur:
///
/// 1. The string literal is broken into pieces. Each segment of the string
/// literal before, between, and after any included expressions, along with
/// the individual expressions themselves, are passed to the
/// `init(stringInterpolationSegment:)` initializer.
/// 2. The results of those calls are passed to the
/// `init(stringInterpolation:)` initializer in the order in which they
/// appear in the string literal.
///
/// In other words, initializing the `message` constant in the example above
/// using string interpolation is equivalent to the following code:
///
/// let message = String(stringInterpolation:
/// String(stringInterpolationSegment: "One cookie: $"),
/// String(stringInterpolationSegment: price),
/// String(stringInterpolationSegment: ", "),
/// String(stringInterpolationSegment: number),
/// String(stringInterpolationSegment: " cookies: $"),
/// String(stringInterpolationSegment: price * number),
/// String(stringInterpolationSegment: "."))
public protocol StringInterpolationConvertible {
/// Create an instance by concatenating the elements of `strings`.
/// Creates an instance by concatenating the given values.
///
/// Do not call this initializer directly. It is used by the compiler when
/// you use string interpolation. For example:
///
/// let s = "\(5) x \(2) = \(5 * 2)"
/// print(s)
/// // Prints "5 x 2 = 10"
///
/// After calling `init(stringInterpolationSegment:)` with each segment of
/// the string literal, this initializer is called with their string
/// representations.
///
/// - Parameter strings: An array of instances of the conforming type.
init(stringInterpolation strings: Self...)
/// Create an instance containing `expr`'s `print` representation.
/// Creates an instance containing the appropriate representation for the
/// given value.
///
/// Do not call this initializer directly. It is used by the compiler for
/// each string interpolation segment when you use string interpolation. For
/// example:
///
/// let s = "\(5) x \(2) = \(5 * 2)"
/// print(s)
/// // Prints "5 x 2 = 10"
///
/// This initializer is called five times when processing the string literal
/// in the example above; once each for the following: the integer `5`, the
/// string `" x "`, the integer `2`, the string `" = "`, and the result of
/// the expression `5 * 2`.
///
/// - Parameter expr: The expression to represent.
init<T>(stringInterpolationSegment expr: T)
}

View File

@@ -879,11 +879,11 @@ public struct ${Self}<Element>
return AnyIndex(_box: _box._startIndex)
}
/// The collection's "past the end" position.
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// `endIndex` is always reachable from `startIndex` by zero or more
/// applications of `index(after:)`.
public var endIndex: AnyIndex {
return AnyIndex(_box: _box._endIndex)
}

View File

@@ -209,13 +209,11 @@ public struct ${Self}<
return LazyFilterIndex(base: index)
}
/// The collection's "past the end" position.
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
///
/// - Complexity: O(1).
/// `endIndex` is always reachable from `startIndex` by zero or more
/// applications of `index(after:)`.
public var endIndex: Index {
return LazyFilterIndex(base: _base.endIndex)
}

View File

@@ -497,10 +497,10 @@ public struct Set<Element : Hashable> :
return _variantStorage.startIndex
}
/// The "past the end" position for iterating members of the set.
/// The "past the end" position for the set---that is, the position one
/// greater than the last valid subscript argument.
///
/// The `endIndex` property is never a valid subscript argument. If the set
/// is empty, `endIndex` is equal to `startIndex`.
/// If the set is empty, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return _variantStorage.endIndex
}
@@ -681,7 +681,7 @@ public struct Set<Element : Hashable> :
//
/// Creates a set containing the elements of the given array literal.
///
/// Don't directly call this initializer, which is used by the compiler when
/// Do not call this initializer directly. It is used by the compiler when
/// you use an array literal. Instead, create a new set using an array
/// literal as its value by enclosing a comma-separated list of values in
/// square brackets. You can use an array literal anywhere a set is expected
@@ -1087,9 +1087,8 @@ public struct Set<Element : Hashable> :
///
/// Two sets that are equal will always have equal hash values.
///
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the hash value
/// across program runs.
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
public var hashValue: Int {
// FIXME: <rdar://problem/18915294> Cache Set<T> hashValue
var result: Int = _mixInt(0)
@@ -1683,7 +1682,8 @@ public struct Dictionary<Key : Hashable, Value> :
return _variantStorage.startIndex
}
/// The dictionary's "past the end" position.
/// The dictionary's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// If the collection is empty, `endIndex` is equal to `startIndex`.
///
@@ -1957,7 +1957,7 @@ public struct Dictionary<Key : Hashable, Value> :
/// Creates a dictionary initialized with a dictionary literal.
///
/// Don't directly call this initializer, which is called by the compiler to
/// Do not call this initializer directly. It is called by the compiler to
/// handle dictionary literals. To use a dictionary literal as the initial
/// value of a dictionary, enclose a comma-separated list of key-value pairs
/// in square brackets.

View File

@@ -30,8 +30,8 @@ public enum ImplicitlyUnwrappedOptional<Wrapped> : NilLiteralConvertible {
/// Creates an instance initialized with `nil`.
///
/// Don't use this initializer directly; it is used by the compiler when you
/// initialize an `Optional` instance with a `nil` literal. For example:
/// Do not call this initializer directly. It is used by the compiler when
/// you initialize an `Optional` instance with a `nil` literal. For example:
///
/// let i: Index! = nil
@_transparent

View File

@@ -116,11 +116,11 @@ extension ${Self} : ${TraversalCollection} {
return _base.startIndex
}
/// The collection's "past the end" position.
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// `endIndex` is always reachable from `startIndex` by zero or more
/// applications of `index(after:)`.
public var endIndex: Base.Index {
return _base.endIndex
}

View File

@@ -30,9 +30,16 @@ public func withExtendedLifetime<T, Result>(
extension String {
/// Invoke `f` on the contents of this string, represented as
/// a nul-terminated array of char, ensuring that the array's
/// lifetime extends through the execution of `f`.
/// Invokes the given closure on the contents of the string, represented as a
/// pointer to a null-terminated sequence of UTF-8 code units.
///
/// The `withCString(_:)` method ensures that the sequence's lifetime extends
/// through the execution of `f`.
///
/// - Parameter f: A closure that takes a pointer to the string's UTF-8 code
/// unit sequence as its sole argument. If the closure has a return value,
/// it is used as the return value of the `withCString(_:)` method.
/// - Returns: The return value of the `f` closure, if any.
public func withCString<Result>(
_ f: @noescape (UnsafePointer<Int8>) throws -> Result
) rethrows -> Result {

View File

@@ -815,7 +815,7 @@ extension DictionaryLiteral : RandomAccessCollection {
/// `endIndex`.
public var startIndex: Int { return 0 }
/// The collection's "past the end" position, or one
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// If the `DictionaryLiteral` instance is empty, `endIndex` is equal to
@@ -839,16 +839,44 @@ extension DictionaryLiteral : RandomAccessCollection {
}
extension String {
/// Initialize `self` with the textual representation of `instance`.
/// Creates a string representing the given value.
///
/// * If `Subject` conforms to `Streamable`, the result is obtained by
/// calling `instance.write(to: s)` on an empty string `s`.
/// * Otherwise, if `Subject` conforms to `CustomStringConvertible`, the
/// result is `instance`'s `description`
/// * Otherwise, if `Subject` conforms to `CustomDebugStringConvertible`,
/// the result is `instance`'s `debugDescription`
/// * Otherwise, an unspecified result is supplied automatically by
/// the Swift standard library.
/// Use this initializer to convert an instance of any type to its preferred
/// representation as a `String` instance. The initializer creates the
/// string representation of `instance` in one of the following ways,
/// depending on its protocol conformance:
///
/// - If `instance` conforms to the `Streamable` protocol, the result is
/// obtained by calling `instance.write(to: s)` on an empty string `s`.
/// - If `instance` conforms to the `CustomStringConvertible` protocol, the
/// result is `instance.description`.
/// - If `instance` conforms to the `CustomDebugStringConvertible` protocol,
/// the result is `instance.debugDescription`.
/// - An unspecified result is supplied automatically by the Swift standard
/// library.
///
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library.
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(String(p))
/// // Prints "Point(x: 21, y: 30)"
///
/// After adding `CustomStringConvertible` conformance by implementing the
/// `description` property, `Point` provides its own custom representation.
///
/// extension Point: CustomStringConvertible {
/// var description: String {
/// return "(\(x), \(y))"
/// }
/// }
///
/// print(String(p))
/// // Prints "(21, 30)"
///
/// - SeeAlso: `String.init<Subject>(reflecting: Subject)`
public init<Subject>(_ instance: Subject) {
@@ -856,20 +884,49 @@ extension String {
_print_unlocked(instance, &self)
}
/// Initialize `self` with a detailed textual representation of
/// `subject`, suitable for debugging.
/// Creates a string with a detailed representation of the given value,
/// suitable for debugging.
///
/// * If `Subject` conforms to `CustomDebugStringConvertible`, the result
/// is `subject`'s `debugDescription`.
/// Use this initializer to convert an instance of any type to its custom
/// debugging representation. The initializer creates the string
/// representation of `instance` in one of the following ways, depending on
/// its protocol conformance:
///
/// * Otherwise, if `Subject` conforms to `CustomStringConvertible`,
/// the result is `subject`'s `description`.
///
/// * Otherwise, if `Subject` conforms to `Streamable`, the result is
/// - If `subject` conforms to the `CustomDebugStringConvertible` protocol,
/// the result is `subject.debugDescription`.
/// - If `subject` conforms to the `CustomStringConvertible` protocol, the
/// result is `subject.description`.
/// - If `subject` conforms to the `Streamable` protocol, the result is
/// obtained by calling `subject.write(to: s)` on an empty string `s`.
/// - An unspecified result is supplied automatically by the Swift standard
/// library.
///
/// * Otherwise, an unspecified result is supplied automatically by
/// the Swift standard library.
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library.
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(String(reflecting: p))
/// // Prints "p: Point = {
/// // x = 21
/// // y = 30
/// // }"
///
/// After adding `CustomDebugStringConvertible` conformance by implementing
/// the `debugDescription` property, `Point` provides its own custom
/// debugging representation.
///
/// extension Point: CustomDebugStringConvertible {
/// var debugDescription: String {
/// return "Point(x: \(x), y: \(y))"
/// }
/// }
///
/// print(String(reflecting: p))
/// // Prints "Point(x: 21, y: 30)"
///
/// - SeeAlso: `String.init<Subject>(Subject)`
public init<Subject>(reflecting subject: Subject) {

View File

@@ -36,8 +36,8 @@ public protocol MutableIndexable : Indexable {
/// If the collection is empty, `startIndex` is equal to `endIndex`.
var startIndex: Index { get }
/// The collection's "past the end" position, or one greater than the last
/// valid subscript argument.
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// When you need a range that includes the last element of a collection, use
/// the half-open range operator (`..<`) with `endIndex`. The `..<` operator

View File

@@ -194,7 +194,7 @@ public enum Optional<Wrapped> : NilLiteralConvertible {
/// Creates an instance initialized with `nil`.
///
/// Don't use this initializer directly; it is used by the compiler when you
/// Do not call this initializer directly. It is used by the compiler when you
/// initialize an `Optional` instance with a `nil` literal. For example:
///
/// var i: Index? = nil

View File

@@ -16,12 +16,60 @@ import SwiftShims
// Input/Output interfaces
//===----------------------------------------------------------------------===//
/// A target of text streaming operations.
/// A type that can be the target of text-streaming operations.
///
/// You can send the output of the standard library's `print(_:to:)` and
/// `dump(_:to:)` functions to an instance of a type that conforms to the
/// `OutputStream` protocol instead of to standard output. Swift's `String`
/// type conforms to `OutputStream` already, so you can capture the output
/// from `print(_:to:)` and `dump(_:to:)` in a string instead of logging it to
/// standard output.
///
/// var s = ""
/// for n in 1 ... 5 {
/// print(n, terminator: "", to: &s)
/// }
/// // s == "12345"
///
/// Conforming to the OutputStream Protocol
/// =======================================
///
/// To make your custom type conform to the `OutputStream` protocol, implement
/// the required `write(_:)` method. Functions that use an `OutputStream`
/// target may call `write(_:)` multiple times per writing operation.
///
/// As an example, here's an implementation of an output stream that converts
/// any input to its plain ASCII representation before sending it to standard
/// output.
///
/// struct ASCIILogger: OutputStream {
/// mutating func write(_ string: String) {
/// let ascii = string.unicodeScalars.lazy.map { scalar in
/// scalar == "\n"
/// ? "\n"
/// : scalar.escaped(asASCII: true)
/// }
/// print(ascii.joined(separator: ""), terminator: "")
/// }
/// }
///
/// The `ASCIILogger` type's `write(_:)` method processes its string input by
/// escaping each Unicode scalar, with the exception of `"\n"` line returns.
/// By sending the output of the `print(_:to:)` function to an instance of
/// `ASCIILogger`, you invoke its `write(_:)` method.
///
/// let s = "Hearts and Diamonds "
/// print(s)
/// // Prints "Hearts and Diamonds "
///
/// var asciiLogger = ASCIILogger()
/// print(s, to: &asciiLogger)
/// // Prints "Hearts \u{2661} and Diamonds \u{2662}"
public protocol OutputStream {
mutating func _lock()
mutating func _unlock()
/// Append the given `string` to this stream.
/// Appends the given string to the stream.
mutating func write(_ string: String)
}
@@ -30,51 +78,143 @@ extension OutputStream {
public mutating func _unlock() {}
}
/// A source of text streaming operations. `Streamable` instances can
/// be written to any *output stream*.
/// A source of text-streaming operations.
///
/// For example: `String`, `Character`, `UnicodeScalar`.
/// Instances of types that conform to the `Streamable` protocol can write
/// their value to instances of any type that conforms to the `OutputStream`
/// protocol. The Swift standard library's text-related types, `String`,
/// `Character`, and `UnicodeScalar`, all conform to `Streamable`.
///
/// Conforming to the Streamable Protocol
/// =====================================
///
/// To add `Streamable` conformance to a custom type, implement the required
/// `write(to:)` method. Call the given output stream's `write(_:)` method in
/// your implementation.
public protocol Streamable {
/// Write a textual representation of `self` into `target`.
/// Writes a textual representation of this instance into the given output
/// stream.
func write<Target : OutputStream>(to target: inout Target)
}
/// A type with a customized textual representation.
///
/// This textual representation is used when values are written to an
/// *output stream*, for example, by `print`.
/// Types that conform to the `CustomStringConvertible` protocol can provide
/// their own representation to be used when converting an instance to a
/// string. The `String(_:)` initializer is the preferred way to convert an
/// instance of *any* type to a string. If the passed instance conforms to
/// `CustomStringConvertible`, the `String(_:)` initializer and the
/// `print(_:)` function use the instance's custom `description` property.
///
/// - Note: `String(instance)` will work for an `instance` of *any*
/// type, returning its `description` if the `instance` happens to be
/// `CustomStringConvertible`. Using `CustomStringConvertible` as a
/// generic constraint, or accessing a conforming type's `description`
/// directly, is therefore discouraged.
/// Accessing a type's `description` property directly or using
/// `CustomStringConvertible` as a generic constraint is discouraged.
///
/// Conforming to the CustomStringConvertible Protocol
/// ==================================================
///
/// Add `CustomStringConvertible` conformance to your custom types by defining
/// a `description` property.
///
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library:
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(p)
/// // Prints "Point(x: 21, y: 30)"
///
/// After implementing the `description` property and declaring
/// `CustomStringConvertible` conformance, the `Point` type provides its own
/// custom representation.
///
/// extension Point: CustomStringConvertible {
/// var description: String {
/// return "(\(x), \(y))"
/// }
/// }
///
/// print(p)
/// // Prints "(21, 30)"
///
/// - SeeAlso: `String.init<T>(T)`, `CustomDebugStringConvertible`
public protocol CustomStringConvertible {
/// A textual representation of the instance.
/// A textual representation of this instance.
///
/// Instead of accessing this property directly, convert an instance of any
/// type to a string by using the `String(_:)` initializer. For example:
///
/// struct Point: CustomStringConvertible {
/// let x: Int, y: Int
///
/// var description: String {
/// return "(\(x), \(y))"
/// }
/// }
///
/// let p = Point(x: 21, y: 30)
/// let s = String(p)
/// print(s)
/// // Prints "(21, 30)"
///
/// The conversion of `p` to a string in the assignment to `s` uses the
/// `Point` type's `description` property.
var description: String { get }
}
/// A type with a customized textual representation suitable for
/// debugging purposes.
/// A type with a customized textual representation suitable for debugging
/// purposes.
///
/// This textual representation is used when values are written to an
/// *output stream* by `debugPrint`, and is
/// typically more verbose than the text provided by a
/// `CustomStringConvertible`'s `description` property.
/// Swift provides a default debugging textual representation for any type.
/// That default representation is used by the `String(reflecting:)`
/// initializer and the `debugPrint(_:)` function for types that don't provide
/// their own. To customize that representation, make your type conform to the
/// `CustomDebugStringConvertible` protocol.
///
/// - Note: `String(reflecting: instance)` will work for an `instance`
/// of *any* type, returning its `debugDescription` if the `instance`
/// happens to be `CustomDebugStringConvertible`. Using
/// `CustomDebugStringConvertible` as a generic constraint, or
/// accessing a conforming type's `debugDescription` directly, is
/// therefore discouraged.
/// Because the `String(reflecting:)` initializer works for instances of *any*
/// type, returning an instance's `debugDescription` if the value passed
/// conforms to `CustomDebugStringConvertible`, accessing a type's
/// `debugDescription` property directly or using
/// `CustomDebugStringConvertible` as a generic constraint is discouraged.
///
/// - SeeAlso: `String.init<T>(reflecting: T)`,
/// `CustomStringConvertible`
/// Conforming to the CustomDebugStringConvertible Protocol
/// =======================================================
///
/// Add `CustomDebugStringConvertible` conformance to your custom types by
/// defining a `debugDescription` property.
///
/// For example, this custom `Point` struct uses the default representation
/// supplied by the standard library:
///
/// struct Point {
/// let x: Int, y: Int
/// }
///
/// let p = Point(x: 21, y: 30)
/// print(String(reflecting: p))
/// // Prints "p: Point = {
/// // x = 21
/// // y = 30
/// // }"
///
/// After adding `CustomDebugStringConvertible` conformance by implementing the
/// `debugDescription` property, `Point` provides its own custom debugging
/// representation.
///
/// extension Point: CustomDebugStringConvertible {
/// var debugDescription: String {
/// return "Point(x: \(x), y: \(y))"
/// }
/// }
///
/// print(String(reflecting: p))
/// // Prints "Point(x: 21, y: 30)"
///
/// - SeeAlso: `String.init<T>(reflecting: T)`, `CustomStringConvertible`
public protocol CustomDebugStringConvertible {
/// A textual representation of the instance, suitable for debugging.
/// A textual representation of this instance, suitable for debugging.
var debugDescription: String { get }
}
@@ -350,7 +490,9 @@ internal struct _Stdout : OutputStream {
}
extension String : OutputStream {
/// Append `other` to this stream.
/// Appends the given string to this string.
///
/// - Parameter other: A string to append.
public mutating func write(_ other: String) {
self += other
}
@@ -361,21 +503,28 @@ extension String : OutputStream {
//===----------------------------------------------------------------------===//
extension String : Streamable {
/// Write a textual representation of `self` into `target`.
/// Writes the string into the given output stream.
///
/// - Parameter target: An output stream.
public func write<Target : OutputStream>(to target: inout Target) {
target.write(self)
}
}
extension Character : Streamable {
/// Write a textual representation of `self` into `target`.
/// Writes the character into the given output stream.
///
/// - Parameter target: An output stream.
public func write<Target : OutputStream>(to target: inout Target) {
target.write(String(self))
}
}
extension UnicodeScalar : Streamable {
/// Write a textual representation of `self` into `target`.
/// Writes the textual representation of the Unicode scalar into the given
/// output stream.
///
/// - Parameter target: An output stream.
public func write<Target : OutputStream>(to target: inout Target) {
target.write(String(Character(self)))
}

View File

@@ -1060,9 +1060,8 @@ public func ^= <T : BitwiseOperations>(lhs: inout T, rhs: T) {
public protocol Hashable : Equatable {
/// The hash value.
///
/// - Important: Hash values are not guaranteed to be equal across different
/// executions of your program. Do not save hash values to use during a
/// future execution.
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
var hashValue: Int { get }
}

View File

@@ -392,7 +392,7 @@ extension SetAlgebra {
/// Creates a set containing the elements of the given array literal.
///
/// Don't directly call this initializer, which is used by the compiler when
/// Do not call this initializer directly. It is used by the compiler when
/// you use an array literal. Instead, create a new set using an array
/// literal as its value by enclosing a comma-separated list of values in
/// square brackets. You can use an array literal anywhere a set is expected

View File

@@ -287,11 +287,11 @@ struct _SliceBuffer<Element> : _ArrayBufferProtocol, RandomAccessCollection {
/// In an empty collection, `startIndex == endIndex`.
public var startIndex: Int
/// The collection's "past the end" position.
/// The collection's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// `endIndex` is always reachable from `startIndex` by zero or more
/// applications of `index(after:)`.
public var endIndex: Int {
get {
return Int(endIndexAndFlags >> 1)

View File

@@ -17,16 +17,13 @@
// are involved in its construction. This feature is crucial for
// preventing infinite recursion even in non-asserting cases.
/// A simple string designed to represent text that is "knowable at
/// compile-time".
/// A string type designed to represent text that is known at compile time.
///
/// Logically speaking, each instance looks something like this:
///
/// enum StaticString {
/// case ascii(start: UnsafePointer<UInt8>, count: Int)
/// case utf8(start: UnsafePointer<UInt8>, count: Int)
/// case scalar(UnicodeScalar)
/// }
/// Instances of the `StaticString` type are immutable. `StaticString` provides
/// limited, pointer-based access to its contents, unlike Swift's more
/// commonly used `String` type. A static string can store its value as a
/// pointer to an ASCII code unit sequence, as a pointer to a UTF-8 code unit
/// sequence, or as a single Unicode scalar value.
@_fixed_layout
public struct StaticString
: _BuiltinUnicodeScalarLiteralConvertible,
@@ -56,10 +53,11 @@ public struct StaticString
/// ASCII.
internal var _flags: Builtin.Int8
/// A pointer to the beginning of UTF-8 code units.
/// A pointer to the beginning of the string's UTF-8 encoded representation.
///
/// - Precondition: `self` stores a pointer to either ASCII or UTF-8 code
/// units.
/// The static string must store a pointer to either ASCII or UTF-8 code
/// units. Accessing this property when `hasPointerRepresentation` is
/// `false` triggers a runtime error.
@_transparent
public var utf8Start: UnsafePointer<UInt8> {
_precondition(
@@ -70,7 +68,9 @@ public struct StaticString
/// The stored Unicode scalar value.
///
/// - Precondition: `self` stores a single Unicode scalar value.
/// The static string must store a single Unicode scalar value. Accessing
/// this property when `hasPointerRepresentation` is `true` triggers a
/// runtime error.
@_transparent
public var unicodeScalar: UnicodeScalar {
_precondition(
@@ -79,11 +79,10 @@ public struct StaticString
return UnicodeScalar(UInt32(UInt(_startPtrOrData)))
}
/// If `self` stores a pointer to ASCII or UTF-8 code units, the
/// length in bytes of that data.
/// The length in bytes of the static string's ASCII or UTF-8 representation.
///
/// If `self` stores a single Unicode scalar value, the value of
/// `utf8CodeUnitCount` is unspecified.
/// - Warning: If the static string stores a single Unicode scalar value, the
/// value of `utf8CodeUnitCount` is unspecified.
@_transparent
public var utf8CodeUnitCount: Int {
_precondition(
@@ -92,25 +91,38 @@ public struct StaticString
return Int(_utf8CodeUnitCount)
}
/// `true` iff `self` stores a pointer to ASCII or UTF-8 code units.
/// A Boolean value indicating whether the static string stores a pointer to
/// ASCII or UTF-8 code units.
@_transparent
public var hasPointerRepresentation: Bool {
return (UInt8(_flags) & 0x1) == 0
}
/// `true` if `self` stores a pointer to ASCII code units.
/// A Boolean value that is `true` if the static string stores a pointer to
/// ASCII code units.
///
/// If `self` stores a single Unicode scalar value, the value of
/// `isASCII` is unspecified.
/// Use this property in conjunction with `hasPointerRepresentation` to
/// determine whether a static string with pointer representation stores an
/// ASCII or UTF-8 code unit sequence.
///
/// - Warning: If the static string stores a single Unicode scalar value, the
/// value of `isASCII` is unspecified.
@_transparent
public var isASCII: Bool {
return (UInt8(_flags) & 0x2) != 0
}
/// Invoke `body` with a buffer containing the UTF-8 code units of
/// `self`.
/// Invokes the given closure with a buffer containing the static string's
/// UTF-8 code unit sequence.
///
/// This method works regardless of what `self` stores.
/// This method works regardless of whether the static string stores a
/// pointer or a single Unicode scalar value.
///
/// - Parameter body: A closure that takes a buffer pointer to the static
/// string's UTF-8 code unit sequence as its sole argument. If the closure
/// has a return value, it is used as the return value of the
/// `withUTF8Buffer(invoke:)` method.
/// - Returns: The return value of the `body` closure, if any.
public func withUTF8Buffer<R>(
invoke body: @noescape (UnsafeBufferPointer<UInt8>) -> R) -> R {
if hasPointerRepresentation {
@@ -130,7 +142,7 @@ public struct StaticString
}
}
/// Create an empty instance.
/// Creates an empty static string.
@_transparent
public init() {
self = ""
@@ -169,7 +181,10 @@ public struct StaticString
self = StaticString(unicodeScalar: value)
}
/// Create an instance initialized to `value`.
/// Creates an instance initialized to a single Unicode scalar.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you initialize a static string with a Unicode scalar.
@effects(readonly)
@_transparent
public init(unicodeScalarLiteral value: StaticString) {
@@ -190,7 +205,11 @@ public struct StaticString
)
}
/// Create an instance initialized to `value`.
/// Creates an instance initialized to a single character that is made up of
/// one or more Unicode code points.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you initialize a static string using an extended grapheme cluster.
@effects(readonly)
@_transparent
public init(extendedGraphemeClusterLiteral value: StaticString) {
@@ -210,14 +229,17 @@ public struct StaticString
isASCII: isASCII)
}
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the value of a string literal.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you initialize a static string using a string literal.
@effects(readonly)
@_transparent
public init(stringLiteral value: StaticString) {
self = value
}
/// A textual representation of `self`.
/// A string representation of the static string.
public var description: String {
return withUTF8Buffer {
(buffer) in
@@ -225,7 +247,7 @@ public struct StaticString
}
}
/// A textual representation of `self`, suitable for debugging.
/// A textual representation of the static string, suitable for debugging.
public var debugDescription: String {
return self.description.debugDescription
}

View File

@@ -15,101 +15,276 @@ import SwiftShims
// FIXME: complexity documentation for most of methods on String is ought to be
// qualified with "amortized" at least, as Characters are variable-length.
/// An arbitrary Unicode string value.
/// A Unicode string value.
///
/// Unicode-Correct
/// ===============
/// A string is a series of characters, such as `"Swift"`. Strings in Swift are
/// Unicode correct, locale insensitive, and designed to be efficient. The
/// `String` type bridges with the Objective-C class `NSString` and offers
/// interoperability with C functions that works with strings.
///
/// Swift strings are designed to be Unicode-correct. In particular,
/// the APIs make it easy to write code that works correctly, and does
/// not surprise end-users, regardless of where you venture in the
/// Unicode character space. For example, the `==` operator checks
/// for [Unicode canonical
/// equivalence](http://www.unicode.org/glossary/#deterministic_comparison),
/// so two different representations of the same string will always
/// compare equal.
/// You can create new strings using string literals or string interpolations.
/// A string literal is a series of characters enclosed in quotes.
///
/// Locale-Insensitive
/// ==================
/// let greeting = "Welcome!"
///
/// The fundamental operations on Swift strings are not sensitive to
/// locale settings. That's because, for example, the validity of a
/// `Dictionary<String, T>` in a running program depends on a given
/// string comparison having a single, stable result. Therefore,
/// Swift always uses the default,
/// un-[tailored](http://www.unicode.org/glossary/#tailorable) Unicode
/// algorithms for basic string operations.
/// String interpolations are string literals that evaluate any included
/// expressions and convert the results to string form. String interpolations
/// are an easy way to build a string from multiple pieces. Wrap each
/// expression in a string interpolation in parentheses, prefixed by a
/// backslash.
///
/// Importing `Foundation` endows swift strings with the full power of
/// the `NSString` API, which allows you to choose more complex
/// locale-sensitive operations explicitly.
/// let name = "Rosa"
/// let personalizedGreeting = "Welcome, \(name)!"
///
/// Value Semantics
/// ===============
/// let price = 2
/// let number = 3
/// let cookiePrice = "\(number) cookies: $\(price * number)."
///
/// Each string variable, `let` binding, or stored property has an
/// independent value, so mutations to the string are not observable
/// through its copies:
/// Combine strings using the concatenation operator (`+`).
///
/// var a = "foo"
/// var b = a
/// b.append("bar")
/// print("a=\(a), b=\(b)") // a=foo, b=foobar
/// let longerGreeting = greeting + " We're glad you're here!"
/// print(longerGreeting)
/// // Prints "Welcome! We're glad you're here!"
///
/// Strings use Copy-on-Write so that their data is only copied
/// lazily, upon mutation, when more than one string instance is using
/// the same buffer. Therefore, the first in any sequence of mutating
/// operations may cost `O(N)` time and space, where `N` is the length
/// of the string's (unspecified) underlying representation.
/// Modifying and Comparing Strings
/// ===============================
///
/// Views
/// =====
/// Strings always have value semantics. Modifying a copy of a string leaves
/// the original unaffected.
///
/// `String` is not itself a collection of anything. Instead, it has
/// properties that present the string's contents as meaningful
/// collections:
/// var otherGreeting = greeting
/// otherGreeting += " Have a nice time!"
/// print(otherGreeting)
/// // Prints "Welcome! Have a nice time!"
///
/// - `characters`: a collection of `Character` ([extended grapheme
/// cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster))
/// elements, a unit of text that is meaningful to most humans.
/// print(greeting)
/// // Prints "Welcome!"
///
/// - `unicodeScalars`: a collection of `UnicodeScalar` ([Unicode
/// scalar
/// values](http://www.unicode.org/glossary/#unicode_scalar_value))
/// the 21-bit codes that are the basic unit of Unicode. These
/// values are equivalent to UTF-32 code units.
/// Comparing strings for equality using the is-equal-to operator (`==`) or a
/// relational operator (like `<` and `>=`) is always performed using the
/// Unicode canonical representation. This means that different
/// representations of a string compare as being equal.
///
/// - `utf16`: a collection of `UTF16.CodeUnit`, the 16-bit
/// elements of the string's UTF-16 encoding.
/// let cafe1 = "Cafe\u{301}"
/// let cafe2 = "Café"
/// print(cafe1 == cafe2)
/// // Prints "true"
///
/// - `utf8`: a collection of `UTF8.CodeUnit`, the 8-bit
/// elements of the string's UTF-8 encoding.
/// The Unicode code point `"\u{301}"` modifies the preceding character to
/// include an accent, so `"e\u{301}"` has the same canonical representation
/// as the single Unicode code point `"é"`.
///
/// Growth and Capacity
/// ===================
/// Basic string operations are not sensitive to locale settings. This ensures
/// that string comparisons and other operations always have a single, stable
/// result, allowing strings to be used as keys in `Dictionary` instances and
/// for other purposes.
///
/// When a string's contiguous storage fills up, new storage must be
/// allocated and characters must be moved to the new storage.
/// `String` uses an exponential growth strategy that makes `append` a
/// constant time operation *when amortized over many invocations*.
/// Representing Strings: Views
/// ===========================
///
/// Objective-C Bridge
/// ==================
/// A string is not itself a collection. Instead, it has properties that
/// present its contents as meaningful collections. Each of these collections
/// is a particular type of *view* of the string's visible and data
/// representation.
///
/// `String` is bridged to Objective-C as `NSString`, and a `String`
/// that originated in Objective-C may store its characters in an
/// `NSString`. Since any arbitrary subclass of `NSString` can
/// become a `String`, there are no guarantees about representation or
/// efficiency in this case. Since `NSString` is immutable, it is
/// just as though the storage was shared by some copy: the first in
/// any sequence of mutating operations causes elements to be copied
/// into unique, contiguous storage which may cost `O(N)` time and
/// space, where `N` is the length of the string representation (or
/// more, if the underlying `NSString` has unusual performance
/// characteristics).
/// To demonstrate the different views available for every string, the
/// following examples use this `String` instance:
///
/// let cafe = "Cafe\u{301} du 🌍"
/// print(cafe)
/// // Prints "Café du 🌍"
///
/// Character View
/// --------------
///
/// A string's `characters` property is a collection of *extended grapheme
/// clusters*, which approximate human-readable characters. Many individual
/// characters, such as "é", "", and "🇮🇳", can be made up of multiple Unicode
/// code points. These code points are combined by Unicode's boundary
/// algorithms into extended grapheme clusters, represented by Swift's
/// `Character` type. Each element of the `characters` view is represented by
/// a `Character` instance.
///
/// print(cafe.characters.count)
/// // Prints "9"
/// print(Array(cafe.characters))
/// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]"
///
/// Each visible character in the `cafe` string is a separate element of the
/// `characters` view.
///
/// Unicode Scalar View
/// -------------------
///
/// A string's `unicodeScalars` property is a collection of Unicode scalar
/// values, the 21-bit codes that are the basic unit of Unicode. Each scalar
/// value is represented by a `UnicodeScalar` instance and is equivalent to a
/// UTF-32 code unit.
///
/// print(cafe.unicodeScalars.count)
/// // Prints "10"
/// print(Array(cafe.unicodeScalars))
/// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]"
/// print(cafe.unicodeScalars.map { $0.value })
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]"
///
/// The `unicodeScalars` view's elements comprise each Unicode scalar value in
/// the `cafe` string. In particular, because `cafe` was declared using the
/// decomposed form of the `"é"` character, `unicodeScalars` contains the code
/// points for both the letter `"e"` (101) and the accent character `"´"`
/// (769).
///
/// UTF-16 View
/// -----------
///
/// A string's `utf16` property is a collection of UTF-16 code units, the
/// 16-bit encoding form of the string's Unicode scalar values. Each code unit
/// is stored as a `UInt16` instance.
///
/// print(cafe.utf16.count)
/// // Prints "11"
/// print(Array(cafe.utf16))
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]"
///
/// The elements of the `utf16` view are the code units for the string when
/// encoded in UTF-16.
///
/// The elements of this collection match those accessed through indexed
/// `NSString` APIs.
///
/// let nscafe = cafe as NSString
/// print(nscafe.length)
/// // Prints "11"
/// print(nscafe.character(at: 3))
/// // Prints "101"
///
/// UTF-8 View
/// ----------
///
/// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit
/// encoding form of the string's Unicode scalar values. Each code unit is
/// stored as a `UInt8` instance.
///
/// print(cafe.utf8.count)
/// // Prints "14"
/// print(Array(cafe.utf8))
/// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]"
///
/// The elements of the `utf8` view are the code units for the string when
/// encoded in UTF-8. This representation matches the one used when `String`
/// instances are passed to C APIs.
///
/// let cLength = strlen(cafe)
/// print(cLength)
/// // Prints "14"
///
/// Counting the Length of a String
/// ===============================
///
/// When you need to know the length of a string, you must first consider what
/// you'll use the length for. Are you measuring the number of characters that
/// will be displayed on the screen, or are you measuring the amount of
/// storage needed for the string in a particular encoding? A single string
/// can have greatly differing lengths when measured by its different views.
///
/// For example, an ASCII character like the capital letter *A* is represented
/// by a single element in each of its four views. The Unicode scalar value of
/// *A* is `65`, which is small enough to fit in a single code unit in both
/// UTF-16 and UTF-8.
///
/// let capitalA = "A"
/// print(capitalA.characters.count)
/// // Prints "1"
/// print(capitalA.unicodeScalars.count)
/// // Prints "1"
/// print(capitalA.utf16.count)
/// // Prints "1"
/// print(capitalA.utf8.count)
/// // Prints "1"
///
///
/// On the other hand, an emoji flag character is constructed from a pair of
/// Unicode scalars values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of
/// these scalar values, in turn, is too large to fit into a single UTF-16 or
/// UTF-8 code unit. As a result, each view of the string `"🇵🇷"` reports a
/// different length.
///
/// let flag = "🇵🇷"
/// print(flag.characters.count)
/// // Prints "1"
/// print(flag.unicodeScalars.count)
/// // Prints "2"
/// print(flag.utf16.count)
/// // Prints "4"
/// print(flag.utf8.count)
/// // Prints "8"
///
/// Accessing String View Elements
/// ==============================
///
/// To find individual elements of a string, use the appropriate view for your
/// task. For example, to retrieve the first word of a longer string, you can
/// search the `characters` view for a space and then create a new string from
/// a prefix of the `characters` view up to that point.
///
/// let name = "Marie Curie"
/// let firstSpace = name.characters.index(of: " ")!
/// let firstName = String(name.characters.prefix(upTo: firstSpace))
/// print(firstName)
/// // Prints "Marie"
///
/// You can convert an index into one of a string's views to an index into
/// another view.
///
/// let firstSpaceUTF8 = firstSpace.samePosition(in: name.utf8)
/// print(Array(name.utf8.prefix(upTo: firstSpaceUTF8)))
/// // Prints "[77, 97, 114, 105, 101]"
///
/// Performance Optimizations
/// =========================
///
/// Although strings in Swift have value semantics, strings use a copy-on-write
/// strategy to store their data in a buffer. This buffer can then be shared
/// by different copies of a string. A string's data is only copied lazily,
/// upon mutation, when more than one string instance is using the same
/// buffer. Therefore, the first in any sequence of mutating operations may
/// cost O(*n*) time and space.
///
/// When a string's contiguous storage fills up, a new buffer must be allocated
/// and data must be moved to the new storage. String buffers use an
/// exponential growth strategy that makes appending to a string a constant
/// time operation when averaged over many append operations.
///
/// Bridging between String and NSString
/// ====================================
///
/// Any `String` instance can be bridged to `NSString` using the type-cast
/// operator (`as`), and any `String` instance that originates in Objective-C
/// may use an `NSString` instance as its storage. Because any arbitrary
/// subclass of `NSString` can become a `String` instance, there are no
/// guarantees about representation or efficiency when a `String` instance is
/// backed by `NSString` storage. Because `NSString` is immutable, it is just
/// as though the storage was shared by a copy: The first in any sequence of
/// mutating operations causes elements to be copied into unique, contiguous
/// storage which may cost O(*n*) time and space, where *n* is the length of
/// the string's encoded representation (or more, if the underlying `NSString`
/// has unusual performance characteristics).
///
/// For more information about the Unicode terms used in this discussion, see
/// the [Unicode.org glossary][glossary]. In particular, this discussion
/// mentions [extended grapheme clusters][clusters],
/// [Unicode scalar values][scalars], and [canonical equivalence][equivalence].
///
/// [glossary]: http://www.unicode.org/glossary/
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
/// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent
///
/// - SeeAlso: `String.CharacterView`, `String.UnicodeScalarView`,
/// `String.UTF16View`, `String.UTF8View`
@_fixed_layout
public struct String {
/// An empty `String`.
/// Creates an empty string.
public init() {
_core = _StringCore()
}
@@ -175,7 +350,11 @@ extension String : _BuiltinUnicodeScalarLiteralConvertible {
}
extension String : UnicodeScalarLiteralConvertible {
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given Unicode scalar value.
///
/// Don't call this initializer directly. It may be used by the compiler when
/// you initialize a string using a string literal that contains a single
/// Unicode scalar value.
public init(unicodeScalarLiteral value: String) {
self = value
}
@@ -197,7 +376,12 @@ extension String : _BuiltinExtendedGraphemeClusterLiteralConvertible {
}
extension String : ExtendedGraphemeClusterLiteralConvertible {
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given extended grapheme cluster
/// literal.
///
/// Don't call this initializer directly. It may be used by the compiler when
/// you initialize a string using a string literal containing a single
/// extended grapheme cluster.
public init(extendedGraphemeClusterLiteral value: String) {
self = value
}
@@ -247,14 +431,22 @@ extension String : _BuiltinStringLiteralConvertible {
}
extension String : StringLiteralConvertible {
/// Create an instance initialized to `value`.
/// Creates an instance initialized to the given string value.
///
/// Don't call this initializer directly. It is used by the compiler when you
/// initialize a string using a string literal. For example:
///
/// let nextStop = "Clark & Lake"
///
/// This assignment to the `nextStop` constant calls this string literal
/// initializer behind the scenes.
public init(stringLiteral value: String) {
self = value
}
}
extension String : CustomDebugStringConvertible {
/// A textual representation of `self`, suitable for debugging.
/// A representation of the string that is suitable for debugging.
public var debugDescription: String {
var result = "\""
for us in self.unicodeScalars {
@@ -431,14 +623,31 @@ public func <(lhs: String, rhs: String) -> Bool {
// Support for copy-on-write
extension String {
/// Append the elements of `other` to `self`.
/// Appends the given string to this string.
///
/// The following example builds a customized greeting by using the
/// `append(_:)` method:
///
/// var greeting = "Hello, "
/// if let name = getUserName() {
/// greeting.append(name)
/// } else {
/// greeting.append("friend")
/// }
/// print(greeting)
/// // Prints "Hello, friend"
///
/// - Parameter other: Another string.
public mutating func append(_ other: String) {
_core.append(other._core)
}
/// Append `x` to `self`.
/// Appends the given Unicode scalar to the string.
///
/// - Complexity: Amortized O(1).
/// - Parameter x: A Unicode scalar value.
///
/// - Complexity: Appending a Unicode scalar to a string averages to O(1)
/// over many additions.
public mutating func append(_ x: UnicodeScalar) {
_core.append(x)
}
@@ -458,13 +667,10 @@ func _stdlib_NSStringHashValuePointer(_ str: OpaquePointer, _ isASCII: Bool) ->
#endif
extension String : Hashable {
/// The hash value.
/// The string's hash value.
///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`.
///
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
public var hashValue: Int {
#if _runtime(_ObjC)
// Mix random bits into NSString's hash so that clients don't rely on
@@ -677,7 +883,15 @@ extension String {
}
}
/// Return `self` converted to lower case.
/// Returns a lowercase version of the string.
///
/// Here's an example of transforming a string to all lowercase letters.
///
/// let cafe = "Café 🍵"
/// print(cafe.lowercased())
/// // Prints "café 🍵"
///
/// - Returns: A lowercase copy of the string.
///
/// - Complexity: O(n)
public func lowercased() -> String {
@@ -718,7 +932,15 @@ extension String {
#endif
}
/// Return `self` converted to upper case.
/// Returns an uppercase version of the string.
///
/// The following example transforms a string to uppercase letters:
///
/// let cafe = "Café 🍵"
/// print(cafe.uppercased())
/// // Prints "CAFÉ 🍵"
///
/// - Returns: An uppercase copy of the string.
///
/// - Complexity: O(n)
public func uppercased() -> String {

View File

@@ -71,7 +71,7 @@ internal func _cocoaStringToContiguous(
source: _CocoaString, range: Range<Int>, minimumCapacity: Int
) -> _StringBuffer {
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(source) == nil,
"Known contiguously-stored strings should already be converted to Swift")
"Known contiguously stored strings should already be converted to Swift")
let startIndex = range.lowerBound
let count = range.upperBound - startIndex
@@ -107,7 +107,7 @@ internal func _cocoaStringSlice(
_sanityCheck(
_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously-stored strings should already be converted to Swift")
"Known contiguously stored strings should already be converted to Swift")
let cfResult: AnyObject = _swift_stdlib_CFStringCreateWithSubstring(
nil, cfSelf, _swift_shims_CFRange(
@@ -124,7 +124,7 @@ internal func _cocoaStringSubscript(
let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously-stored strings should already be converted to Swift")
"Known contiguously stored strings should already be converted to Swift")
return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position)
}

View File

@@ -19,13 +19,42 @@
// allow performance optimizations of linear traversals.
extension String {
/// A `String`'s collection of `Character`s ([extended grapheme
/// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster))
/// elements.
/// A view of a string's contents as a collection of characters.
///
/// In Swift, every string provides a view of its contents as characters. In
/// this view, many individual characters---for example, "é", "", and
/// "🇮🇳"---can be made up of multiple Unicode code points. These code points
/// are combined by Unicode's boundary algorithms into *extended grapheme
/// clusters*, represented by the `Character` type. Each element of a
/// `CharacterView` collection is a `Character` instance.
///
/// let flowers = "Flowers 💐"
/// for c in flowers {
/// print(c)
/// }
/// // F
/// // l
/// // o
/// // w
/// // e
/// // r
/// // s
/// //
/// // 💐
///
/// You can convert a `String.CharacterView` instance back into a string
/// using the `String` type's `init(_:)` initializer.
///
/// let name = "Marie Curie"
/// if let firstSpace = name.characters.index(of: " ") {
/// let firstName = String(name.characters.prefix(upTo: firstSpace))
/// print(firstName)
/// }
/// // Prints "Marie"
public struct CharacterView {
internal var _core: _StringCore
/// Create a view of the `Character`s in `text`.
/// Creates a view of the given string.
public init(_ text: String) {
self._core = text._core
}
@@ -36,9 +65,7 @@ extension String {
}
}
/// A collection of `Characters` representing the `String`'s
/// [extended grapheme
/// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster).
/// A view of the string's contents as a collection of characters.
public var characters: CharacterView {
get {
return CharacterView(self)
@@ -48,12 +75,34 @@ extension String {
}
}
/// Efficiently mutate `self` by applying `body` to its `characters`.
/// Applies the given closure to a mutable view of the string's characters.
///
/// - Warning: Do not rely on anything about `self` (the `String`
/// that is the target of this method) during the execution of
/// `body`: it may not appear to have its correct value. Instead,
/// use only the `String.CharacterView` argument to `body`.
/// Do not use the string that is the target of this method inside the
/// closure passed to `body`, as it may not have its correct value.
/// Instead, use the closure's `String.CharacterView` argument.
///
/// This example below uses the `withMutableCharacters(_:)` method to truncate
/// the string `str` at the first space and to return the remainder of the
/// string.
///
/// var str = "All this happened, more or less."
/// let afterSpace = str.withMutableCharacters { chars -> String.CharacterView in
/// if let i = chars.index(of: " ") {
/// let result = chars.suffix(from: chars.index(after: i))
/// chars.removeSubrange(i..<chars.endIndex)
/// return result
/// }
/// return String.CharacterView()
/// }
///
/// print(str)
/// // Prints "All"
/// print(String(afterSpace))
/// // Prints "this happened, more or less."
///
/// - Parameter body: A closure that takes a character view as its argument.
/// - Returns: The return value of the `body` closure, if any, is the return
/// value of this method.
public mutating func withMutableCharacters<R>(_ body: (inout CharacterView) -> R) -> R {
// Naively mutating self.characters forces multiple references to
// exist at the point of mutation. Instead, temporarily move the
@@ -65,8 +114,20 @@ extension String {
return r
}
/// Construct the `String` corresponding to the given sequence of
/// Unicode scalars.
/// Creates a string from the given character view.
///
/// Use this initializer to recover a string after performing a collection
/// slicing operation on a character view.
///
/// let poem = "'Twas brillig, and the slithy toves / " +
/// "Did gyre and gimbal in the wabe: / " +
/// "All mimsy were the borogoves / " +
/// "And the mome raths outgrabe."
/// let excerpt = String(poem.characters.prefix(22)) + "..."
/// print(excerpt)
/// // Prints "'Twas brillig, and the..."
///
/// - Parameter characters: A character view to convert to a string.
public init(_ characters: CharacterView) {
self.init(characters._core)
}
@@ -79,7 +140,20 @@ extension String.CharacterView : BidirectionalCollection {
return UnicodeScalarView(_core)
}
/// A character position.
/// A position in a string's `CharacterView` instance.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// The following example finds the index of the first space in the string's
/// character view and then converts that to the same position in the UTF-8
/// view:
///
/// let hearts = "Hearts <3 💘"
/// if let i = hearts.characters.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf8)
/// print(Array(hearts.utf8.prefix(upTo: j)))
/// }
/// // Prints "[72, 101, 97, 114, 116, 115]"
public struct Index : Comparable, CustomPlaygroundQuickLookable {
public // SPI(Foundation)
init(_base: String.UnicodeScalarView.Index) {
@@ -198,17 +272,17 @@ extension String.CharacterView : BidirectionalCollection {
public typealias IndexDistance = Int
/// The position of the first `Character` if `self` is
/// non-empty; identical to `endIndex` otherwise.
/// The position of the first character in a nonempty character view.
///
/// In an empty character view, `startIndex` is equal to `endIndex`.
public var startIndex: Index {
return Index(_base: unicodeScalars.startIndex)
}
/// The "past the end" position.
/// A character view's "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// In an empty character view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return Index(_base: unicodeScalars.endIndex)
}
@@ -235,27 +309,44 @@ extension String.CharacterView : BidirectionalCollection {
i._utf16Index - predecessorLengthUTF16, i._base._core))
}
/// Access the `Character` at `position`.
/// Accesses the character at the given position.
///
/// - Precondition: `position` is a valid position in `self` and
/// `position != endIndex`.
/// The following example searches a string's character view for a capital
/// letter and then prints the character at the found index:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.characters.index(where: { "A"..."Z" ~= $0 }) {
/// print("First capital letter: \(greeting.characters[i])")
/// }
/// // Prints "First capital letter: H"
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
public subscript(i: Index) -> Character {
return Character(String(unicodeScalars[i._base..<i._endBase]))
}
}
extension String.CharacterView : RangeReplaceableCollection {
/// Create an empty instance.
/// Creates an empty character view.
public init() {
self.init("")
}
/// Replace the characters within `bounds` with `newElements`.
/// Replaces the characters within the specified bounds with the given
/// characters.
///
/// Invalidates all indices with respect to `self`.
/// Invalidates all indices with respect to the string.
///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise.
/// - Parameters:
/// - bounds: The range of characters to replace. The bounds of the range
/// must be valid indices of the character view.
/// - newElements: The new characters to add to the view.
///
/// - Complexity: O(*m*), where *m* is the combined length of the character
/// view and `newElements`. If the call to `replaceSubrange(_:with:)`
/// simply removes characters at the end of the view, the complexity is
/// O(*n*), where *n* is equal to `bounds.count`.
public mutating func replaceSubrange<
C: Collection where C.Iterator.Element == Character
>(
@@ -268,16 +359,25 @@ extension String.CharacterView : RangeReplaceableCollection {
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
}
/// Reserve enough space to store `n` ASCII characters.
/// Reserves enough space in the character view's underlying storage to store
/// the specified number of ASCII characters.
///
/// - Complexity: O(`n`).
/// Because each element of a character view can require more than a single
/// ASCII character's worth of storage, additional allocation may be
/// necessary when adding characters to the character view after a call to
/// `reserveCapacity(_:)`.
///
/// - Parameter n: The minimum number of ASCII character's worth of storage
/// to allocate.
///
/// - Complexity: O(*n*), where *n* is the capacity being reserved.
public mutating func reserveCapacity(_ n: Int) {
_core.reserveCapacity(n)
}
/// Append `c` to `self`.
/// Appends the given character to the character view.
///
/// - Complexity: Amortized O(1).
/// - Parameter c: The character to append to the character view.
public mutating func append(_ c: Character) {
switch c._representation {
case .small(let _63bits):
@@ -288,7 +388,9 @@ extension String.CharacterView : RangeReplaceableCollection {
}
}
/// Append the elements of `newElements` to `self`.
/// Appends the characters in the given sequence to the character view.
///
/// - Parameter newElements: A sequence of characters.
public mutating func append<
S : Sequence where S.Iterator.Element == Character
>(contentsOf newElements: S) {
@@ -298,7 +400,10 @@ extension String.CharacterView : RangeReplaceableCollection {
}
}
/// Create an instance containing `characters`.
/// Creates a new character view containing the characters in the given
/// sequence.
///
/// - Parameter characters: A sequence of characters.
public init<
S : Sequence where S.Iterator.Element == Character
>(_ characters: S) {
@@ -309,10 +414,19 @@ extension String.CharacterView : RangeReplaceableCollection {
// Algorithms
extension String.CharacterView {
/// Access the characters in `bounds`.
/// Accesses the characters in the given range.
///
/// - Complexity: O(1) unless bridging from Objective-C requires an
/// O(N) conversion.
/// The example below uses this subscript to access the characters up to, but
/// not including, the first comma (`","`) in the string.
///
/// let str = "All this happened, more or less."
/// let i = str.characters.index(of: ",")!
/// let substring = str.characters[str.characters.startIndex ..< i]
/// print(String(substring))
/// // Prints "All this happened"
///
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> String.CharacterView {
let unicodeScalarRange =
bounds.lowerBound._base..<bounds.upperBound._base

View File

@@ -11,14 +11,42 @@
//===----------------------------------------------------------------------===//
extension String.Index {
/// Construct the position in `characters` that corresponds exactly to
/// `unicodeScalarIndex`. If no such position exists, the result is `nil`.
/// Creates an index in the given string that corresponds exactly to the
/// specified `UnicodeScalarView` position.
///
/// - Precondition: `unicodeScalarIndex` is an element of
/// `characters.unicodeScalars.indices`.
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string's character view. The
/// character at that position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.index(of: "e")!
/// let charactersIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(String(cafe.characters.prefix(through: charactersIndex)))
/// // Prints "Café"
///
/// If the position passed in `unicodeScalarIndex` doesn't have an exact
/// corresponding position in `other.characters`, the result of the
/// initializer is `nil`. For example, an attempt to convert the position of
/// the combining acute accent (`"\u{0301}"`) fails. Combining Unicode
/// scalars do not have their own position in a character view.
///
/// let nextIndex = String.Index(cafe.unicodeScalars.index(after: scalarsIndex),
/// within: cafe)
/// print(nextIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - unicodeScalarIndex: A position in the `unicodeScalars` view of the
/// `other` parameter.
/// - other: The string referenced by both `unicodeScalarIndex` and the
/// resulting index.
public init?(
_ unicodeScalarIndex: String.UnicodeScalarIndex,
within characters: String
within other: String
) {
if !unicodeScalarIndex._isOnGraphemeClusterBoundary {
return nil
@@ -26,18 +54,51 @@ extension String.Index {
self.init(_base: unicodeScalarIndex)
}
/// Construct the position in `characters` that corresponds exactly to
/// `utf16Index`. If no such position exists, the result is `nil`.
/// Creates an index in the given string that corresponds exactly to the
/// specified `UTF16View` position.
///
/// - Precondition: `utf16Index` is an element of
/// `characters.utf16.indices`.
/// The following example finds the position of a space in a string's `utf16`
/// view and then converts that position to an index in the the string's
/// `characters` view. The value `32` is the UTF-16 encoded value of a space
/// character.
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let charactersIndex = String.Index(utf16Index, within: cafe)!
///
/// print(String(cafe.characters.prefix(upTo: charactersIndex)))
/// // Prints "Café"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `other.characters`, the result of the
/// initializer is `nil`. For example, an attempt to convert the position of
/// the trailing surrogate of a UTF-16 surrogate pair fails.
///
/// The next example attempts to convert the indices of the two UTF-16 code
/// points that represent the teacup emoji (`"🍵"`). The index of the lead
/// surrogate is successfully converted to a position in `other.characters`,
/// but the index of the trailing surrogate is not.
///
/// let emojiHigh = cafe.utf16.index(after: utf16Index)
/// print(String.Index(emojiHigh, within: cafe))
/// // Prints "Optional(String.Index(...))"
///
/// let emojiLow = cafe.utf16.index(after: emojiHigh)
/// print(String.Index(emojiLow, within: cafe))
/// // Prints "nil"
///
/// - Parameters:
/// - utf16Index: A position in the `utf16` view of the `other` parameter.
/// - other: The string referenced by both `utf16Index` and the resulting
/// index.
public init?(
_ utf16Index: String.UTF16Index,
within characters: String
within other: String
) {
if let me = utf16Index.samePosition(
in: characters.unicodeScalars
)?.samePosition(in: characters) {
in: other.unicodeScalars
)?.samePosition(in: other) {
self = me
}
else {
@@ -45,18 +106,25 @@ extension String.Index {
}
}
/// Construct the position in `characters` that corresponds exactly to
/// `utf8Index`. If no such position exists, the result is `nil`.
/// Creates an index in the given string that corresponds exactly to the
/// specified `UTF8View` position.
///
/// - Precondition: `utf8Index` is an element of
/// `characters.utf8.indices`.
/// If the position passed in `utf8Index` doesn't have an exact corresponding
/// position in `other.characters`, the result of the initializer is `nil`.
/// For example, an attempt to convert the position of a UTF-8 continuation
/// byte returns `nil`.
///
/// - Parameters:
/// - utf8Index: A position in the `utf8` view of the `other` parameter.
/// - other: The string referenced by both `utf8Index` and the resulting
/// index.
public init?(
_ utf8Index: String.UTF8Index,
within characters: String
within other: String
) {
if let me = utf8Index.samePosition(
in: characters.unicodeScalars
)?.samePosition(in: characters) {
in: other.unicodeScalars
)?.samePosition(in: other) {
self = me
}
else {
@@ -64,30 +132,71 @@ extension String.Index {
}
}
/// Returns the position in `utf8` that corresponds exactly
/// to `self`.
/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// - Precondition: `self` is an element of `String(utf8).indices`.
/// The index must be a valid index of `String(utf8).characters`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)
/// print(Array(cafe.utf8.suffix(from: j)))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index {
return String.UTF8View.Index(self, within: utf8)
}
/// Returns the position in `utf16` that corresponds exactly
/// to `self`.
/// Returns the position in the given UTF-16 view that corresponds exactly to
/// this index.
///
/// - Precondition: `self` is an element of `String(utf16).indices`.
/// The index must be a valid index of `String(utf16).characters`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion.
/// - Returns: The position in `utf16` that corresponds exactly to this index.
public func samePosition(
in utf16: String.UTF16View
) -> String.UTF16View.Index {
return String.UTF16View.Index(self, within: utf16)
}
/// Returns the position in `unicodeScalars` that corresponds exactly
/// to `self`.
/// Returns the position in the given view of Unicode scalars that
/// corresponds exactly to this index.
///
/// - Precondition: `self` is an element of `String(unicodeScalars).indices`.
/// The index must be a valid index of `String(unicodeScalars).characters`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `unicodeScalars`
/// view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.unicodeScalars)
/// print(cafe.unicodeScalars[j])
/// }
/// // Prints "é"
///
/// - Parameter unicodeScalars: The view to use for the index conversion.
/// - Returns: The position in `unicodeScalars` that corresponds exactly to
/// this index.
public func samePosition(
in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarView.Index {

View File

@@ -35,7 +35,20 @@ for int_ty in all_integer_types(word_bits):
}%
extension String : StringInterpolationConvertible {
/// Create an instance by concatenating the elements of `strings`.
/// Creates a new string by concatenating the given interpolations.
///
/// Do not call this initializer directly. It is used by the compiler when
/// you create a string using string interpolation. Instead, use string
/// interpolation to create a new string by including values, literals,
/// variables, or expressions enclosed in parentheses, prefixed by a
/// backslash (`\(`...`)`).
///
/// let price = 2
/// let number = 3
/// let message = "If one cookie costs \(price) dollars, " +
/// "\(number) cookies cost \(price * number) dollars."
/// print(message)
/// // Prints "If one cookie costs 2 dollars, 3 cookies cost 6 dollars."
@effects(readonly)
public init(stringInterpolation strings: String...) {
self.init()
@@ -44,18 +57,36 @@ extension String : StringInterpolationConvertible {
}
}
/// Create an instance containing `expr`'s `print` representation.
/// Creates a string containing the given expression's textual
/// representation.
///
/// Do not call this initializer directly. It is used by the compiler when
/// interpreting string interpolations.
///
/// - SeeAlso: `StringInterpolationConvertible`
public init<T>(stringInterpolationSegment expr: T) {
self = String(expr)
}
% for Type in StreamableTypes:
/// Creates a string containing the given value's textual representation.
///
/// Do not call this initializer directly. It is used by the compiler when
/// interpreting string interpolations.
///
/// - SeeAlso: `StringInterpolationConvertible`
public init(stringInterpolationSegment expr: ${Type}) {
self = _toStringReadOnlyStreamable(expr)
}
% end
% for Type in PrintableTypes:
/// Creates a string containing the given value's textual representation.
///
/// Do not call this initializer directly. It is used by the compiler when
/// interpreting string interpolations.
///
/// - SeeAlso: `StringInterpolationConvertible`
public init(stringInterpolationSegment expr: ${Type}) {
self = _toStringReadOnlyPrintable(expr)
}

View File

@@ -13,8 +13,15 @@
import SwiftShims
extension String {
/// Construct an instance that is the concatenation of `count` copies
/// of `repeatedValue`.
/// Creates a string representing the given character repeated the specified
/// number of times.
///
/// For example, use this initializer to create a string with ten `"0"`
/// characters in a row.
///
/// let zeroes = String("0" as Character, count: 10)
/// print(zeroes)
/// // Prints "0000000000"
public init(repeating repeatedValue: Character, count: Int) {
let s = String(repeatedValue)
self = String(_storage: _StringBuffer(
@@ -26,8 +33,15 @@ extension String {
}
}
/// Construct an instance that is the concatenation of `count` copies
/// of `Character(repeatedValue)`.
/// Creates a string representing the given Unicode scalar repeated the
/// specified number of times.
///
/// For example, use this initializer to create a string with ten `"0"`
/// scalars in a row.
///
/// let zeroes = String("0" as UnicodeScalar, count: 10)
/// print(zeroes)
/// // Prints "0000000000"
public init(repeating repeatedValue: UnicodeScalar, count: Int) {
self = String._fromWellFormedCodeUnitSequence(
UTF32.self,
@@ -43,7 +57,7 @@ extension String {
return scalarSlices.map { String($0) }
}
/// `true` iff `self` contains no characters.
/// A Boolean value indicating whether a string has no characters.
public var isEmpty : Bool {
return _core.count == 0
}
@@ -72,7 +86,36 @@ func _stdlib_NSStringHasSuffixNFD(_ theString: AnyObject, _ suffix: AnyObject) -
func _stdlib_NSStringHasSuffixNFDPointer(_ theString: OpaquePointer, _ suffix: OpaquePointer) -> Bool
extension String {
/// Returns `true` iff `self` begins with `prefix`.
/// Returns a Boolean value indicating whether the string begins with the
/// specified prefix.
///
/// The comparison is both case sensitive and Unicode safe. The
/// case-sensitive comparision will only match strings whose corresponding
/// characters have the same case.
///
/// let cafe = "Café du Monde"
///
/// // Case sensitive
/// print(cafe.hasPrefix("café"))
/// // Prints "false"
///
/// The Unicode-safe comparison matches Unicode scalar values rather than the
/// code points used to compose them. The example below uses two strings
/// with different forms of the `"é"` character---the first uses the composed
/// form and the second uses the decomposed form.
///
/// // Unicode safe
/// let composedCafe = "Café"
/// let decomposedCafe = "Cafe\u{0301}"
///
/// print(cafe.hasPrefix(composedCafe))
/// // Prints "true"
/// print(cafe.hasPrefix(decomposedCafe))
/// // Prints "true"
///
/// - Parameter prefix: A possible prefix to test against this string.
/// Passing an empty string (`""`) as `prefix` always results in `false`.
/// - Returns: `true` if the string begins with `prefix`, otherwise, `false`.
public func hasPrefix(_ prefix: String) -> Bool {
let selfCore = self._core
let prefixCore = prefix._core
@@ -96,7 +139,36 @@ extension String {
self._bridgeToObjectiveCImpl(), prefix._bridgeToObjectiveCImpl())
}
/// Returns `true` iff `self` ends with `suffix`.
/// Returns a Boolean value indicating whether the string ends with the
/// specified suffix.
///
/// The comparison is both case sensitive and Unicode safe. The
/// case-sensitive comparision will only match strings whose corresponding
/// characters have the same case.
///
/// let plans = "Let's meet at the café"
///
/// // Case sensitive
/// print(plans.hasSuffix("Café"))
/// // Prints "false"
///
/// The Unicode-safe comparison matches Unicode scalar values rather than the
/// code points used to compose them. The example below uses two strings
/// with different forms of the `"é"` character---the first uses the composed
/// form and the second uses the decomposed form.
///
/// // Unicode safe
/// let composedCafe = "café"
/// let decomposedCafe = "cafe\u{0301}"
///
/// print(plans.hasSuffix(composedCafe))
/// // Prints "true"
/// print(plans.hasSuffix(decomposedCafe))
/// // Prints "true"
///
/// - Parameter suffix: A possible suffix to test against this string.
/// Passing an empty string (`""`) as `suffix` always results in `false`.
/// - Returns: `true` if the string ends with `suffix`, otherwise, `false`.
public func hasSuffix(_ suffix: String) -> Bool {
let selfCore = self._core
let suffixCore = suffix._core
@@ -133,38 +205,86 @@ extension String {
// FIXME: can't just use a default arg for radix below; instead we
// need these single-arg overloads <rdar://problem/17775455>
/// Create an instance representing `v` in base 10.
/// Creates a string representing the given value in base 10.
///
/// The following example converts the maximal `Int` value to a string and
/// prints its length:
///
/// let max = String(Int.max)
/// print("\(max) has \(max.utf16.count) digits.")
/// // Prints "9223372036854775807 has 19 digits."
public init<T : _SignedInteger>(_ v: T) {
self = _int64ToString(v.toIntMax())
}
/// Create an instance representing `v` in base 10.
/// Creates a string representing the given value in base 10.
///
/// The following example converts the maximal `UInt` value to a string and
/// prints its length:
///
/// let max = String(UInt.max)
/// print("\(max) has \(max.utf16.count) digits.")
/// // Prints "18446744073709551615 has 20 digits."
public init<T : UnsignedInteger>(_ v: T) {
self = _uint64ToString(v.toUIntMax())
}
/// Create an instance representing `v` in the given `radix` (base).
/// Creates a string representing the given value in the specified base.
///
/// Numerals greater than 9 are represented as roman letters,
/// starting with `a` if `uppercase` is `false` or `A` otherwise.
/// Numerals greater than 9 are represented as Roman letters. These letters
/// start with `"A"` if `uppercase` is `true`; otherwise, with `"a"`.
///
/// let v = 999_999
/// print(String(v, radix: 2))
/// // Prints "11110100001000111111"
///
/// print(String(v, radix: 16))
/// // Prints "f423f"
/// print(String(v, radix: 16, uppercase: true))
/// // Prints "F423F"
///
/// - Parameters:
/// - value: The value to convert to a string.
/// - radix: The base to use for the string representation. `radix` must be
/// at least 2 and at most 36.
/// - uppercase: Pass `true` to use uppercase letters to represent numerals
/// greater than 9, or `false` to use lowercase letters. The default is
/// `false`.
public init<T : _SignedInteger>(
_ v: T, radix: Int, uppercase: Bool = false
_ value: T, radix: Int, uppercase: Bool = false
) {
_precondition(radix > 1, "Radix must be greater than 1")
self = _int64ToString(
v.toIntMax(), radix: Int64(radix), uppercase: uppercase)
value.toIntMax(), radix: Int64(radix), uppercase: uppercase)
}
/// Create an instance representing `v` in the given `radix` (base).
/// Creates a string representing the given value in the specified base.
///
/// Numerals greater than 9 are represented as roman letters,
/// starting with `a` if `uppercase` is `false` or `A` otherwise.
/// Numerals greater than 9 are represented as Roman letters. These letters
/// start with `"A"` if `uppercase` is `true`; otherwise, with `"a"`.
///
/// let v: UInt = 999_999
/// print(String(v, radix: 2))
/// // Prints "11110100001000111111"
///
/// print(String(v, radix: 16))
/// // Prints "f423f"
/// print(String(v, radix: 16, uppercase: true))
/// // Prints "F423F"
///
/// - Parameters:
/// - value: The value to convert to a string.
/// - radix: The base to use for the string representation. `radix` must be
/// at least 2 and at most 36.
/// - uppercase: Pass `true` to use uppercase letters to represent numerals
/// greater than 9, or `false` to use lowercase letters. The default is
/// `false`.
public init<T : UnsignedInteger>(
_ v: T, radix: Int, uppercase: Bool = false
_ value: T, radix: Int, uppercase: Bool = false
) {
_precondition(radix > 1, "Radix must be greater than 1")
self = _uint64ToString(
v.toUIntMax(), radix: Int64(radix), uppercase: uppercase)
value.toUIntMax(), radix: Int64(radix), uppercase: uppercase)
}
}

View File

@@ -14,18 +14,25 @@
// similar API.
extension String {
/// The index type for subscripting a string.
public typealias Index = CharacterView.Index
/// A type used to represent the number of steps between two `String.Index`
/// values, where one value is reachable from the other.
///
/// In Swift, *reachability* refers to the ability to produce one value from
/// the other through zero or more applications of `index(after:)`.
public typealias IndexDistance = CharacterView.IndexDistance
/// The position of the first `Character` in `self.characters` if
/// `self` is non-empty; identical to `endIndex` otherwise.
/// The position of the first character in a nonempty string.
///
/// In an empty string, `startIndex` is equal to `endIndex`.
public var startIndex: Index { return characters.startIndex }
/// The "past the end" position in `self.characters`.
/// A string's "past the end" position---that is, the position one greater
/// than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// In an empty string, `endIndex` is equal to `startIndex`.
public var endIndex: Index { return characters.endIndex }
// TODO: swift-3-indexing-model - add docs
@@ -55,16 +62,25 @@ extension String {
return characters.distance(from: start, to: end)
}
/// Access the `Character` at `position`.
/// Accesses the character at the given position.
///
/// - Precondition: `position` is a valid position in `self.characters`
/// and `position != endIndex`.
/// Indices for a subscripting a string are shared with the string's
/// `characters` view. For example:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.characters.index(where: { $0 >= "A" && $0 <= "Z" }) {
/// print("First capital letter: \(greeting[i])")
/// }
/// // Prints "First capital letter: H"
///
/// - Parameter i: A valid index of the string. `i` must be less than the
/// string's end index.
public subscript(i: Index) -> Character { return characters[i] }
/// Return the characters within the given `bounds`.
/// Accesses the text in the given range.
///
/// - Complexity: O(1) unless bridging from Objective-C requires an
/// O(N) conversion.
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> String {
return String(characters[bounds])
}
@@ -79,24 +95,63 @@ public func < (lhs: String.Index, rhs: String.Index) -> Bool {
}
extension String {
/// Create an instance containing `characters`.
/// Creates a new string containing the characters in the given sequence.
///
/// You can use this initializer to create a new string from the result of
/// one or more operations on a string's `characters` view. For example:
///
/// let str = "The rain in Spain stays mainly in the plain."
///
/// let vowels: Set<Character> = ["a", "e", "i", "o", "u"]
/// let disemvowelled = String(str.characters.lazy.filter { !vowels.contains($0) })
///
/// print(disemvowelled)
/// // Prints "Th rn n Spn stys mnly n th pln."
///
/// - Parameter characters: A sequence of characters.
public init<
S : Sequence where S.Iterator.Element == Character
>(_ characters: S) {
self._core = CharacterView(characters)._core
}
/// Reserves enough space in the string's underlying storage to store the
/// specified number of ASCII characters.
///
/// Because each character in a string can require more than a single ASCII
/// character's worth of storage, additional allocation may be necessary
/// when adding characters to a string after a call to
/// `reserveCapacity(_:)`.
///
/// - Parameter n: The minimum number of ASCII character's worth of storage
/// to allocate.
///
/// - Complexity: O(*n*)
public mutating func reserveCapacity(_ n: Int) {
withMutableCharacters {
(v: inout CharacterView) in v.reserveCapacity(n)
}
}
/// Appends the given character to the string.
///
/// The following example adds an emoji globe to the end of a string.
///
/// var globe = "Globe "
/// globe.append("🌍")
/// print(globe)
/// // Prints "Globe 🌍"
///
/// - Parameter c: The character to append to the string.
public mutating func append(_ c: Character) {
withMutableCharacters {
(v: inout CharacterView) in v.append(c)
}
}
/// Appends the characters in the given sequence to the string.
///
/// - Parameter newElements: A sequence of characters.
public mutating func append<
S : Sequence where S.Iterator.Element == Character
>(contentsOf newElements: S) {
@@ -106,13 +161,20 @@ extension String {
}
% for Range in ['Range', 'ClosedRange']:
/// Replace the characters within `bounds` with the elements of
/// `replacement`.
/// Replaces the text within the specified bounds with the given characters.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise.
/// - Parameters:
/// - bounds: The range of text to replace. The bounds of the range must be
/// valid indices of the string.
/// - newElements: The new characters to add to the string.
///
/// - Complexity: O(*m*), where *m* is the combined length of the string and
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes text at the end of the string, the complexity is O(*n*), where
/// *n* is equal to `bounds.count`.
public mutating func replaceSubrange<
C : Collection where C.Iterator.Element == Character
>(
@@ -125,12 +187,20 @@ extension String {
}
}
/// Replace the text in `bounds` with `replacement`.
/// Replaces the text within the specified bounds with the given string.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise.
/// - Parameters:
/// - bounds: The range of text to replace. The bounds of the range must be
/// valid indices of the string.
/// - newElements: The new text to add to the string.
///
/// - Complexity: O(*m*), where *m* is the combined length of the string and
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes text at the end of the string, the complexity is O(*n*), where
/// *n* is equal to `bounds.count`.
public mutating func replaceSubrange(
_ bounds: ${Range}<Index>, with newElements: String
) {
@@ -139,22 +209,37 @@ extension String {
}
% end
/// Insert `newElement` at position `i`.
/// Inserts a new character at the specified position.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Complexity: O(`self.count`).
/// - Parameters:
/// - newElement: The new character to insert into the string.
/// - i: A valid index of the string. If `i` is equal to the string's end
/// index, this methods appends `newElement` to the string.
///
/// - Complexity: O(*n*), where *n* is the length of the string.
public mutating func insert(_ newElement: Character, at i: Index) {
withMutableCharacters {
(v: inout CharacterView) in v.insert(newElement, at: i)
}
}
/// Insert `newElements` at position `i`.
/// Inserts a collection of characters at the specified position.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Complexity: O(`self.count + newElements.count`).
/// - Parameters:
/// - newElements: A collection of `Character` elements to insert into the
/// string.
/// - i: A valid index of the string. If `i` is equal to the string's end
/// index, this methods appends the contents of `newElements` to the
/// string.
///
/// - Complexity: O(*n*), where *n* is the combined length of the string and
/// `newElements`.
public mutating func insert<
S : Collection where S.Iterator.Element == Character
>(contentsOf newElements: S, at i: Index) {
@@ -163,11 +248,24 @@ extension String {
}
}
/// Remove and return the `Character` at position `i`.
/// Removes and returns the character at the specified position.
///
/// Invalidates all indices with respect to `self`.
/// All the elements following `i` are moved to close the gap. This example
/// removes the hyphen from the middle of a string.
///
/// - Complexity: O(`self.count`).
/// var nonempty = "non-empty"
/// if let i = nonempty.characters.index(of: "-") {
/// nonempty.remove(at: i)
/// }
/// print(nonempty)
/// // Prints "nonempty"
///
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Parameter i: The position of the character to remove. `i` must be a
/// valid index of the string that is not equal to the string's end index.
/// - Returns: The character that was removed.
@discardableResult
public mutating func remove(at i: Index) -> Character {
return withMutableCharacters {
@@ -176,11 +274,19 @@ extension String {
}
% for Range in ['Range', 'ClosedRange']:
/// Remove the characters in `bounds`.
/// Removes the characters in the given range.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Complexity: O(`self.count`).
% if Range == 'ClosedRange':
/// - Parameter bounds: The range of the elements to remove. The upper and
/// lower bounds of `bounds` must be valid indices of the string and not
/// equal to the string's end index.
% else:
/// - Parameter bounds: The range of the elements to remove. The upper and
/// lower bounds of `bounds` must be valid indices of the string.
% end
public mutating func removeSubrange(_ bounds: ${Range}<Index>) {
// FIXME: swift-3-indexing-model: tests.
withMutableCharacters {
@@ -189,13 +295,15 @@ extension String {
}
% end
/// Replace `self` with the empty string.
/// Replaces this string with the empty string.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - parameter keepCapacity: If `true`, prevents the release of
/// allocated storage, which can be a useful optimization
/// when `self` is going to be grown again.
/// - Parameter keepCapacity: Pass `true` to prevent the release of the
/// string's allocated storage. Retaining the storage can be a useful
/// optimization when you're planning to grow the string again. The
/// default value is `false`.
public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) {
withMutableCharacters {
(v: inout CharacterView) in v.removeAll(keepingCapacity: keepCapacity)

View File

@@ -14,13 +14,123 @@
// allow performance optimizations of linear traversals.
extension String {
/// A collection of UTF-16 code units that encodes a `String` value.
/// A view of a string's contents as a collection of UTF-16 code units.
///
/// You can access a string's view of UTF-16 code units by using its `utf16`
/// property. A string's UTF-16 view encodes the string's Unicode scalar
/// values as 16-bit integers.
///
/// let flowers = "Flowers 💐"
/// for v in flowers.utf16 {
/// print(v)
/// }
/// // 70
/// // 108
/// // 111
/// // 119
/// // 101
/// // 114
/// // 115
/// // 32
/// // 55357
/// // 56464
///
/// Unicode scalar values that make up a string's contents can be up to 21
/// bits long. The longer scalar values may need two `UInt16` values for
/// storage. Those "pairs" of code units are called *surrogate pairs*.
///
/// let flowermoji = "💐"
/// for v in flowermoji.unicodeScalars {
/// print(v, v.value)
/// }
/// // 💐 128144
///
/// for v in flowermoji.utf16 {
/// print(v)
/// }
/// // 55357
/// // 56464
///
/// To convert a `String.UTF16View` instance back into a string, use the
/// `String` type's `init(_:)` initializer.
///
/// let favemoji = "My favorite emoji is 🎉"
/// if let i = favemoji.utf16.index(where: { $0 >= 128 }) {
/// let asciiPrefix = String(favemoji.utf16.prefix(upTo: i))
/// print(asciiPrefix)
/// }
/// // Prints "My favorite emoji is "
///
/// UTF16View Elements Match NSString Characters
/// ============================================
///
/// The UTF-16 code units of a string's `utf16` view match the elements
/// accessed through indexed `NSString` APIs.
///
/// print(flowers.utf16.count)
/// // Prints "10"
///
/// let nsflowers = flowers as NSString
/// print(nsflowers.length)
/// // Prints "10"
///
/// Unlike `NSString`, however, `String.UTF16View` does not use integer
/// indices. If you need to access a specific position in a UTF-16 view, use
/// Swift's index manipulation methods. The following example accesses the
/// fourth code unit in both the `flowers` and `nsflowers` strings:
///
/// print(nsflowers.character(at: 3))
/// // Prints "119"
///
/// let i = flowers.utf16.index(flowers.utf16.startIndex, offsetBy: 3)
/// print(flowers.utf16[i])
/// // Prints "119"
///
/// Although the Swift overlay updates many Objective-C methods to return
/// native Swift indices and index ranges, some still return instances of
/// `NSRange`. To convert an `NSRange` instance to a range of
/// `String.UTF16View.Index`, follow these steps:
///
/// 1. Use the `NSRange` type's `toRange` method to convert the instance to
/// an optional range of `Int` values.
/// 2. Use your string's `utf16` view's index manipulation methods to convert
/// the integer bounds to `String.UTF16View.Index` values.
/// 3. Create a new `Range` instance from the new index values.
///
/// Here's an implementation of those steps, showing how to retrieve a
/// substring described by an `NSRange` instance from the middle of a
/// string.
///
/// let snowy = " Let it snow! "
/// let nsrange = NSRange(location: 3, length: 12)
/// if let r = nsrange.toRange() {
/// let start = snowy.utf16.index(snowy.utf16.startIndex, offsetBy: r.lowerBound)
/// let end = snowy.utf16.index(snowy.utf16.startIndex, offsetBy: r.upperBound)
/// let substringRange = start..<end
/// print(snowy.utf16[substringRange])
/// }
/// // Prints "Let it snow!"
public struct UTF16View
: BidirectionalCollection,
CustomStringConvertible,
CustomDebugStringConvertible {
/// A position in a string's collection of UTF-16 code units.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// For example, the following code sample finds the index of the first
/// space in the string's character view and then converts that to the same
/// position in the UTF-16 view.
///
/// let hearts = "Hearts <3 💘"
/// if let i = hearts.characters.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf16)
/// print(Array(hearts.utf16.suffix(from: j)))
/// print(hearts.utf16.suffix(from: j))
/// }
/// // Prints "[32, 60, 51, 32, 9829, 65038, 32, 55357, 56472]"
/// // Prints " <3 💘"
public struct Index : Comparable {
// Foundation needs access to these fields so it can expose
// random access
@@ -33,16 +143,15 @@ extension String {
public typealias IndexDistance = Int
/// The position of the first code unit if the `String` is
/// non-empty; identical to `endIndex` otherwise.
/// nonempty; identical to `endIndex` otherwise.
public var startIndex: Index {
return Index(_offset: 0)
}
/// The "past the end" position.
/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return Index(_offset: _length)
}
@@ -98,10 +207,18 @@ extension String {
return _core.startIndex + _offset + i
}
/// Access the element at `position`.
/// Accesses the code unit at the given position.
///
/// - Precondition: `position` is a valid position in `self` and
/// `position != endIndex`.
/// The following example uses the subscript to print the value of a
/// string's first UTF-16 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf16.startIndex
/// print("First character's UTF-16 code unit: \(greeting.utf16[i])")
/// // Prints "First character's UTF-16 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position` must be
/// less than the view's end index.
public subscript(i: Index) -> UTF16.CodeUnit {
let position = i._offset
_precondition(position >= 0 && position < _length,
@@ -152,10 +269,11 @@ extension String {
}
#endif
/// Get the contiguous subrange of elements enclosed by `bounds`.
/// Accesses the contiguous subrange of elements enclosed by the specified
/// range.
///
/// - Complexity: O(1) unless bridging from Objective-C requires an
/// O(N) conversion.
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> UTF16View {
return UTF16View(
_core,
@@ -200,9 +318,24 @@ extension String {
}
}
/// Construct the `String` corresponding to the given sequence of
/// UTF-16 code units. If `utf16` contains unpaired surrogates, the
/// result is `nil`.
/// Creates a string corresponding to the given sequence of UTF-8 code units.
///
/// If `utf16` contains unpaired UTF-16 surrogates, the result is `nil`.
///
/// You can use this initializer to create a new string from a slice of
/// another string's `utf16` view.
///
/// let picnicGuest = "Deserving porcupine"
/// if let i = picnicGuest.utf16.index(of: 32) {
/// let adjective = String(picnicGuest.utf16.prefix(upTo: i))
/// print(adjective)
/// }
/// // Prints "Optional(Deserving)"
///
/// The `adjective` constant is created by calling this initializer with a
/// slice of the `picnicGuest.utf16` view.
///
/// - Parameter utf16: A UTF-16 code sequence.
public init?(_ utf16: UTF16View) {
let wholeString = String(utf16._core)
@@ -219,7 +352,7 @@ extension String {
return nil
}
/// The index type for subscripting a `String`'s `utf16` view.
/// The index type for subscripting a string's `utf16` view.
public typealias UTF16Index = UTF16View.Index
}
@@ -239,11 +372,31 @@ public func < (
// Index conversions
extension String.UTF16View.Index {
/// Construct the position in `utf16` that corresponds exactly to
/// `utf8Index`. If no such position exists, the result is `nil`.
/// Creates an index in the given UTF-16 view that corresponds exactly to the
/// specified `UTF8View` position.
///
/// - Precondition: `utf8Index` is an element of
/// `String(utf16)!.utf8.indices`.
/// The following example finds the position of a space in a string's `utf8`
/// view and then converts that position to an index in the the string's
/// `utf16` view.
///
/// let cafe = "Café 🍵"
///
/// let utf8Index = cafe.utf8.index(of: 32)!
/// let utf16Index = String.UTF16View.Index(utf8Index, within: cafe.utf16)!
///
/// print(cafe.utf16.prefix(upTo: utf16Index))
/// // Prints "Café"
///
/// If the position passed as `utf8Index` doesn't have an exact corresponding
/// position in `utf16`, the result of the initializer is `nil`. For
/// example, because UTF-8 and UTF-16 represent high Unicode code points
/// differently, an attempt to convert the position of a UTF-8 continuation
/// byte fails.
///
/// - Parameters:
/// - utf8Index: A position in a `UTF8View` instance. `utf8Index` must be
/// an element in `String(utf16).utf8.indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init?(
_ utf8Index: String.UTF8Index, within utf16: String.UTF16View
) {
@@ -260,52 +413,129 @@ extension String.UTF16View.Index {
_offset = utf8Index._coreIndex
}
/// Construct the position in `utf16` that corresponds exactly to
/// `unicodeScalarIndex`.
/// Creates an index in the given UTF-16 view that corresponds exactly to the
/// specified `UnicodeScalarView` position.
///
/// - Precondition: `unicodeScalarIndex` is an element of
/// `String(utf16)!.unicodeScalars.indices`.
/// The following example finds the position of a space in a string's `utf8`
/// view and then converts that position to an index in the the string's
/// `utf16` view.
///
/// let cafe = "Café 🍵"
///
/// let scalarIndex = cafe.unicodeScalars.index(of: "é")!
/// let utf16Index = String.UTF16View.Index(scalarIndex, within: cafe.utf16)
///
/// print(cafe.utf16.prefix(through: utf16Index))
/// // Prints "Café"
///
/// - Parameters:
/// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance.
/// `unicodeScalarIndex` must be an element in
/// `String(utf16).unicodeScalarIndex.indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init(
_ unicodeScalarIndex: String.UnicodeScalarIndex,
within utf16: String.UTF16View) {
_offset = unicodeScalarIndex._position
}
/// Construct the position in `utf16` that corresponds exactly to
/// `characterIndex`.
/// Creates an index in the given UTF-16 view that corresponds exactly to the
/// specified `CharacterView` position.
///
/// - Precondition: `characterIndex` is an element of
/// `String(utf16)!.indices`.
/// The following example finds the position of a space in a string's `characters`
/// view and then converts that position to an index in the the string's
/// `utf16` view.
///
/// let cafe = "Café 🍵"
///
/// let characterIndex = cafe.characters.index(of: "é")!
/// let utf16Index = String.UTF16View.Index(characterIndex, within: cafe.utf16)
///
/// print(cafe.utf16.prefix(through: utf16Index))
/// // Prints "Café"
///
/// - Parameters:
/// - characterIndex: A position in a `CharacterView` instance.
/// `characterIndex` must be an element in
/// `String(utf16).characters.indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init(_ characterIndex: String.Index, within utf16: String.UTF16View) {
_offset = characterIndex._utf16Index
}
/// Returns the position in `utf8` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// - Precondition: `self` is an element of
/// `String(utf8)!.utf16.indices`.
/// The index must be a valid index of `String(utf8).utf16`.
///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method to find the same
/// position in the string's `utf8` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe.utf8)!
/// print(Array(cafe.utf8.prefix(upTo: j)))
/// // Prints "[67, 97, 102, 195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
/// If this index does not have an exact corresponding position in `utf8`,
/// this method returns `nil`. For example, an attempt to convert the
/// position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}
/// Returns the position in `unicodeScalars` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given view of Unicode scalars that
/// corresponds exactly to this index.
///
/// - Precondition: `self` is an element of
/// `String(unicodeScalars).utf16.indices`.
/// This index must be a valid index of `String(unicodeScalars).utf16`.
///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method to find the same
/// position in the string's `unicodeScalars` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe.unicodeScalars)!
/// print(cafe.unicodeScalars.prefix(upTo: j))
/// // Prints "Café"
///
/// - Parameter unicodeScalars: The view to use for the index conversion.
/// - Returns: The position in `unicodeScalars` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `unicodeScalars`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-16 trailing surrogate
/// returns `nil`.
public func samePosition(
in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: unicodeScalars)
}
/// Returns the position in `characters` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given string that corresponds exactly to this
/// index.
///
/// - Precondition: `self` is an element of `characters.utf16.indices`.
/// This index must be a valid index of `characters.utf16`.
///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe)!
/// print(cafe[cafe.startIndex ..< j])
/// // Prints "Café"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `characters`, this method returns `nil`. For example, an attempt to
/// convert the position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in characters: String
) -> String.Index? {
@@ -315,7 +545,7 @@ extension String.UTF16View.Index {
// Reflection
extension String.UTF16View : CustomReflectable {
/// Returns a mirror that reflects `self`.
/// Returns a mirror that reflects the UTF-16 view of a string.
public var customMirror: Mirror {
return Mirror(self, unlabeledChildren: self)
}

View File

@@ -88,7 +88,80 @@ extension _StringCore {
}
extension String {
/// A collection of UTF-8 code units that encodes a `String` value.
/// A view of a string's contents as a collection of UTF-8 code units.
///
/// You can access a string's view of UTF-8 code units by using its `utf8`
/// property. A string's UTF-8 view encodes the string's Unicode scalar
/// values as 8-bit integers.
///
/// let flowers = "Flowers 💐"
/// for v in flowers.utf8 {
/// print(v)
/// }
/// // 70
/// // 108
/// // 111
/// // 119
/// // 101
/// // 114
/// // 115
/// // 32
/// // 240
/// // 159
/// // 146
/// // 144
///
/// A string's Unicode scalar values can be up to 21 bits in length. To
/// represent those scalar values using 8-bit integers, more than one UTF-8
/// code unit is often required.
///
/// let flowermoji = "💐"
/// for v in flowermoji.unicodeScalars {
/// print(v, v.value)
/// }
/// // 💐 128144
///
/// for v in flowermoji.utf8 {
/// print(v)
/// }
/// // 240
/// // 159
/// // 146
/// // 144
///
/// In the encoded representation of a Unicode scalar value, each UTF-8 code
/// unit after the first is called a *continuation byte*.
///
/// UTF8View Elements Match Encoded C Strings
/// =========================================
///
/// Swift streamlines interoperation with C string APIs by letting you pass a
/// `String` instance to a function as an `Int8` or `UInt8` pointer. When you
/// call a C function using a `String`, Swift automatically creates a buffer
/// of UTF-8 code units and passes a pointer to that buffer. The code units
/// of that buffer match the code units in the string's `utf8` view.
///
/// The following example uses the C `strncmp` function to compare the
/// beginning of two Swift strings. The `strncmp` function takes two
/// `const char*` pointers and an integer specifying the number of characters
/// to compare. Because the strings are identical up to the 14th character,
/// comparing only those characters results in a return value of `0`.
///
/// let s1 = "They call me 'Bell'"
/// let s2 = "They call me 'Stacey'"
///
/// print(strncmp(s1, s2, 14))
/// // Prints "0"
/// print(String(s1.utf8.prefix(14))
/// // Prints "They call me '"
///
/// Extending the compared character count to 15 includes the differing
/// characters, so a nonzero result is returned.
///
/// print(strncmp(s1, s2, 15))
/// // Prints "-17"
/// print(String(s1.utf8.prefix(14))
/// // Prints "They call me 'B"
public struct UTF8View
: Collection,
CustomStringConvertible,
@@ -114,7 +187,22 @@ extension String {
self._endIndex = e
}
/// A position in a `String.UTF8View`.
/// A position in a string's `UTF8View` instance.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// For example, the following code sample finds the index of the first
/// space in the string's character view and then converts that to the same
/// position in the UTF-8 view.
///
/// let hearts = "Hearts <3 💘"
/// if let i = hearts.characters.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf8)
/// print(Array(hearts.utf8.prefix(upTo: j)))
/// print(hearts.utf8.prefix(upTo: j))
/// }
/// // Prints "[72, 101, 97, 114, 116, 115]"
/// // Prints "Hearts"
public struct Index : Comparable {
internal typealias Buffer = _StringCore._UTF8Chunk
@@ -169,17 +257,18 @@ extension String {
public typealias IndexDistance = Int
/// The position of the first code unit if the `String` is
/// non-empty; identical to `endIndex` otherwise.
/// The position of the first code unit if the UTF-8 view is
/// nonempty.
///
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
public var startIndex: Index {
return self._startIndex
}
/// The "past the end" position.
/// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return self._endIndex
}
@@ -201,7 +290,7 @@ extension String {
let nextCoreIndex = i._coreIndex &+ increment
let nextBuffer = Index._nextBuffer(after: i._buffer)
// if the nextBuffer is non-empty, we have all we need
// if the nextBuffer is nonempty, we have all we need
if _fastPath(nextBuffer != Index._emptyBuffer) {
return Index(i._core, nextCoreIndex, nextBuffer)
}
@@ -219,20 +308,29 @@ extension String {
}
}
/// Access the element at `position`.
/// Accesses the code unit at the given position.
///
/// - Precondition: `position` is a valid position in `self` and
/// `position != endIndex`.
/// The following example uses the subscript to print the value of a
/// string's first UTF-8 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf8.startIndex
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
/// // Prints "First character's UTF-8 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position`
/// must be less than the view's end index.
public subscript(position: Index) -> UTF8.CodeUnit {
let result = UTF8.CodeUnit(truncatingBitPattern: position._buffer & 0xFF)
_precondition(result != 0xFF, "cannot subscript using endIndex")
return result
}
/// Access the contiguous subrange of elements enclosed by `bounds`.
/// Accesses the contiguous subrange of elements enclosed by the specified
/// range.
///
/// - Complexity: O(1) unless bridging from Objective-C requires an
/// O(N) conversion.
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> UTF8View {
return UTF8View(_core, bounds.lowerBound, bounds.upperBound)
}
@@ -260,11 +358,21 @@ extension String {
return _core.elementWidth == 1 ? _core.startASCII : nil
}
/// A contiguously-stored nul-terminated UTF-8 representation of
/// `self`.
/// A contiguously stored null-terminated UTF-8 representation of
/// the string.
///
/// To access the underlying memory, invoke
/// `withUnsafeBufferPointer` on the `ContiguousArray`.
/// `withUnsafeBufferPointer` on the array.
///
/// let s = "Hello!"
/// let bytes = s.nulTerminatedUTF8
/// print(bytes)
/// // Prints "[72, 101, 108, 108, 111, 33, 0]"
///
/// bytes.withUnsafeBufferPointer { ptr in
/// print(strlen(UnsafePointer(ptr.baseAddress!)))
/// }
/// // Prints "6"
public var nulTerminatedUTF8: ContiguousArray<UTF8.CodeUnit> {
var result = ContiguousArray<UTF8.CodeUnit>()
result.reserveCapacity(utf8.count + 1)
@@ -283,9 +391,24 @@ extension String {
return try nulTerminatedUTF8.withUnsafeBufferPointer(body)
}
/// Construct the `String` corresponding to the given sequence of
/// UTF-8 code units. If `utf8` contains unpaired surrogates, the
/// result is `nil`.
/// Creates a string corresponding to the given sequence of UTF-8 code units.
///
/// If `utf8` is an ill-formed UTF-8 code sequence, the result is `nil`.
///
/// You can use this initializer to create a new string from a slice of
/// another string's `utf8` view.
///
/// let picnicGuest = "Deserving porcupine"
/// if let i = picnicGuest.utf8.index(of: 32) {
/// let adjective = String(picnicGuest.utf8.prefix(upTo: i))
/// print(adjective)
/// }
/// // Prints "Optional(Deserving)"
///
/// The `adjective` constant is created by calling this initializer with a
/// slice of the `picnicGuest.utf8` view.
///
/// - Parameter utf8: A UTF-8 code sequence.
public init?(_ utf8: UTF8View) {
let wholeString = String(utf8._core)
@@ -297,7 +420,7 @@ extension String {
return nil
}
/// The index type for subscripting a `String`'s `.utf8` view.
/// The index type for subscripting a string's `utf8` view.
public typealias UTF8Index = UTF8View.Index
}
@@ -356,11 +479,44 @@ extension String.UTF8View.Index {
self.init(core, _utf16Offset, buffer)
}
/// Construct the position in `utf8` that corresponds exactly to
/// `utf16Index`. If no such position exists, the result is `nil`.
/// Creates an index in the given UTF-8 view that corresponds exactly to the
/// specified `UTF16View` position.
///
/// - Precondition: `utf8Index` is an element of
/// `String(utf16)!.utf8.indices`.
/// The following example finds the position of a space in a string's `utf16`
/// view and then converts that position to an index in the the string's
/// `utf8` view.
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let utf8Index = String.UTF8View.Index(utf16Index, within: cafe.utf8)!
///
/// print(Array(cafe.utf8.prefix(upTo: utf8Index)))
/// // Prints "[67, 97, 102, 195, 169]"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `utf8`, the result of the initializer is
/// `nil`. For example, because UTF-8 and UTF-16 represent high Unicode code
/// points differently, an attempt to convert the position of the trailing
/// surrogate of a UTF-16 surrogate pair fails.
///
/// The next example attempts to convert the indices of the two UTF-16 code
/// points that represent the teacup emoji (`"🍵"`). The index of the lead
/// surrogate is successfully converted to a position in `utf8`, but the
/// index of the trailing surrogate is not.
///
/// let emojiHigh = cafe.utf16.index(after: utf16Index)
/// print(String.UTF8View.Index(emojiHigh, within: cafe.utf8))
/// // Prints "Optional(String.Index(...))"
///
/// let emojiLow = cafe.utf16.index(after: emojiHigh)
/// print(String.UTF8View.Index(emojiLow, within: cafe.utf8))
/// // Prints "nil"
///
/// - Parameters:
/// - utf16Index: A position in a `UTF16View` instance. `utf16Index` must
/// be an element in `String(utf8).utf16.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init?(_ utf16Index: String.UTF16Index, within utf8: String.UTF8View) {
let utf16 = String.UTF16View(utf8._core)
@@ -383,11 +539,24 @@ extension String.UTF8View.Index {
self.init(utf8._core, _utf16Offset: utf16Index._offset)
}
/// Construct the position in `utf8` that corresponds exactly to
/// `unicodeScalarIndex`.
/// Creates an index in the given UTF-8 view that corresponds exactly to the
/// specified `UnicodeScalarView` position.
///
/// - Precondition: `unicodeScalarIndex` is an element of
/// `String(utf8)!.unicodeScalars.indices`.
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string's `utf8` view.
///
/// let cafe = "Cafe\u{0301}"
/// let scalarsIndex = cafe.unicodeScalars.index(of: "e")!
/// let utf8Index = String.UTF8View.Index(scalarsIndex, within: cafe.utf8)
///
/// print(Array(cafe.utf8.prefix(through: utf8Index)))
/// // Prints "[67, 97, 102, 101]"
///
/// - Parameters:
/// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance.
/// `unicodeScalarIndex` must be an element of
/// `String(utf8).unicodeScalars.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init(
_ unicodeScalarIndex: String.UnicodeScalarIndex,
within utf8: String.UTF8View
@@ -395,40 +564,102 @@ extension String.UTF8View.Index {
self.init(utf8._core, _utf16Offset: unicodeScalarIndex._position)
}
/// Construct the position in `utf8` that corresponds exactly to
/// `characterIndex`.
/// Creates an index in the given UTF-8 view that corresponds exactly to the
/// specified string position.
///
/// - Precondition: `characterIndex` is an element of
/// `String(utf8)!.indices`.
/// The following example converts the position of the teacup emoji (`"🍵"`)
/// into its corresponding position in the string's `utf8` view.
///
/// let cafe = "Café 🍵"
/// let characterIndex = cafe.characters.index(of: "🍵")!
/// let utf8Index = String.UTF8View.Index(characterIndex, within: cafe.utf8)
///
/// print(Array(cafe.utf8.suffix(from: utf8Index)))
/// // Prints "[240, 159, 141, 181]"
///
/// - Parameters:
/// - characterIndex: A position in a `CharacterView` instance.
/// `characterIndex` must be an element of
/// `String(utf8).characters.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init(_ characterIndex: String.Index, within utf8: String.UTF8View) {
self.init(utf8._core, _utf16Offset: characterIndex._base._position)
}
/// Returns the position in `utf16` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given UTF-16 view that corresponds exactly to
/// this index.
///
/// - Precondition: `self` is an element of `String(utf16)!.utf8.indices`.
/// The index must be a valid index of `String(utf16).utf8`.
///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method to find the same
/// position in the string's `utf16` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf8.index(of: 32)!
/// let j = i.samePosition(in: cafe.utf16)!
/// print(cafe.utf16.prefix(upTo: j))
/// // Prints "Café"
///
/// - Parameter utf16: The view to use for the index conversion.
/// - Returns: The position in `utf16` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `utf16`, this method returns `nil`. For example, an attempt to convert
/// the position of a UTF-8 continuation byte returns `nil`.
public func samePosition(
in utf16: String.UTF16View
) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: utf16)
}
/// Returns the position in `unicodeScalars` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given view of Unicode scalars that
/// corresponds exactly to this index.
///
/// - Precondition: `self` is an element of
/// `String(unicodeScalars).utf8.indices`.
/// This index must be a valid index of `String(unicodeScalars).utf8`.
///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method to find the same position
/// in the string's `unicodeScalars` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf8.index(of: 32)!
/// let j = i.samePosition(in: cafe.unicodeScalars)!
/// print(cafe.unicodeScalars.prefix(upTo: j))
/// // Prints "Café"
///
/// - Parameter unicodeScalars: The view to use for the index conversion.
/// - Returns: The position in `unicodeScalars` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `unicodeScalars`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
public func samePosition(
in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: unicodeScalars)
}
/// Returns the position in `characters` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given string that corresponds exactly to this
/// index.
///
/// - Precondition: `self` is an element of `characters.utf8.indices`.
/// This index must be a valid index of `characters.utf8`.
///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf8.index(of: 32)!
/// let j = i.samePosition(in: cafe)!
/// print(cafe[cafe.startIndex ..< j])
/// // Prints "Café"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `characters`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
public func samePosition(
in characters: String
) -> String.Index? {
@@ -438,7 +669,7 @@ extension String.UTF8View.Index {
// Reflection
extension String.UTF8View : CustomReflectable {
/// Returns a mirror that reflects `self`.
/// Returns a mirror that reflects the UTF-8 view of a string.
public var customMirror: Mirror {
return Mirror(self, unlabeledChildren: self)
}

View File

@@ -25,8 +25,52 @@ public func < (
}
extension String {
/// A collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value) that
/// encodes a `String` value.
/// A view of a string's contents as a collection of Unicode scalar values.
///
/// You can access a string's view of Unicode scalar values by using its
/// `unicodeScalars` property. Unicode scalar values are the 21-bit codes
/// that are the basic unit of Unicode. Each scalar value is represented by
/// a `UnicodeScalar` instance and is equivalent to a UTF-32 code unit.
///
/// let flowers = "Flowers 💐"
/// for v in flowers.unicodeScalars {
/// print(v.value)
/// }
/// // 70
/// // 108
/// // 111
/// // 119
/// // 101
/// // 114
/// // 115
/// // 32
/// // 128144
///
/// Some characters that are visible in a string are made up of more than one
/// Unicode scalar value. In that case, a string's `unicodeScalars` view
/// contains more values than its `characters` view.
///
/// let flag = "🇵🇷"
/// for c in flag.characters {
/// print(c)
/// }
/// // 🇵🇷
///
/// for v in flag.unicodeScalars {
/// print(v.value)
/// }
/// // 127477
/// // 127479
///
/// You can convert a `String.UnicodeScalarView` instance back into a string
/// using the `String` type's `init(_:)` initializer.
///
/// let favemoji = "My favorite emoji is 🎉"
/// if let i = favemoji.unicodeScalars.index(where: { $0.value >= 128 }) {
/// let asciiPrefix = String(favemoji.unicodeScalars.prefix(upTo: i))
/// print(asciiPrefix)
/// }
/// // Prints "My favorite emoji is "
public struct UnicodeScalarView :
BidirectionalCollection,
CustomStringConvertible,
@@ -54,7 +98,22 @@ extension String {
}
}
/// A position in a `String.UnicodeScalarView`.
/// A position in a string's `UnicodeScalars` view.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// The following example finds the index of the solid heart pictograph in
/// the string's character view and then converts that to the same
/// position in the Unicode scalars view:
///
/// let hearts = "Hearts <3 💘"
/// let i = hearts.characters.index(of: "")!
///
/// let j = i.samePosition(in: hearts.unicodeScalars)
/// print(hearts.unicodeScalars.suffix(from: j))
/// // Prints " 💘"
/// print(hearts.unicodeScalars[j].value)
/// // Prints "9829"
public struct Index : Comparable {
public init(_ _position: Int, _ _core: _StringCore) {
self._position = _position
@@ -75,17 +134,18 @@ extension String {
@_versioned internal var _core: _StringCore
}
/// The position of the first `UnicodeScalar` if the `String` is
/// non-empty; identical to `endIndex` otherwise.
/// The position of the first Unicode scalar value if the string is
/// nonempty.
///
/// If the string is empty, `startIndex` is equal to `endIndex`.
public var startIndex: Index {
return Index(_core.startIndex, _core)
}
/// The "past the end" position.
/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return Index(_core.endIndex, _core)
}
@@ -114,10 +174,22 @@ extension String {
return Index(i, _core)
}
/// Access the element at `position`.
/// Accesses the Unicode scalar value at the given position.
///
/// - Precondition: `position` is a valid position in `self` and
/// `position != endIndex`.
/// The following example searches a string's Unicode scalars view for a
/// capital letter and then prints the character and Unicode scalar value
/// at the found index:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.unicodeScalars.index(where: { "A"..."Z" ~= $0 }) {
/// print("First capital letter: \(greeting.unicodeScalars[i])")
/// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
/// }
/// // Prints "First capital letter: H"
/// // Prints "Unicode scalar value: 72"
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
public subscript(position: Index) -> UnicodeScalar {
var scratch = _ScratchIterator(_core, position._position)
var decoder = UTF16()
@@ -131,17 +203,26 @@ extension String {
}
}
/// Access the contiguous subrange of elements enclosed by `bounds`.
/// Accesses the Unicode scalar values in the given range.
///
/// - Complexity: O(1) unless bridging from Objective-C requires an
/// O(N) conversion.
/// The example below uses this subscript to access the scalar values up
/// to, but not including, the first comma (`","`) in the string.
///
/// let str = "All this happened, more or less."
/// let i = str.unicodeScalars.index(of: ",")!
/// let substring = str.unicodeScalars[str.unicodeScalars.startIndex ..< i]
/// print(String(substring))
/// // Prints "All this happened"
///
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(r: Range<Index>) -> UnicodeScalarView {
return UnicodeScalarView(
_core[r.lowerBound._position..<r.upperBound._position])
}
/// A type whose instances can produce the elements of this
/// sequence, in order.
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
/// collection.
public struct Iterator : IteratorProtocol {
init(_ _base: _StringCore) {
if _base.hasContiguousStorage {
@@ -164,11 +245,13 @@ extension String {
}
}
/// Advance to the next element and return it, or `nil` if no next
/// element exists.
/// Advances to the next element and returns it.
///
/// - Precondition: No preceding call to `self.next()` has returned
/// `nil`.
/// Do not call this method if a copy of the iterator has been advanced.
///
/// - Returns: The next element in the collection if an element is
/// available; otherwise, `nil`. After returning `nil` once, this
/// method returns `nil` on every subsequent call.
public mutating func next() -> UnicodeScalar? {
var result: UnicodeDecodingResult
if _baseSet {
@@ -202,10 +285,9 @@ extension String {
internal var _iterator: IndexingIterator<_StringCore>!
}
/// Returns an iterator over the `UnicodeScalar`s that comprise
/// this sequence.
/// Returns an iterator over the Unicode scalars that make up this view.
///
/// - Complexity: O(1).
/// - Returns: An iterator over this collection's `UnicodeScalar` elements.
public func makeIterator() -> Iterator {
return Iterator(_core)
}
@@ -221,19 +303,33 @@ extension String {
internal var _core: _StringCore
}
/// Construct the `String` corresponding to the given sequence of
/// Unicode scalars.
/// Creates a string corresponding to the given collection of Unicode
/// scalars.
///
/// You can use this initializer to create a new string from a slice of
/// another string's `unicodeScalars` view.
///
/// let picnicGuest = "Deserving porcupine"
/// if let i = picnicGuest.unicodeScalars.index(of: " ") {
/// let adjective = String(picnicGuest.unicodeScalars.prefix(upTo: i))
/// print(adjective)
/// }
/// // Prints "Deserving"
///
/// The `adjective` constant is created by calling this initializer with a
/// slice of the `picnicGuest.unicodeScalars` view.
///
/// - Parameter unicodeScalars: A collection of Unicode scalar values.
public init(_ unicodeScalars: UnicodeScalarView) {
self.init(unicodeScalars._core)
}
/// The index type for subscripting a `String`'s `.unicodeScalars`
/// view.
/// The index type for a string's `unicodeScalars` view.
public typealias UnicodeScalarIndex = UnicodeScalarView.Index
}
extension String {
/// The value of `self` as a collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value).
/// The string's value represented as a collection of Unicode scalar values.
public var unicodeScalars : UnicodeScalarView {
get {
return UnicodeScalarView(_core)
@@ -245,36 +341,60 @@ extension String {
}
extension String.UnicodeScalarView : RangeReplaceableCollection {
/// Construct an empty instance.
/// Creates an empty view instance.
public init() {
self = String.UnicodeScalarView(_StringCore())
}
/// Reserve enough space to store `n` ASCII characters.
/// Reserves enough space in the view's underlying storage to store the
/// specified number of ASCII characters.
///
/// - Complexity: O(`n`).
/// Because a Unicode scalar value can require more than a single ASCII
/// character's worth of storage, additional allocation may be necessary
/// when adding to a Unicode scalar view after a call to
/// `reserveCapacity(_:)`.
///
/// - Parameter n: The minimum number of ASCII character's worth of storage
/// to allocate.
///
/// - Complexity: O(*n*), where *n* is the capacity being reserved.
public mutating func reserveCapacity(_ n: Int) {
_core.reserveCapacity(n)
}
/// Append `x` to `self`.
/// Appends the given Unicode scalar to the view.
///
/// - Complexity: Amortized O(1).
/// - Parameter c: The character to append to the string.
public mutating func append(_ x: UnicodeScalar) {
_core.append(x)
}
/// Append the elements of `newElements` to `self`.
/// Appends the Unicode scalar values in the given sequence to the view.
///
/// - Complexity: O(*length of result*).
/// - Parameter newElements: A sequence of Unicode scalar values.
///
/// - Complexity: O(*n*), where *n* is the length of the resulting view.
public mutating func append<
S : Sequence where S.Iterator.Element == UnicodeScalar
>(contentsOf newElements: S) {
_core.append(contentsOf: newElements.lazy.flatMap { $0.utf16 })
}
/// Replace the elements within `bounds` with `newElements`.
/// Replaces the elements within the specified bounds with the given Unicode
/// scalar values.
///
/// Invalidates all indices with respect to `self`.
/// Calling this method invalidates any existing indices for use with this
/// string.
///
/// - Complexity: O(`bounds.count`) if `bounds.upperBound
/// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise.
/// - Parameters:
/// - bounds: The range of elements to replace. The bounds of the range
/// must be valid indices of the view.
/// - newElements: The new Unicode scalar values to add to the string.
///
/// - Complexity: O(*m*), where *m* is the combined length of the view and
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes elements at the end of the string, the complexity is O(*n*),
/// where *n* is equal to `bounds.count`.
public mutating func replaceSubrange<
C: Collection where C.Iterator.Element == UnicodeScalar
>(
@@ -290,11 +410,31 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
// Index conversions
extension String.UnicodeScalarIndex {
/// Construct the position in `unicodeScalars` that corresponds exactly to
/// `utf16Index`. If no such position exists, the result is `nil`.
/// Creates an index in the given Unicode scalars view that corresponds
/// exactly to the specified `UTF16View` position.
///
/// - Precondition: `utf16Index` is an element of
/// `String(unicodeScalars).utf16.indices`.
/// The following example finds the position of a space in a string's `utf16`
/// view and then converts that position to an index in the the string's
/// `unicodeScalars` view:
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let scalarIndex = String.UnicodeScalarView.Index(utf16Index, within: cafe.unicodeScalars)!
///
/// print(String(cafe.unicodeScalars.prefix(upTo: scalarIndex)))
/// // Prints "Café"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `unicodeScalars`, the result of the
/// initializer is `nil`. For example, an attempt to convert the position of
/// the trailing surrogate of a UTF-16 surrogate pair fails.
///
/// - Parameters:
/// - utf16Index: A position in the `utf16` view of the `characters`
/// parameter.
/// - unicodeScalars: The `UnicodeScalarView` instance referenced by both
/// `utf16Index` and the resulting index.
public init?(
_ utf16Index: String.UTF16Index,
within unicodeScalars: String.UnicodeScalarView
@@ -320,11 +460,19 @@ extension String.UnicodeScalarIndex {
self.init(utf16Index._offset, unicodeScalars._core)
}
/// Construct the position in `unicodeScalars` that corresponds exactly to
/// `utf8Index`. If no such position exists, the result is `nil`.
/// Creates an index in the given Unicode scalars view that corresponds
/// exactly to the specified `UTF8View` position.
///
/// - Precondition: `utf8Index` is an element of
/// `String(unicodeScalars).utf8.indices`.
/// If the position passed as `utf8Index` doesn't have an exact corresponding
/// position in `unicodeScalars`, the result of the initializer is `nil`.
/// For example, an attempt to convert the position of a UTF-8 continuation
/// byte returns `nil`.
///
/// - Parameters:
/// - utf8Index: A position in the `utf8` view of the `characters`
/// parameter.
/// - unicodeScalars: The `UnicodeScalarView` instance referenced by both
/// `utf8Index` and the resulting index.
public init?(
_ utf8Index: String.UTF8Index,
within unicodeScalars: String.UnicodeScalarView
@@ -342,11 +490,24 @@ extension String.UnicodeScalarIndex {
self.init(utf8Index._coreIndex, core)
}
/// Construct the position in `unicodeScalars` that corresponds
/// exactly to `characterIndex`.
/// Creates an index in the given Unicode scalars view that corresponds
/// exactly to the specified string position.
///
/// - Precondition: `characterIndex` is an element of
/// `String(unicodeScalars).indices`.
/// The following example converts the position of the teacup emoji (`"🍵"`)
/// into its corresponding position in the string's `unicodeScalars` view.
///
/// let cafe = "Café 🍵"
/// let characterIndex = cafe.characters.index(of: "🍵")!
/// let scalarIndex = String.UnicodeScalarView.Index(characterIndex, within: cafe.unicodeScalars)
///
/// print(cafe.unicodeScalars.suffix(from: scalarIndex))
/// // Prints "🍵"
///
/// - Parameters:
/// - characterIndex: A position in a `CharacterView` instance.
/// `characterIndex` must be an element of
/// `String(utf8).characters.indices`.
/// - utf8: The `UTF8View` in which to find the new position.
public init(
_ characterIndex: String.Index,
within unicodeScalars: String.UnicodeScalarView
@@ -354,29 +515,71 @@ extension String.UnicodeScalarIndex {
self.init(characterIndex._base._position, unicodeScalars._core)
}
/// Returns the position in `utf8` that corresponds exactly
/// to `self`.
/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// - Precondition: `self` is an element of `String(utf8)!.indices`.
/// The index must be a valid index of `String(utf8).unicodeScalars`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.unicodeScalars.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)
/// print(Array(cafe.utf8.suffix(from: j)))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index {
return String.UTF8View.Index(self, within: utf8)
}
/// Returns the position in `utf16` that corresponds exactly
/// to `self`.
/// Returns the position in the given UTF-16 view that corresponds exactly to
/// this index.
///
/// - Precondition: `self` is an element of `String(utf16)!.indices`.
/// The index must be a valid index of `String(utf16).unicodeScalars`.
///
/// This example first finds the position of the character `"é"` and then uses
/// this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.characters.index(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion.
/// - Returns: The position in `utf16` that corresponds exactly to this index.
public func samePosition(
in utf16: String.UTF16View
) -> String.UTF16View.Index {
return String.UTF16View.Index(self, within: utf16)
}
/// Returns the position in `characters` that corresponds exactly
/// to `self`, or if no such position exists, `nil`.
/// Returns the position in the given string that corresponds exactly to this
/// index.
///
/// - Precondition: `self` is an element of
/// `characters.unicodeScalars.indices`.
/// This index must be a valid index of `characters.unicodeScalars`.
///
/// This example first finds the position of a space (UTF-8 code point `32`)
/// in a string's `utf8` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.unicodeScalars.index(of: "🍵")
/// let j = i.samePosition(in: cafe)!
/// print(cafe.suffix(from: j))
/// // Prints "🍵"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to
/// this index. If this index does not have an exact corresponding
/// position in `characters`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
public func samePosition(in characters: String) -> String.Index? {
return String.Index(self, within: characters)
}
@@ -408,7 +611,7 @@ extension String.UnicodeScalarIndex {
// Reflection
extension String.UnicodeScalarView : CustomReflectable {
/// Returns a mirror that reflects `self`.
/// Returns a mirror that reflects the Unicode scalars view of a string.
public var customMirror: Mirror {
return Mirror(self, unlabeledChildren: self)
}

View File

@@ -16,11 +16,19 @@
/// The result of one Unicode decoding step.
///
/// A unicode scalar value, an indication that no more unicode scalars
/// are available, or an indication of a decoding error.
/// Each `UnicodeDecodingResult` instance can represent a Unicode scalar value,
/// an indication that no more Unicode scalars are available, or an indication
/// of a decoding error.
///
/// - SeeAlso: `UnicodeCodec.decode(next:)`
public enum UnicodeDecodingResult : Equatable {
/// A decoded Unicode scalar value.
case scalarValue(UnicodeScalar)
/// An indication that no more Unicode scalars are available in the input.
case emptyInput
/// An indication of a decoding error.
case error
}
@@ -40,56 +48,102 @@ public func == (
}
}
/// A Unicode [encoding scheme](http://www.unicode.org/glossary/#character_encoding_scheme).
/// A Unicode encoding form that translates between Unicode scalar values and
/// form-specific code units.
///
/// Consists of an underlying [code unit](http://www.unicode.org/glossary/#code_unit)
/// and functions to translate between sequences of these code units and
/// [unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value).
/// The `UnicodeCodec` protocol declares methods that decode code unit
/// sequences into Unicode scalar values and encode Unicode scalar values
/// into code unit sequences. The standard library implements codecs for the
/// UTF-8, UTF-16, and UTF-32 encoding schemes as the `UTF8`, `UTF16`, and
/// `UTF32` types, respectively. Use the `UnicodeScalar` type to work with
/// decoded Unicode scalar values.
///
/// - SeeAlso: `UTF8`, `UTF16`, `UTF32`, `UnicodeScalar`
public protocol UnicodeCodec {
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit)
/// values for this encoding.
/// A type that can hold code unit values for this encoding.
associatedtype CodeUnit
/// Creates an instance of the codec.
init()
/// Start or continue decoding a UTF sequence.
/// Starts or continues decoding a code unit sequence into Unicode scalar
/// values.
///
/// In order to decode a code unit sequence completely, this function should
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`.
/// Checking that the iterator was exhausted is not sufficient. The decoder
/// can have an internal buffer that is pre-filled with data from the input
/// iterator.
/// To decode a code unit sequence completely, call this method repeatedly
/// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// iterator was exhausted is not sufficient, because the decoder can store
/// buffered data from the input iterator.
///
/// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error.
///
/// - Parameter next: An iterator of code units to be decoded. Repeated
/// calls to this method on the same instance should always pass the same
/// iterator and the iterator or copies thereof should not be used for
/// anything else between calls. Failing to do so will yield unspecified
/// results.
/// The following example decodes the UTF-8 encoded bytes of a string into an
/// array of `UnicodeScalar` instances:
///
/// let str = "Unicode"
/// print(Array(str.utf8))
/// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]"
///
/// var bytesIterator = str.utf8.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf8Decoder = UTF8()
/// Decode: while true {
/// switch utf8Decoder.decode(&bytesIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit
>(_ next: inout I) -> UnicodeDecodingResult
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by
/// calling `processCodeUnit` on each `CodeUnit`.
/// Encodes a Unicode scalar as a series of code units by calling the given
/// closure on each code unit.
///
/// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar
/// value (`\u{1D110}`) but requires four code units for its UTF-8
/// representation. The following code uses the `UTF8` codec to encode a
/// fermata in UTF-8:
///
/// var bytes: [UTF8.CodeUnit] = []
/// UTF8.encode("𝄐", sendingOutputTo: { bytes.append($0) })
/// print(bytes)
/// // Prints "[240, 157, 132, 144]"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
static func encode(
_ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
)
}
/// A codec for [UTF-8](http://www.unicode.org/glossary/#UTF_8).
/// A codec for translating between Unicode scalar values and UTF-8 code
/// units.
public struct UTF8 : UnicodeCodec {
// See Unicode 8.0.0, Ch 3.9, UTF-8.
// http://www.unicode.org/versions/Unicode8.0.0/ch03.pdf
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit)
/// values for this encoding.
/// A type that can hold code unit values for this encoding.
public typealias CodeUnit = UInt8
/// Creates an instance of the UTF-8 codec.
public init() {}
/// Lookahead buffer used for UTF-8 decoding. New bytes are inserted at MSB,
@@ -105,22 +159,47 @@ public struct UTF8 : UnicodeCodec {
/// we are done decoding, as there might still be bytes left in the buffer.
internal var _didExhaustIterator: Bool = false
/// Start or continue decoding a UTF-8 sequence.
/// Starts or continues decoding a UTF-8 sequence.
///
/// In order to decode a code unit sequence completely, this function should
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`.
/// Checking that the iterator was exhausted is not sufficient. The decoder
/// can have an internal buffer that is pre-filled with data from the input
/// iterator.
/// To decode a code unit sequence completely, call this method repeatedly
/// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// iterator was exhausted is not sufficient, because the decoder can store
/// buffered data from the input iterator.
///
/// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error.
///
/// - Parameter next: An iterator of code units to be decoded. Repeated
/// calls to this method on the same instance should always pass the same
/// iterator and the iterator or copies thereof should not be used for
/// anything else between calls. Failing to do so will yield unspecified
/// results.
/// The following example decodes the UTF-8 encoded bytes of a string into an
/// array of `UnicodeScalar` instances. This is a demonstration only---if
/// you need the Unicode scalar representation of a string, use its
/// `unicodeScalars` view.
///
/// let str = "Unicode"
/// print(Array(str.utf8))
/// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]"
///
/// var bytesIterator = str.utf8.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf8Decoder = UTF8()
/// Decode: while true {
/// switch utf8Decoder.decode(&bytesIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
public mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit
>(_ next: inout I) -> UnicodeDecodingResult {
@@ -280,8 +359,22 @@ public struct UTF8 : UnicodeCodec {
}
}
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by
/// calling `processCodeUnit` on each `CodeUnit`.
/// Encodes a Unicode scalar as a series of code units by calling the given
/// closure on each code unit.
///
/// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar
/// value (`\u{1D110}`) but requires four code units for its UTF-8
/// representation. The following code encodes a fermata in UTF-8:
///
/// var bytes: [UTF8.CodeUnit] = []
/// UTF8.encode("𝄐", sendingOutputTo: { bytes.append($0) })
/// print(bytes)
/// // Prints "[240, 157, 132, 144]"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
public static func encode(
_ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
@@ -315,19 +408,35 @@ public struct UTF8 : UnicodeCodec {
processCodeUnit(buf3)
}
/// Returns `true` if `byte` is a continuation byte of the form
/// `0b10xxxxxx`.
/// Returns a Boolean value indicating whether the specified code unit is a
/// UTF-8 continuation byte.
///
/// Continuation bytes take the form `0b10xxxxxx`. For example, a lowercase
/// "e" with an acute accent above it (`"é"`) uses 2 bytes for its UTF-8
/// representation: `0b11000011` (195) and `0b10101001` (169). The second
/// byte is a continuation byte.
///
/// let eAcute = "é"
/// for codePoint in eAcute.utf8 {
/// print(codePoint, UTF8.isContinuation(codePoint))
/// }
/// // Prints "195 false"
/// // Prints "169 true"
///
/// - Parameter byte: A UTF-8 code unit.
/// - Returns: `true` if `byte` is a continuation byte; otherwise, `false`.
public static func isContinuation(_ byte: CodeUnit) -> Bool {
return byte & 0b11_00__0000 == 0b10_00__0000
}
}
/// A codec for [UTF-16](http://www.unicode.org/glossary/#UTF_16).
/// A codec for translating between Unicode scalar values and UTF-16 code
/// units.
public struct UTF16 : UnicodeCodec {
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit)
/// values for this encoding.
/// A type that can hold code unit values for this encoding.
public typealias CodeUnit = UInt16
/// Creates an instance of the UTF-16 codec.
public init() {}
/// A lookahead buffer for one UTF-16 code unit.
@@ -340,22 +449,47 @@ public struct UTF16 : UnicodeCodec {
/// `x` is set when `_decodeLookahead` contains a code unit.
internal var _lookaheadFlags: UInt8 = 0
/// Start or continue decoding a UTF sequence.
/// Starts or continues decoding a UTF-16 sequence.
///
/// In order to decode a code unit sequence completely, this function should
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`.
/// Checking that the iterator was exhausted is not sufficient. The decoder
/// can have an internal buffer that is pre-filled with data from the input
/// iterator.
/// To decode a code unit sequence completely, call this method repeatedly
/// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// iterator was exhausted is not sufficient, because the decoder can store
/// buffered data from the input iterator.
///
/// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error.
///
/// - Parameter next: An iterator of code units to be decoded. Repeated
/// calls to this method on the same instance should always pass the same
/// iterator and the iterator or copies thereof should not be used for
/// anything else between calls. Failing to do so will yield unspecified
/// results.
/// The following example decodes the UTF-16 encoded bytes of a string into an
/// array of `UnicodeScalar` instances. This is a demonstration only---if
/// you need the Unicode scalar representation of a string, use its
/// `unicodeScalars` view.
///
/// let str = "Unicode"
/// print(Array(str.utf16))
/// // Prints "[10024, 85, 110, 105, 99, 111, 100, 101, 10024]"
///
/// var codeUnitIterator = str.utf16.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf16Decoder = UTF16()
/// Decode: while true {
/// switch utf16Decoder.decode(&codeUnitIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
public mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit
>(_ input: inout I) -> UnicodeDecodingResult {
@@ -451,8 +585,22 @@ public struct UTF16 : UnicodeCodec {
}
}
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by
/// calling `processCodeUnit` on each `CodeUnit`.
/// Encodes a Unicode scalar as a series of code units by calling the given
/// closure on each code unit.
///
/// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar
/// value (`\u{1D110}`) but requires two code units for its UTF-16
/// representation. The following code encodes a fermata in UTF-16:
///
/// var codeUnits: [UTF16.CodeUnit] = []
/// UTF16.encode("𝄐", sendingOutputTo: { codeUnits.append($0) })
/// print(codeUnits)
/// // Prints "[55348, 56592]"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
public static func encode(
_ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
@@ -470,30 +618,56 @@ public struct UTF16 : UnicodeCodec {
}
}
/// A codec for [UTF-32](http://www.unicode.org/glossary/#UTF_32).
/// A codec for translating between Unicode scalar values and UTF-32 code
/// units.
public struct UTF32 : UnicodeCodec {
/// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit)
/// values for this encoding.
/// A type that can hold code unit values for this encoding.
public typealias CodeUnit = UInt32
/// Creates an instance of the UTF-32 codec.
public init() {}
/// Start or continue decoding a UTF sequence.
/// Starts or continues decoding a UTF-32 sequence.
///
/// In order to decode a code unit sequence completely, this function should
/// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`.
/// Checking that the iterator was exhausted is not sufficient. The decoder
/// can have an internal buffer that is pre-filled with data from the input
/// iterator.
/// To decode a code unit sequence completely, call this method repeatedly
/// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the
/// iterator was exhausted is not sufficient, because the decoder can store
/// buffered data from the input iterator.
///
/// Because of buffering, it is impossible to find the corresponding position
/// in the iterator for a given returned `UnicodeScalar` or an error.
///
/// - Parameter next: An iterator of code units to be decoded. Repeated
/// calls to this method on the same instance should always pass the same
/// iterator and the iterator or copies thereof should not be used for
/// anything else between calls. Failing to do so will yield unspecified
/// results.
/// The following example decodes the UTF-16 encoded bytes of a string
/// into an array of `UnicodeScalar` instances. This is a demonstration
/// only---if you need the Unicode scalar representation of a string, use
/// its `unicodeScalars` view.
///
/// // UTF-32 representation of "Unicode"
/// let codeUnits: [UTF32.CodeUnit] =
/// [10024, 85, 110, 105, 99, 111, 100, 101, 10024]
///
/// var codeUnitIterator = codeUnits.makeIterator()
/// var scalars: [UnicodeScalar] = []
/// var utf32Decoder = UTF32()
/// Decode: while true {
/// switch utf32Decoder.decode(&codeUnitIterator) {
/// case .scalarValue(let v): scalars.append(v)
/// case .emptyInput: break Decode
/// case .error:
/// print("Decoding error")
/// break Decode
/// }
/// }
/// print(scalars)
/// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]"
///
/// - Parameter next: An iterator of code units to be decoded. `next` must be
/// the same iterator instance in repeated calls to this method. Do not
/// advance the iterator or any copies of the iterator outside this
/// method.
/// - Returns: A `UnicodeDecodingResult` instance, representing the next
/// Unicode scalar, an indication of an error, or an indication that the
/// UTF sequence has been fully decoded.
public mutating func decode<
I : IteratorProtocol where I.Element == CodeUnit
>(_ input: inout I) -> UnicodeDecodingResult {
@@ -511,8 +685,22 @@ public struct UTF32 : UnicodeCodec {
}
}
/// Encode a `UnicodeScalar` as a series of `CodeUnit`s by
/// calling `processCodeUnit` on each `CodeUnit`.
/// Encodes a Unicode scalar as a UTF-32 code unit by calling the given
/// closure.
///
/// For example, like every Unicode scalar, the musical fermata symbol ("𝄐")
/// can be represented in UTF-32 as a single code unit. The following code
/// encodes a fermata in UTF-32:
///
/// var codeUnit: UTF32.CodeUnit = 0
/// UTF32.encode("𝄐", sendingOutputTo: { codeUnit = $0 })
/// print(codeUnit)
/// // Prints "119056"
///
/// - Parameters:
/// - input: The Unicode scalar value to encode.
/// - processCodeUnit: A closure that processes one code unit argument at a
/// time.
public static func encode(
_ input: UnicodeScalar,
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
@@ -521,12 +709,41 @@ public struct UTF32 : UnicodeCodec {
}
}
/// Translate `input`, in the given `InputEncoding`, into `processCodeUnit`, in
/// the given `OutputEncoding`.
/// Translates the given input from one Unicode encoding to another by calling
/// the given closure.
///
/// - Parameter stopOnError: Causes encoding to stop when an encoding
/// error is detected in `input`, if `true`. Otherwise, U+FFFD
/// replacement characters are inserted for each detected error.
/// The following example transcodes the UTF-8 representation of the string
/// `"Fermata 𝄐"` into UTF-32.
///
/// let fermata = "Fermata 𝄐"
/// let bytes = fermata.utf8
/// print(Array(bytes))
/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]"
///
/// var codeUnits: [UTF32.CodeUnit] = []
/// let sink = { codeUnits.append($0) }
/// transcode(bytes.makeIterator(), from: UTF8.self, to: UTF32.self,
/// stoppingOnError: false, sendingOutputTo: sink)
/// print(codeUnits)
/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 119056]"
///
/// The `sink` closure is called with each resulting UTF-32 code unit as the
/// function iterates over its input.
///
/// - Parameters:
/// - input: An iterator of code units to be translated, encoded as
/// `inputEncoding`. If `stopOnError` is `false`, the entire iterator will
/// be exhausted. Otherwise, iteration will stop if an encoding error is
/// detected.
/// - inputEncoding: The Unicode encoding of `input`.
/// - outputEncoding: The destination Unicode encoding.
/// - stopOnError: Pass `true` to stop translation when an encoding error is
/// detected in `input`. Otherwise, a Unicode replacement character
/// (`"\u{FFFD}"`) is inserted for each detected error.
/// - processCodeUnit: A closure that processes one `outputEncoding` code
/// unit at a time.
/// - Returns: `true` if the translation detected encoding errors in `input`;
/// otherwise, `false`.
public func transcode<
Input : IteratorProtocol,
InputEncoding : UnicodeCodec,
@@ -697,24 +914,76 @@ extension UTF8.CodeUnit : _StringElement {
}
extension UTF16 {
/// Returns the number of code units required to encode `x`.
/// Returns the number of code units required to encode the given Unicode
/// scalar.
///
/// Because a Unicode scalar value can require up to 21 bits to store its
/// value, some Unicode scalars are represented in UTF-16 by a pair of
/// 16-bit code units. The first and second code units of the pair,
/// designated *leading* and *trailing* surrogates, make up a *surrogate
/// pair*.
///
/// let anA: UnicodeScalar = "A"
/// print(anA.value)
/// // Prints "65"
/// print(UTF16.width(anA))
/// // Prints "1"
///
/// let anApple: UnicodeScalar = "🍎"
/// print(anApple.value)
/// // Prints "127822"
/// print(UTF16.width(anApple))
/// // Prints "2"
///
/// - Parameter x: A Unicode scalar value.
/// - Returns: The width of `x` when encoded in UTF-16, either `1` or `2`.
public static func width(_ x: UnicodeScalar) -> Int {
return x.value <= 0xFFFF ? 1 : 2
}
/// Returns the high surrogate code unit of a [surrogate pair](http://www.unicode.org/glossary/#surrogate_pair) representing
/// `x`.
/// Returns the high-surrogate code unit of the surrogate pair representing
/// the specifed Unicode scalar.
///
/// - Precondition: `width(x) == 2`.
/// Because a Unicode scalar value can require up to 21 bits to store its
/// value, some Unicode scalars are represented in UTF-16 by a pair of
/// 16-bit code units. The first and second code units of the pair,
/// designated *leading* and *trailing* surrogates, make up a *surrogate
/// pair*.
///
/// let apple: UnicodeScalar = "🍎"
/// print(UTF16.leadSurrogate(apple)
/// // Prints "55356"
///
/// - Parameter x: A Unicode scalar value. `x` must be represented by a
/// surrogate pair when encoded in UTF-16. To check whether `x` is
/// represented by a surrogate pair, use `UTF16.width(x) == 2`.
/// - Returns: The leading surrogate code unit of `x` when encoded in UTF-16.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.trailSurrogate(_:)`
public static func leadSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit {
_precondition(width(x) == 2)
return UTF16.CodeUnit((x.value - 0x1_0000) >> (10 as UInt32)) + 0xD800
}
/// Returns the low surrogate code unit of a [surrogate pair](http://www.unicode.org/glossary/#surrogate_pair) representing
/// `x`.
/// Returns the low-surrogate code unit of the surrogate pair representing
/// the specifed Unicode scalar.
///
/// - Precondition: `width(x) == 2`.
/// Because a Unicode scalar value can require up to 21 bits to store its
/// value, some Unicode scalars are represented in UTF-16 by a pair of
/// 16-bit code units. The first and second code units of the pair,
/// designated *leading* and *trailing* surrogates, make up a *surrogate
/// pair*.
///
/// let apple: UnicodeScalar = "🍎"
/// print(UTF16.trailSurrogate(apple)
/// // Prints "57166"
///
/// - Parameter x: A Unicode scalar value. `x` must be represented by a
/// surrogate pair when encoded in UTF-16. To check whether `x` is
/// represented by a surrogate pair, use `UTF16.width(x) == 2`.
/// - Returns: The trailing surrogate code unit of `x` when encoded in UTF-16.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)`
public static func trailSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit {
_precondition(width(x) == 2)
return UTF16.CodeUnit(
@@ -722,10 +991,57 @@ extension UTF16 {
) + 0xDC00
}
/// Returns a Boolean value indicating whether the specified code unit is a
/// high-surrogate code unit.
///
/// Here's an example of checking whether each code unit in a string's
/// `utf16` view is a lead surrogate. The `apple` string contains a single
/// emoji character made up of a surrogate pair when encoded in UTF-16.
///
/// let apple = "🍎"
/// for unit in apple.utf16 {
/// print(UTF16.isLeadSurrogate(unit))
/// }
/// // Prints "true"
/// // Prints "false"
///
/// This method does not validate the encoding of a UTF-16 sequence beyond
/// the specified code unit. Specifically, it does not validate that a
/// low-surrogate code unit follows `x`.
///
/// - Parameter x: A UTF-16 code unit.
/// - Returns: `true` if `x` is a high-surrogate code unit; otherwise,
/// `false`.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)`
public static func isLeadSurrogate(_ x: CodeUnit) -> Bool {
return 0xD800...0xDBFF ~= x
}
/// Returns a Boolean value indicating whether the specified code unit is a
/// low-surrogate code unit.
///
/// Here's an example of checking whether each code unit in a string's
/// `utf16` view is a trailing surrogate. The `apple` string contains a
/// single emoji character made up of a surrogate pair when encoded in
/// UTF-16.
///
/// let apple = "🍎"
/// for unit in apple.utf16 {
/// print(UTF16.isTrailSurrogate(unit))
/// }
/// // Prints "false"
/// // Prints "true"
///
/// This method does not validate the encoding of a UTF-16 sequence beyond
/// the specified code unit. Specifically, it does not validate that a
/// high-surrogate code unit precedes `x`.
///
/// - Parameter x: A UTF-16 code unit.
/// - Returns: `true` if `x` is a low-surrogate code unit; otherwise,
/// `false`.
///
/// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)`
public static func isTrailSurrogate(_ x: CodeUnit) -> Bool {
return 0xDC00...0xDFFF ~= x
}
@@ -751,12 +1067,39 @@ extension UTF16 {
}
/// Returns the number of UTF-16 code units required for the given code unit
/// sequence when transcoded to UTF-16, and a bit describing if the sequence
/// was found to contain only ASCII characters.
/// sequence when transcoded to UTF-16, and a Boolean value indicating
/// whether the sequence was found to contain only ASCII characters.
///
/// If `repairIllFormedSequences` is `true`, the function always succeeds.
/// If it is `false`, `nil` is returned if an ill-formed code unit sequence is
/// found in `input`.
/// The following example finds the length of the UTF-16 encoding of the
/// string `"Fermata 𝄐"`, starting with its UTF-8 representation.
///
/// let fermata = "Fermata 𝄐"
/// let bytes = fermata.utf8
/// print(Array(bytes))
/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]"
///
/// let result = transcodedLength(of: bytes.makeIterator(),
/// decodedAs: UTF8.self,
/// repairingIllFormedSequences: false)
/// print(result)
/// // Prints "Optional((10, false))"
///
/// - Parameters:
/// - input: An iterator of code units to be translated, encoded as
/// `sourceEncoding`. If `repairingIllFormedSequences` is `true`, the
/// entire iterator will be exhausted. Otherwise, iteration will stop if
/// an ill-formed sequence is detected.
/// - sourceEncoding: The Unicode encoding of `input`.
/// - repairingIllFormedSequences: Pass `true` to measure the length of
/// `input` even when `input` contains ill-formed sequences. Each
/// ill-formed sequence is replaced with a Unicode replacement character
/// (`"\u{FFFD}"`) and is measured as such. Pass `false` to immediately
/// stop measuring `input` when an ill-formed sequence is encountered.
/// - Returns: A tuple containing the number of UTF-16 code units required to
/// encode `input` and a Boolean value that indicates whether the `input`
/// contained only ASCII characters. If `repairingIllFormedSequences` is
/// `false` and an ill-formed sequence is detected, this method returns
/// `nil`.
public static func transcodedLength<
Encoding : UnicodeCodec, Input : IteratorProtocol
where Encoding.CodeUnit == Input.Element
@@ -792,7 +1135,7 @@ extension UTF16 {
}
}
// Unchecked init to avoid precondition branches in hot code paths were we
// Unchecked init to avoid precondition branches in hot code paths where we
// already know the value is a valid unicode scalar.
extension UnicodeScalar {
/// Create an instance with numeric value `value`, bypassing the regular

View File

@@ -12,7 +12,25 @@
// UnicodeScalar Type
//===----------------------------------------------------------------------===//
/// A [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value).
/// A Unicode scalar value.
///
/// The `UnicodeScalar` type, representing a single Unicode scalar value, is
/// the element type of a string's `unicodeScalars` collection.
///
/// You can create a `UnicodeScalar` instance by using a string literal that
/// contains a single character representing exactly one Unicode scalar value.
///
/// let letterK: UnicodeScalar = "K"
/// let kim: UnicodeScalar = ""
/// print(letterK, kim)
/// // Prints "K "
///
/// You can also create Unicode scalar values directly from their numeric
/// representation.
///
/// let airplane = UnicodeScalar(9992)
/// print(airplane)
/// // Prints ""
@_fixed_layout
public struct UnicodeScalar :
_BuiltinUnicodeScalarLiteralConvertible,
@@ -20,7 +38,7 @@ public struct UnicodeScalar :
var _value: UInt32
/// A numeric representation of `self`.
/// A numeric representation of the Unicode scalar.
public var value: UInt32 { return _value }
@_transparent
@@ -28,15 +46,35 @@ public struct UnicodeScalar :
self._value = UInt32(value)
}
/// Create an instance initialized to `value`.
/// Creates a Unicode scalar with the specified value.
///
/// Do not call this initializer directly. It may be used by the compiler
/// when you use a string literal to initialize a `UnicodeScalar` instance.
///
/// let letterK: UnicodeScalar = "K"
/// print(letterK)
/// // Prints "K"
///
/// In this example, the assignment to the `letterK` constant is handled by
/// this initializer behind the scenes.
@_transparent
public init(unicodeScalarLiteral value: UnicodeScalar) {
self = value
}
/// Create an instance with numeric value `v`.
/// Creates a Unicode scalar with the specified numeric value.
///
/// - Precondition: `v` is a valid Unicode scalar value.
/// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of an emoji character:
///
/// let codepoint: UInt32 = 127881
/// let emoji = UnicodeScalar(codepoint)
/// print(emoji)
/// // Prints "🎉"
///
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the range `0...0xD7FF` or
/// `0xE000...0x10FFFF`.
public init(_ v: UInt32) {
// Unicode 6.3.0:
//
@@ -55,29 +93,70 @@ public struct UnicodeScalar :
self._value = v
}
/// Create an instance with numeric value `v`.
/// Creates a Unicode scalar with the specified numeric value.
///
/// - Precondition: `v` is a valid Unicode scalar value.
/// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of ``, the Korean word for rice:
///
/// let codepoint: UInt16 = 48165
/// let bap = UnicodeScalar(codepoint)
/// print(bap)
/// // Prints ""
///
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the range `0...0xD7FF` or
/// `0xE000...0xFFFF`.
public init(_ v: UInt16) {
self = UnicodeScalar(UInt32(v))
}
/// Create an instance with numeric value `v`.
/// Creates a Unicode scalar with the specified numeric value.
///
/// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of `7`:
///
/// let codepoint: UInt8 = 55
/// let seven = UnicodeScalar(codepoint)
/// print(seven)
/// // Prints "7"
///
/// - Parameter v: The code point to use for the scalar.
public init(_ v: UInt8) {
self = UnicodeScalar(UInt32(v))
}
/// Create a duplicate of `v`.
/// Creates a duplicate of the given Unicode scalar.
public init(_ v: UnicodeScalar) {
// This constructor allows one to provide necessary type context to
// disambiguate between function overloads on 'String' and 'UnicodeScalar'.
self = v
}
/// Returns a String representation of `self` .
/// Returns a string representation of the Unicode scalar.
///
/// - parameter forceASCII: If `true`, forces most values into a numeric
/// representation.
/// Scalar values representing characters that are normally unprintable or
/// that otherwise require escaping are escaped with a backslash.
///
/// let tab = UnicodeScalar(9)
/// print(tab)
/// // Prints " "
/// print(tab.escaped(asASCII: false))
/// // Prints "\t"
///
/// When the `forceASCII` parameter is `true`, a `UnicodeScalar` instance
/// with a value greater than 127 is represented using an escaped numeric
/// value; otherwise, non-ASCII characters are represented using their
/// typical string value.
///
/// let bap = UnicodeScalar(48165)
/// print(bap.escaped(asASCII: false))
/// // Prints ""
/// print(bap.escaped(asASCII: true))
/// // Prints "\u{BC25}"
///
/// - Parameter forceASCII: Pass `true` if you need the result to use only
/// ASCII characters; otherwise, pass `false`.
/// - Returns: A string representation of the scalar.
public func escaped(asASCII forceASCII: Bool) -> String {
func lowNibbleAsHex(_ v: UInt32) -> String {
let nibble = v & 15
@@ -137,8 +216,21 @@ public struct UnicodeScalar :
}
}
/// Returns `true` if this is an ASCII character (code point 0 to 127
/// inclusive).
/// A Boolean value indicating whether the Unicode scalar is an ASCII
/// character.
///
/// ASCII characters have a scalar value between 0 and 127, inclusive. For
/// example:
///
/// let canyon = "Cañón"
/// for scalar in canyon.unicodeScalars {
/// print(scalar, scalar.isASCII, scalar.value)
/// }
/// // Prints "C true 67"
/// // Prints "a true 97"
/// // Prints "ñ false 241"
/// // Prints "ó false 243"
/// // Prints "n true 110"
public var isASCII: Bool {
return value <= 127
}
@@ -155,33 +247,41 @@ public struct UnicodeScalar :
}
extension UnicodeScalar : CustomStringConvertible, CustomDebugStringConvertible {
/// A textual representation of `self`.
/// An escaped textual representation of the Unicode scalar.
public var description: String {
return "\"\(escaped(asASCII: false))\""
}
/// A textual representation of `self`, suitable for debugging.
/// An escaped textual representation of the Unicode scalar, suitable for
/// debugging.
public var debugDescription: String {
return "\"\(escaped(asASCII: true))\""
}
}
extension UnicodeScalar : Hashable {
/// The hash value.
/// The Unicode scalar's hash value.
///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`.
///
/// - Note: The hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
public var hashValue: Int {
return Int(self.value)
}
}
extension UnicodeScalar {
/// Construct with value `v`.
/// Creates a Unicode scalar with the specified numeric value.
///
/// - Precondition: `v` is a valid unicode scalar value.
/// For example, the following code sample creates a `UnicodeScalar` instance
/// with a value of an emoji character:
///
/// let codepoint = 127881
/// let emoji = UnicodeScalar(codepoint)
/// print(emoji)
/// // Prints "🎉"
///
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the ranges `0...0xD7FF` or
/// `0xE000...0x10FFFF`.
public init(_ v: Int) {
self = UnicodeScalar(UInt32(v))
}
@@ -244,11 +344,10 @@ extension UnicodeScalar.UTF16View : RandomAccessCollection {
return 0
}
/// The "past the end" position.
/// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// If the collection is empty, `endIndex` is equal to `startIndex`.
var endIndex: Int {
return 0 + UTF16.width(value)
}
@@ -273,7 +372,7 @@ func _ascii16(_ c: UnicodeScalar) -> UTF16.CodeUnit {
extension UnicodeScalar {
/// Creates an instance of the NUL scalar value.
@available(*, unavailable, message: "use the 'UnicodeScalar(\"\\0\")'")
@available(*, unavailable, message: "use 'UnicodeScalar(0)'")
public init() {
Builtin.unreachable()
}

View File

@@ -48,11 +48,11 @@ public struct Unsafe${Mutable}BufferPointer<Element>
return 0
}
/// The "past the end" position; always identical to `count`.
/// The "past the end" position---that is, the position one greater than the
/// last valid subscript argument.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `index(after:)`.
/// The `endIndex` property of an `Unsafe${Mutable}BufferPointer` instance is
/// always identical to `count`.
public var endIndex: Int {
return count
}