//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// extension String { /// A view of a string's contents as a collection of Unicode scalar values. /// /// You can access a string's view of Unicode scalar values by using its /// `unicodeScalars` property. Unicode scalar values are the 21-bit codes /// that are the basic unit of Unicode. Each scalar value is represented by /// a `Unicode.Scalar` instance and is equivalent to a UTF-32 code unit. /// /// let flowers = "Flowers 💐" /// for v in flowers.unicodeScalars { /// print(v.value) /// } /// // 70 /// // 108 /// // 111 /// // 119 /// // 101 /// // 114 /// // 115 /// // 32 /// // 128144 /// /// Some characters that are visible in a string are made up of more than one /// Unicode scalar value. In that case, a string's `unicodeScalars` view /// contains more values than its `characters` view. /// /// let flag = "🇵🇷" /// for c in flag.characters { /// print(c) /// } /// // 🇵🇷 /// /// for v in flag.unicodeScalars { /// print(v.value) /// } /// // 127477 /// // 127479 /// /// You can convert a `String.UnicodeScalarView` instance back into a string /// using the `String` type's `init(_:)` initializer. /// /// let favemoji = "My favorite emoji is 🎉" /// if let i = favemoji.unicodeScalars.index(where: { $0.value >= 128 }) { /// let asciiPrefix = String(favemoji.unicodeScalars.prefix(upTo: i)) /// print(asciiPrefix) /// } /// // Prints "My favorite emoji is " public struct UnicodeScalarView : BidirectionalCollection, CustomStringConvertible, CustomDebugStringConvertible { internal init(_ _core: _StringCore, coreOffset: Int = 0) { self._core = _core self._coreOffset = coreOffset } internal struct _ScratchIterator : IteratorProtocol { var core: _StringCore var idx: Int @_versioned init(_ core: _StringCore, _ pos: Int) { self.idx = pos self.core = core } @inline(__always) mutating func next() -> UTF16.CodeUnit? { if idx == core.endIndex { return nil } defer { idx += 1 } return self.core[idx] } } /// A position in a string's `UnicodeScalars` view. /// /// You can convert between indices of the different string views by using /// conversion initializers and the `samePosition(in:)` method overloads. /// The following example finds the index of the solid heart pictograph in /// the string's character view and then converts that to the same /// position in the Unicode scalars view: /// /// let hearts = "Hearts <3 ♥︎ 💘" /// let i = hearts.characters.index(of: "♥︎")! /// /// let j = i.samePosition(in: hearts.unicodeScalars) /// print(hearts.unicodeScalars.suffix(from: j)) /// // Prints "♥︎ 💘" /// print(hearts.unicodeScalars[j].value) /// // Prints "9829" public struct Index { public // SPI(Foundation) init(_position: Int) { self._position = _position } @_versioned internal var _position: Int } /// Translates a `_core` index into a `UnicodeScalarIndex` using this view's /// `_coreOffset`. internal func _fromCoreIndex(_ i: Int) -> Index { return Index(_position: i + _coreOffset) } /// Translates a `UnicodeScalarIndex` into a `_core` index using this view's /// `_coreOffset`. internal func _toCoreIndex(_ i: Index) -> Int { return i._position - _coreOffset } /// The position of the first Unicode scalar value if the string is /// nonempty. /// /// If the string is empty, `startIndex` is equal to `endIndex`. public var startIndex: Index { return _fromCoreIndex(_core.startIndex) } /// The "past the end" position---that is, the position one greater than /// the last valid subscript argument. /// /// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`. public var endIndex: Index { return _fromCoreIndex(_core.endIndex) } /// Returns the next consecutive location after `i`. /// /// - Precondition: The next location exists. public func index(after i: Index) -> Index { let i = _toCoreIndex(i) var scratch = _ScratchIterator(_core, i) var decoder = UTF16() let (_, length) = decoder._decodeOne(&scratch) return _fromCoreIndex(i + length) } /// Returns the previous consecutive location before `i`. /// /// - Precondition: The previous location exists. public func index(before i: Index) -> Index { var i = _toCoreIndex(i) - 1 let codeUnit = _core[i] if _slowPath((codeUnit >> 10) == 0b1101_11) { if i != 0 && (_core[i - 1] >> 10) == 0b1101_10 { i -= 1 } } return _fromCoreIndex(i) } /// Accesses the Unicode scalar value at the given position. /// /// The following example searches a string's Unicode scalars view for a /// capital letter and then prints the character and Unicode scalar value /// at the found index: /// /// let greeting = "Hello, friend!" /// if let i = greeting.unicodeScalars.index(where: { "A"..."Z" ~= $0 }) { /// print("First capital letter: \(greeting.unicodeScalars[i])") /// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)") /// } /// // Prints "First capital letter: H" /// // Prints "Unicode scalar value: 72" /// /// - Parameter position: A valid index of the character view. `position` /// must be less than the view's end index. public subscript(position: Index) -> Unicode.Scalar { var scratch = _ScratchIterator(_core, _toCoreIndex(position)) var decoder = UTF16() switch decoder.decode(&scratch) { case .scalarValue(let us): return us case .emptyInput: _sanityCheckFailure("cannot subscript using an endIndex") case .error: return Unicode.Scalar(0xfffd)! } } /// Accesses the Unicode scalar values in the given range. /// /// The example below uses this subscript to access the scalar values up /// to, but not including, the first comma (`","`) in the string. /// /// let str = "All this happened, more or less." /// let i = str.unicodeScalars.index(of: ",")! /// let substring = str.unicodeScalars[str.unicodeScalars.startIndex ..< i] /// print(String(substring)) /// // Prints "All this happened" /// /// - Complexity: O(*n*) if the underlying string is bridged from /// Objective-C, where *n* is the length of the string; otherwise, O(1). public subscript(r: Range) -> UnicodeScalarView { let rawSubRange = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound) return UnicodeScalarView(_core[rawSubRange], coreOffset: r.lowerBound._position) } /// An iterator over the Unicode scalars that make up a `UnicodeScalarView` /// collection. public struct Iterator : IteratorProtocol { init(_ _base: _StringCore) { if _base.hasContiguousStorage { self._baseSet = true if _base.isASCII { self._ascii = true self._asciiBase = UnsafeBufferPointer( start: _base._baseAddress?.assumingMemoryBound( to: UTF8.CodeUnit.self), count: _base.count).makeIterator() } else { self._ascii = false self._base = UnsafeBufferPointer( start: _base._baseAddress?.assumingMemoryBound( to: UTF16.CodeUnit.self), count: _base.count).makeIterator() } } else { self._ascii = false self._baseSet = false self._iterator = _base.makeIterator() } } /// Advances to the next element and returns it, or `nil` if no next /// element exists. /// /// Once `nil` has been returned, all subsequent calls return `nil`. /// /// - Precondition: `next()` has not been applied to a copy of `self` /// since the copy was made. public mutating func next() -> Unicode.Scalar? { var result: UnicodeDecodingResult if _baseSet { if _ascii { switch self._asciiBase.next() { case let x?: result = .scalarValue(Unicode.Scalar(x)) case nil: result = .emptyInput } } else { result = _decoder.decode(&(self._base!)) } } else { result = _decoder.decode(&(self._iterator!)) } switch result { case .scalarValue(let us): return us case .emptyInput: return nil case .error: return Unicode.Scalar(0xfffd) } } internal var _decoder: UTF16 = UTF16() internal let _baseSet: Bool internal let _ascii: Bool internal var _asciiBase: UnsafeBufferPointerIterator! internal var _base: UnsafeBufferPointerIterator! internal var _iterator: IndexingIterator<_StringCore>! } /// Returns an iterator over the Unicode scalars that make up this view. /// /// - Returns: An iterator over this collection's `Unicode.Scalar` elements. public func makeIterator() -> Iterator { return Iterator(_core) } public var description: String { return String(_core) } public var debugDescription: String { return "StringUnicodeScalarView(\(self.description.debugDescription))" } internal var _core: _StringCore /// The offset of this view's `_core` from an original core. This works /// around the fact that `_StringCore` is always zero-indexed. /// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position` /// before that value is used as a `_core` index. internal var _coreOffset: Int } /// Creates a string corresponding to the given collection of Unicode /// scalars. /// /// You can use this initializer to create a new string from a slice of /// another string's `unicodeScalars` view. /// /// let picnicGuest = "Deserving porcupine" /// if let i = picnicGuest.unicodeScalars.index(of: " ") { /// let adjective = String(picnicGuest.unicodeScalars.prefix(upTo: i)) /// print(adjective) /// } /// // Prints "Deserving" /// /// The `adjective` constant is created by calling this initializer with a /// slice of the `picnicGuest.unicodeScalars` view. /// /// - Parameter unicodeScalars: A collection of Unicode scalar values. public init(_ unicodeScalars: UnicodeScalarView) { self.init(unicodeScalars._core) } /// The index type for a string's `unicodeScalars` view. public typealias UnicodeScalarIndex = UnicodeScalarView.Index } extension String { /// The string's value represented as a collection of Unicode scalar values. public var unicodeScalars: UnicodeScalarView { get { return UnicodeScalarView(_core) } set { _core = newValue._core } } } extension String.UnicodeScalarView.Index : Comparable { public static func == ( lhs: String.UnicodeScalarView.Index, rhs: String.UnicodeScalarView.Index ) -> Bool { return lhs._position == rhs._position } public static func < ( lhs: String.UnicodeScalarView.Index, rhs: String.UnicodeScalarView.Index ) -> Bool { return lhs._position < rhs._position } } extension String.UnicodeScalarView : RangeReplaceableCollection { /// Creates an empty view instance. public init() { self = String.UnicodeScalarView(_StringCore()) } /// Reserves enough space in the view's underlying storage to store the /// specified number of ASCII characters. /// /// Because a Unicode scalar value can require more than a single ASCII /// character's worth of storage, additional allocation may be necessary /// when adding to a Unicode scalar view after a call to /// `reserveCapacity(_:)`. /// /// - Parameter n: The minimum number of ASCII character's worth of storage /// to allocate. /// /// - Complexity: O(*n*), where *n* is the capacity being reserved. public mutating func reserveCapacity(_ n: Int) { _core.reserveCapacity(n) } /// Appends the given Unicode scalar to the view. /// /// - Parameter c: The character to append to the string. public mutating func append(_ x: Unicode.Scalar) { _core.append(x) } /// Appends the Unicode scalar values in the given sequence to the view. /// /// - Parameter newElements: A sequence of Unicode scalar values. /// /// - Complexity: O(*n*), where *n* is the length of the resulting view. public mutating func append(contentsOf newElements: S) where S.Iterator.Element == Unicode.Scalar { _core.append(contentsOf: newElements.lazy.flatMap { $0.utf16 }) } /// Replaces the elements within the specified bounds with the given Unicode /// scalar values. /// /// Calling this method invalidates any existing indices for use with this /// string. /// /// - Parameters: /// - bounds: The range of elements to replace. The bounds of the range /// must be valid indices of the view. /// - newElements: The new Unicode scalar values to add to the string. /// /// - Complexity: O(*m*), where *m* is the combined length of the view and /// `newElements`. If the call to `replaceSubrange(_:with:)` simply /// removes elements at the end of the string, the complexity is O(*n*), /// where *n* is equal to `bounds.count`. public mutating func replaceSubrange( _ bounds: Range, with newElements: C ) where C : Collection, C.Iterator.Element == Unicode.Scalar { let rawSubRange: Range = _toCoreIndex(bounds.lowerBound) ..< _toCoreIndex(bounds.upperBound) let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 } _core.replaceSubrange(rawSubRange, with: lazyUTF16) } } // Index conversions extension String.UnicodeScalarIndex { /// Creates an index in the given Unicode scalars view that corresponds /// exactly to the specified `UTF16View` position. /// /// The following example finds the position of a space in a string's `utf16` /// view and then converts that position to an index in the string's /// `unicodeScalars` view: /// /// let cafe = "Café 🍵" /// /// let utf16Index = cafe.utf16.index(of: 32)! /// let scalarIndex = String.UnicodeScalarView.Index(utf16Index, within: cafe.unicodeScalars)! /// /// print(String(cafe.unicodeScalars.prefix(upTo: scalarIndex))) /// // Prints "Café" /// /// If the position passed in `utf16Index` doesn't have an exact /// corresponding position in `unicodeScalars`, the result of the /// initializer is `nil`. For example, an attempt to convert the position of /// the trailing surrogate of a UTF-16 surrogate pair fails. /// /// - Parameters: /// - utf16Index: A position in the `utf16` view of the `characters` /// parameter. /// - unicodeScalars: The `UnicodeScalarView` instance referenced by both /// `utf16Index` and the resulting index. public init?( _ utf16Index: String.UTF16Index, within unicodeScalars: String.UnicodeScalarView ) { let utf16 = String.UTF16View(unicodeScalars._core) if utf16Index != utf16.startIndex && utf16Index != utf16.endIndex { _precondition( utf16Index >= utf16.startIndex && utf16Index <= utf16.endIndex, "Invalid String.UTF16Index for this Unicode.Scalar view") // Detect positions that have no corresponding index. Note that // we have to check before and after, because an unpaired // surrogate will be decoded as a single replacement character, // thus making the corresponding position valid. if UTF16.isTrailSurrogate(utf16[utf16Index]) && UTF16.isLeadSurrogate(utf16[utf16.index(before: utf16Index)]) { return nil } } self.init(_position: utf16Index._offset) } /// Creates an index in the given Unicode scalars view that corresponds /// exactly to the specified `UTF8View` position. /// /// If the position passed as `utf8Index` doesn't have an exact corresponding /// position in `unicodeScalars`, the result of the initializer is `nil`. /// For example, an attempt to convert the position of a UTF-8 continuation /// byte returns `nil`. /// /// - Parameters: /// - utf8Index: A position in the `utf8` view of the `characters` /// parameter. /// - unicodeScalars: The `UnicodeScalarView` instance referenced by both /// `utf8Index` and the resulting index. public init?( _ utf8Index: String.UTF8Index, within unicodeScalars: String.UnicodeScalarView ) { let core = unicodeScalars._core _precondition( utf8Index._coreIndex >= 0 && utf8Index._coreIndex <= core.endIndex, "Invalid String.UTF8Index for this Unicode.Scalar view") // Detect positions that have no corresponding index. if !utf8Index._isOnUnicodeScalarBoundary(in: core) { return nil } self.init(_position: utf8Index._coreIndex) } /// Creates an index in the given Unicode scalars view that corresponds /// exactly to the specified string position. /// /// The following example converts the position of the teacup emoji (`"🍵"`) /// into its corresponding position in the string's `unicodeScalars` view. /// /// let cafe = "Café 🍵" /// let characterIndex = cafe.characters.index(of: "🍵")! /// let scalarIndex = String.UnicodeScalarView.Index(characterIndex, within: cafe.unicodeScalars) /// /// print(cafe.unicodeScalars.suffix(from: scalarIndex)) /// // Prints "🍵" /// /// - Parameters: /// - characterIndex: A position in a `CharacterView` instance. /// `characterIndex` must be an element of /// `String(utf8).characters.indices`. /// - utf8: The `UTF8View` in which to find the new position. public init( _ characterIndex: String.Index, within unicodeScalars: String.UnicodeScalarView ) { self.init(_position: characterIndex._base._position) } /// Returns the position in the given UTF-8 view that corresponds exactly to /// this index. /// /// The index must be a valid index of `String(utf8).unicodeScalars`. /// /// This example first finds the position of the character `"é"` and then uses /// this method find the same position in the string's `utf8` view. /// /// let cafe = "Café" /// if let i = cafe.unicodeScalars.index(of: "é") { /// let j = i.samePosition(in: cafe.utf8) /// print(Array(cafe.utf8.suffix(from: j))) /// } /// // Prints "[195, 169]" /// /// - Parameter utf8: The view to use for the index conversion. /// - Returns: The position in `utf8` that corresponds exactly to this index. public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index { return String.UTF8View.Index(self, within: utf8) } /// Returns the position in the given UTF-16 view that corresponds exactly to /// this index. /// /// The index must be a valid index of `String(utf16).unicodeScalars`. /// /// This example first finds the position of the character `"é"` and then uses /// this method find the same position in the string's `utf16` view. /// /// let cafe = "Café" /// if let i = cafe.characters.index(of: "é") { /// let j = i.samePosition(in: cafe.utf16) /// print(cafe.utf16[j]) /// } /// // Prints "233" /// /// - Parameter utf16: The view to use for the index conversion. /// - Returns: The position in `utf16` that corresponds exactly to this index. public func samePosition( in utf16: String.UTF16View ) -> String.UTF16View.Index { return String.UTF16View.Index(self, within: utf16) } /// Returns the position in the given string that corresponds exactly to this /// index. /// /// This index must be a valid index of `characters.unicodeScalars`. /// /// This example first finds the position of a space (UTF-8 code point `32`) /// in a string's `utf8` view and then uses this method find the same position /// in the string. /// /// let cafe = "Café 🍵" /// let i = cafe.unicodeScalars.index(of: "🍵") /// let j = i.samePosition(in: cafe)! /// print(cafe.suffix(from: j)) /// // Prints "🍵" /// /// - Parameter characters: The string to use for the index conversion. /// - Returns: The position in `characters` that corresponds exactly to /// this index. If this index does not have an exact corresponding /// position in `characters`, this method returns `nil`. For example, /// an attempt to convert the position of a UTF-8 continuation byte /// returns `nil`. public func samePosition(in characters: String) -> String.Index? { return String.Index(self, within: characters) } } extension String.UnicodeScalarView { // NOTE: Don't make this function inlineable. Grapheme cluster // segmentation uses a completely different algorithm in Unicode 9.0. internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool { if i == startIndex || i == endIndex { return true } let precedingScalar = self[index(before: i)] let graphemeClusterBreakProperty = _UnicodeGraphemeClusterBreakPropertyTrie() let segmenter = _UnicodeExtendedGraphemeClusterSegmenter() let gcb0 = graphemeClusterBreakProperty.getPropertyRawValue( precedingScalar.value) if segmenter.isBoundaryAfter(gcb0) { return true } let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(self[i].value) return segmenter.isBoundary(gcb0, gcb1) } } // Reflection extension String.UnicodeScalarView : CustomReflectable { /// Returns a mirror that reflects the Unicode scalars view of a string. public var customMirror: Mirror { return Mirror(self, unlabeledChildren: self) } } extension String.UnicodeScalarView : CustomPlaygroundQuickLookable { public var customPlaygroundQuickLook: PlaygroundQuickLook { return .text(description) } }