//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// extension String { /// A view of a string's contents as a collection of Unicode scalar values. /// /// You can access a string's view of Unicode scalar values by using its /// `unicodeScalars` property. Unicode scalar values are the 21-bit codes /// that are the basic unit of Unicode. Each scalar value is represented by /// a `Unicode.Scalar` instance and is equivalent to a UTF-32 code unit. /// /// let flowers = "Flowers 💐" /// for v in flowers.unicodeScalars { /// print(v.value) /// } /// // 70 /// // 108 /// // 111 /// // 119 /// // 101 /// // 114 /// // 115 /// // 32 /// // 128144 /// /// Some characters that are visible in a string are made up of more than one /// Unicode scalar value. In that case, a string's `unicodeScalars` view /// contains more elements than the string itself. /// /// let flag = "🇵🇷" /// for c in flag { /// print(c) /// } /// // 🇵🇷 /// /// for v in flag.unicodeScalars { /// print(v.value) /// } /// // 127477 /// // 127479 /// /// You can convert a `String.UnicodeScalarView` instance back into a string /// using the `String` type's `init(_:)` initializer. /// /// let favemoji = "My favorite emoji is 🎉" /// if let i = favemoji.unicodeScalars.firstIndex(where: { $0.value >= 128 }) { /// let asciiPrefix = String(favemoji.unicodeScalars[.. Index { return Index(encodedOffset: i + _coreOffset) } /// Translates a `UnicodeScalarIndex` into a `_guts` index using this /// view's `_coreOffset`. @inlinable // FIXME(sil-serialize-all) internal func _toCoreIndex(_ i: Index) -> Int { return i.encodedOffset - _coreOffset } /// The position of the first Unicode scalar value if the string is /// nonempty. /// /// If the string is empty, `startIndex` is equal to `endIndex`. @inlinable // FIXME(sil-serialize-all) public var startIndex: Index { return _fromCoreIndex(_guts.startIndex) } /// The "past the end" position---that is, the position one greater than /// the last valid subscript argument. /// /// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`. @inlinable // FIXME(sil-serialize-all) public var endIndex: Index { return _fromCoreIndex(_guts.endIndex) } /// Returns the next consecutive location after `i`. /// /// - Precondition: The next location exists. @inlinable // FIXME(sil-serialize-all) public func index(after i: Index) -> Index { let offset = _toCoreIndex(i) let length: Int = _visitGuts(_guts, args: offset, ascii: { _ -> Int in return 1 }, utf16: { utf16, offset in return utf16.unicodeScalarWidth(startingAt: offset) }, opaque: { opaque, offset in return opaque.unicodeScalarWidth(startingAt: offset) } ) return _fromCoreIndex(offset + length) } /// Returns the previous consecutive location before `i`. /// /// - Precondition: The previous location exists. @inlinable // FIXME(sil-serialize-all) public func index(before i: Index) -> Index { let offset = _toCoreIndex(i) let length: Int = _visitGuts(_guts, args: offset, ascii: { _ -> Int in return 1 }, utf16: { utf16, offset in return utf16.unicodeScalarWidth(endingAt: offset) }, opaque: { opaque, offset in return opaque.unicodeScalarWidth(endingAt: offset) } ) return _fromCoreIndex(offset - length) } /// Accesses the Unicode scalar value at the given position. /// /// The following example searches a string's Unicode scalars view for a /// capital letter and then prints the character and Unicode scalar value /// at the found index: /// /// let greeting = "Hello, friend!" /// if let i = greeting.unicodeScalars.firstIndex(where: { "A"..."Z" ~= $0 }) { /// print("First capital letter: \(greeting.unicodeScalars[i])") /// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)") /// } /// // Prints "First capital letter: H" /// // Prints "Unicode scalar value: 72" /// /// - Parameter position: A valid index of the character view. `position` /// must be less than the view's end index. @inlinable // FIXME(sil-serialize-all) public subscript(position: Index) -> Unicode.Scalar { let offset = position.encodedOffset return _guts.unicodeScalar(startingAt: offset) } /// An iterator over the Unicode scalars that make up a `UnicodeScalarView` /// collection. @_fixed_layout // FIXME(sil-serialize-all) public struct Iterator : IteratorProtocol { @usableFromInline // FIXME(sil-serialize-all) internal var _guts: _StringGuts // FIXME(TODO: JIRA): the below is absurdly wasteful. // UnicodeScalarView.Iterator should be able to be passed in-registers. @usableFromInline // FIXME(sil-serialize-all) internal var _asciiIterator: _UnmanagedASCIIString.UnicodeScalarIterator? @usableFromInline // FIXME(sil-serialize-all) internal var _utf16Iterator: _UnmanagedUTF16String.UnicodeScalarIterator? @usableFromInline // FIXME(sil-serialize-all) internal var _opaqueIterator: _UnmanagedOpaqueString.UnicodeScalarIterator? @usableFromInline internal var _smallIterator: _SmallUTF8String.UnicodeScalarIterator? @inlinable // FIXME(sil-serialize-all) internal init(_ guts: _StringGuts) { if _slowPath(guts._isOpaque) { self.init(_opaque: guts) return } self.init(_concrete: guts) } @inlinable // FIXME(sil-serialize-all) @inline(__always) internal init(_concrete guts: _StringGuts) { _sanityCheck(!guts._isOpaque) self._guts = guts defer { _fixLifetime(self) } if _guts.isASCII { self._asciiIterator = _guts._unmanagedASCIIView.makeUnicodeScalarIterator() } else { self._utf16Iterator = _guts._unmanagedUTF16View.makeUnicodeScalarIterator() } } @usableFromInline // @opaque init(_opaque _guts: _StringGuts) { _sanityCheck(_guts._isOpaque) defer { _fixLifetime(self) } self._guts = _guts // TODO: Replace the whole iterator scheme with a sensible solution. if self._guts._isSmall { self._smallIterator = _guts._smallUTF8String.makeUnicodeScalarIterator() } else { self._opaqueIterator = _guts._asOpaque().makeUnicodeScalarIterator() } } /// Advances to the next element and returns it, or `nil` if no next /// element exists. /// /// Once `nil` has been returned, all subsequent calls return `nil`. /// /// - Precondition: `next()` has not been applied to a copy of `self` /// since the copy was made. @inlinable // FIXME(sil-serialize-all) public mutating func next() -> Unicode.Scalar? { if _slowPath(_opaqueIterator != nil) { return _opaqueIterator!.next() } if _asciiIterator != nil { return _asciiIterator!.next() } if _guts._isSmall { return _smallIterator!.next() } return _utf16Iterator!.next() } } /// Returns an iterator over the Unicode scalars that make up this view. /// /// - Returns: An iterator over this collection's `Unicode.Scalar` elements. @inlinable // FIXME(sil-serialize-all) public func makeIterator() -> Iterator { return Iterator(_guts) } @inlinable // FIXME(sil-serialize-all) public var description: String { return String(_guts) } public var debugDescription: String { return "StringUnicodeScalarView(\(self.description.debugDescription))" } } /// Creates a string corresponding to the given collection of Unicode /// scalars. /// /// You can use this initializer to create a new string from a slice of /// another string's `unicodeScalars` view. /// /// let picnicGuest = "Deserving porcupine" /// if let i = picnicGuest.unicodeScalars.firstIndex(of: " ") { /// let adjective = String(picnicGuest.unicodeScalars[.. Unicode.Scalar { return _visitGuts(self, args: offset, ascii: { ascii, offset in let u = ascii.codeUnit(atCheckedOffset: offset) return Unicode.Scalar(_unchecked: UInt32(u)) }, utf16: { utf16, offset in return utf16.unicodeScalar(startingAt: offset) }, opaque: { opaque, offset in return opaque.unicodeScalar(startingAt: offset) }) } @inlinable internal func unicodeScalar(endingAt offset: Int) -> Unicode.Scalar { return _visitGuts(self, args: offset, ascii: { ascii, offset in let u = ascii.codeUnit(atCheckedOffset: offset &- 1) return Unicode.Scalar(_unchecked: UInt32(u)) }, utf16: { utf16, offset in return utf16.unicodeScalar(endingAt: offset) }, opaque: { opaque, offset in return opaque.unicodeScalar(endingAt: offset) }) } } extension String.UnicodeScalarView : _SwiftStringView { @inlinable // FIXME(sil-serialize-all) internal var _persistentContent : String { return String(_guts) } @inlinable // FIXME(sil-serialize-all) var _wholeString : String { return String(_guts) } @inlinable // FIXME(sil-serialize-all) var _encodedOffsetRange : Range { return 0..<_guts.count } } extension String { /// The string's value represented as a collection of Unicode scalar values. @inlinable // FIXME(sil-serialize-all) public var unicodeScalars: UnicodeScalarView { get { return UnicodeScalarView(_guts) } set { _guts = newValue._guts } } } extension String.UnicodeScalarView : RangeReplaceableCollection { /// Creates an empty view instance. @inlinable // FIXME(sil-serialize-all) public init() { self = String.UnicodeScalarView(_StringGuts()) } /// Reserves enough space in the view's underlying storage to store the /// specified number of ASCII characters. /// /// Because a Unicode scalar value can require more than a single ASCII /// character's worth of storage, additional allocation may be necessary /// when adding to a Unicode scalar view after a call to /// `reserveCapacity(_:)`. /// /// - Parameter n: The minimum number of ASCII character's worth of storage /// to allocate. /// /// - Complexity: O(*n*), where *n* is the capacity being reserved. public mutating func reserveCapacity(_ n: Int) { _guts.reserveCapacity(n) } /// Appends the given Unicode scalar to the view. /// /// - Parameter c: The character to append to the string. public mutating func append(_ c: Unicode.Scalar) { if _fastPath(_guts.isASCII && c.value <= 0x7f) { _guts.withMutableASCIIStorage(unusedCapacity: 1) { storage in unowned(unsafe) let s = storage._value s.end.pointee = UInt8(c.value) s.count += 1 } } else { let width = UTF16.width(c) _guts.withMutableUTF16Storage(unusedCapacity: width) { storage in unowned(unsafe) let s = storage._value _sanityCheck(s.count + width <= s.capacity) if _fastPath(width == 1) { s.end.pointee = UTF16.CodeUnit(c.value) } else { _sanityCheck(width == 2) s.end[0] = UTF16.leadSurrogate(c) s.end[1] = UTF16.trailSurrogate(c) } s.count += width } } } /// Appends the Unicode scalar values in the given sequence to the view. /// /// - Parameter newElements: A sequence of Unicode scalar values. /// /// - Complexity: O(*n*), where *n* is the length of the resulting view. public mutating func append(contentsOf newElements: S) where S.Element == Unicode.Scalar { // FIXME: Keep ASCII storage if possible _guts.reserveUnusedCapacity(newElements.underestimatedCount) var it = newElements.makeIterator() var next = it.next() while let n = next { _guts.withMutableUTF16Storage(unusedCapacity: UTF16.width(n)) { storage in var p = storage._value.end let limit = storage._value.capacityEnd while let n = next { let w = UTF16.width(n) guard p + w <= limit else { break } if w == 1 { p.pointee = UTF16.CodeUnit(n.value) } else { _sanityCheck(w == 2) p[0] = UTF16.leadSurrogate(n) p[1] = UTF16.trailSurrogate(n) } p += w next = it.next() } storage._value.count = p - storage._value.start } } } /// Replaces the elements within the specified bounds with the given Unicode /// scalar values. /// /// Calling this method invalidates any existing indices for use with this /// string. /// /// - Parameters: /// - bounds: The range of elements to replace. The bounds of the range /// must be valid indices of the view. /// - newElements: The new Unicode scalar values to add to the string. /// /// - Complexity: O(*m*), where *m* is the combined length of the view and /// `newElements`. If the call to `replaceSubrange(_:with:)` simply /// removes elements at the end of the string, the complexity is O(*n*), /// where *n* is equal to `bounds.count`. public mutating func replaceSubrange( _ bounds: Range, with newElements: C ) where C : Collection, C.Element == Unicode.Scalar { let rawSubRange: Range = _toCoreIndex(bounds.lowerBound) ..< _toCoreIndex(bounds.upperBound) let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 } _guts.replaceSubrange(rawSubRange, with: lazyUTF16) } } // Index conversions extension String.UnicodeScalarIndex { /// Creates an index in the given Unicode scalars view that corresponds /// exactly to the specified `UTF16View` position. /// /// The following example finds the position of a space in a string's `utf16` /// view and then converts that position to an index in the string's /// `unicodeScalars` view: /// /// let cafe = "Café 🍵" /// /// let utf16Index = cafe.utf16.firstIndex(of: 32)! /// let scalarIndex = String.Index(utf16Index, within: cafe.unicodeScalars)! /// /// print(String(cafe.unicodeScalars[.. String.Index? { return String.Index(self, within: characters) } } extension String.UnicodeScalarView { @inlinable // FIXME(sil-serialize-all) internal func _isOnUnicodeScalarBoundary(_ i: Index) -> Bool { if _fastPath(_guts.isASCII) { return true } if i == startIndex || i == endIndex { return true } if i._transcodedOffset != 0 { return false } let i2 = _toCoreIndex(i) if _fastPath(!UTF16.isTrailSurrogate(_guts[i2])) { return true } return i2 == 0 || !UTF16.isLeadSurrogate(_guts[i2 &- 1]) } // NOTE: Don't make this function inlineable. Grapheme cluster // segmentation uses a completely different algorithm in Unicode 9.0. @inlinable // FIXME(sil-serialize-all) internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool { if i == startIndex || i == endIndex { return true } if !_isOnUnicodeScalarBoundary(i) { return false } let str = String(_guts) return i == str.index(before: str.index(after: i)) } } // Reflection extension String.UnicodeScalarView : CustomReflectable { /// Returns a mirror that reflects the Unicode scalars view of a string. @inlinable // FIXME(sil-serialize-all) public var customMirror: Mirror { return Mirror(self, unlabeledChildren: self) } } extension String.UnicodeScalarView : CustomPlaygroundQuickLookable { @inlinable // FIXME(sil-serialize-all) @available(*, deprecated, message: "UnicodeScalarView.customPlaygroundQuickLook will be removed in a future Swift version") public var customPlaygroundQuickLook: PlaygroundQuickLook { return .text(description) } } // backward compatibility for index interchange. extension String.UnicodeScalarView { @inlinable // FIXME(sil-serialize-all) @available( swift, obsoleted: 4.0, message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index") public func index(after i: Index?) -> Index { return index(after: i!) } @inlinable // FIXME(sil-serialize-all) @available( swift, obsoleted: 4.0, message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index") public func index(_ i: Index?, offsetBy n: Int) -> Index { return index(i!, offsetBy: n) } @inlinable // FIXME(sil-serialize-all) @available( swift, obsoleted: 4.0, message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices") public func distance(from i: Index?, to j: Index?) -> Int { return distance(from: i!, to: j!) } @inlinable // FIXME(sil-serialize-all) @available( swift, obsoleted: 4.0, message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index") public subscript(i: Index?) -> Unicode.Scalar { return self[i!] } } //===--- Slicing Support --------------------------------------------------===// /// In Swift 3.2, in the absence of type context, /// /// someString.unicodeScalars[ /// someString.unicodeScalars.startIndex /// ..< someString.unicodeScalars.endIndex] /// /// was deduced to be of type `String.UnicodeScalarView`. Provide a /// more-specific Swift-3-only `subscript` overload that continues to produce /// `String.UnicodeScalarView`. extension String.UnicodeScalarView { public typealias SubSequence = Substring.UnicodeScalarView @inlinable // FIXME(sil-serialize-all) @available(swift, introduced: 4) public subscript(r: Range) -> String.UnicodeScalarView.SubSequence { return String.UnicodeScalarView.SubSequence(self, _bounds: r) } /// Accesses the Unicode scalar values in the given range. /// /// The example below uses this subscript to access the scalar values up /// to, but not including, the first comma (`","`) in the string. /// /// let str = "All this happened, more or less." /// let i = str.unicodeScalars.firstIndex(of: ",")! /// let substring = str.unicodeScalars[str.unicodeScalars.startIndex ..< i] /// print(String(substring)) /// // Prints "All this happened" /// /// - Complexity: O(*n*) if the underlying string is bridged from /// Objective-C, where *n* is the length of the string; otherwise, O(1). @available(swift, obsoleted: 4) public subscript(r: Range) -> String.UnicodeScalarView { let rawSubRange: Range = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound) return String.UnicodeScalarView( _guts._extractSlice(rawSubRange), coreOffset: r.lowerBound.encodedOffset) } @inlinable // FIXME(sil-serialize-all) @available(swift, obsoleted: 4) public subscript(bounds: ClosedRange) -> String.UnicodeScalarView { return self[bounds.relative(to: self)] } }