//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // FIXME(ABI)#71 : The UTF-16 string view should have a custom iterator type to // allow performance optimizations of linear traversals. extension String { /// A view of a string's contents as a collection of Unicode scalar values. /// /// You can access a string's view of Unicode scalar values by using its /// `unicodeScalars` property. Unicode scalar values are the 21-bit codes /// that are the basic unit of Unicode. Each scalar value is represented by /// a `Unicode.Scalar` instance and is equivalent to a UTF-32 code unit. /// /// let flowers = "Flowers 💐" /// for v in flowers.unicodeScalars { /// print(v.value) /// } /// // 70 /// // 108 /// // 111 /// // 119 /// // 101 /// // 114 /// // 115 /// // 32 /// // 128144 /// /// Some characters that are visible in a string are made up of more than one /// Unicode scalar value. In that case, a string's `unicodeScalars` view /// contains more elements than the string itself. /// /// let flag = "🇵🇷" /// for c in flag { /// print(c) /// } /// // 🇵🇷 /// /// for v in flag.unicodeScalars { /// print(v.value) /// } /// // 127477 /// // 127479 /// /// You can convert a `String.UnicodeScalarView` instance back into a string /// using the `String` type's `init(_:)` initializer. /// /// let favemoji = "My favorite emoji is 🎉" /// if let i = favemoji.unicodeScalars.firstIndex(where: { $0.value >= 128 }) { /// let asciiPrefix = String(favemoji.unicodeScalars[.. Index { let i = _guts.validateScalarIndex(i) return _uncheckedIndex(after: i) } @_alwaysEmitIntoClient @inline(__always) internal func _uncheckedIndex(after i: Index) -> Index { // TODO(String performance): isASCII fast-path if _fastPath(_guts.isFastUTF8) { let len = _guts.fastUTF8ScalarLength(startingAt: i._encodedOffset) return i.encoded(offsetBy: len)._scalarAligned._knownUTF8 } return _foreignIndex(after: i) } /// Returns the previous consecutive location before `i`. /// /// - Precondition: The previous location exists. @inlinable @inline(__always) public func index(before i: Index) -> Index { let i = _guts.validateInclusiveScalarIndex(i) // Note: Aligning an index may move it closer towards the `startIndex`, so // the `i > startIndex` check needs to come after rounding. _precondition(i > startIndex, "String index is out of bounds") return _uncheckedIndex(before: i) } @_alwaysEmitIntoClient @inline(__always) internal func _uncheckedIndex(before i: Index) -> Index { // TODO(String performance): isASCII fast-path if _fastPath(_guts.isFastUTF8) { let len = _guts.withFastUTF8 { utf8 in _utf8ScalarLength(utf8, endingAt: i._encodedOffset) } _internalInvariant(len <= 4, "invalid UTF8") return i.encoded(offsetBy: 0 &- len)._scalarAligned._knownUTF8 } return _foreignIndex(before: i) } /// Accesses the Unicode scalar value at the given position. /// /// The following example searches a string's Unicode scalars view for a /// capital letter and then prints the character and Unicode scalar value /// at the found index: /// /// let greeting = "Hello, friend!" /// if let i = greeting.unicodeScalars.firstIndex(where: { "A"..."Z" ~= $0 }) { /// print("First capital letter: \(greeting.unicodeScalars[i])") /// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)") /// } /// // Prints "First capital letter: H" /// // Prints "Unicode scalar value: 72" /// /// - Parameter position: A valid index of the character view. `position` /// must be less than the view's end index. @inlinable @inline(__always) public subscript(position: Index) -> Unicode.Scalar { let i = _guts.validateScalarIndex(position) return _guts.errorCorrectedScalar(startingAt: i._encodedOffset).0 } @_alwaysEmitIntoClient // Swift 5.1 bug fix public func distance(from start: Index, to end: Index) -> Int { let start = _guts.validateInclusiveScalarIndex(start) let end = _guts.validateInclusiveScalarIndex(end) var i = start var count = 0 if i < end { while i < end { count += 1 i = _uncheckedIndex(after: i) } } else if i > end { while i > end { count -= 1 i = _uncheckedIndex(before: i) } } return count } @_alwaysEmitIntoClient public func index(_ i: Index, offsetBy distance: Int) -> Index { var i = _guts.validateInclusiveScalarIndex(i) if distance >= 0 { for _ in stride(from: 0, to: distance, by: 1) { _precondition(i._encodedOffset < _guts.count, "String index is out of bounds") i = _uncheckedIndex(after: i) } } else { for _ in stride(from: 0, to: distance, by: -1) { _precondition(i._encodedOffset > 0, "String index is out of bounds") i = _uncheckedIndex(before: i) } } return i } @_alwaysEmitIntoClient public func index( _ i: Index, offsetBy distance: Int, limitedBy limit: Index ) -> Index? { // Note: `limit` is intentionally not scalar aligned to ensure our behavior // exactly matches the documentation above. We do need to ensure it has a // matching encoding, though. The same goes for `start`, which is used to // determine whether the limit applies at all. let limit = _guts.ensureMatchingEncoding(limit) let start = _guts.ensureMatchingEncoding(i) var i = _guts.validateInclusiveScalarIndex(i) if distance >= 0 { for _ in stride(from: 0, to: distance, by: 1) { guard limit < start || i < limit else { return nil } _precondition(i._encodedOffset < _guts.count, "String index is out of bounds") i = _uncheckedIndex(after: i) } guard limit < start || i <= limit else { return nil } } else { for _ in stride(from: 0, to: distance, by: -1) { guard limit > start || i > limit else { return nil } _precondition(i._encodedOffset > 0, "String index is out of bounds") i = _uncheckedIndex(before: i) } guard limit > start || i >= limit else { return nil } } return i } } extension String.UnicodeScalarView { @frozen public struct Iterator: IteratorProtocol, Sendable { @usableFromInline internal var _guts: _StringGuts @usableFromInline internal var _position: Int = 0 @usableFromInline internal var _end: Int @inlinable internal init(_ guts: _StringGuts) { self._end = guts.count self._guts = guts } @inlinable @inline(__always) public mutating func next() -> Unicode.Scalar? { guard _fastPath(_position < _end) else { return nil } let (result, len) = _guts.errorCorrectedScalar(startingAt: _position) _position &+= len return result } } @inlinable public __consuming func makeIterator() -> Iterator { return Iterator(_guts) } } extension String.UnicodeScalarView: CustomStringConvertible { @inlinable @inline(__always) public var description: String { return String(_guts) } } extension String.UnicodeScalarView: CustomDebugStringConvertible { public var debugDescription: String { return "StringUnicodeScalarView(\(self.description.debugDescription))" } } extension String { /// Creates a string corresponding to the given collection of Unicode /// scalars. /// /// You can use this initializer to create a new string from a slice of /// another string's `unicodeScalars` view. /// /// let picnicGuest = "Deserving porcupine" /// if let i = picnicGuest.unicodeScalars.firstIndex(of: " ") { /// let adjective = String(picnicGuest.unicodeScalars[..(contentsOf newElements: S) where S.Element == Unicode.Scalar { // TODO(String performance): Skip extra String allocation let scalars = String(decoding: newElements.map { $0.value }, as: UTF32.self) self = (String(self._guts) + scalars).unicodeScalars } /// Replaces the elements within the specified bounds with the given Unicode /// scalar values. /// /// Calling this method invalidates any existing indices for use with this /// string. /// /// - Parameters: /// - subrange: The range of elements to replace. The bounds of the range /// must be valid indices of the view. /// - newElements: The new Unicode scalar values to add to the string. /// /// - Complexity: O(*m*), where *m* is the combined length of the view and /// `newElements`. If the call to `replaceSubrange(_:with:)` simply /// removes elements at the end of the string, the complexity is O(*n*), /// where *n* is equal to `bounds.count`. public mutating func replaceSubrange( _ subrange: Range, with newElements: C ) where C: Collection, C.Element == Unicode.Scalar { let subrange = _guts.validateScalarRange(subrange) _guts.replaceSubrange(subrange, with: newElements) _invariantCheck() } } // Index conversions extension String.UnicodeScalarIndex { /// Creates an index in the given Unicode scalars view that corresponds /// exactly to the specified `UTF16View` position. /// /// The following example finds the position of a space in a string's `utf16` /// view and then converts that position to an index in the string's /// `unicodeScalars` view: /// /// let cafe = "Café 🍵" /// /// let utf16Index = cafe.utf16.firstIndex(of: 32)! /// let scalarIndex = String.Index(utf16Index, within: cafe.unicodeScalars)! /// /// print(String(cafe.unicodeScalars[.. String.Index? { return String.Index(self, within: characters) } } #if SWIFT_ENABLE_REFLECTION // Reflection extension String.UnicodeScalarView: CustomReflectable { /// Returns a mirror that reflects the Unicode scalars view of a string. public var customMirror: Mirror { return Mirror(self, unlabeledChildren: self) } } #endif //===--- Slicing Support --------------------------------------------------===// /// In Swift 3.2, in the absence of type context, /// /// someString.unicodeScalars[ /// someString.unicodeScalars.startIndex /// ..< someString.unicodeScalars.endIndex] /// /// was deduced to be of type `String.UnicodeScalarView`. Provide a /// more-specific Swift-3-only `subscript` overload that continues to produce /// `String.UnicodeScalarView`. extension String.UnicodeScalarView { public typealias SubSequence = Substring.UnicodeScalarView @available(swift, introduced: 4) public subscript(r: Range) -> String.UnicodeScalarView.SubSequence { let r = _guts.validateScalarRange(r) return SubSequence(_unchecked: self, bounds: r) } } // Foreign string Support extension String.UnicodeScalarView { @usableFromInline @inline(never) @_effects(releasenone) internal func _foreignIndex(after i: Index) -> Index { _internalInvariant(_guts.isForeign) let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i) let len = UTF16.isLeadSurrogate(cu) ? 2 : 1 let r = i.encoded(offsetBy: len)._scalarAligned return r._knownUTF16 } @usableFromInline @inline(never) @_effects(releasenone) internal func _foreignIndex(before i: Index) -> Index { _internalInvariant(_guts.isForeign) let priorIdx = i.priorEncoded let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx) let len = UTF16.isTrailSurrogate(cu) ? 2 : 1 let r = i.encoded(offsetBy: -len)._scalarAligned return r._knownUTF16 } }