//===--- StringCharacterView.swift - String's Collection of Characters ----===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // String is-not-a Sequence or Collection, but it exposes a // collection of characters. // //===----------------------------------------------------------------------===// // FIXME(ABI)#70 : The character string view should have a custom iterator type // to allow performance optimizations of linear traversals. import SwiftShims extension String: BidirectionalCollection { public typealias SubSequence = Substring public typealias Element = Character /// The position of the first character in a nonempty string. /// /// In an empty string, `startIndex` is equal to `endIndex`. @inlinable @inline(__always) public var startIndex: Index { return _guts.startIndex } /// A string's "past the end" position---that is, the position one greater /// than the last valid subscript argument. /// /// In an empty string, `endIndex` is equal to `startIndex`. @inlinable @inline(__always) public var endIndex: Index { return _guts.endIndex } /// The number of characters in a string. @inline(__always) public var count: Int { return distance(from: startIndex, to: endIndex) } /// Returns the position immediately after the given index. /// /// - Parameter i: A valid index of the collection. `i` must be less than /// `endIndex`. /// - Returns: The index value immediately after `i`. public func index(after i: Index) -> Index { let i = _guts.ensureMatchingEncoding(i) _precondition(i < endIndex, "String index is out of bounds") let r = _uncheckedIndex(after: _guts.scalarAlign(i)) return _guts.markEncoding(r) } /// A version of `index(after:)` that assumes that the given index: /// /// - has the right encoding, /// - is within bounds, and /// - is scalar aligned. /// /// It does not mark the encoding of the returned index. internal func _uncheckedIndex(after i: Index) -> Index { // FIXME: Unlike `index(before:)`, this function may return incorrect // results if `i` isn't on a grapheme cluster boundary. (The grapheme // breaking algorithm assumes we start on a break when we go forward.) _internalInvariant(_guts.hasMatchingEncoding(i)) _internalInvariant(i < endIndex) _internalInvariant(i._isScalarAligned) // TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc. let stride = _characterStride(startingAt: i) let nextOffset = i._encodedOffset &+ stride let nextIndex = Index(_encodedOffset: nextOffset)._scalarAligned let nextStride = _characterStride(startingAt: nextIndex) let r = Index(encodedOffset: nextOffset, characterStride: nextStride) return r._scalarAligned } /// Returns the position immediately before the given index. /// /// - Parameter i: A valid index of the collection. `i` must be greater than /// `startIndex`. /// - Returns: The index value immediately before `i`. public func index(before i: Index) -> Index { let i = _guts.ensureMatchingEncoding(i) // Note: bounds checking in `index(before:)` is tricky as scalar aligning an // index may need to access storage, but it may also move it closer towards // the `startIndex`. Therefore, we must check against the `endIndex` before // aligning, but we need to delay the `i > startIndex` check until after. _precondition(i <= endIndex, "String index is out of bounds") let i = _guts.scalarAlign(i) _precondition(i > startIndex, "String index is out of bounds") let r = _uncheckedIndex(before: _guts.scalarAlign(i)) return _guts.markEncoding(r) } /// A version of `index(before:)` that assumes that the given index: /// /// - has the right encoding, /// - is within bounds, and /// - is scalar aligned. /// /// It does not mark the encoding of the returned index. internal func _uncheckedIndex(before i: Index) -> Index { _internalInvariant(_guts.hasMatchingEncoding(i)) _internalInvariant(i > startIndex && i <= endIndex) _internalInvariant(i._isScalarAligned) // TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc. let stride = _characterStride(endingAt: i) let priorOffset = i._encodedOffset &- stride let r = Index(encodedOffset: priorOffset, characterStride: stride) return r._scalarAligned } /// Returns an index that is the specified distance from the given index. /// /// The following example obtains an index advanced four positions from a /// string's starting index and then prints the character at that position. /// /// let s = "Swift" /// let i = s.index(s.startIndex, offsetBy: 4) /// print(s[i]) /// // Prints "t" /// /// The value passed as `distance` must not offset `i` beyond the bounds of /// the collection. /// /// - Parameters: /// - i: A valid index of the collection. /// - distance: The distance to offset `i`. /// - Returns: An index offset by `distance` from the index `i`. If /// `distance` is positive, this is the same value as the result of /// `distance` calls to `index(after:)`. If `distance` is negative, this /// is the same value as the result of `abs(distance)` calls to /// `index(before:)`. /// - Complexity: O(*n*), where *n* is the absolute value of `distance`. public func index(_ i: Index, offsetBy distance: Int) -> Index { // Note: in Swift 5.6 and below, this method used to be inlinable, // forwarding to `_index(_:offsetBy:)`. // TODO: known-ASCII and single-scalar-grapheme fast path, etc. var i = _guts.ensureMatchingEncoding(i) _precondition(i >= startIndex && i <= endIndex, "String index is out of bounds") i = _guts.scalarAlign(i) if distance >= 0 { for _ in stride(from: 0, to: distance, by: 1) { _precondition(i < endIndex, "String index is out of bounds") i = _uncheckedIndex(after: i) } } else { for _ in stride(from: 0, to: distance, by: -1) { _precondition(i > startIndex, "String index is out of bounds") i = _uncheckedIndex(before: i) } } return _guts.markEncoding(i) } /// Returns an index that is the specified distance from the given index, /// unless that distance is beyond a given limiting index. /// /// The following example obtains an index advanced four positions from a /// string's starting index and then prints the character at that position. /// The operation doesn't require going beyond the limiting `s.endIndex` /// value, so it succeeds. /// /// let s = "Swift" /// if let i = s.index(s.startIndex, offsetBy: 4, limitedBy: s.endIndex) { /// print(s[i]) /// } /// // Prints "t" /// /// The next example attempts to retrieve an index six positions from /// `s.startIndex` but fails, because that distance is beyond the index /// passed as `limit`. /// /// let j = s.index(s.startIndex, offsetBy: 6, limitedBy: s.endIndex) /// print(j) /// // Prints "nil" /// /// The value passed as `distance` must not offset `i` beyond the bounds of /// the collection, unless the index passed as `limit` prevents offsetting /// beyond those bounds. /// /// - Parameters: /// - i: A valid index of the collection. /// - distance: The distance to offset `i`. /// - limit: A valid index of the collection to use as a limit. If /// `distance > 0`, a limit that is less than `i` has no effect. /// Likewise, if `distance < 0`, a limit that is greater than `i` has no /// effect. /// - Returns: An index offset by `distance` from the index `i`, unless that /// index would be beyond `limit` in the direction of movement. In that /// case, the method returns `nil`. /// /// - Complexity: O(*n*), where *n* is the absolute value of `distance`. public func index( _ i: Index, offsetBy distance: Int, limitedBy limit: Index ) -> Index? { // Note: In Swift 5.6 and below, this function used to be inlinable, // forwarding to `BidirectionalCollection._index(_:offsetBy:limitedBy:)`. // Unfortunately, that approach isn't compatible with SE-0180, as it doesn't // support cases where `i` or `limit` aren't character aligned. // TODO: known-ASCII and single-scalar-grapheme fast path, etc. // Per SE-0180, `i` and `limit` are allowed to fall in between grapheme // breaks, in which case this function must still terminate without trapping // and return a result that makes sense. // Note: `limit` is intentionally not scalar aligned to ensure our behavior // exactly matches the documentation above. let limit = _guts.ensureMatchingEncoding(limit) var i = _guts.ensureMatchingEncoding(i) _precondition(i >= startIndex && i <= endIndex, "String index is out of bounds") i = _guts.scalarAlign(i) let start = i if distance >= 0 { for _ in stride(from: 0, to: distance, by: 1) { guard limit < start || i < limit else { return nil } _precondition(i < endIndex, "String index is out of bounds") i = _uncheckedIndex(after: i) } guard limit < start || i <= limit else { return nil } } else { for _ in stride(from: 0, to: distance, by: -1) { guard limit > start || i > limit else { return nil } _precondition(i > startIndex, "String index is out of bounds") i = _uncheckedIndex(before: i) } guard limit > start || i >= limit else { return nil } } return _guts.markEncoding(i) } /// Returns the distance between two indices. /// /// - Parameters: /// - start: A valid index of the collection. /// - end: Another valid index of the collection. If `end` is equal to /// `start`, the result is zero. /// - Returns: The distance between `start` and `end`. /// /// - Complexity: O(*n*), where *n* is the resulting distance. public func distance(from start: Index, to end: Index) -> Int { // Note: In Swift 5.6 and below, this function used to be inlinable, // forwarding to `BidirectionalCollection._distance(from:to:)`. // FIXME: Due to the `index(after:)` problem above, this function doesn't // always return consistent results when the given indices fall between // grapheme breaks -- swapping `start` and `end` may change the magnitude of // the result. var start = _guts.ensureMatchingEncoding(start) var end = _guts.ensureMatchingEncoding(end) _precondition( start >= startIndex && start <= endIndex && end >= startIndex && end <= endIndex, "String index is out of bounds") start = _guts.scalarAlign(start) end = _guts.scalarAlign(end) // TODO: known-ASCII and single-scalar-grapheme fast path, etc. // Per SE-0180, `start` and `end` are allowed to fall in between grapheme // breaks, in which case this function must still terminate without trapping // and return a result that makes sense. var i = start var count = 0 if i < end { while i < end { // Note `<` instead of `==` count += 1 i = _uncheckedIndex(after: i) } } else if i > end { while i > end { // Note `<` instead of `==` count -= 1 i = _uncheckedIndex(before: i) } } return count } /// Accesses the character at the given position. /// /// You can use the same indices for subscripting a string and its substring. /// For example, this code finds the first letter after the first space: /// /// let str = "Greetings, friend! How are you?" /// let firstSpace = str.firstIndex(of: " ") ?? str.endIndex /// let substr = str[firstSpace...] /// if let nextCapital = substr.firstIndex(where: { $0 >= "A" && $0 <= "Z" }) { /// print("Capital after a space: \(str[nextCapital])") /// } /// // Prints "Capital after a space: H" /// /// - Parameter i: A valid index of the string. `i` must be less than the /// string's end index. @inlinable @inline(__always) // FIXME(lorentey): Consider removing these. If // `index(after:)` isn't inlinable, does it // really matter if this one is? (Potential // _guts-related optimizations notwithstanding.) // `subscript` being inlinable forces a bunch of // new additions to be _aEIC, even though they // ought to be internal. public subscript(i: Index) -> Character { var i = _guts.ensureMatchingEncoding(i) _boundsCheck(i) i = _guts.scalarAlign(i) let distance = _characterStride(startingAt: i) return _guts.errorCorrectedCharacter( startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance) } @inlinable @inline(__always) internal func _characterStride(startingAt i: Index) -> Int { _internalInvariant_5_1(i._isScalarAligned) // Fast check if it's already been measured, otherwise check resiliently if let d = i.characterStride { return d } if i == endIndex { return 0 } return _guts._opaqueCharacterStride(startingAt: i._encodedOffset) } @inlinable @inline(__always) internal func _characterStride(endingAt i: Index) -> Int { _internalInvariant_5_1(i._isScalarAligned) if i == startIndex { return 0 } return _guts._opaqueCharacterStride(endingAt: i._encodedOffset) } } extension String { @frozen public struct Iterator: IteratorProtocol, Sendable { @usableFromInline internal var _guts: _StringGuts @usableFromInline internal var _position: Int = 0 @usableFromInline internal var _end: Int @inlinable internal init(_ guts: _StringGuts) { self._end = guts.count self._guts = guts } @inlinable public mutating func next() -> Character? { guard _fastPath(_position < _end) else { return nil } let len = _guts._opaqueCharacterStride(startingAt: _position) let nextPosition = _position &+ len let result = _guts.errorCorrectedCharacter( startingAt: _position, endingAt: nextPosition) _position = nextPosition return result } } @inlinable public __consuming func makeIterator() -> Iterator { return Iterator(_guts) } }