[stdlib] Factor scalar-aligned String index validation out into a set of common routines

There are three flavors, corresponding to i < endIndex, i <= endIndex, and range containment checks.
Additionally, we have separate variants for index validation in substrings.
This commit is contained in:
Karoy Lorentey
2022-03-01 23:27:40 -08:00
parent 15c7721caf
commit a44997eeea
6 changed files with 337 additions and 127 deletions

View File

@@ -49,9 +49,8 @@ extension String: BidirectionalCollection {
/// `endIndex`. /// `endIndex`.
/// - Returns: The index value immediately after `i`. /// - Returns: The index value immediately after `i`.
public func index(after i: Index) -> Index { public func index(after i: Index) -> Index {
let i = _guts.ensureMatchingEncoding(i) let i = _guts.validateScalarIndex(i)
_precondition(i < endIndex, "String index is out of bounds") let r = _uncheckedIndex(after: i)
let r = _uncheckedIndex(after: _guts.scalarAlign(i))
return _guts.internalMarkEncoding(r) return _guts.internalMarkEncoding(r)
} }
@@ -85,14 +84,10 @@ extension String: BidirectionalCollection {
/// `startIndex`. /// `startIndex`.
/// - Returns: The index value immediately before `i`. /// - Returns: The index value immediately before `i`.
public func index(before i: Index) -> Index { public func index(before i: Index) -> Index {
let i = _guts.ensureMatchingEncoding(i) let i = _guts.validateInclusiveScalarIndex(i)
// Note: Scalar aligning an index may move it closer towards the
// Note: bounds checking in `index(before:)` is tricky as scalar aligning an // `startIndex`, so the `i > startIndex` check needs to come after the
// index may need to access storage, but it may also move it closer towards // `validateScalarIndex` call.
// the `startIndex`. Therefore, we must check against the `endIndex` before
// aligning, but we need to delay the `i > startIndex` check until after.
_precondition(i <= endIndex, "String index is out of bounds")
let i = _guts.scalarAlign(i)
_precondition(i > startIndex, "String index is out of bounds") _precondition(i > startIndex, "String index is out of bounds")
let r = _uncheckedIndex(before: _guts.scalarAlign(i)) let r = _uncheckedIndex(before: _guts.scalarAlign(i))
@@ -147,10 +142,7 @@ extension String: BidirectionalCollection {
// TODO: known-ASCII and single-scalar-grapheme fast path, etc. // TODO: known-ASCII and single-scalar-grapheme fast path, etc.
var i = _guts.ensureMatchingEncoding(i) var i = _guts.validateInclusiveScalarIndex(i)
_precondition(i >= startIndex && i <= endIndex,
"String index is out of bounds")
i = _guts.scalarAlign(i)
if distance >= 0 { if distance >= 0 {
for _ in stride(from: 0, to: distance, by: 1) { for _ in stride(from: 0, to: distance, by: 1) {
@@ -222,10 +214,7 @@ extension String: BidirectionalCollection {
// exactly matches the documentation above. // exactly matches the documentation above.
let limit = _guts.ensureMatchingEncoding(limit) let limit = _guts.ensureMatchingEncoding(limit)
var i = _guts.ensureMatchingEncoding(i) var i = _guts.validateInclusiveScalarIndex(i)
_precondition(i >= startIndex && i <= endIndex,
"String index is out of bounds")
i = _guts.scalarAlign(i)
let start = i let start = i
if distance >= 0 { if distance >= 0 {
@@ -264,22 +253,14 @@ extension String: BidirectionalCollection {
// grapheme breaks -- swapping `start` and `end` may change the magnitude of // grapheme breaks -- swapping `start` and `end` may change the magnitude of
// the result. // the result.
var start = _guts.ensureMatchingEncoding(start) let start = _guts.validateInclusiveScalarIndex(start)
var end = _guts.ensureMatchingEncoding(end) let end = _guts.validateInclusiveScalarIndex(end)
_precondition(
start >= startIndex && start <= endIndex &&
end >= startIndex && end <= endIndex,
"String index is out of bounds")
start = _guts.scalarAlign(start)
end = _guts.scalarAlign(end)
// TODO: known-ASCII and single-scalar-grapheme fast path, etc. // TODO: known-ASCII and single-scalar-grapheme fast path, etc.
// Per SE-0180, `start` and `end` are allowed to fall in between grapheme // Per SE-0180, `start` and `end` are allowed to fall in between Character
// breaks, in which case this function must still terminate without trapping // boundaries, in which case this function must still terminate without
// and return a result that makes sense. // trapping and return a result that makes sense.
var i = start var i = start
var count = 0 var count = 0
@@ -313,7 +294,7 @@ extension String: BidirectionalCollection {
/// ///
/// - Parameter i: A valid index of the string. `i` must be less than the /// - Parameter i: A valid index of the string. `i` must be less than the
/// string's end index. /// string's end index.
@inlinable @inline(__always) // FIXME(lorentey): Consider removing these. If @inlinable @inline(__always) // TODO(lorentey): Consider removing these. If
// `index(after:)` isn't inlinable, does it // `index(after:)` isn't inlinable, does it
// really matter if this one is? (Potential // really matter if this one is? (Potential
// _guts-related optimizations notwithstanding.) // _guts-related optimizations notwithstanding.)
@@ -321,11 +302,8 @@ extension String: BidirectionalCollection {
// new additions to be _aEIC, even though they // new additions to be _aEIC, even though they
// ought to be internal. // ought to be internal.
public subscript(i: Index) -> Character { public subscript(i: Index) -> Character {
var i = _guts.ensureMatchingEncoding(i) let i = _guts.validateScalarIndex(i)
_boundsCheck(i)
i = _guts.scalarAlign(i)
let distance = _characterStride(startingAt: i) let distance = _characterStride(startingAt: i)
return _guts.errorCorrectedCharacter( return _guts.errorCorrectedCharacter(
startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance) startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance)
} }

View File

@@ -309,6 +309,17 @@ func _isSwiftStdlib_5_7() -> Bool {
// Encoding // Encoding
extension _StringGuts { extension _StringGuts {
/// Returns whether this string is known to use UTF-16 code units.
///
/// This always returns a value corresponding to the string's actual encoding
/// on stdlib versions >=5.7.
///
/// Standard Library versions <=5.6 did not set the corresponding flag, so
/// this property always returns false.
@_alwaysEmitIntoClient
@inline(__always)
internal var isKnownUTF16: Bool { _object.isKnownUTF16 }
@_alwaysEmitIntoClient // Swift 5.7 @_alwaysEmitIntoClient // Swift 5.7
internal func markEncoding(_ i: String.Index) -> String.Index { internal func markEncoding(_ i: String.Index) -> String.Index {
// In this inlinable function, we cannot assume that all foreign strings are // In this inlinable function, we cannot assume that all foreign strings are
@@ -316,7 +327,7 @@ extension _StringGuts {
// introduced other foreign forms. // introduced other foreign forms.
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { // SwiftStdlib 5.7 if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { // SwiftStdlib 5.7
// With a >=5.7 stdlib, we can rely on `isKnownUTF16` to contain the truth. // With a >=5.7 stdlib, we can rely on `isKnownUTF16` to contain the truth.
return _object.isKnownUTF16 ? i._knownUTF16 : i._knownUTF8 return isKnownUTF16 ? i._knownUTF16 : i._knownUTF8
} }
// We know that in stdlibs 5.0..<5.7, all foreign strings were UTF-16, // We know that in stdlibs 5.0..<5.7, all foreign strings were UTF-16,
// so we can use `isForeign` to determine the encoding. // so we can use `isForeign` to determine the encoding.
@@ -354,14 +365,14 @@ extension _StringGuts {
/// not set the flags that this method relies on. However, false positives /// not set the flags that this method relies on. However, false positives
/// cannot happen: if this method detects a mismatch, then it is guaranteed to /// cannot happen: if this method detects a mismatch, then it is guaranteed to
/// be a real one. /// be a real one.
@_alwaysEmitIntoClient // FIXME(lorentey): Should this remain internal? @_alwaysEmitIntoClient // TODO(lorentey): Should this remain internal?
@inline(__always) @inline(__always)
internal func ensureMatchingEncoding(_ i: String.Index) -> String.Index { internal func ensureMatchingEncoding(_ i: String.Index) -> String.Index {
if _fastPath(!isForeign && i._canBeUTF8) { return i } if _fastPath(!isForeign && i._canBeUTF8) { return i }
return _slowEnsureMatchingEncoding(i) return _slowEnsureMatchingEncoding(i)
} }
@_alwaysEmitIntoClient // FIXME(lorentey): Should this remain internal? @_alwaysEmitIntoClient // TODO(lorentey): Should this remain internal?
internal func _slowEnsureMatchingEncoding(_ i: String.Index) -> String.Index { internal func _slowEnsureMatchingEncoding(_ i: String.Index) -> String.Index {
_internalInvariant(isForeign || !i._canBeUTF8) _internalInvariant(isForeign || !i._canBeUTF8)
if isForeign { if isForeign {
@@ -383,7 +394,7 @@ extension _StringGuts {
// This trap can never trigger on OSes that have stdlibs <= 5.6, because // This trap can never trigger on OSes that have stdlibs <= 5.6, because
// those versions never set the `isKnownUTF16` flag in `_StringObject`. // those versions never set the `isKnownUTF16` flag in `_StringObject`.
// //
_precondition(!_object.isKnownUTF16 || i._canBeUTF16, _precondition(!isKnownUTF16 || i._canBeUTF16,
"Invalid string index") "Invalid string index")
return i return i
} }
@@ -407,6 +418,135 @@ extension _StringGuts {
} }
} }
// Index validation
extension _StringGuts {
/// Validate `i` and adjust its position toward the start, returning the
/// resulting index or trapping as appropriate. If this function returns, then
/// the returned value
///
/// - has an encoding that matches this string,
/// - is within the bounds of this string, and
/// - is aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateScalarIndex(_ i: String.Index) -> String.Index {
let i = ensureMatchingEncoding(i)
_precondition(i._encodedOffset < count, "String index is out of bounds")
return scalarAlign(i)
}
/// Validate `i` and adjust its position toward the start, returning the
/// resulting index or trapping as appropriate. If this function returns, then
/// the returned value
///
/// - has an encoding that matches this string,
/// - is within `start ..< end`, and
/// - is aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateScalarIndex(
_ i: String.Index,
from start: String.Index,
to end: String.Index
) -> String.Index {
_internalInvariant(start <= end && end <= endIndex)
let i = ensureMatchingEncoding(i)
_precondition(i >= start && i < end, "Substring index is out of bounds")
return scalarAlign(i)
}
/// Validate `i` and adjust its position toward the start, returning the
/// resulting index or trapping as appropriate. If this function returns, then
/// the returned value
///
/// - has an encoding that matches this string,
/// - is within the bounds of this string (including the `endIndex`), and
/// - is aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateInclusiveScalarIndex(
_ i: String.Index
) -> String.Index {
let i = ensureMatchingEncoding(i)
_precondition(i._encodedOffset <= count, "String index is out of bounds")
return scalarAlign(i)
}
/// Validate `i` and adjust its position toward the start, returning the
/// resulting index or trapping as appropriate. If this function returns, then
/// the returned value
///
/// - has an encoding that matches this string,
/// - is within the bounds of this string (including the `endIndex`), and
/// - is aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateInclusiveScalarIndex(
_ i: String.Index,
from start: String.Index,
to end: String.Index
) -> String.Index {
_internalInvariant(start <= end && end <= endIndex)
let i = ensureMatchingEncoding(i)
_precondition(i >= start && i <= end, "Substring index is out of bounds")
return scalarAlign(i)
}
/// Validate `range` and adjust the position of its bounds, returning the
/// resulting range or trapping as appropriate. If this function returns, then
/// the bounds of the returned value
///
/// - have an encoding that matches this string,
/// - are within the bounds of this string, and
/// - are aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateScalarRange(
_ range: Range<String.Index>
) -> Range<String.Index> {
var upper = ensureMatchingEncoding(range.upperBound)
var lower = ensureMatchingEncoding(range.lowerBound)
// Note: if only `lower` was miscoded, then the range invariant `lower <=
// upper` may no longer hold after the above conversions, so we need to
// re-check it here.
_precondition(upper._encodedOffset <= count && lower <= upper,
"String index range is out of bounds")
upper = scalarAlign(upper)
lower = scalarAlign(lower)
return Range(_uncheckedBounds: (lower, upper))
}
/// Validate `range` and adjust the position of its bounds, returning the
/// resulting range or trapping as appropriate. If this function returns, then
/// the bounds of the returned value
///
/// - have an encoding that matches this string,
/// - are within `start ..< end`, and
/// - are aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateScalarRange(
_ range: Range<String.Index>,
from start: String.Index,
to end: String.Index
) -> Range<String.Index> {
_internalInvariant(start <= end && end <= endIndex)
var upper = ensureMatchingEncoding(range.upperBound)
var lower = ensureMatchingEncoding(range.lowerBound)
// Note: if only `lower` was miscoded, then the range invariant `lower <=
// upper` may no longer hold after the above conversions, so we need to
// re-check it here.
_precondition(upper <= end && lower >= start && lower <= upper,
"Substring index range is out of bounds")
upper = scalarAlign(upper)
lower = scalarAlign(lower)
return Range(_uncheckedBounds: (lower, upper))
}
}
// Old SPI(corelibs-foundation) // Old SPI(corelibs-foundation)
extension _StringGuts { extension _StringGuts {
@available(*, deprecated) @available(*, deprecated)

View File

@@ -839,6 +839,14 @@ extension _StringObject.CountAndFlags {
internal var isTailAllocated: Bool { internal var isTailAllocated: Bool {
return 0 != _storage & _StringObject.CountAndFlags.isTailAllocatedMask return 0 != _storage & _StringObject.CountAndFlags.isTailAllocatedMask
} }
/// Returns whether this string is known to use UTF-16 code units.
///
/// This always returns a value corresponding to the string's actual encoding
/// on stdlib versions >=5.7.
///
/// Standard Library versions <=5.6 did not set the corresponding flag, so
/// this property always returns false.
@_alwaysEmitIntoClient @_alwaysEmitIntoClient
@inline(__always) // Swift 5.7 @inline(__always) // Swift 5.7
internal var isKnownUTF16: Bool { internal var isKnownUTF16: Bool {

View File

@@ -195,10 +195,11 @@ extension String: RangeReplaceableCollection {
@_specialize(where C == Substring) @_specialize(where C == Substring)
@_specialize(where C == Array<Character>) @_specialize(where C == Array<Character>)
public mutating func replaceSubrange<C>( public mutating func replaceSubrange<C>(
_ bounds: Range<Index>, _ subrange: Range<Index>,
with newElements: C with newElements: C
) where C: Collection, C.Iterator.Element == Character { ) where C: Collection, C.Iterator.Element == Character {
_guts.replaceSubrange(bounds, with: newElements) let subrange = _guts.validateScalarRange(subrange)
_guts.replaceSubrange(subrange, with: newElements)
} }
/// Inserts a new character at the specified position. /// Inserts a new character at the specified position.
@@ -213,7 +214,9 @@ extension String: RangeReplaceableCollection {
/// ///
/// - Complexity: O(*n*), where *n* is the length of the string. /// - Complexity: O(*n*), where *n* is the length of the string.
public mutating func insert(_ newElement: Character, at i: Index) { public mutating func insert(_ newElement: Character, at i: Index) {
self.replaceSubrange(i..<i, with: newElement._str) let i = _guts.validateInclusiveScalarIndex(i)
let range = Range(_uncheckedBounds: (i, i))
_guts.replaceSubrange(range, with: newElement._str)
} }
/// Inserts a collection of characters at the specified position. /// Inserts a collection of characters at the specified position.
@@ -236,7 +239,9 @@ extension String: RangeReplaceableCollection {
public mutating func insert<S: Collection>( public mutating func insert<S: Collection>(
contentsOf newElements: S, at i: Index contentsOf newElements: S, at i: Index
) where S.Element == Character { ) where S.Element == Character {
self.replaceSubrange(i..<i, with: newElements) let i = _guts.validateInclusiveScalarIndex(i)
let range = Range(_uncheckedBounds: (i, i))
_guts.replaceSubrange(range, with: newElements)
} }
/// Removes and returns the character at the specified position. /// Removes and returns the character at the specified position.
@@ -259,8 +264,13 @@ extension String: RangeReplaceableCollection {
/// - Returns: The character that was removed. /// - Returns: The character that was removed.
@discardableResult @discardableResult
public mutating func remove(at i: Index) -> Character { public mutating func remove(at i: Index) -> Character {
let result = self[i] let i = _guts.validateScalarIndex(i)
_guts.remove(from: i, to: self.index(after: i)) let stride = _characterStride(startingAt: i)
let j = Index(_encodedOffset: i._encodedOffset &+ stride)._scalarAligned
let result = _guts.errorCorrectedCharacter(
startingAt: i._encodedOffset, endingAt: j._encodedOffset)
_guts.remove(from: i, to: j)
return result return result
} }
@@ -275,6 +285,7 @@ extension String: RangeReplaceableCollection {
/// - Parameter bounds: The range of the elements to remove. The upper and /// - Parameter bounds: The range of the elements to remove. The upper and
/// lower bounds of `bounds` must be valid indices of the string. /// lower bounds of `bounds` must be valid indices of the string.
public mutating func removeSubrange(_ bounds: Range<Index>) { public mutating func removeSubrange(_ bounds: Range<Index>) {
let bounds = _guts.validateScalarRange(bounds)
_guts.remove(from: bounds.lowerBound, to: bounds.upperBound) _guts.remove(from: bounds.lowerBound, to: bounds.upperBound)
} }

View File

@@ -108,6 +108,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {
public func index(after i: Index) -> Index { public func index(after i: Index) -> Index {
// TODO(String performance): isASCII fast-path // TODO(String performance): isASCII fast-path
// TODO(lorentey): Review index validation
_precondition(i < endIndex, "String index is out of bounds") _precondition(i < endIndex, "String index is out of bounds")
let i = _guts.scalarAlign(i) let i = _guts.scalarAlign(i)
@@ -121,6 +122,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {
@_alwaysEmitIntoClient // Swift 5.1 bug fix @_alwaysEmitIntoClient // Swift 5.1 bug fix
public func distance(from start: Index, to end: Index) -> Int { public func distance(from start: Index, to end: Index) -> Int {
// TODO(lorentey): Review index validation
return _distance(from: _guts.scalarAlign(start), to: _guts.scalarAlign(end)) return _distance(from: _guts.scalarAlign(start), to: _guts.scalarAlign(end))
} }
@@ -129,6 +131,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {
/// - Precondition: The previous location exists. /// - Precondition: The previous location exists.
@inlinable @inline(__always) @inlinable @inline(__always)
public func index(before i: Index) -> Index { public func index(before i: Index) -> Index {
// TODO(lorentey): Review index validation
// TODO(String performance): isASCII fast-path // TODO(String performance): isASCII fast-path
// Note: bounds checking in `index(before:)` is tricky as scalar aligning an // Note: bounds checking in `index(before:)` is tricky as scalar aligning an
@@ -168,6 +171,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {
/// must be less than the view's end index. /// must be less than the view's end index.
@inlinable @inline(__always) @inlinable @inline(__always)
public subscript(position: Index) -> Unicode.Scalar { public subscript(position: Index) -> Unicode.Scalar {
// TODO(lorentey): Review index validation
String(_guts)._boundsCheck(position) String(_guts)._boundsCheck(position)
let i = _guts.scalarAlign(position) let i = _guts.scalarAlign(position)
return _guts.errorCorrectedScalar(startingAt: i._encodedOffset).0 return _guts.errorCorrectedScalar(startingAt: i._encodedOffset).0
@@ -314,6 +318,7 @@ extension String.UnicodeScalarView: RangeReplaceableCollection {
_ bounds: Range<Index>, _ bounds: Range<Index>,
with newElements: C with newElements: C
) where C: Collection, C.Element == Unicode.Scalar { ) where C: Collection, C.Element == Unicode.Scalar {
// TODO(lorentey): Review index validation
// TODO(String performance): Skip extra String and Array allocation // TODO(String performance): Skip extra String and Array allocation
let utf8Replacement = newElements.flatMap { String($0).utf8 } let utf8Replacement = newElements.flatMap { String($0).utf8 }
@@ -358,6 +363,7 @@ extension String.UnicodeScalarIndex {
_ sourcePosition: String.Index, _ sourcePosition: String.Index,
within unicodeScalars: String.UnicodeScalarView within unicodeScalars: String.UnicodeScalarView
) { ) {
// TODO(lorentey): Review index validation
guard unicodeScalars._guts.isOnUnicodeScalarBoundary(sourcePosition) else { guard unicodeScalars._guts.isOnUnicodeScalarBoundary(sourcePosition) else {
return nil return nil
} }
@@ -385,6 +391,7 @@ extension String.UnicodeScalarIndex {
/// an attempt to convert the position of a UTF-8 continuation byte /// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`. /// returns `nil`.
public func samePosition(in characters: String) -> String.Index? { public func samePosition(in characters: String) -> String.Index? {
// TODO(lorentey): Review index validation
return String.Index(self, within: characters) return String.Index(self, within: characters)
} }
} }
@@ -414,6 +421,7 @@ extension String.UnicodeScalarView {
@available(swift, introduced: 4) @available(swift, introduced: 4)
public subscript(r: Range<Index>) -> String.UnicodeScalarView.SubSequence { public subscript(r: Range<Index>) -> String.UnicodeScalarView.SubSequence {
// TODO(lorentey): Review index validation
_failEarlyRangeCheck(r, bounds: startIndex..<endIndex) _failEarlyRangeCheck(r, bounds: startIndex..<endIndex)
return String.UnicodeScalarView.SubSequence(self, _bounds: r) return String.UnicodeScalarView.SubSequence(self, _bounds: r)
} }

View File

@@ -97,21 +97,24 @@ public struct Substring: Sendable {
@usableFromInline @usableFromInline
internal var _slice: Slice<String> internal var _slice: Slice<String>
@inline(__always)
internal init(_unchecked slice: Slice<String>) {
_internalInvariant(slice.endIndex <= slice._base._guts.endIndex)
_internalInvariant(
slice._base._guts.hasMatchingEncoding(slice.startIndex) &&
slice._base._guts.hasMatchingEncoding(slice.endIndex))
_internalInvariant(
slice.startIndex._isScalarAligned && slice.endIndex._isScalarAligned)
self._slice = slice
_invariantCheck()
}
@usableFromInline @usableFromInline
@available(*, deprecated) // Use `init(_unchecked:)` in new code.
internal init(_ slice: Slice<String>) { internal init(_ slice: Slice<String>) {
let _guts = slice._base._guts let r = slice.base._guts.validateScalarRange(
_internalInvariant( slice.startIndex ..< slice.endIndex)
_guts.hasMatchingEncoding(slice.startIndex) && self._slice = Slice(base: slice.base, bounds: r)
_guts.hasMatchingEncoding(slice.endIndex))
_internalInvariant(
slice.startIndex >= _guts.startIndex && slice.endIndex <= _guts.endIndex)
let start = slice.base._guts.scalarAlign(slice.startIndex)
let end = slice.base._guts.scalarAlign(slice.endIndex)
self._slice = Slice(
base: slice.base,
bounds: Range(_uncheckedBounds: (start, end)))
_invariantCheck() _invariantCheck()
} }
@@ -123,7 +126,7 @@ public struct Substring: Sendable {
/// Creates an empty substring. /// Creates an empty substring.
@inlinable @inline(__always) @inlinable @inline(__always)
public init() { public init() {
self.init(Slice()) self._slice = Slice()
} }
} }
@@ -146,6 +149,10 @@ extension Substring {
#else #else
@usableFromInline @inline(never) @_effects(releasenone) @usableFromInline @inline(never) @_effects(releasenone)
internal func _invariantCheck() { internal func _invariantCheck() {
_internalInvariant(_slice.endIndex <= base._guts.endIndex)
_internalInvariant(
base._guts.hasMatchingEncoding(_slice.startIndex) &&
base._guts.hasMatchingEncoding(_slice.endIndex))
// Indices are always scalar aligned // Indices are always scalar aligned
_internalInvariant( _internalInvariant(
_slice.startIndex == base._guts.scalarAlign(_slice.startIndex) && _slice.startIndex == base._guts.scalarAlign(_slice.startIndex) &&
@@ -156,6 +163,29 @@ extension Substring {
#endif // INTERNAL_CHECKS_ENABLED #endif // INTERNAL_CHECKS_ENABLED
} }
extension Substring {
@inline(__always)
internal func _validateScalarIndex(_ i: String.Index) -> String.Index {
_slice._base._guts.validateScalarIndex(i, from: startIndex, to: endIndex)
}
@inline(__always)
internal func _validateInclusiveScalarIndex(
_ i: String.Index
) -> String.Index {
_slice._base._guts.validateInclusiveScalarIndex(
i, from: startIndex, to: endIndex)
}
@inline(__always)
internal func _validateScalarRange(
_ range: Range<String.Index>
) -> Range<String.Index> {
_slice._base._guts.validateScalarRange(
range, from: startIndex, to: endIndex)
}
}
extension Substring: StringProtocol { extension Substring: StringProtocol {
public typealias Index = String.Index public typealias Index = String.Index
public typealias SubSequence = Substring public typealias SubSequence = Substring
@@ -174,10 +204,8 @@ extension Substring: StringProtocol {
// leads to Collection conformance issues when the `Substring`'s bounds do // leads to Collection conformance issues when the `Substring`'s bounds do
// not fall on grapheme boundaries in `base`. // not fall on grapheme boundaries in `base`.
let i = _slice.base._guts.ensureMatchingEncoding(i) let i = _validateScalarIndex(i)
_precondition(i < endIndex && i >= startIndex, let r = _uncheckedIndex(after: i)
"Substring index is out of bounds")
let r = _uncheckedIndex(after: _slice.base._guts.scalarAlign(i))
return _slice.base._guts.internalMarkEncoding(r) return _slice.base._guts.internalMarkEncoding(r)
} }
@@ -222,10 +250,13 @@ extension Substring: StringProtocol {
// leads to Collection conformance issues when the `Substring`'s bounds do // leads to Collection conformance issues when the `Substring`'s bounds do
// not fall on grapheme boundaries in `base`. // not fall on grapheme boundaries in `base`.
let i = _slice.base._guts.ensureMatchingEncoding(i) let i = _validateInclusiveScalarIndex(i)
_precondition(i <= endIndex && i > startIndex, // Note: Scalar aligning an index may move it closer towards the
"Substring index is out of bounds") // `startIndex`, so the `i > startIndex` check needs to come after the
let r = _uncheckedIndex(before: _slice.base._guts.scalarAlign(i)) // `validateScalarIndex` call.
_precondition(i > startIndex, "Substring index is out of bounds")
let r = _uncheckedIndex(before: i)
return _slice.base._guts.internalMarkEncoding(r) return _slice.base._guts.internalMarkEncoding(r)
} }
@@ -242,7 +273,6 @@ extension Substring: StringProtocol {
_internalInvariant(i._isScalarAligned) _internalInvariant(i._isScalarAligned)
// TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc. // TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
let i = _slice.base._guts.scalarAlign(i)
let stride = _characterStride(endingAt: i) let stride = _characterStride(endingAt: i)
let priorOffset = i._encodedOffset &- stride let priorOffset = i._encodedOffset &- stride
_internalInvariant(priorOffset >= startIndex._encodedOffset) _internalInvariant(priorOffset >= startIndex._encodedOffset)
@@ -266,11 +296,8 @@ extension Substring: StringProtocol {
// substring. This leads to Collection conformance issues when the // substring. This leads to Collection conformance issues when the
// `Substring`'s bounds do not fall on grapheme boundaries in `base`. // `Substring`'s bounds do not fall on grapheme boundaries in `base`.
var i = _slice.base._guts.ensureMatchingEncoding(i)
_precondition(i >= startIndex && i <= endIndex,
"String index is out of bounds")
i = _slice.base._guts.scalarAlign(i)
// TODO: known-ASCII and single-scalar-grapheme fast path, etc. // TODO: known-ASCII and single-scalar-grapheme fast path, etc.
var i = _validateInclusiveScalarIndex(i)
if distance >= 0 { if distance >= 0 {
for _ in stride(from: 0, to: distance, by: 1) { for _ in stride(from: 0, to: distance, by: 1) {
_precondition(i < endIndex, "String index is out of bounds") _precondition(i < endIndex, "String index is out of bounds")
@@ -303,11 +330,7 @@ extension Substring: StringProtocol {
// exactly matches the documentation. // exactly matches the documentation.
let limit = _slice.base._guts.ensureMatchingEncoding(limit) let limit = _slice.base._guts.ensureMatchingEncoding(limit)
var i = _slice.base._guts.ensureMatchingEncoding(i) var i = _validateInclusiveScalarIndex(i)
_precondition(i >= startIndex && i <= endIndex,
"String index is out of bounds")
i = _slice.base._guts.scalarAlign(i)
let start = i let start = i
if distance >= 0 { if distance >= 0 {
for _ in stride(from: 0, to: distance, by: 1) { for _ in stride(from: 0, to: distance, by: 1) {
@@ -340,22 +363,14 @@ extension Substring: StringProtocol {
// grapheme breaks -- swapping `start` and `end` may change the magnitude of // grapheme breaks -- swapping `start` and `end` may change the magnitude of
// the result. // the result.
var start = _slice.base._guts.ensureMatchingEncoding(start) let start = _validateInclusiveScalarIndex(start)
var end = _slice.base._guts.ensureMatchingEncoding(end) let end = _validateInclusiveScalarIndex(end)
_precondition(
start >= startIndex && start <= endIndex &&
end >= startIndex && end <= endIndex,
"String index is out of bounds")
start = _slice.base._guts.scalarAlign(start)
end = _slice.base._guts.scalarAlign(end)
// TODO: known-ASCII and single-scalar-grapheme fast path, etc. // TODO: known-ASCII and single-scalar-grapheme fast path, etc.
// Per SE-0180, `start` and `end` are allowed to fall in between grapheme // Per SE-0180, `start` and `end` are allowed to fall in between Character
// breaks, in which case this function must still terminate without trapping // boundaries, in which case this function must still terminate without
// and return a result that makes sense. // trapping and return a result that makes sense.
var i = start var i = start
var count = 0 var count = 0
@@ -375,33 +390,34 @@ extension Substring: StringProtocol {
} }
public subscript(i: Index) -> Character { public subscript(i: Index) -> Character {
var i = _slice.base._guts.ensureMatchingEncoding(i) let i = _validateScalarIndex(i)
_precondition(i >= startIndex && i < endIndex,
"Substring index is out of bounds")
i = _slice.base._guts.scalarAlign(i)
let distance = _characterStride(startingAt: i) let distance = _characterStride(startingAt: i)
return _slice.base._guts.errorCorrectedCharacter( return _slice.base._guts.errorCorrectedCharacter(
startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance) startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance)
} }
public mutating func replaceSubrange<C>( public mutating func replaceSubrange<C>(
_ bounds: Range<Index>, _ subrange: Range<Index>,
with newElements: C with newElements: C
) where C: Collection, C.Iterator.Element == Iterator.Element { ) where C: Collection, C.Iterator.Element == Iterator.Element {
_replaceSubrange(bounds, with: newElements) _replaceSubrange(subrange, with: newElements)
} }
public mutating func replaceSubrange( public mutating func replaceSubrange(
_ bounds: Range<Index>, with newElements: Substring _ subrange: Range<Index>, with newElements: Substring
) { ) {
_replaceSubrange(bounds, with: newElements) _replaceSubrange(subrange, with: newElements)
} }
@inline(__always)
internal mutating func _replaceSubrange<C: Collection>( internal mutating func _replaceSubrange<C: Collection>(
_ bounds: Range<Index>, with newElements: C _ subrange: Range<Index>, with newElements: C
) where C.Element == Element { ) where C.Element == Element {
_slice.replaceSubrange(bounds, with: newElements) let subrange = _validateScalarRange(subrange)
// TODO(lorentey): We can't delegate to Slice here; it doesn't handle
// subscalar indices or the case where `newElements` changes character
// breaks in the surrounding context. The substring's
// `startIndex`/`endIndex` may get broken.
_slice.replaceSubrange(subrange, with: newElements)
} }
/// Creates a string from the given Unicode code units in the specified /// Creates a string from the given Unicode code units in the specified
@@ -495,17 +511,20 @@ extension Substring: StringProtocol {
} }
extension Substring { extension Substring {
// TODO(lorentey): Rename to proper terminology
internal var _knownToStartOnGraphemeBreak: Bool { internal var _knownToStartOnGraphemeBreak: Bool {
startIndex._encodedOffset == 0 || startIndex.characterStride != nil startIndex._encodedOffset == 0 || startIndex.characterStride != nil
} }
// TODO(lorentey): Rename to proper terminology
internal var _knownToEndOnGraphemeBreak: Bool { internal var _knownToEndOnGraphemeBreak: Bool {
endIndex == _slice.base.endIndex || endIndex.characterStride != nil endIndex == _slice.base.endIndex || endIndex.characterStride != nil
} }
internal var _encodedOffsetRange: Range<Int> { internal var _encodedOffsetRange: Range<Int> {
Range(_uncheckedBounds: ( let lower = _slice._startIndex._encodedOffset
_slice._startIndex._encodedOffset, _slice._endIndex._encodedOffset)) let upper = _slice._endIndex._encodedOffset
return Range(_uncheckedBounds: (lower, upper))
} }
internal func _characterStride(startingAt i: Index) -> Int { internal func _characterStride(startingAt i: Index) -> Int {
@@ -556,7 +575,7 @@ extension Substring: CustomDebugStringConvertible {
extension Substring: LosslessStringConvertible { extension Substring: LosslessStringConvertible {
public init(_ content: String) { public init(_ content: String) {
let range = Range(_uncheckedBounds: (content.startIndex, content.endIndex)) let range = Range(_uncheckedBounds: (content.startIndex, content.endIndex))
self.init(Slice(base: content, bounds: range)) self.init(_unchecked: Slice(base: content, bounds: range))
} }
} }
@@ -602,11 +621,13 @@ extension Substring.UTF8View: BidirectionalCollection {
@inlinable @inlinable
public func formIndex(after i: inout Index) { public func formIndex(after i: inout Index) {
// TODO(lorentey): Review index validation
_slice.formIndex(after: &i) _slice.formIndex(after: &i)
} }
@inlinable @inlinable
public func index(_ i: Index, offsetBy n: Int) -> Index { public func index(_ i: Index, offsetBy n: Int) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(i, offsetBy: n) return _slice.index(i, offsetBy: n)
} }
@@ -614,11 +635,13 @@ extension Substring.UTF8View: BidirectionalCollection {
public func index( public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index _ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? { ) -> Index? {
// TODO(lorentey): Review index validation
return _slice.index(i, offsetBy: n, limitedBy: limit) return _slice.index(i, offsetBy: n, limitedBy: limit)
} }
@inlinable @inlinable
public func distance(from start: Index, to end: Index) -> Int { public func distance(from start: Index, to end: Index) -> Int {
// TODO(lorentey): Review index validation
return _slice.distance(from: start, to: end) return _slice.distance(from: start, to: end)
} }
@@ -632,6 +655,7 @@ extension Substring.UTF8View: BidirectionalCollection {
@inlinable @inlinable
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) { public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
// TODO(lorentey): Review index validation
_slice._failEarlyRangeCheck(index, bounds: bounds) _slice._failEarlyRangeCheck(index, bounds: bounds)
} }
@@ -639,19 +663,25 @@ extension Substring.UTF8View: BidirectionalCollection {
public func _failEarlyRangeCheck( public func _failEarlyRangeCheck(
_ range: Range<Index>, bounds: Range<Index> _ range: Range<Index>, bounds: Range<Index>
) { ) {
// TODO(lorentey): Review index validation
_slice._failEarlyRangeCheck(range, bounds: bounds) _slice._failEarlyRangeCheck(range, bounds: bounds)
} }
@inlinable @inlinable
public func index(before i: Index) -> Index { return _slice.index(before: i) } public func index(before i: Index) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(before: i)
}
@inlinable @inlinable
public func formIndex(before i: inout Index) { public func formIndex(before i: inout Index) {
// TODO(lorentey): Review index validation
_slice.formIndex(before: &i) _slice.formIndex(before: &i)
} }
@inlinable @inlinable
public subscript(r: Range<Index>) -> Substring.UTF8View { public subscript(r: Range<Index>) -> Substring.UTF8View {
// TODO(lorentey): Review index validation
// FIXME(strings): tests. // FIXME(strings): tests.
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex, _precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
"UTF8View index range out of bounds") "UTF8View index range out of bounds")
@@ -697,6 +727,7 @@ extension String {
self = String(Substring(codeUnits)) self = String(Substring(codeUnits))
} }
} }
extension Substring { extension Substring {
@frozen @frozen
public struct UTF16View: Sendable { public struct UTF16View: Sendable {
@@ -706,6 +737,7 @@ extension Substring {
/// Creates an instance that slices `base` at `_bounds`. /// Creates an instance that slices `base` at `_bounds`.
@inlinable @inlinable
internal init(_ base: String.UTF16View, _bounds: Range<Index>) { internal init(_ base: String.UTF16View, _bounds: Range<Index>) {
// TODO(lorentey): Review index validation
_slice = Slice( _slice = Slice(
base: String(base._guts).utf16, base: String(base._guts).utf16,
bounds: _bounds) bounds: _bounds)
@@ -729,21 +761,29 @@ extension Substring.UTF16View: BidirectionalCollection {
public var endIndex: Index { return _slice.endIndex } public var endIndex: Index { return _slice.endIndex }
@inlinable @inlinable
public subscript(index: Index) -> Element { return _slice[index] } public subscript(index: Index) -> Element {
// TODO(lorentey): Review index validation
return _slice[index]
}
@inlinable @inlinable
public var indices: Indices { return _slice.indices } public var indices: Indices { return _slice.indices }
@inlinable @inlinable
public func index(after i: Index) -> Index { return _slice.index(after: i) } public func index(after i: Index) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(after: i)
}
@inlinable @inlinable
public func formIndex(after i: inout Index) { public func formIndex(after i: inout Index) {
// TODO(lorentey): Review index validation
_slice.formIndex(after: &i) _slice.formIndex(after: &i)
} }
@inlinable @inlinable
public func index(_ i: Index, offsetBy n: Int) -> Index { public func index(_ i: Index, offsetBy n: Int) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(i, offsetBy: n) return _slice.index(i, offsetBy: n)
} }
@@ -751,16 +791,19 @@ extension Substring.UTF16View: BidirectionalCollection {
public func index( public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index _ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? { ) -> Index? {
// TODO(lorentey): Review index validation
return _slice.index(i, offsetBy: n, limitedBy: limit) return _slice.index(i, offsetBy: n, limitedBy: limit)
} }
@inlinable @inlinable
public func distance(from start: Index, to end: Index) -> Int { public func distance(from start: Index, to end: Index) -> Int {
// TODO(lorentey): Review index validation
return _slice.distance(from: start, to: end) return _slice.distance(from: start, to: end)
} }
@inlinable @inlinable
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) { public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
// TODO(lorentey): Review index validation
_slice._failEarlyRangeCheck(index, bounds: bounds) _slice._failEarlyRangeCheck(index, bounds: bounds)
} }
@@ -768,19 +811,25 @@ extension Substring.UTF16View: BidirectionalCollection {
public func _failEarlyRangeCheck( public func _failEarlyRangeCheck(
_ range: Range<Index>, bounds: Range<Index> _ range: Range<Index>, bounds: Range<Index>
) { ) {
// TODO(lorentey): Review index validation
_slice._failEarlyRangeCheck(range, bounds: bounds) _slice._failEarlyRangeCheck(range, bounds: bounds)
} }
@inlinable @inlinable
public func index(before i: Index) -> Index { return _slice.index(before: i) } public func index(before i: Index) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(before: i)
}
@inlinable @inlinable
public func formIndex(before i: inout Index) { public func formIndex(before i: inout Index) {
// TODO(lorentey): Review index validation
_slice.formIndex(before: &i) _slice.formIndex(before: &i)
} }
@inlinable @inlinable
public subscript(r: Range<Index>) -> Substring.UTF16View { public subscript(r: Range<Index>) -> Substring.UTF16View {
// TODO(lorentey): Review index validation
return Substring.UTF16View(_slice.base, _bounds: r) return Substring.UTF16View(_slice.base, _bounds: r)
} }
} }
@@ -857,21 +906,32 @@ extension Substring.UnicodeScalarView: BidirectionalCollection {
public var endIndex: Index { return _slice.endIndex } public var endIndex: Index { return _slice.endIndex }
@inlinable @inlinable
public subscript(index: Index) -> Element { return _slice[index] } public subscript(index: Index) -> Element {
// TODO(lorentey): Review index validation
return _slice[index]
}
@inlinable @inlinable
public var indices: Indices { return _slice.indices } public var indices: Indices {
// TODO(lorentey): Review index validation
return _slice.indices
}
@inlinable @inlinable
public func index(after i: Index) -> Index { return _slice.index(after: i) } public func index(after i: Index) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(after: i)
}
@inlinable @inlinable
public func formIndex(after i: inout Index) { public func formIndex(after i: inout Index) {
// TODO(lorentey): Review index validation
_slice.formIndex(after: &i) _slice.formIndex(after: &i)
} }
@inlinable @inlinable
public func index(_ i: Index, offsetBy n: Int) -> Index { public func index(_ i: Index, offsetBy n: Int) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(i, offsetBy: n) return _slice.index(i, offsetBy: n)
} }
@@ -879,16 +939,19 @@ extension Substring.UnicodeScalarView: BidirectionalCollection {
public func index( public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index _ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? { ) -> Index? {
// TODO(lorentey): Review index validation
return _slice.index(i, offsetBy: n, limitedBy: limit) return _slice.index(i, offsetBy: n, limitedBy: limit)
} }
@inlinable @inlinable
public func distance(from start: Index, to end: Index) -> Int { public func distance(from start: Index, to end: Index) -> Int {
// TODO(lorentey): Review index validation
return _slice.distance(from: start, to: end) return _slice.distance(from: start, to: end)
} }
@inlinable @inlinable
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) { public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
// TODO(lorentey): Review index validation
_slice._failEarlyRangeCheck(index, bounds: bounds) _slice._failEarlyRangeCheck(index, bounds: bounds)
} }
@@ -896,19 +959,25 @@ extension Substring.UnicodeScalarView: BidirectionalCollection {
public func _failEarlyRangeCheck( public func _failEarlyRangeCheck(
_ range: Range<Index>, bounds: Range<Index> _ range: Range<Index>, bounds: Range<Index>
) { ) {
// TODO(lorentey): Review index validation
_slice._failEarlyRangeCheck(range, bounds: bounds) _slice._failEarlyRangeCheck(range, bounds: bounds)
} }
@inlinable @inlinable
public func index(before i: Index) -> Index { return _slice.index(before: i) } public func index(before i: Index) -> Index {
// TODO(lorentey): Review index validation
return _slice.index(before: i)
}
@inlinable @inlinable
public func formIndex(before i: inout Index) { public func formIndex(before i: inout Index) {
// TODO(lorentey): Review index validation
_slice.formIndex(before: &i) _slice.formIndex(before: &i)
} }
@inlinable @inlinable
public subscript(r: Range<Index>) -> Substring.UnicodeScalarView { public subscript(r: Range<Index>) -> Substring.UnicodeScalarView {
// TODO(lorentey): Review index validation
_failEarlyRangeCheck(r, bounds: startIndex..<endIndex) _failEarlyRangeCheck(r, bounds: startIndex..<endIndex)
return Substring.UnicodeScalarView(_slice.base, _bounds: r) return Substring.UnicodeScalarView(_slice.base, _bounds: r)
} }
@@ -951,9 +1020,12 @@ extension Substring.UnicodeScalarView: RangeReplaceableCollection {
public init() { _slice = Slice.init() } public init() { _slice = Slice.init() }
public mutating func replaceSubrange<C: Collection>( public mutating func replaceSubrange<C: Collection>(
_ target: Range<Index>, with replacement: C _ subrange: Range<Index>, with replacement: C
) where C.Element == Element { ) where C.Element == Element {
_slice.replaceSubrange(target, with: replacement) // TODO(lorentey): Review index validation
let subrange = _slice.base._guts.validateScalarRange(
subrange, from: startIndex, to: endIndex)
_slice.replaceSubrange(subrange, with: replacement)
} }
} }
@@ -1037,22 +1109,15 @@ extension Substring: ExpressibleByStringLiteral {
extension String { extension String {
@available(swift, introduced: 4) @available(swift, introduced: 4)
public subscript(r: Range<Index>) -> Substring { public subscript(r: Range<Index>) -> Substring {
let r = Range(_uncheckedBounds: ( let r = _guts.validateScalarRange(r)
_guts.ensureMatchingEncoding(r.lowerBound), return Substring(_unchecked: Slice(base: self, bounds: r))
_guts.ensureMatchingEncoding(r.upperBound)))
_boundsCheck(r)
return Substring(Slice(base: self, bounds: r))
} }
} }
extension Substring { extension Substring {
@available(swift, introduced: 4) @available(swift, introduced: 4)
public subscript(r: Range<Index>) -> Substring { public subscript(r: Range<Index>) -> Substring {
let r = Range(_uncheckedBounds: ( let r = _validateScalarRange(r)
_slice.base._guts.ensureMatchingEncoding(r.lowerBound), return Substring(_unchecked: Slice(base: base, bounds: r))
_slice.base._guts.ensureMatchingEncoding(r.upperBound)))
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
"Substring index range is out of bounds")
return Substring(Slice(base: _slice.base, bounds: r))
} }
} }