[String] Scalar-alignment bug fixes.

Fixes a general category (pun intended) of scalar-alignment bugs
surrounding exchanging non-scalar-aligned indices between views and
for slicing.

SE-0180 unifies the Index type of String and all its views and allows
non-scalar-aligned indices to be used across views. In order to
guarantee behavior, we often have to check and perform scalar
alignment. To speed up these checks, we allocate a bit denoting
known-to-be-aligned, so that the alignment check can skip the
load. The below shows what views need to check for alignment before
they can operate, and whether the indices they produce are aligned.

┌───────────────╥────────────────────┬──────────────────────────┐
│ View          ║ Requires Alignment │ Produces Aligned Indices │
╞═══════════════╬════════════════════╪══════════════════════════╡
│ Native UTF8   ║ no                 │ no                       │
├───────────────╫────────────────────┼──────────────────────────┤
│ Native UTF16  ║ yes                │ no                       │
╞═══════════════╬════════════════════╪══════════════════════════╡
│ Foreign UTF8  ║ yes                │ no                       │
├───────────────╫────────────────────┼──────────────────────────┤
│ Foreign UTF16 ║ no                 │ no                       │
╞═══════════════╬════════════════════╪══════════════════════════╡
│ UnicodeScalar ║ yes                │ yes                      │
├───────────────╫────────────────────┼──────────────────────────┤
│ Character     ║ yes                │ yes                      │
└───────────────╨────────────────────┴──────────────────────────┘

The "requires alignment" applies to any operation taking a
String.Index that's not defined entirely in terms of other operations
taking a String.Index. These include:

* index(after:)
* index(before:)
* subscript
* distance(from:to:) (since `to` is compared against directly)
* UTF16View._nativeGetOffset(for:)
This commit is contained in:
Michael Ilseman
2019-06-24 16:51:54 -07:00
parent 93d65fc9c3
commit 4cd1e812b7
15 changed files with 545 additions and 164 deletions

View File

@@ -106,22 +106,29 @@ extension String.UnicodeScalarView: BidirectionalCollection {
/// - Precondition: The next location exists.
@inlinable @inline(__always)
public func index(after i: Index) -> Index {
let i = _guts.scalarAlign(i)
_internalInvariant(i < endIndex)
// TODO(String performance): isASCII fast-path
if _fastPath(_guts.isFastUTF8) {
let len = _guts.fastUTF8ScalarLength(startingAt: i._encodedOffset)
return i.encoded(offsetBy: len)
return i.encoded(offsetBy: len).aligned
}
return _foreignIndex(after: i)
}
@_alwaysEmitIntoClient // Swift 5.1 bug fix
public func distance(from start: Index, to end: Index) -> Int {
return _distance(from: _guts.scalarAlign(start), to: _guts.scalarAlign(end))
}
/// Returns the previous consecutive location before `i`.
///
/// - Precondition: The previous location exists.
@inlinable @inline(__always)
public func index(before i: Index) -> Index {
let i = _guts.scalarAlign(i)
precondition(i._encodedOffset > 0)
// TODO(String performance): isASCII fast-path
@@ -130,7 +137,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {
return _utf8ScalarLength(utf8, endingAt: i._encodedOffset)
}
_internalInvariant(len <= 4, "invalid UTF8")
return i.encoded(offsetBy: -len)
return i.encoded(offsetBy: -len).aligned
}
return _foreignIndex(before: i)
@@ -412,7 +419,7 @@ extension String.UnicodeScalarView {
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i)
let len = UTF16.isLeadSurrogate(cu) ? 2 : 1
return i.encoded(offsetBy: len)
return i.encoded(offsetBy: len).aligned
}
@usableFromInline @inline(never)
@@ -423,6 +430,6 @@ extension String.UnicodeScalarView {
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx)
let len = UTF16.isTrailSurrogate(cu) ? 2 : 1
return i.encoded(offsetBy: -len)
return i.encoded(offsetBy: -len).aligned
}
}