[stdlib] Fix string index sharing (#4896)

* [stdlib] Fix String.UTF16View index sharing

* [stdlib] Fix String.UnicodeScalarView index sharing

* [stdlib] Fix String.CharacterView index sharing

* [stdlib] Test advancing string indices past their ends

* [stdlib] Simplify CharacterView ranged subscript
This commit is contained in:
Nate Cook
2016-10-13 12:19:38 -05:00
committed by Maxim Moiseev
parent 91bba4d425
commit c2bc72d9d6
4 changed files with 93 additions and 55 deletions

View File

@@ -54,14 +54,22 @@ extension String {
public struct CharacterView {
internal var _core: _StringCore
/// The offset of this view's `_core` from an original core. This works
/// around the fact that `_StringCore` is always zero-indexed.
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
/// before that value is used as a `_core` index.
internal var _coreOffset: Int
/// Creates a view of the given string.
public init(_ text: String) {
self._core = text._core
self._coreOffset = 0
}
public // @testable
init(_ _core: _StringCore) {
init(_ _core: _StringCore, coreOffset: Int = 0) {
self._core = _core
self._coreOffset = coreOffset
}
}
@@ -139,7 +147,7 @@ extension String {
extension String.CharacterView : BidirectionalCollection {
internal typealias UnicodeScalarView = String.UnicodeScalarView
internal var unicodeScalars: UnicodeScalarView {
return UnicodeScalarView(_core)
return UnicodeScalarView(_core, coreOffset: _coreOffset)
}
/// A position in a string's `CharacterView` instance.
@@ -246,7 +254,7 @@ extension String.CharacterView : BidirectionalCollection {
from start: UnicodeScalarView.Index
) -> Int {
var start = start
let end = UnicodeScalarView.Index(_position: _core.count)
let end = unicodeScalars.endIndex
if start == end {
return 0
}
@@ -288,7 +296,7 @@ extension String.CharacterView : BidirectionalCollection {
internal func _measureExtendedGraphemeClusterBackward(
from end: UnicodeScalarView.Index
) -> Int {
let start = UnicodeScalarView.Index(_position: 0)
let start = unicodeScalars.startIndex
if start == end {
return 0
}
@@ -363,8 +371,8 @@ extension String.CharacterView : RangeReplaceableCollection {
with newElements: C
) where C : Collection, C.Iterator.Element == Character {
let rawSubRange: Range<Int> =
bounds.lowerBound._base._position
..< bounds.upperBound._base._position
bounds.lowerBound._base._position - _coreOffset
..< bounds.upperBound._base._position - _coreOffset
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
}
@@ -436,10 +444,9 @@ extension String.CharacterView {
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> String.CharacterView {
let unicodeScalarRange =
bounds.lowerBound._base..<bounds.upperBound._base
return String.CharacterView(
String(_core).unicodeScalars[unicodeScalarRange]._core)
let unicodeScalarRange = bounds.lowerBound._base..<bounds.upperBound._base
return String.CharacterView(unicodeScalars[unicodeScalarRange]._core,
coreOffset: unicodeScalarRange.lowerBound._position)
}
}

View File

@@ -145,7 +145,7 @@ extension String {
/// The position of the first code unit if the `String` is
/// nonempty; identical to `endIndex` otherwise.
public var startIndex: Index {
return Index(_offset: 0)
return Index(_offset: _offset)
}
/// The "past the end" position---that is, the position one greater than
@@ -153,7 +153,7 @@ extension String {
///
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return Index(_offset: _length)
return Index(_offset: _offset + _length)
}
public struct Indices {
@@ -204,7 +204,7 @@ extension String {
}
func _internalIndex(at i: Int) -> Int {
return _core.startIndex + _offset + i
return _core.startIndex + i
}
/// Accesses the code unit at the given position.
@@ -220,11 +220,10 @@ extension String {
/// - Parameter position: A valid index of the view. `position` must be
/// less than the view's end index.
public subscript(i: Index) -> UTF16.CodeUnit {
let position = i._offset
_precondition(position >= 0 && position < _length,
_precondition(i >= startIndex && i < endIndex,
"out-of-range access on a UTF16View")
let index = _internalIndex(at: position)
let index = _internalIndex(at: i._offset)
let u = _core[index]
if _fastPath((u >> 11) != 0b1101_1) {
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
@@ -292,8 +291,8 @@ extension String {
}
public var description: String {
let start = _internalIndex(at: 0)
let end = _internalIndex(at: _length)
let start = _internalIndex(at: _offset)
let end = _internalIndex(at: _offset + _length)
return String(_core[start..<end])
}
@@ -337,18 +336,17 @@ extension String {
public init?(_ utf16: UTF16View) {
let wholeString = String(utf16._core)
if let start = UTF16Index(
_offset: utf16._offset
).samePosition(in: wholeString) {
if let end = UTF16Index(
_offset: utf16._offset + utf16._length
).samePosition(in: wholeString) {
self = wholeString[start..<end]
return
}
}
guard
let start = UTF16Index(_offset: utf16._offset)
.samePosition(in: wholeString),
let end = UTF16Index(_offset: utf16._offset + utf16._length)
.samePosition(in: wholeString)
else
{
return nil
}
self = wholeString[start..<end]
}
/// The index type for subscripting a string's `utf16` view.
public typealias UTF16Index = UTF16View.Index

View File

@@ -62,8 +62,9 @@ extension String {
CustomStringConvertible,
CustomDebugStringConvertible
{
internal init(_ _core: _StringCore) {
internal init(_ _core: _StringCore, coreOffset: Int = 0) {
self._core = _core
self._coreOffset = coreOffset
}
internal struct _ScratchIterator : IteratorProtocol {
@@ -109,12 +110,24 @@ extension String {
@_versioned internal var _position: Int
}
/// Translates a `_core` index into a `UnicodeScalarIndex` using this view's
/// `_coreOffset`.
internal func _fromCoreIndex(_ i: Int) -> Index {
return Index(_position: i + _coreOffset)
}
/// Translates a `UnicodeScalarIndex` into a `_core` index using this view's
/// `_coreOffset`.
internal func _toCoreIndex(_ i: Index) -> Int {
return i._position - _coreOffset
}
/// The position of the first Unicode scalar value if the string is
/// nonempty.
///
/// If the string is empty, `startIndex` is equal to `endIndex`.
public var startIndex: Index {
return Index(_position: _core.startIndex)
return _fromCoreIndex(_core.startIndex)
}
/// The "past the end" position---that is, the position one greater than
@@ -122,31 +135,32 @@ extension String {
///
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return Index(_position: _core.endIndex)
return _fromCoreIndex(_core.endIndex)
}
/// Returns the next consecutive location after `i`.
///
/// - Precondition: The next location exists.
public func index(after i: Index) -> Index {
var scratch = _ScratchIterator(_core, i._position)
let i = _toCoreIndex(i)
var scratch = _ScratchIterator(_core, i)
var decoder = UTF16()
let (_, length) = decoder._decodeOne(&scratch)
return Index(_position: i._position + length)
return _fromCoreIndex(i + length)
}
/// Returns the previous consecutive location before `i`.
///
/// - Precondition: The previous location exists.
public func index(before i: Index) -> Index {
var i = i._position - 1
var i = _toCoreIndex(i) - 1
let codeUnit = _core[i]
if _slowPath((codeUnit >> 10) == 0b1101_11) {
if i != 0 && (_core[i - 1] >> 10) == 0b1101_10 {
i -= 1
}
}
return Index(_position: i)
return _fromCoreIndex(i)
}
/// Accesses the Unicode scalar value at the given position.
@@ -166,7 +180,7 @@ extension String {
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
public subscript(position: Index) -> UnicodeScalar {
var scratch = _ScratchIterator(_core, position._position)
var scratch = _ScratchIterator(_core, _toCoreIndex(position))
var decoder = UTF16()
switch decoder.decode(&scratch) {
case .scalarValue(let us):
@@ -192,8 +206,9 @@ extension String {
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(r: Range<Index>) -> UnicodeScalarView {
return UnicodeScalarView(
_core[r.lowerBound._position..<r.upperBound._position])
let rawSubRange = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound)
return UnicodeScalarView(_core[rawSubRange],
coreOffset: r.lowerBound._position)
}
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
@@ -270,7 +285,7 @@ extension String {
}
public var description: String {
return String(_core[startIndex._position..<endIndex._position])
return String(_core)
}
public var debugDescription: String {
@@ -278,6 +293,12 @@ extension String {
}
internal var _core: _StringCore
/// The offset of this view's `_core` from an original core. This works
/// around the fact that `_StringCore` is always zero-indexed.
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
/// before that value is used as a `_core` index.
internal var _coreOffset: Int
}
/// Creates a string corresponding to the given collection of Unicode
@@ -391,9 +412,8 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
_ bounds: Range<Index>,
with newElements: C
) where C : Collection, C.Iterator.Element == UnicodeScalar {
let rawSubRange: Range<Int> =
bounds.lowerBound._position
..< bounds.upperBound._position
let rawSubRange: Range<Int> = _toCoreIndex(bounds.lowerBound) ..<
_toCoreIndex(bounds.upperBound)
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
}

View File

@@ -5,11 +5,10 @@ import StdlibUnittest
var SubstringTests = TestSuite("SubstringTests")
func checkMatch<S: Collection, T: Collection
func checkMatch<S: Collection, T: Collection>(_ x: S, _ y: T, _ i: S.Index)
where S.Index == T.Index, S.Iterator.Element == T.Iterator.Element,
S.Iterator.Element: Equatable>(
_ x: S, _ y: T, _ i: S.Index) {
S.Iterator.Element: Equatable
{
expectEqual(x[i], y[i])
}
@@ -25,9 +24,7 @@ SubstringTests.test("String") {
expectEqual(s3, "cd")
}
SubstringTests.test("CharacterView")
.xfail(.always("CharacterView slices don't share indices"))
.code {
SubstringTests.test("CharacterView") {
let s = "abcdefg"
var t = s.characters.dropFirst(2)
var u = t.dropFirst(2)
@@ -41,6 +38,11 @@ SubstringTests.test("CharacterView")
checkMatch(t, u, u.index(after: u.startIndex))
checkMatch(t, u, u.index(before: u.endIndex))
expectEqual("", String(t.dropFirst(10)))
expectEqual("", String(t.dropLast(10)))
expectEqual("", String(u.dropFirst(10)))
expectEqual("", String(u.dropLast(10)))
t.replaceSubrange(t.startIndex...t.startIndex, with: ["C"])
u.replaceSubrange(u.startIndex...u.startIndex, with: ["E"])
expectEqual(String(u), "Efg")
@@ -48,9 +50,7 @@ SubstringTests.test("CharacterView")
expectEqual(s, "abcdefg")
}
SubstringTests.test("UnicodeScalars")
.xfail(.always("UnicodeScalarsView slices don't share indices"))
.code {
SubstringTests.test("UnicodeScalars") {
let s = "abcdefg"
var t = s.unicodeScalars.dropFirst(2)
var u = t.dropFirst(2)
@@ -64,6 +64,11 @@ SubstringTests.test("UnicodeScalars")
checkMatch(t, u, u.index(after: u.startIndex))
checkMatch(t, u, u.index(before: u.endIndex))
expectEqual("", String(t.dropFirst(10)))
expectEqual("", String(t.dropLast(10)))
expectEqual("", String(u.dropFirst(10)))
expectEqual("", String(u.dropLast(10)))
t.replaceSubrange(t.startIndex...t.startIndex, with: ["C"])
u.replaceSubrange(u.startIndex...u.startIndex, with: ["E"])
expectEqual(String(u), "Efg")
@@ -71,9 +76,7 @@ SubstringTests.test("UnicodeScalars")
expectEqual(s, "abcdefg")
}
SubstringTests.test("UTF16View")
.xfail(.always("UTF16View slices don't share indices"))
.code {
SubstringTests.test("UTF16View") {
let s = "abcdefg"
let t = s.utf16.dropFirst(2)
let u = t.dropFirst(2)
@@ -86,6 +89,11 @@ SubstringTests.test("UTF16View")
checkMatch(t, u, u.startIndex)
checkMatch(t, u, u.index(after: u.startIndex))
checkMatch(t, u, u.index(before: u.endIndex))
expectEqual("", String(t.dropFirst(10))!)
expectEqual("", String(t.dropLast(10))!)
expectEqual("", String(u.dropFirst(10))!)
expectEqual("", String(u.dropLast(10))!)
}
SubstringTests.test("UTF8View") {
@@ -99,6 +107,11 @@ SubstringTests.test("UTF8View") {
checkMatch(s.utf8, t, u.startIndex)
checkMatch(t, u, u.startIndex)
checkMatch(t, u, u.index(after: u.startIndex))
expectEqual("", String(t.dropFirst(10))!)
expectEqual("", String(t.dropLast(10))!)
expectEqual("", String(u.dropFirst(10))!)
expectEqual("", String(u.dropLast(10))!)
}
runAllTests()