mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
[stdlib] Fix string index sharing (#4896)
* [stdlib] Fix String.UTF16View index sharing * [stdlib] Fix String.UnicodeScalarView index sharing * [stdlib] Fix String.CharacterView index sharing * [stdlib] Test advancing string indices past their ends * [stdlib] Simplify CharacterView ranged subscript
This commit is contained in:
@@ -54,14 +54,22 @@ extension String {
|
||||
public struct CharacterView {
|
||||
internal var _core: _StringCore
|
||||
|
||||
/// The offset of this view's `_core` from an original core. This works
|
||||
/// around the fact that `_StringCore` is always zero-indexed.
|
||||
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
|
||||
/// before that value is used as a `_core` index.
|
||||
internal var _coreOffset: Int
|
||||
|
||||
/// Creates a view of the given string.
|
||||
public init(_ text: String) {
|
||||
self._core = text._core
|
||||
self._coreOffset = 0
|
||||
}
|
||||
|
||||
public // @testable
|
||||
init(_ _core: _StringCore) {
|
||||
init(_ _core: _StringCore, coreOffset: Int = 0) {
|
||||
self._core = _core
|
||||
self._coreOffset = coreOffset
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,7 +147,7 @@ extension String {
|
||||
extension String.CharacterView : BidirectionalCollection {
|
||||
internal typealias UnicodeScalarView = String.UnicodeScalarView
|
||||
internal var unicodeScalars: UnicodeScalarView {
|
||||
return UnicodeScalarView(_core)
|
||||
return UnicodeScalarView(_core, coreOffset: _coreOffset)
|
||||
}
|
||||
|
||||
/// A position in a string's `CharacterView` instance.
|
||||
@@ -246,7 +254,7 @@ extension String.CharacterView : BidirectionalCollection {
|
||||
from start: UnicodeScalarView.Index
|
||||
) -> Int {
|
||||
var start = start
|
||||
let end = UnicodeScalarView.Index(_position: _core.count)
|
||||
let end = unicodeScalars.endIndex
|
||||
if start == end {
|
||||
return 0
|
||||
}
|
||||
@@ -288,7 +296,7 @@ extension String.CharacterView : BidirectionalCollection {
|
||||
internal func _measureExtendedGraphemeClusterBackward(
|
||||
from end: UnicodeScalarView.Index
|
||||
) -> Int {
|
||||
let start = UnicodeScalarView.Index(_position: 0)
|
||||
let start = unicodeScalars.startIndex
|
||||
if start == end {
|
||||
return 0
|
||||
}
|
||||
@@ -363,8 +371,8 @@ extension String.CharacterView : RangeReplaceableCollection {
|
||||
with newElements: C
|
||||
) where C : Collection, C.Iterator.Element == Character {
|
||||
let rawSubRange: Range<Int> =
|
||||
bounds.lowerBound._base._position
|
||||
..< bounds.upperBound._base._position
|
||||
bounds.lowerBound._base._position - _coreOffset
|
||||
..< bounds.upperBound._base._position - _coreOffset
|
||||
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
|
||||
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
|
||||
}
|
||||
@@ -436,10 +444,9 @@ extension String.CharacterView {
|
||||
/// - Complexity: O(*n*) if the underlying string is bridged from
|
||||
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
|
||||
public subscript(bounds: Range<Index>) -> String.CharacterView {
|
||||
let unicodeScalarRange =
|
||||
bounds.lowerBound._base..<bounds.upperBound._base
|
||||
return String.CharacterView(
|
||||
String(_core).unicodeScalars[unicodeScalarRange]._core)
|
||||
let unicodeScalarRange = bounds.lowerBound._base..<bounds.upperBound._base
|
||||
return String.CharacterView(unicodeScalars[unicodeScalarRange]._core,
|
||||
coreOffset: unicodeScalarRange.lowerBound._position)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -145,7 +145,7 @@ extension String {
|
||||
/// The position of the first code unit if the `String` is
|
||||
/// nonempty; identical to `endIndex` otherwise.
|
||||
public var startIndex: Index {
|
||||
return Index(_offset: 0)
|
||||
return Index(_offset: _offset)
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one greater than
|
||||
@@ -153,7 +153,7 @@ extension String {
|
||||
///
|
||||
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
|
||||
public var endIndex: Index {
|
||||
return Index(_offset: _length)
|
||||
return Index(_offset: _offset + _length)
|
||||
}
|
||||
|
||||
public struct Indices {
|
||||
@@ -204,7 +204,7 @@ extension String {
|
||||
}
|
||||
|
||||
func _internalIndex(at i: Int) -> Int {
|
||||
return _core.startIndex + _offset + i
|
||||
return _core.startIndex + i
|
||||
}
|
||||
|
||||
/// Accesses the code unit at the given position.
|
||||
@@ -220,11 +220,10 @@ extension String {
|
||||
/// - Parameter position: A valid index of the view. `position` must be
|
||||
/// less than the view's end index.
|
||||
public subscript(i: Index) -> UTF16.CodeUnit {
|
||||
let position = i._offset
|
||||
_precondition(position >= 0 && position < _length,
|
||||
_precondition(i >= startIndex && i < endIndex,
|
||||
"out-of-range access on a UTF16View")
|
||||
|
||||
let index = _internalIndex(at: position)
|
||||
let index = _internalIndex(at: i._offset)
|
||||
let u = _core[index]
|
||||
if _fastPath((u >> 11) != 0b1101_1) {
|
||||
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
|
||||
@@ -292,8 +291,8 @@ extension String {
|
||||
}
|
||||
|
||||
public var description: String {
|
||||
let start = _internalIndex(at: 0)
|
||||
let end = _internalIndex(at: _length)
|
||||
let start = _internalIndex(at: _offset)
|
||||
let end = _internalIndex(at: _offset + _length)
|
||||
return String(_core[start..<end])
|
||||
}
|
||||
|
||||
@@ -337,18 +336,17 @@ extension String {
|
||||
public init?(_ utf16: UTF16View) {
|
||||
let wholeString = String(utf16._core)
|
||||
|
||||
if let start = UTF16Index(
|
||||
_offset: utf16._offset
|
||||
).samePosition(in: wholeString) {
|
||||
if let end = UTF16Index(
|
||||
_offset: utf16._offset + utf16._length
|
||||
).samePosition(in: wholeString) {
|
||||
self = wholeString[start..<end]
|
||||
return
|
||||
}
|
||||
}
|
||||
guard
|
||||
let start = UTF16Index(_offset: utf16._offset)
|
||||
.samePosition(in: wholeString),
|
||||
let end = UTF16Index(_offset: utf16._offset + utf16._length)
|
||||
.samePosition(in: wholeString)
|
||||
else
|
||||
{
|
||||
return nil
|
||||
}
|
||||
self = wholeString[start..<end]
|
||||
}
|
||||
|
||||
/// The index type for subscripting a string's `utf16` view.
|
||||
public typealias UTF16Index = UTF16View.Index
|
||||
|
||||
@@ -62,8 +62,9 @@ extension String {
|
||||
CustomStringConvertible,
|
||||
CustomDebugStringConvertible
|
||||
{
|
||||
internal init(_ _core: _StringCore) {
|
||||
internal init(_ _core: _StringCore, coreOffset: Int = 0) {
|
||||
self._core = _core
|
||||
self._coreOffset = coreOffset
|
||||
}
|
||||
|
||||
internal struct _ScratchIterator : IteratorProtocol {
|
||||
@@ -109,12 +110,24 @@ extension String {
|
||||
@_versioned internal var _position: Int
|
||||
}
|
||||
|
||||
/// Translates a `_core` index into a `UnicodeScalarIndex` using this view's
|
||||
/// `_coreOffset`.
|
||||
internal func _fromCoreIndex(_ i: Int) -> Index {
|
||||
return Index(_position: i + _coreOffset)
|
||||
}
|
||||
|
||||
/// Translates a `UnicodeScalarIndex` into a `_core` index using this view's
|
||||
/// `_coreOffset`.
|
||||
internal func _toCoreIndex(_ i: Index) -> Int {
|
||||
return i._position - _coreOffset
|
||||
}
|
||||
|
||||
/// The position of the first Unicode scalar value if the string is
|
||||
/// nonempty.
|
||||
///
|
||||
/// If the string is empty, `startIndex` is equal to `endIndex`.
|
||||
public var startIndex: Index {
|
||||
return Index(_position: _core.startIndex)
|
||||
return _fromCoreIndex(_core.startIndex)
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one greater than
|
||||
@@ -122,31 +135,32 @@ extension String {
|
||||
///
|
||||
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
|
||||
public var endIndex: Index {
|
||||
return Index(_position: _core.endIndex)
|
||||
return _fromCoreIndex(_core.endIndex)
|
||||
}
|
||||
|
||||
/// Returns the next consecutive location after `i`.
|
||||
///
|
||||
/// - Precondition: The next location exists.
|
||||
public func index(after i: Index) -> Index {
|
||||
var scratch = _ScratchIterator(_core, i._position)
|
||||
let i = _toCoreIndex(i)
|
||||
var scratch = _ScratchIterator(_core, i)
|
||||
var decoder = UTF16()
|
||||
let (_, length) = decoder._decodeOne(&scratch)
|
||||
return Index(_position: i._position + length)
|
||||
return _fromCoreIndex(i + length)
|
||||
}
|
||||
|
||||
/// Returns the previous consecutive location before `i`.
|
||||
///
|
||||
/// - Precondition: The previous location exists.
|
||||
public func index(before i: Index) -> Index {
|
||||
var i = i._position - 1
|
||||
var i = _toCoreIndex(i) - 1
|
||||
let codeUnit = _core[i]
|
||||
if _slowPath((codeUnit >> 10) == 0b1101_11) {
|
||||
if i != 0 && (_core[i - 1] >> 10) == 0b1101_10 {
|
||||
i -= 1
|
||||
}
|
||||
}
|
||||
return Index(_position: i)
|
||||
return _fromCoreIndex(i)
|
||||
}
|
||||
|
||||
/// Accesses the Unicode scalar value at the given position.
|
||||
@@ -166,7 +180,7 @@ extension String {
|
||||
/// - Parameter position: A valid index of the character view. `position`
|
||||
/// must be less than the view's end index.
|
||||
public subscript(position: Index) -> UnicodeScalar {
|
||||
var scratch = _ScratchIterator(_core, position._position)
|
||||
var scratch = _ScratchIterator(_core, _toCoreIndex(position))
|
||||
var decoder = UTF16()
|
||||
switch decoder.decode(&scratch) {
|
||||
case .scalarValue(let us):
|
||||
@@ -192,8 +206,9 @@ extension String {
|
||||
/// - Complexity: O(*n*) if the underlying string is bridged from
|
||||
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
|
||||
public subscript(r: Range<Index>) -> UnicodeScalarView {
|
||||
return UnicodeScalarView(
|
||||
_core[r.lowerBound._position..<r.upperBound._position])
|
||||
let rawSubRange = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound)
|
||||
return UnicodeScalarView(_core[rawSubRange],
|
||||
coreOffset: r.lowerBound._position)
|
||||
}
|
||||
|
||||
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
|
||||
@@ -270,7 +285,7 @@ extension String {
|
||||
}
|
||||
|
||||
public var description: String {
|
||||
return String(_core[startIndex._position..<endIndex._position])
|
||||
return String(_core)
|
||||
}
|
||||
|
||||
public var debugDescription: String {
|
||||
@@ -278,6 +293,12 @@ extension String {
|
||||
}
|
||||
|
||||
internal var _core: _StringCore
|
||||
|
||||
/// The offset of this view's `_core` from an original core. This works
|
||||
/// around the fact that `_StringCore` is always zero-indexed.
|
||||
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
|
||||
/// before that value is used as a `_core` index.
|
||||
internal var _coreOffset: Int
|
||||
}
|
||||
|
||||
/// Creates a string corresponding to the given collection of Unicode
|
||||
@@ -391,9 +412,8 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
|
||||
_ bounds: Range<Index>,
|
||||
with newElements: C
|
||||
) where C : Collection, C.Iterator.Element == UnicodeScalar {
|
||||
let rawSubRange: Range<Int> =
|
||||
bounds.lowerBound._position
|
||||
..< bounds.upperBound._position
|
||||
let rawSubRange: Range<Int> = _toCoreIndex(bounds.lowerBound) ..<
|
||||
_toCoreIndex(bounds.upperBound)
|
||||
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
|
||||
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
|
||||
}
|
||||
|
||||
@@ -5,11 +5,10 @@ import StdlibUnittest
|
||||
|
||||
var SubstringTests = TestSuite("SubstringTests")
|
||||
|
||||
func checkMatch<S: Collection, T: Collection
|
||||
func checkMatch<S: Collection, T: Collection>(_ x: S, _ y: T, _ i: S.Index)
|
||||
where S.Index == T.Index, S.Iterator.Element == T.Iterator.Element,
|
||||
S.Iterator.Element: Equatable>(
|
||||
_ x: S, _ y: T, _ i: S.Index) {
|
||||
|
||||
S.Iterator.Element: Equatable
|
||||
{
|
||||
expectEqual(x[i], y[i])
|
||||
}
|
||||
|
||||
@@ -25,9 +24,7 @@ SubstringTests.test("String") {
|
||||
expectEqual(s3, "cd")
|
||||
}
|
||||
|
||||
SubstringTests.test("CharacterView")
|
||||
.xfail(.always("CharacterView slices don't share indices"))
|
||||
.code {
|
||||
SubstringTests.test("CharacterView") {
|
||||
let s = "abcdefg"
|
||||
var t = s.characters.dropFirst(2)
|
||||
var u = t.dropFirst(2)
|
||||
@@ -41,6 +38,11 @@ SubstringTests.test("CharacterView")
|
||||
checkMatch(t, u, u.index(after: u.startIndex))
|
||||
checkMatch(t, u, u.index(before: u.endIndex))
|
||||
|
||||
expectEqual("", String(t.dropFirst(10)))
|
||||
expectEqual("", String(t.dropLast(10)))
|
||||
expectEqual("", String(u.dropFirst(10)))
|
||||
expectEqual("", String(u.dropLast(10)))
|
||||
|
||||
t.replaceSubrange(t.startIndex...t.startIndex, with: ["C"])
|
||||
u.replaceSubrange(u.startIndex...u.startIndex, with: ["E"])
|
||||
expectEqual(String(u), "Efg")
|
||||
@@ -48,9 +50,7 @@ SubstringTests.test("CharacterView")
|
||||
expectEqual(s, "abcdefg")
|
||||
}
|
||||
|
||||
SubstringTests.test("UnicodeScalars")
|
||||
.xfail(.always("UnicodeScalarsView slices don't share indices"))
|
||||
.code {
|
||||
SubstringTests.test("UnicodeScalars") {
|
||||
let s = "abcdefg"
|
||||
var t = s.unicodeScalars.dropFirst(2)
|
||||
var u = t.dropFirst(2)
|
||||
@@ -64,6 +64,11 @@ SubstringTests.test("UnicodeScalars")
|
||||
checkMatch(t, u, u.index(after: u.startIndex))
|
||||
checkMatch(t, u, u.index(before: u.endIndex))
|
||||
|
||||
expectEqual("", String(t.dropFirst(10)))
|
||||
expectEqual("", String(t.dropLast(10)))
|
||||
expectEqual("", String(u.dropFirst(10)))
|
||||
expectEqual("", String(u.dropLast(10)))
|
||||
|
||||
t.replaceSubrange(t.startIndex...t.startIndex, with: ["C"])
|
||||
u.replaceSubrange(u.startIndex...u.startIndex, with: ["E"])
|
||||
expectEqual(String(u), "Efg")
|
||||
@@ -71,9 +76,7 @@ SubstringTests.test("UnicodeScalars")
|
||||
expectEqual(s, "abcdefg")
|
||||
}
|
||||
|
||||
SubstringTests.test("UTF16View")
|
||||
.xfail(.always("UTF16View slices don't share indices"))
|
||||
.code {
|
||||
SubstringTests.test("UTF16View") {
|
||||
let s = "abcdefg"
|
||||
let t = s.utf16.dropFirst(2)
|
||||
let u = t.dropFirst(2)
|
||||
@@ -86,6 +89,11 @@ SubstringTests.test("UTF16View")
|
||||
checkMatch(t, u, u.startIndex)
|
||||
checkMatch(t, u, u.index(after: u.startIndex))
|
||||
checkMatch(t, u, u.index(before: u.endIndex))
|
||||
|
||||
expectEqual("", String(t.dropFirst(10))!)
|
||||
expectEqual("", String(t.dropLast(10))!)
|
||||
expectEqual("", String(u.dropFirst(10))!)
|
||||
expectEqual("", String(u.dropLast(10))!)
|
||||
}
|
||||
|
||||
SubstringTests.test("UTF8View") {
|
||||
@@ -99,6 +107,11 @@ SubstringTests.test("UTF8View") {
|
||||
checkMatch(s.utf8, t, u.startIndex)
|
||||
checkMatch(t, u, u.startIndex)
|
||||
checkMatch(t, u, u.index(after: u.startIndex))
|
||||
|
||||
expectEqual("", String(t.dropFirst(10))!)
|
||||
expectEqual("", String(t.dropLast(10))!)
|
||||
expectEqual("", String(u.dropFirst(10))!)
|
||||
expectEqual("", String(u.dropLast(10))!)
|
||||
}
|
||||
|
||||
runAllTests()
|
||||
|
||||
Reference in New Issue
Block a user