mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[stdlib] Make String.UTF8View bidirectional
This is a step along the way toward handling backward-compatiblity of UTF8View slicing and preventing inadvertent creation of String instances that keep inaccessible memory alive.
This commit is contained in:
@@ -29,15 +29,19 @@ extension String {
|
|||||||
|
|
||||||
/// Convenience accessors
|
/// Convenience accessors
|
||||||
extension String.Index._Cache {
|
extension String.Index._Cache {
|
||||||
|
@_versioned
|
||||||
var utf16: Void? {
|
var utf16: Void? {
|
||||||
if case .utf16 = self { return () } else { return nil }
|
if case .utf16 = self { return () } else { return nil }
|
||||||
}
|
}
|
||||||
|
@_versioned
|
||||||
var utf8: String.Index._UTF8Buffer? {
|
var utf8: String.Index._UTF8Buffer? {
|
||||||
if case .utf8(let r) = self { return r } else { return nil }
|
if case .utf8(let r) = self { return r } else { return nil }
|
||||||
}
|
}
|
||||||
|
@_versioned
|
||||||
var character: UInt16? {
|
var character: UInt16? {
|
||||||
if case .character(let r) = self { return r } else { return nil }
|
if case .character(let r) = self { return r } else { return nil }
|
||||||
}
|
}
|
||||||
|
@_versioned
|
||||||
var unicodeScalar: UnicodeScalar? {
|
var unicodeScalar: UnicodeScalar? {
|
||||||
if case .unicodeScalar(let r) = self { return r } else { return nil }
|
if case .unicodeScalar(let r) = self { return r } else { return nil }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -98,9 +98,10 @@ extension String {
|
|||||||
/// print(String(s1.utf8.prefix(15)))
|
/// print(String(s1.utf8.prefix(15)))
|
||||||
/// // Prints "They call me 'B"
|
/// // Prints "They call me 'B"
|
||||||
public struct UTF8View
|
public struct UTF8View
|
||||||
: Collection,
|
: BidirectionalCollection,
|
||||||
CustomStringConvertible,
|
CustomStringConvertible,
|
||||||
CustomDebugStringConvertible {
|
CustomDebugStringConvertible {
|
||||||
|
|
||||||
@_versioned
|
@_versioned
|
||||||
internal let _core: _StringCore
|
internal let _core: _StringCore
|
||||||
|
|
||||||
@@ -165,38 +166,81 @@ extension String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var j = i
|
var j = i
|
||||||
while true {
|
|
||||||
if case .utf8(let buffer) = j._cache {
|
|
||||||
_onFastPath()
|
|
||||||
var scalarLength16 = 1
|
|
||||||
let b0 = buffer.first._unsafelyUnwrappedUnchecked
|
|
||||||
var nextBuffer = buffer
|
|
||||||
|
|
||||||
let leading1s = (~b0).leadingZeroBitCount
|
// Ensure j's cache is utf8
|
||||||
if leading1s == 0 {
|
if _slowPath(j._cache.utf8 == nil) {
|
||||||
nextBuffer.removeFirst()
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
let n8 = j._transcodedOffset + 1
|
|
||||||
// If we haven't reached a scalar boundary...
|
|
||||||
if _fastPath(n8 < leading1s) {
|
|
||||||
return Index(
|
|
||||||
encodedOffset: j.encodedOffset,
|
|
||||||
transcodedOffset: n8, .utf8(buffer: nextBuffer))
|
|
||||||
}
|
|
||||||
scalarLength16 = n8 >> 2 + 1
|
|
||||||
nextBuffer.removeFirst(n8)
|
|
||||||
}
|
|
||||||
if _fastPath(!nextBuffer.isEmpty) {
|
|
||||||
return Index(
|
|
||||||
encodedOffset: j.encodedOffset + scalarLength16,
|
|
||||||
.utf8(buffer: nextBuffer))
|
|
||||||
}
|
|
||||||
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
|
|
||||||
}
|
|
||||||
j = _index(atEncodedOffset: j.encodedOffset)
|
j = _index(atEncodedOffset: j.encodedOffset)
|
||||||
precondition(j != endIndex, "index out of bounds")
|
precondition(j != endIndex, "index out of bounds")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let buffer = j._cache.utf8._unsafelyUnwrappedUnchecked
|
||||||
|
|
||||||
|
var scalarLength16 = 1
|
||||||
|
let b0 = buffer.first._unsafelyUnwrappedUnchecked
|
||||||
|
var nextBuffer = buffer
|
||||||
|
|
||||||
|
let leading1s = (~b0).leadingZeroBitCount
|
||||||
|
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
|
||||||
|
nextBuffer.removeFirst()
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Number of bytes consumed in this scalar
|
||||||
|
let n8 = j._transcodedOffset + 1
|
||||||
|
// If we haven't reached a scalar boundary...
|
||||||
|
if _fastPath(n8 < leading1s) {
|
||||||
|
// Advance to the next position in this scalar
|
||||||
|
return Index(
|
||||||
|
encodedOffset: j.encodedOffset,
|
||||||
|
transcodedOffset: n8, .utf8(buffer: buffer))
|
||||||
|
}
|
||||||
|
// We reached a scalar boundary; compute the underlying utf16's width
|
||||||
|
// based on the number of utf8 code units
|
||||||
|
scalarLength16 = n8 >> 2 + 1
|
||||||
|
nextBuffer.removeFirst(n8)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _fastPath(!nextBuffer.isEmpty) {
|
||||||
|
return Index(
|
||||||
|
encodedOffset: j.encodedOffset + scalarLength16,
|
||||||
|
.utf8(buffer: nextBuffer))
|
||||||
|
}
|
||||||
|
// If nothing left in the buffer, refill it.
|
||||||
|
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func index(before i: Index) -> Index {
|
||||||
|
if _fastPath(_core.isASCII) {
|
||||||
|
precondition(i.encodedOffset > 0)
|
||||||
|
return Index(encodedOffset: i.encodedOffset - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if i._transcodedOffset != 0 {
|
||||||
|
_sanityCheck(i._cache.utf8 != nil)
|
||||||
|
var r = i
|
||||||
|
r._compoundOffset = r._compoundOffset &- 1
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle the scalar boundary the same way as the not-a-utf8-index case.
|
||||||
|
|
||||||
|
// Parse a single scalar
|
||||||
|
var p = Unicode.UTF16.ReverseParser()
|
||||||
|
var s = _core[..<i.encodedOffset].reversed().makeIterator()
|
||||||
|
let u8: Unicode.UTF8.EncodedScalar
|
||||||
|
switch p.parseScalar(from: &s) {
|
||||||
|
case .valid(let u16):
|
||||||
|
u8 = Unicode.UTF8.transcode(
|
||||||
|
u16, from: Unicode.UTF16.self)._unsafelyUnwrappedUnchecked
|
||||||
|
case .error(let stride):
|
||||||
|
u8 = Unicode.UTF8.encodedReplacementCharacter
|
||||||
|
case .emptyInput:
|
||||||
|
_preconditionFailure("index out of bounds")
|
||||||
|
}
|
||||||
|
return Index(
|
||||||
|
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
|
||||||
|
transcodedOffset: u8.count &- 1,
|
||||||
|
.utf8(buffer: String.Index._UTF8Buffer(u8))
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func distance(from i: Index, to j: Index) -> IndexDistance {
|
public func distance(from i: Index, to j: Index) -> IndexDistance {
|
||||||
@@ -586,3 +630,34 @@ extension String.UTF8View {
|
|||||||
return self[i!]
|
return self[i!]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
//===--- Slicing Support --------------------------------------------------===//
|
||||||
|
/// In Swift 3.2, in the absence of type context,
|
||||||
|
///
|
||||||
|
/// someString.utf8[someString.startIndex..<someString.endIndex]
|
||||||
|
///
|
||||||
|
/// was deduced to be of type `String.UTF8View`. Provide a more-specific
|
||||||
|
/// Swift-3-only `subscript` overload that continues to produce
|
||||||
|
/// `String.UTF8View`.
|
||||||
|
extension String.UTF8View {
|
||||||
|
@available(swift, introduced: 4)
|
||||||
|
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
|
||||||
|
return String.UTF8View.SubSequence(base: self, bounds: r)
|
||||||
|
}
|
||||||
|
|
||||||
|
@available(swift, obsoleted: 4)
|
||||||
|
public subscript(bounds: Range<Index>) -> String.UTF8View {
|
||||||
|
var r = self
|
||||||
|
r._startIndex = bounds.lowerBound
|
||||||
|
r._endIndex = bounds.upperBound
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
@available(swift, obsoleted: 4)
|
||||||
|
public subscript(bounds: ClosedRange<Index>) -> String.UTF8View {
|
||||||
|
return self[bounds.relative(to: self)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*/
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ func acceptsRandomAccessCollection<C: RandomAccessCollection>(_: C) {}
|
|||||||
|
|
||||||
func testStringCollectionTypes(s: String) {
|
func testStringCollectionTypes(s: String) {
|
||||||
acceptsCollection(s.utf8)
|
acceptsCollection(s.utf8)
|
||||||
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
|
acceptsBidirectionalCollection(s.utf8)
|
||||||
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
|
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
|
||||||
|
|
||||||
acceptsCollection(s.utf16)
|
acceptsCollection(s.utf16)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ func acceptsRandomAccessCollection<I: RandomAccessCollection>(_: I) {}
|
|||||||
|
|
||||||
func testStringCollectionTypes(s: String) {
|
func testStringCollectionTypes(s: String) {
|
||||||
acceptsCollection(s.utf8)
|
acceptsCollection(s.utf8)
|
||||||
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
|
acceptsBidirectionalCollection(s.utf8)
|
||||||
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
|
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
|
||||||
|
|
||||||
// UTF16View is random-access with Foundation, bidirectional without
|
// UTF16View is random-access with Foundation, bidirectional without
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ func test_UTF16ViewSubscriptByInt(x: String.UTF16View, i: Int, r: Range<Int>) {
|
|||||||
|
|
||||||
func test_UTF8View(s: String.UTF8View, i: String.UTF8View.Index, d: Int) {
|
func test_UTF8View(s: String.UTF8View, i: String.UTF8View.Index, d: Int) {
|
||||||
_ = s.index(after: i) // OK
|
_ = s.index(after: i) // OK
|
||||||
_ = s.index(before: i) // expected-error {{before:}} expected-note {{overloads}}
|
_ = s.index(before: i) // OK
|
||||||
_ = s.index(i, offsetBy: d) // OK
|
_ = s.index(i, offsetBy: d) // OK
|
||||||
_ = s.index(i, offsetBy: d, limitedBy: i) // OK
|
_ = s.index(i, offsetBy: d, limitedBy: i) // OK
|
||||||
_ = s.distance(from: i, to: i) // OK
|
_ = s.distance(from: i, to: i) // OK
|
||||||
|
|||||||
@@ -59,10 +59,10 @@ StringTests.test("AssociatedTypes-UTF8View") {
|
|||||||
expectCollectionAssociatedTypes(
|
expectCollectionAssociatedTypes(
|
||||||
collectionType: View.self,
|
collectionType: View.self,
|
||||||
iteratorType: View.Iterator.self,
|
iteratorType: View.Iterator.self,
|
||||||
subSequenceType: Slice<View>.self,
|
subSequenceType: BidirectionalSlice<View>.self,
|
||||||
indexType: View.Index.self,
|
indexType: View.Index.self,
|
||||||
indexDistanceType: Int.self,
|
indexDistanceType: Int.self,
|
||||||
indicesType: DefaultIndices<View>.self)
|
indicesType: DefaultBidirectionalIndices<View>.self)
|
||||||
}
|
}
|
||||||
|
|
||||||
StringTests.test("AssociatedTypes-UTF16View") {
|
StringTests.test("AssociatedTypes-UTF16View") {
|
||||||
|
|||||||
@@ -728,8 +728,7 @@ tests.test("String.UTF8View/Collection")
|
|||||||
.forEach(in: utfTests) {
|
.forEach(in: utfTests) {
|
||||||
test in
|
test in
|
||||||
|
|
||||||
// FIXME(ABI)#72 : should be `checkBidirectionalCollection`.
|
checkBidirectionalCollection(test.utf8, test.string.utf8) { $0 == $1 }
|
||||||
checkForwardCollection(test.utf8, test.string.utf8) { $0 == $1 }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tests.test("String.UTF16View/BidirectionalCollection")
|
tests.test("String.UTF16View/BidirectionalCollection")
|
||||||
|
|||||||
Reference in New Issue
Block a user