mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[stdlib] Make String.UTF8View bidirectional
This is a step along the way toward handling backward-compatiblity of UTF8View slicing and preventing inadvertent creation of String instances that keep inaccessible memory alive.
This commit is contained in:
@@ -29,15 +29,19 @@ extension String {
|
||||
|
||||
/// Convenience accessors
|
||||
extension String.Index._Cache {
|
||||
@_versioned
|
||||
var utf16: Void? {
|
||||
if case .utf16 = self { return () } else { return nil }
|
||||
}
|
||||
@_versioned
|
||||
var utf8: String.Index._UTF8Buffer? {
|
||||
if case .utf8(let r) = self { return r } else { return nil }
|
||||
}
|
||||
@_versioned
|
||||
var character: UInt16? {
|
||||
if case .character(let r) = self { return r } else { return nil }
|
||||
}
|
||||
@_versioned
|
||||
var unicodeScalar: UnicodeScalar? {
|
||||
if case .unicodeScalar(let r) = self { return r } else { return nil }
|
||||
}
|
||||
|
||||
@@ -98,9 +98,10 @@ extension String {
|
||||
/// print(String(s1.utf8.prefix(15)))
|
||||
/// // Prints "They call me 'B"
|
||||
public struct UTF8View
|
||||
: Collection,
|
||||
: BidirectionalCollection,
|
||||
CustomStringConvertible,
|
||||
CustomDebugStringConvertible {
|
||||
|
||||
@_versioned
|
||||
internal let _core: _StringCore
|
||||
|
||||
@@ -165,38 +166,81 @@ extension String {
|
||||
}
|
||||
|
||||
var j = i
|
||||
while true {
|
||||
if case .utf8(let buffer) = j._cache {
|
||||
_onFastPath()
|
||||
|
||||
// Ensure j's cache is utf8
|
||||
if _slowPath(j._cache.utf8 == nil) {
|
||||
j = _index(atEncodedOffset: j.encodedOffset)
|
||||
precondition(j != endIndex, "index out of bounds")
|
||||
}
|
||||
|
||||
let buffer = j._cache.utf8._unsafelyUnwrappedUnchecked
|
||||
|
||||
var scalarLength16 = 1
|
||||
let b0 = buffer.first._unsafelyUnwrappedUnchecked
|
||||
var nextBuffer = buffer
|
||||
|
||||
let leading1s = (~b0).leadingZeroBitCount
|
||||
if leading1s == 0 {
|
||||
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
|
||||
nextBuffer.removeFirst()
|
||||
}
|
||||
else {
|
||||
// Number of bytes consumed in this scalar
|
||||
let n8 = j._transcodedOffset + 1
|
||||
// If we haven't reached a scalar boundary...
|
||||
if _fastPath(n8 < leading1s) {
|
||||
// Advance to the next position in this scalar
|
||||
return Index(
|
||||
encodedOffset: j.encodedOffset,
|
||||
transcodedOffset: n8, .utf8(buffer: nextBuffer))
|
||||
transcodedOffset: n8, .utf8(buffer: buffer))
|
||||
}
|
||||
// We reached a scalar boundary; compute the underlying utf16's width
|
||||
// based on the number of utf8 code units
|
||||
scalarLength16 = n8 >> 2 + 1
|
||||
nextBuffer.removeFirst(n8)
|
||||
}
|
||||
|
||||
if _fastPath(!nextBuffer.isEmpty) {
|
||||
return Index(
|
||||
encodedOffset: j.encodedOffset + scalarLength16,
|
||||
.utf8(buffer: nextBuffer))
|
||||
}
|
||||
// If nothing left in the buffer, refill it.
|
||||
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
|
||||
}
|
||||
j = _index(atEncodedOffset: j.encodedOffset)
|
||||
precondition(j != endIndex, "index out of bounds")
|
||||
|
||||
public func index(before i: Index) -> Index {
|
||||
if _fastPath(_core.isASCII) {
|
||||
precondition(i.encodedOffset > 0)
|
||||
return Index(encodedOffset: i.encodedOffset - 1)
|
||||
}
|
||||
|
||||
if i._transcodedOffset != 0 {
|
||||
_sanityCheck(i._cache.utf8 != nil)
|
||||
var r = i
|
||||
r._compoundOffset = r._compoundOffset &- 1
|
||||
return r
|
||||
}
|
||||
|
||||
// Handle the scalar boundary the same way as the not-a-utf8-index case.
|
||||
|
||||
// Parse a single scalar
|
||||
var p = Unicode.UTF16.ReverseParser()
|
||||
var s = _core[..<i.encodedOffset].reversed().makeIterator()
|
||||
let u8: Unicode.UTF8.EncodedScalar
|
||||
switch p.parseScalar(from: &s) {
|
||||
case .valid(let u16):
|
||||
u8 = Unicode.UTF8.transcode(
|
||||
u16, from: Unicode.UTF16.self)._unsafelyUnwrappedUnchecked
|
||||
case .error(let stride):
|
||||
u8 = Unicode.UTF8.encodedReplacementCharacter
|
||||
case .emptyInput:
|
||||
_preconditionFailure("index out of bounds")
|
||||
}
|
||||
return Index(
|
||||
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
|
||||
transcodedOffset: u8.count &- 1,
|
||||
.utf8(buffer: String.Index._UTF8Buffer(u8))
|
||||
)
|
||||
}
|
||||
|
||||
public func distance(from i: Index, to j: Index) -> IndexDistance {
|
||||
@@ -586,3 +630,34 @@ extension String.UTF8View {
|
||||
return self[i!]
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
//===--- Slicing Support --------------------------------------------------===//
|
||||
/// In Swift 3.2, in the absence of type context,
|
||||
///
|
||||
/// someString.utf8[someString.startIndex..<someString.endIndex]
|
||||
///
|
||||
/// was deduced to be of type `String.UTF8View`. Provide a more-specific
|
||||
/// Swift-3-only `subscript` overload that continues to produce
|
||||
/// `String.UTF8View`.
|
||||
extension String.UTF8View {
|
||||
@available(swift, introduced: 4)
|
||||
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
|
||||
return String.UTF8View.SubSequence(base: self, bounds: r)
|
||||
}
|
||||
|
||||
@available(swift, obsoleted: 4)
|
||||
public subscript(bounds: Range<Index>) -> String.UTF8View {
|
||||
var r = self
|
||||
r._startIndex = bounds.lowerBound
|
||||
r._endIndex = bounds.upperBound
|
||||
return r
|
||||
}
|
||||
|
||||
@available(swift, obsoleted: 4)
|
||||
public subscript(bounds: ClosedRange<Index>) -> String.UTF8View {
|
||||
return self[bounds.relative(to: self)]
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
@@ -71,7 +71,7 @@ func acceptsRandomAccessCollection<C: RandomAccessCollection>(_: C) {}
|
||||
|
||||
func testStringCollectionTypes(s: String) {
|
||||
acceptsCollection(s.utf8)
|
||||
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
|
||||
acceptsBidirectionalCollection(s.utf8)
|
||||
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
|
||||
|
||||
acceptsCollection(s.utf16)
|
||||
|
||||
@@ -7,7 +7,7 @@ func acceptsRandomAccessCollection<I: RandomAccessCollection>(_: I) {}
|
||||
|
||||
func testStringCollectionTypes(s: String) {
|
||||
acceptsCollection(s.utf8)
|
||||
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
|
||||
acceptsBidirectionalCollection(s.utf8)
|
||||
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
|
||||
|
||||
// UTF16View is random-access with Foundation, bidirectional without
|
||||
|
||||
@@ -26,7 +26,7 @@ func test_UTF16ViewSubscriptByInt(x: String.UTF16View, i: Int, r: Range<Int>) {
|
||||
|
||||
func test_UTF8View(s: String.UTF8View, i: String.UTF8View.Index, d: Int) {
|
||||
_ = s.index(after: i) // OK
|
||||
_ = s.index(before: i) // expected-error {{before:}} expected-note {{overloads}}
|
||||
_ = s.index(before: i) // OK
|
||||
_ = s.index(i, offsetBy: d) // OK
|
||||
_ = s.index(i, offsetBy: d, limitedBy: i) // OK
|
||||
_ = s.distance(from: i, to: i) // OK
|
||||
|
||||
@@ -59,10 +59,10 @@ StringTests.test("AssociatedTypes-UTF8View") {
|
||||
expectCollectionAssociatedTypes(
|
||||
collectionType: View.self,
|
||||
iteratorType: View.Iterator.self,
|
||||
subSequenceType: Slice<View>.self,
|
||||
subSequenceType: BidirectionalSlice<View>.self,
|
||||
indexType: View.Index.self,
|
||||
indexDistanceType: Int.self,
|
||||
indicesType: DefaultIndices<View>.self)
|
||||
indicesType: DefaultBidirectionalIndices<View>.self)
|
||||
}
|
||||
|
||||
StringTests.test("AssociatedTypes-UTF16View") {
|
||||
|
||||
@@ -728,8 +728,7 @@ tests.test("String.UTF8View/Collection")
|
||||
.forEach(in: utfTests) {
|
||||
test in
|
||||
|
||||
// FIXME(ABI)#72 : should be `checkBidirectionalCollection`.
|
||||
checkForwardCollection(test.utf8, test.string.utf8) { $0 == $1 }
|
||||
checkBidirectionalCollection(test.utf8, test.string.utf8) { $0 == $1 }
|
||||
}
|
||||
|
||||
tests.test("String.UTF16View/BidirectionalCollection")
|
||||
|
||||
Reference in New Issue
Block a user