[stdlib] Make String.UTF8View bidirectional

This is a step along the way toward handling backward-compatiblity of UTF8View
slicing and preventing inadvertent creation of String instances that keep
inaccessible memory alive.
This commit is contained in:
Dave Abrahams
2017-07-17 11:26:35 -07:00
parent 56705736b4
commit e6519fbd2b
7 changed files with 116 additions and 38 deletions

View File

@@ -29,15 +29,19 @@ extension String {
/// Convenience accessors
extension String.Index._Cache {
@_versioned
var utf16: Void? {
if case .utf16 = self { return () } else { return nil }
}
@_versioned
var utf8: String.Index._UTF8Buffer? {
if case .utf8(let r) = self { return r } else { return nil }
}
@_versioned
var character: UInt16? {
if case .character(let r) = self { return r } else { return nil }
}
@_versioned
var unicodeScalar: UnicodeScalar? {
if case .unicodeScalar(let r) = self { return r } else { return nil }
}

View File

@@ -98,9 +98,10 @@ extension String {
/// print(String(s1.utf8.prefix(15)))
/// // Prints "They call me 'B"
public struct UTF8View
: Collection,
: BidirectionalCollection,
CustomStringConvertible,
CustomDebugStringConvertible {
@_versioned
internal let _core: _StringCore
@@ -165,38 +166,81 @@ extension String {
}
var j = i
while true {
if case .utf8(let buffer) = j._cache {
_onFastPath()
// Ensure j's cache is utf8
if _slowPath(j._cache.utf8 == nil) {
j = _index(atEncodedOffset: j.encodedOffset)
precondition(j != endIndex, "index out of bounds")
}
let buffer = j._cache.utf8._unsafelyUnwrappedUnchecked
var scalarLength16 = 1
let b0 = buffer.first._unsafelyUnwrappedUnchecked
var nextBuffer = buffer
let leading1s = (~b0).leadingZeroBitCount
if leading1s == 0 {
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
nextBuffer.removeFirst()
}
else {
// Number of bytes consumed in this scalar
let n8 = j._transcodedOffset + 1
// If we haven't reached a scalar boundary...
if _fastPath(n8 < leading1s) {
// Advance to the next position in this scalar
return Index(
encodedOffset: j.encodedOffset,
transcodedOffset: n8, .utf8(buffer: nextBuffer))
transcodedOffset: n8, .utf8(buffer: buffer))
}
// We reached a scalar boundary; compute the underlying utf16's width
// based on the number of utf8 code units
scalarLength16 = n8 >> 2 + 1
nextBuffer.removeFirst(n8)
}
if _fastPath(!nextBuffer.isEmpty) {
return Index(
encodedOffset: j.encodedOffset + scalarLength16,
.utf8(buffer: nextBuffer))
}
// If nothing left in the buffer, refill it.
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
}
j = _index(atEncodedOffset: j.encodedOffset)
precondition(j != endIndex, "index out of bounds")
public func index(before i: Index) -> Index {
if _fastPath(_core.isASCII) {
precondition(i.encodedOffset > 0)
return Index(encodedOffset: i.encodedOffset - 1)
}
if i._transcodedOffset != 0 {
_sanityCheck(i._cache.utf8 != nil)
var r = i
r._compoundOffset = r._compoundOffset &- 1
return r
}
// Handle the scalar boundary the same way as the not-a-utf8-index case.
// Parse a single scalar
var p = Unicode.UTF16.ReverseParser()
var s = _core[..<i.encodedOffset].reversed().makeIterator()
let u8: Unicode.UTF8.EncodedScalar
switch p.parseScalar(from: &s) {
case .valid(let u16):
u8 = Unicode.UTF8.transcode(
u16, from: Unicode.UTF16.self)._unsafelyUnwrappedUnchecked
case .error(let stride):
u8 = Unicode.UTF8.encodedReplacementCharacter
case .emptyInput:
_preconditionFailure("index out of bounds")
}
return Index(
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
transcodedOffset: u8.count &- 1,
.utf8(buffer: String.Index._UTF8Buffer(u8))
)
}
public func distance(from i: Index, to j: Index) -> IndexDistance {
@@ -586,3 +630,34 @@ extension String.UTF8View {
return self[i!]
}
}
/*
//===--- Slicing Support --------------------------------------------------===//
/// In Swift 3.2, in the absence of type context,
///
/// someString.utf8[someString.startIndex..<someString.endIndex]
///
/// was deduced to be of type `String.UTF8View`. Provide a more-specific
/// Swift-3-only `subscript` overload that continues to produce
/// `String.UTF8View`.
extension String.UTF8View {
@available(swift, introduced: 4)
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
return String.UTF8View.SubSequence(base: self, bounds: r)
}
@available(swift, obsoleted: 4)
public subscript(bounds: Range<Index>) -> String.UTF8View {
var r = self
r._startIndex = bounds.lowerBound
r._endIndex = bounds.upperBound
return r
}
@available(swift, obsoleted: 4)
public subscript(bounds: ClosedRange<Index>) -> String.UTF8View {
return self[bounds.relative(to: self)]
}
}
*/

View File

@@ -71,7 +71,7 @@ func acceptsRandomAccessCollection<C: RandomAccessCollection>(_: C) {}
func testStringCollectionTypes(s: String) {
acceptsCollection(s.utf8)
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
acceptsBidirectionalCollection(s.utf8)
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
acceptsCollection(s.utf16)

View File

@@ -7,7 +7,7 @@ func acceptsRandomAccessCollection<I: RandomAccessCollection>(_: I) {}
func testStringCollectionTypes(s: String) {
acceptsCollection(s.utf8)
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
acceptsBidirectionalCollection(s.utf8)
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
// UTF16View is random-access with Foundation, bidirectional without

View File

@@ -26,7 +26,7 @@ func test_UTF16ViewSubscriptByInt(x: String.UTF16View, i: Int, r: Range<Int>) {
func test_UTF8View(s: String.UTF8View, i: String.UTF8View.Index, d: Int) {
_ = s.index(after: i) // OK
_ = s.index(before: i) // expected-error {{before:}} expected-note {{overloads}}
_ = s.index(before: i) // OK
_ = s.index(i, offsetBy: d) // OK
_ = s.index(i, offsetBy: d, limitedBy: i) // OK
_ = s.distance(from: i, to: i) // OK

View File

@@ -59,10 +59,10 @@ StringTests.test("AssociatedTypes-UTF8View") {
expectCollectionAssociatedTypes(
collectionType: View.self,
iteratorType: View.Iterator.self,
subSequenceType: Slice<View>.self,
subSequenceType: BidirectionalSlice<View>.self,
indexType: View.Index.self,
indexDistanceType: Int.self,
indicesType: DefaultIndices<View>.self)
indicesType: DefaultBidirectionalIndices<View>.self)
}
StringTests.test("AssociatedTypes-UTF16View") {

View File

@@ -728,8 +728,7 @@ tests.test("String.UTF8View/Collection")
.forEach(in: utfTests) {
test in
// FIXME(ABI)#72 : should be `checkBidirectionalCollection`.
checkForwardCollection(test.utf8, test.string.utf8) { $0 == $1 }
checkBidirectionalCollection(test.utf8, test.string.utf8) { $0 == $1 }
}
tests.test("String.UTF16View/BidirectionalCollection")