[stdlib] Access a Character's .unicodeScalars without allocation

This commit is contained in:
Dave Abrahams
2017-06-02 11:50:45 -07:00
parent 3a7a30a822
commit f212fac717

View File

@@ -11,17 +11,17 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
extension Character { extension Character {
public struct UnicodeScalarView { public struct UnicodeScalarView {
internal let _base: String.UnicodeScalarView internal let _base: Character
} }
public var unicodeScalars : UnicodeScalarView { public var unicodeScalars : UnicodeScalarView {
return UnicodeScalarView(_base: String(self).unicodeScalars) return UnicodeScalarView(_base: self)
} }
} }
extension Character.UnicodeScalarView { extension Character.UnicodeScalarView {
public struct Iterator { public struct Iterator {
internal var _base: String.UnicodeScalarView.Iterator internal var _base: IndexingIterator<Character.UnicodeScalarView>
} }
} }
@@ -33,13 +33,15 @@ extension Character.UnicodeScalarView.Iterator : IteratorProtocol {
extension Character.UnicodeScalarView : Sequence { extension Character.UnicodeScalarView : Sequence {
public func makeIterator() -> Iterator { public func makeIterator() -> Iterator {
return Iterator(_base: _base.makeIterator()) return Iterator(_base: IndexingIterator(_elements: self))
} }
} }
extension Character.UnicodeScalarView { extension Character.UnicodeScalarView {
public struct Index { public struct Index {
internal let _base: String.UnicodeScalarView.Index internal let _encodedOffset: Int
internal let _scalar: Unicode.UTF16.EncodedScalar
internal let _stride: UInt8
} }
} }
@@ -48,7 +50,7 @@ extension Character.UnicodeScalarView.Index : Equatable {
lhs: Character.UnicodeScalarView.Index, lhs: Character.UnicodeScalarView.Index,
rhs: Character.UnicodeScalarView.Index rhs: Character.UnicodeScalarView.Index
) -> Bool { ) -> Bool {
return lhs._base == rhs._base return lhs._encodedOffset == rhs._encodedOffset
} }
} }
@@ -57,27 +59,94 @@ extension Character.UnicodeScalarView.Index : Comparable {
lhs: Character.UnicodeScalarView.Index, lhs: Character.UnicodeScalarView.Index,
rhs: Character.UnicodeScalarView.Index rhs: Character.UnicodeScalarView.Index
) -> Bool { ) -> Bool {
return lhs._base < rhs._base return lhs._encodedOffset < rhs._encodedOffset
} }
} }
extension Character.UnicodeScalarView : Collection { extension Character.UnicodeScalarView : Collection {
public var startIndex: Index { public var startIndex: Index {
return Index(_base: _base.startIndex) return index(
after: Index(
_encodedOffset: 0,
_scalar: Unicode.UTF16.EncodedScalar(),
_stride: 0
))
} }
public var endIndex: Index { public var endIndex: Index {
return Index(_base: _base.endIndex) return Index(
_encodedOffset: _base._smallUTF16?.count ?? _base._largeUTF16!.count,
_scalar: Unicode.UTF16.EncodedScalar(),
_stride: 0
)
} }
public func index(after i: Index) -> Index { public func index(after i: Index) -> Index {
return Index(_base: _base.index(after: i._base)) var parser = Unicode.UTF16.ForwardParser()
let startOfNextScalar = i._encodedOffset + numericCast(i._stride)
let r: Unicode.ParseResult<Unicode.UTF16.EncodedScalar>
let small_ = _base._smallUTF16
if _fastPath(small_ != nil), let u16 = small_ {
var i = u16[u16.index(u16.startIndex, offsetBy: startOfNextScalar)...]
.makeIterator()
r = parser.parseScalar(from: &i)
} }
else {
var i = _base._largeUTF16![startOfNextScalar...].makeIterator()
r = parser.parseScalar(from: &i)
}
switch r {
case .valid(let s):
return Index(
_encodedOffset: startOfNextScalar, _scalar: s,
_stride: UInt8(extendingOrTruncating: s.count))
case .error:
return Index(
_encodedOffset: startOfNextScalar,
_scalar: Unicode.UTF16.encodedReplacementCharacter,
_stride: 1)
case .emptyInput:
if i._stride != 0 { return endIndex }
fatalError("no position after end of Character's last Unicode.Scalar")
}
}
public subscript(_ i: Index) -> UnicodeScalar { public subscript(_ i: Index) -> UnicodeScalar {
return _base[i._base] return Unicode.UTF16.decode(i._scalar)
} }
} }
extension Character.UnicodeScalarView : BidirectionalCollection { extension Character.UnicodeScalarView : BidirectionalCollection {
public func index(before i: Index) -> Index { public func index(before i: Index) -> Index {
return Index(_base: _base.index(before: i._base)) var parser = Unicode.UTF16.ReverseParser()
let r: Unicode.ParseResult<Unicode.UTF16.EncodedScalar>
let small_ = _base._smallUTF16
if _fastPath(small_ != nil), let u16 = small_ {
var i = u16[..<u16.index(u16.startIndex, offsetBy: i._encodedOffset)]
.reversed().makeIterator()
r = parser.parseScalar(from: &i)
}
else {
var i = _base._largeUTF16![..<i._encodedOffset].reversed().makeIterator()
r = parser.parseScalar(from: &i)
}
switch r {
case .valid(let s):
return Index(
_encodedOffset: i._encodedOffset - s.count, _scalar: s,
_stride: UInt8(extendingOrTruncating: s.count))
case .error:
return Index(
_encodedOffset: i._encodedOffset - 1,
_scalar: Unicode.UTF16.encodedReplacementCharacter,
_stride: 1)
case .emptyInput:
fatalError("no position before Character's last Unicode.Scalar")
}
} }
} }