mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Stop inlining _asOpaque into user code. Inlining it bloats user code as there's a bit-test-and-branch to a block containing the _asOpaque call, followed up some operations to e.g. manipulate the range or re-align the calling convention, etc., followed by a final branch to opaque stdlib code. Instead, branch directly into opaque stdlib code. In theory, this means that supporting all opaque patterns can be done with minimal bloat. On ARM, this is a single tbnz instruction.
749 lines
25 KiB
Swift
749 lines
25 KiB
Swift
//===--- StringUTF8.swift - A UTF8 view of String -------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
extension String {
|
|
/// A view of a string's contents as a collection of UTF-8 code units.
|
|
///
|
|
/// You can access a string's view of UTF-8 code units by using its `utf8`
|
|
/// property. A string's UTF-8 view encodes the string's Unicode scalar
|
|
/// values as 8-bit integers.
|
|
///
|
|
/// let flowers = "Flowers 💐"
|
|
/// for v in flowers.utf8 {
|
|
/// print(v)
|
|
/// }
|
|
/// // 70
|
|
/// // 108
|
|
/// // 111
|
|
/// // 119
|
|
/// // 101
|
|
/// // 114
|
|
/// // 115
|
|
/// // 32
|
|
/// // 240
|
|
/// // 159
|
|
/// // 146
|
|
/// // 144
|
|
///
|
|
/// A string's Unicode scalar values can be up to 21 bits in length. To
|
|
/// represent those scalar values using 8-bit integers, more than one UTF-8
|
|
/// code unit is often required.
|
|
///
|
|
/// let flowermoji = "💐"
|
|
/// for v in flowermoji.unicodeScalars {
|
|
/// print(v, v.value)
|
|
/// }
|
|
/// // 💐 128144
|
|
///
|
|
/// for v in flowermoji.utf8 {
|
|
/// print(v)
|
|
/// }
|
|
/// // 240
|
|
/// // 159
|
|
/// // 146
|
|
/// // 144
|
|
///
|
|
/// In the encoded representation of a Unicode scalar value, each UTF-8 code
|
|
/// unit after the first is called a *continuation byte*.
|
|
///
|
|
/// UTF8View Elements Match Encoded C Strings
|
|
/// =========================================
|
|
///
|
|
/// Swift streamlines interoperation with C string APIs by letting you pass a
|
|
/// `String` instance to a function as an `Int8` or `UInt8` pointer. When you
|
|
/// call a C function using a `String`, Swift automatically creates a buffer
|
|
/// of UTF-8 code units and passes a pointer to that buffer. The code units
|
|
/// of that buffer match the code units in the string's `utf8` view.
|
|
///
|
|
/// The following example uses the C `strncmp` function to compare the
|
|
/// beginning of two Swift strings. The `strncmp` function takes two
|
|
/// `const char*` pointers and an integer specifying the number of characters
|
|
/// to compare. Because the strings are identical up to the 14th character,
|
|
/// comparing only those characters results in a return value of `0`.
|
|
///
|
|
/// let s1 = "They call me 'Bell'"
|
|
/// let s2 = "They call me 'Stacey'"
|
|
///
|
|
/// print(strncmp(s1, s2, 14))
|
|
/// // Prints "0"
|
|
/// print(String(s1.utf8.prefix(14)))
|
|
/// // Prints "They call me '"
|
|
///
|
|
/// Extending the compared character count to 15 includes the differing
|
|
/// characters, so a nonzero result is returned.
|
|
///
|
|
/// print(strncmp(s1, s2, 15))
|
|
/// // Prints "-17"
|
|
/// print(String(s1.utf8.prefix(15)))
|
|
/// // Prints "They call me 'B"
|
|
@_fixed_layout // FIXME(sil-serialize-all)
|
|
public struct UTF8View
|
|
: BidirectionalCollection,
|
|
CustomStringConvertible,
|
|
CustomDebugStringConvertible {
|
|
|
|
/// Underlying UTF-16-compatible representation
|
|
@_versioned
|
|
internal var _guts: _StringGuts
|
|
|
|
/// Distances to `(startIndex, endIndex)` from the endpoints of _guts,
|
|
/// measured in UTF-8 code units.
|
|
///
|
|
/// Note: this is *only* here to support legacy Swift3-style slicing where
|
|
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
|
|
/// those semantics are no longer supported.
|
|
@_versioned
|
|
internal let _legacyOffsets: (start: Int8, end: Int8)
|
|
|
|
/// Flags indicating whether the limits of this view did not originally fall
|
|
/// on grapheme cluster boundaries in the original string. This is used to
|
|
/// emulate (undocumented) Swift 3 behavior where String.init?(_:) returned
|
|
/// nil in such cases.
|
|
///
|
|
/// Note: this is *only* here to support legacy Swift3-style slicing where
|
|
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
|
|
/// those semantics are no longer supported.
|
|
@_versioned
|
|
internal let _legacyPartialCharacters: (start: Bool, end: Bool)
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal init(
|
|
_ _guts: _StringGuts,
|
|
legacyOffsets: (Int, Int) = (0, 0),
|
|
legacyPartialCharacters: (Bool, Bool) = (false, false)
|
|
) {
|
|
self._guts = _guts
|
|
self._legacyOffsets = (Int8(legacyOffsets.0), Int8(legacyOffsets.1))
|
|
self._legacyPartialCharacters = legacyPartialCharacters
|
|
}
|
|
|
|
public typealias Index = String.Index
|
|
|
|
/// The position of the first code unit if the UTF-8 view is
|
|
/// nonempty.
|
|
///
|
|
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var startIndex: Index {
|
|
let r = _index(atEncodedOffset: _guts.startIndex)
|
|
if _legacyOffsets.start == 0 { return r }
|
|
return index(r, offsetBy: numericCast(_legacyOffsets.start))
|
|
}
|
|
|
|
/// The "past the end" position---that is, the position one
|
|
/// greater than the last valid subscript argument.
|
|
///
|
|
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var endIndex: Index {
|
|
_sanityCheck(_legacyOffsets.end >= -3 && _legacyOffsets.end <= 0,
|
|
"out of bounds legacy end")
|
|
|
|
var r = Index(encodedOffset: _guts.endIndex)
|
|
if _fastPath(_legacyOffsets.end == 0) {
|
|
return r
|
|
}
|
|
switch _legacyOffsets.end {
|
|
case -3: r = index(before: r); fallthrough
|
|
case -2: r = index(before: r); fallthrough
|
|
case -1: return index(before: r)
|
|
default: Builtin.unreachable()
|
|
}
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned
|
|
internal func _index(atEncodedOffset n: Int) -> Index {
|
|
if _fastPath(_guts.isASCII) { return Index(encodedOffset: n) }
|
|
let count = _guts.count
|
|
if n == count { return endIndex }
|
|
|
|
var p = UTF16.ForwardParser()
|
|
var i = _guts.makeIterator(in: n..<count)
|
|
var buffer = Index._UTF8Buffer()
|
|
Loop:
|
|
while true {
|
|
switch p.parseScalar(from: &i) {
|
|
case .valid(let u16):
|
|
let u8 = Unicode.UTF8.transcode(u16, from: Unicode.UTF16.self)
|
|
._unsafelyUnwrappedUnchecked
|
|
if buffer.count + u8.count > buffer.capacity { break Loop }
|
|
buffer.append(contentsOf: u8)
|
|
case .error:
|
|
let u8 = Unicode.UTF8.encodedReplacementCharacter
|
|
if buffer.count + u8.count > buffer.capacity { break Loop }
|
|
buffer.append(contentsOf: u8)
|
|
case .emptyInput:
|
|
break Loop
|
|
}
|
|
}
|
|
return Index(encodedOffset: n, .utf8(buffer: buffer))
|
|
}
|
|
|
|
/// Returns the next consecutive position after `i`.
|
|
///
|
|
/// - Precondition: The next position is representable.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@inline(__always)
|
|
public func index(after i: Index) -> Index {
|
|
if _fastPath(_guts.isASCII) {
|
|
precondition(i.encodedOffset < _guts.count)
|
|
return Index(encodedOffset: i.encodedOffset + 1)
|
|
}
|
|
|
|
var j = i
|
|
|
|
// Ensure j's cache is utf8
|
|
if _slowPath(j._cache.utf8 == nil) {
|
|
j = _index(atEncodedOffset: j.encodedOffset)
|
|
precondition(j != endIndex, "Index out of bounds")
|
|
}
|
|
|
|
let buffer = j._cache.utf8._unsafelyUnwrappedUnchecked
|
|
|
|
var scalarLength16 = 1
|
|
let b0 = buffer.first._unsafelyUnwrappedUnchecked
|
|
var nextBuffer = buffer
|
|
|
|
let leading1s = (~b0).leadingZeroBitCount
|
|
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
|
|
nextBuffer.removeFirst()
|
|
}
|
|
else {
|
|
// Number of bytes consumed in this scalar
|
|
let n8 = j._transcodedOffset + 1
|
|
// If we haven't reached a scalar boundary...
|
|
if _fastPath(n8 < leading1s) {
|
|
// Advance to the next position in this scalar
|
|
return Index(
|
|
encodedOffset: j.encodedOffset,
|
|
transcodedOffset: n8, .utf8(buffer: buffer))
|
|
}
|
|
// We reached a scalar boundary; compute the underlying utf16's width
|
|
// based on the number of utf8 code units
|
|
scalarLength16 = n8 >> 2 + 1
|
|
nextBuffer.removeFirst(n8)
|
|
}
|
|
|
|
if _fastPath(!nextBuffer.isEmpty) {
|
|
return Index(
|
|
encodedOffset: j.encodedOffset + scalarLength16,
|
|
.utf8(buffer: nextBuffer))
|
|
}
|
|
// If nothing left in the buffer, refill it.
|
|
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public func index(before i: Index) -> Index {
|
|
if _fastPath(_guts.isASCII) {
|
|
precondition(i.encodedOffset > 0)
|
|
return Index(encodedOffset: i.encodedOffset - 1)
|
|
}
|
|
|
|
if i._transcodedOffset != 0 {
|
|
_sanityCheck(i._cache.utf8 != nil)
|
|
var r = i
|
|
r._compoundOffset = r._compoundOffset &- 1
|
|
return r
|
|
}
|
|
|
|
// Handle the scalar boundary the same way as the not-a-utf8-index case.
|
|
_precondition(i.encodedOffset > 0, "Can't move before startIndex")
|
|
|
|
// Parse a single scalar
|
|
let u = _guts.unicodeScalar(endingAt: i.encodedOffset)
|
|
let u8 = Unicode.UTF8.encode(u)._unsafelyUnwrappedUnchecked
|
|
return Index(
|
|
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
|
|
transcodedOffset: u8.count &- 1,
|
|
.utf8(buffer: String.Index._UTF8Buffer(u8))
|
|
)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public func distance(from i: Index, to j: Index) -> Int {
|
|
if _fastPath(_guts.isASCII) {
|
|
return j.encodedOffset - i.encodedOffset
|
|
}
|
|
return j >= i
|
|
? _forwardDistance(from: i, to: j) : -_forwardDistance(from: j, to: i)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned
|
|
@inline(__always)
|
|
internal func _forwardDistance(from i: Index, to j: Index) -> Int {
|
|
return j._transcodedOffset - i._transcodedOffset +
|
|
_count(fromUTF16: IteratorSequence(_guts.makeIterator(
|
|
in: i.encodedOffset..<j.encodedOffset)))
|
|
}
|
|
|
|
/// Accesses the code unit at the given position.
|
|
///
|
|
/// The following example uses the subscript to print the value of a
|
|
/// string's first UTF-8 code unit.
|
|
///
|
|
/// let greeting = "Hello, friend!"
|
|
/// let i = greeting.utf8.startIndex
|
|
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
|
|
/// // Prints "First character's UTF-8 code unit: 72"
|
|
///
|
|
/// - Parameter position: A valid index of the view. `position`
|
|
/// must be less than the view's end index.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public subscript(position: Index) -> UTF8.CodeUnit {
|
|
@inline(__always)
|
|
get {
|
|
if _fastPath(_guts.isASCII) {
|
|
let ascii = _guts._unmanagedASCIIView
|
|
let offset = position.encodedOffset
|
|
_precondition(offset < ascii.count, "Index out of bounds")
|
|
return ascii.buffer[position.encodedOffset]
|
|
}
|
|
var j = position
|
|
while true {
|
|
if case .utf8(let buffer) = j._cache {
|
|
_onFastPath()
|
|
return buffer[
|
|
buffer.index(buffer.startIndex, offsetBy: j._transcodedOffset)]
|
|
}
|
|
j = _index(atEncodedOffset: j.encodedOffset)
|
|
precondition(j < endIndex, "Index out of bounds")
|
|
}
|
|
}
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var description: String {
|
|
return String(_guts)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var debugDescription: String {
|
|
return "UTF8View(\(self.description.debugDescription))"
|
|
}
|
|
}
|
|
|
|
/// A UTF-8 encoding of `self`.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var utf8: UTF8View {
|
|
get {
|
|
return UTF8View(self._guts)
|
|
}
|
|
set {
|
|
self = String(describing: newValue)
|
|
}
|
|
}
|
|
|
|
/// A contiguously stored null-terminated UTF-8 representation of the string.
|
|
///
|
|
/// To access the underlying memory, invoke `withUnsafeBufferPointer` on the
|
|
/// array.
|
|
///
|
|
/// let s = "Hello!"
|
|
/// let bytes = s.utf8CString
|
|
/// print(bytes)
|
|
/// // Prints "[72, 101, 108, 108, 111, 33, 0]"
|
|
///
|
|
/// bytes.withUnsafeBufferPointer { ptr in
|
|
/// print(strlen(ptr.baseAddress!))
|
|
/// }
|
|
/// // Prints "6"
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var utf8CString: ContiguousArray<CChar> {
|
|
var result = ContiguousArray<CChar>()
|
|
result.reserveCapacity(utf8.count + 1)
|
|
for c in utf8 {
|
|
result.append(CChar(bitPattern: c))
|
|
}
|
|
result.append(0)
|
|
return result
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal func _withUnsafeBufferPointerToUTF8<R>(
|
|
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
|
|
) rethrows -> R {
|
|
if _guts.isASCII {
|
|
return try body(_guts._unmanagedASCIIView.buffer)
|
|
}
|
|
var nullTerminatedUTF8 = ContiguousArray<UTF8.CodeUnit>()
|
|
nullTerminatedUTF8.reserveCapacity(utf8.count + 1)
|
|
nullTerminatedUTF8 += utf8
|
|
nullTerminatedUTF8.append(0)
|
|
return try nullTerminatedUTF8.withUnsafeBufferPointer(body)
|
|
}
|
|
|
|
/// Creates a string corresponding to the given sequence of UTF-8 code units.
|
|
///
|
|
/// If `utf8` is an ill-formed UTF-8 code sequence, the result is `nil`.
|
|
///
|
|
/// You can use this initializer to create a new string from a slice of
|
|
/// another string's `utf8` view.
|
|
///
|
|
/// let picnicGuest = "Deserving porcupine"
|
|
/// if let i = picnicGuest.utf8.index(of: 32) {
|
|
/// let adjective = String(picnicGuest.utf8[..<i])
|
|
/// print(adjective)
|
|
/// }
|
|
/// // Prints "Optional(Deserving)"
|
|
///
|
|
/// The `adjective` constant is created by calling this initializer with a
|
|
/// slice of the `picnicGuest.utf8` view.
|
|
///
|
|
/// - Parameter utf8: A UTF-8 code sequence.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(swift, deprecated: 3.2,
|
|
message: "Failable initializer was removed in Swift 4. When upgrading to Swift 4, please use non-failable String.init(_:UTF8View)")
|
|
@available(swift, obsoleted: 4.0,
|
|
message: "Please use non-failable String.init(_:UTF8View) instead")
|
|
public init?(_ utf8: UTF8View) {
|
|
if utf8.startIndex._transcodedOffset != 0
|
|
|| utf8.endIndex._transcodedOffset != 0
|
|
|| utf8._legacyPartialCharacters.start
|
|
|| utf8._legacyPartialCharacters.end {
|
|
return nil
|
|
}
|
|
self = String(utf8._guts)
|
|
}
|
|
|
|
/// Creates a string corresponding to the given sequence of UTF-8 code units.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(swift, introduced: 4.0, message:
|
|
"Please use failable String.init?(_:UTF8View) when in Swift 3.2 mode")
|
|
public init(_ utf8: UTF8View) {
|
|
self = String(utf8._guts)
|
|
}
|
|
|
|
/// The index type for subscripting a string.
|
|
public typealias UTF8Index = UTF8View.Index
|
|
}
|
|
|
|
extension String.UTF8View : _SwiftStringView {
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal var _persistentContent : String {
|
|
return String(self._guts)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
var _wholeString : String {
|
|
return String(_guts)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
var _encodedOffsetRange : Range<Int> {
|
|
return 0..<_guts.count
|
|
}
|
|
}
|
|
|
|
extension String.UTF8View {
|
|
@_fixed_layout // FIXME(sil-serialize-all)
|
|
public struct Iterator {
|
|
internal typealias _OutputBuffer = _ValidUTF8Buffer<UInt64>
|
|
@_versioned
|
|
internal let _guts: _StringGuts
|
|
@_versioned
|
|
internal let _endOffset: Int
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal var _nextOffset: Int
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal var _buffer: _OutputBuffer
|
|
}
|
|
|
|
public func makeIterator() -> Iterator {
|
|
return Iterator(self)
|
|
}
|
|
}
|
|
|
|
extension String.UTF8View.Iterator : IteratorProtocol {
|
|
public typealias Element = String.UTF8View.Element
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal init(_ utf8: String.UTF8View) {
|
|
self._guts = utf8._guts
|
|
self._nextOffset = 0
|
|
self._buffer = _OutputBuffer()
|
|
self._endOffset = utf8._guts.count
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public mutating func next() -> Unicode.UTF8.CodeUnit? {
|
|
if _slowPath(_nextOffset == _endOffset) {
|
|
if _slowPath(_buffer.isEmpty) {
|
|
return nil
|
|
}
|
|
}
|
|
if _guts.isASCII {
|
|
defer { _nextOffset += 1 }
|
|
return _guts._unmanagedASCIIView.buffer[_nextOffset]
|
|
}
|
|
|
|
if _fastPath(!_buffer.isEmpty) {
|
|
return _buffer.removeFirst()
|
|
}
|
|
return _fillBuffer()
|
|
}
|
|
|
|
@_versioned
|
|
@inline(never)
|
|
internal mutating func _fillBuffer() -> Unicode.UTF8.CodeUnit {
|
|
_sanityCheck(!_guts.isASCII, "next() already checks for known ASCII")
|
|
if _slowPath(_guts._isOpaque) {
|
|
return _opaqueFillBuffer()
|
|
}
|
|
|
|
defer { _fixLifetime(_guts) }
|
|
return _fillBuffer(from: _guts._unmanagedUTF16View)
|
|
}
|
|
|
|
@_versioned // @opaque
|
|
internal mutating func _opaqueFillBuffer() -> Unicode.UTF8.CodeUnit {
|
|
_sanityCheck(_guts._isOpaque)
|
|
defer { _fixLifetime(_guts) }
|
|
return _fillBuffer(from: _guts._asOpaque())
|
|
}
|
|
|
|
// NOT @_versioned
|
|
internal mutating func _fillBuffer<V: _StringVariant>(
|
|
from variant: V
|
|
) -> Unicode.UTF8.CodeUnit {
|
|
// Eat as many ASCII characters as possible
|
|
let asciiEnd = Swift.min(_nextOffset + _buffer.capacity, _endOffset)
|
|
for cu in variant[_nextOffset..<asciiEnd] {
|
|
if !UTF16._isASCII(cu) { break }
|
|
_buffer.append(UInt8(truncatingIfNeeded: cu))
|
|
_nextOffset += 1
|
|
}
|
|
if _nextOffset == asciiEnd {
|
|
return _buffer.removeFirst()
|
|
}
|
|
// Decode UTF-16, encode UTF-8
|
|
for scalar in IteratorSequence(
|
|
variant[_nextOffset..<_endOffset].makeUnicodeScalarIterator()) {
|
|
let u8 = UTF8.encode(scalar)._unsafelyUnwrappedUnchecked
|
|
let c8 = u8.count
|
|
guard _buffer.count + c8 <= _buffer.capacity else { break }
|
|
_buffer.append(contentsOf: u8)
|
|
_nextOffset += 1 &+ (c8 &>> 2)
|
|
}
|
|
return _buffer.removeFirst()
|
|
}
|
|
}
|
|
|
|
extension String.UTF8View {
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var count: Int {
|
|
if _fastPath(_guts.isASCII) { return _guts.count }
|
|
|
|
if _slowPath(_guts._isOpaque) {
|
|
return _opaqueCount
|
|
}
|
|
|
|
defer { _fixLifetime(_guts) }
|
|
return _count(fromUTF16: _guts._unmanagedUTF16View)
|
|
}
|
|
|
|
@_versioned // @opaque
|
|
internal var _opaqueCount: Int {
|
|
_sanityCheck(_guts._isOpaque)
|
|
defer { _fixLifetime(_guts) }
|
|
return _count(fromUTF16: _guts._asOpaque())
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned // FIXME(sil-serialize-all)
|
|
internal func _count<Source: Sequence>(fromUTF16 source: Source) -> Int
|
|
where Source.Element == Unicode.UTF16.CodeUnit
|
|
{
|
|
var result = 0
|
|
var prev: Unicode.UTF16.CodeUnit = 0
|
|
for u in source {
|
|
switch u {
|
|
case 0..<0x80: result += 1
|
|
case 0x80..<0x800: result += 2
|
|
case 0x800..<0xDC00: result += 3
|
|
case 0xDC00..<0xE000: result += UTF16.isLeadSurrogate(prev) ? 1 : 3
|
|
default: result += 3
|
|
}
|
|
prev = u
|
|
}
|
|
return result
|
|
}
|
|
}
|
|
|
|
// Index conversions
|
|
extension String.UTF8View.Index {
|
|
/// Creates an index in the given UTF-8 view that corresponds exactly to the
|
|
/// specified `UTF16View` position.
|
|
///
|
|
/// The following example finds the position of a space in a string's `utf16`
|
|
/// view and then converts that position to an index in the string's
|
|
/// `utf8` view.
|
|
///
|
|
/// let cafe = "Café 🍵"
|
|
///
|
|
/// let utf16Index = cafe.utf16.index(of: 32)!
|
|
/// let utf8Index = String.UTF8View.Index(utf16Index, within: cafe.utf8)!
|
|
///
|
|
/// print(Array(cafe.utf8[..<utf8Index]))
|
|
/// // Prints "[67, 97, 102, 195, 169]"
|
|
///
|
|
/// If the position passed in `utf16Index` doesn't have an exact
|
|
/// corresponding position in `utf8`, the result of the initializer is
|
|
/// `nil`. For example, because UTF-8 and UTF-16 represent high Unicode code
|
|
/// points differently, an attempt to convert the position of the trailing
|
|
/// surrogate of a UTF-16 surrogate pair fails.
|
|
///
|
|
/// The next example attempts to convert the indices of the two UTF-16 code
|
|
/// points that represent the teacup emoji (`"🍵"`). The index of the lead
|
|
/// surrogate is successfully converted to a position in `utf8`, but the
|
|
/// index of the trailing surrogate is not.
|
|
///
|
|
/// let emojiHigh = cafe.utf16.index(after: utf16Index)
|
|
/// print(String.UTF8View.Index(emojiHigh, within: cafe.utf8))
|
|
/// // Prints "Optional(String.Index(...))"
|
|
///
|
|
/// let emojiLow = cafe.utf16.index(after: emojiHigh)
|
|
/// print(String.UTF8View.Index(emojiLow, within: cafe.utf8))
|
|
/// // Prints "nil"
|
|
///
|
|
/// - Parameters:
|
|
/// - sourcePosition: A position in a `String` or one of its views.
|
|
/// - target: The `UTF8View` in which to find the new position.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public init?(_ sourcePosition: String.Index, within target: String.UTF8View) {
|
|
switch sourcePosition._cache {
|
|
case .utf8:
|
|
self.init(encodedOffset: sourcePosition.encodedOffset,
|
|
transcodedOffset:sourcePosition._transcodedOffset, sourcePosition._cache)
|
|
|
|
default:
|
|
guard String.UnicodeScalarView(target._guts)._isOnUnicodeScalarBoundary(
|
|
sourcePosition) else { return nil }
|
|
self.init(encodedOffset: sourcePosition.encodedOffset)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Reflection
|
|
extension String.UTF8View : CustomReflectable {
|
|
/// Returns a mirror that reflects the UTF-8 view of a string.
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
public var customMirror: Mirror {
|
|
return Mirror(self, unlabeledChildren: self)
|
|
}
|
|
}
|
|
|
|
extension String.UTF8View : CustomPlaygroundQuickLookable {
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(*, deprecated, message: "UTF8View.customPlaygroundQuickLook will be removed in a future Swift version")
|
|
public var customPlaygroundQuickLook: PlaygroundQuickLook {
|
|
return .text(description)
|
|
}
|
|
}
|
|
|
|
// backward compatibility for index interchange.
|
|
extension String.UTF8View {
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(
|
|
swift, obsoleted: 4.0,
|
|
message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
|
|
public func index(after i: Index?) -> Index {
|
|
return index(after: i!)
|
|
}
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(
|
|
swift, obsoleted: 4.0,
|
|
message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
|
|
public func index(_ i: Index?, offsetBy n: Int) -> Index {
|
|
return index(i!, offsetBy: n)
|
|
}
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(
|
|
swift, obsoleted: 4.0,
|
|
message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices")
|
|
public func distance(
|
|
from i: Index?, to j: Index?) -> Int {
|
|
return distance(from: i!, to: j!)
|
|
}
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(
|
|
swift, obsoleted: 4.0,
|
|
message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
|
|
public subscript(i: Index?) -> Unicode.UTF8.CodeUnit {
|
|
return self[i!]
|
|
}
|
|
}
|
|
|
|
//===--- Slicing Support --------------------------------------------------===//
|
|
/// In Swift 3.2, in the absence of type context,
|
|
///
|
|
/// someString.utf8[someString.utf8.startIndex..<someString.utf8.endIndex]
|
|
///
|
|
/// was deduced to be of type `String.UTF8View`. Provide a more-specific
|
|
/// Swift-3-only `subscript` overload that continues to produce
|
|
/// `String.UTF8View`.
|
|
extension String.UTF8View {
|
|
public typealias SubSequence = Substring.UTF8View
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(swift, introduced: 4)
|
|
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
|
|
return String.UTF8View.SubSequence(self, _bounds: r)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(swift, obsoleted: 4)
|
|
public subscript(r: Range<Index>) -> String.UTF8View {
|
|
let wholeString = String(_guts)
|
|
let legacyPartialCharacters = (
|
|
(self._legacyPartialCharacters.start &&
|
|
r.lowerBound.encodedOffset == 0) ||
|
|
r.lowerBound.samePosition(in: wholeString) == nil,
|
|
(self._legacyPartialCharacters.end &&
|
|
r.upperBound.encodedOffset == _guts.count) ||
|
|
r.upperBound.samePosition(in: wholeString) == nil)
|
|
|
|
if r.upperBound._transcodedOffset == 0 {
|
|
return String.UTF8View(
|
|
_guts._extractSlice(
|
|
r.lowerBound.encodedOffset..<r.upperBound.encodedOffset),
|
|
legacyOffsets: (r.lowerBound._transcodedOffset, 0),
|
|
legacyPartialCharacters: legacyPartialCharacters)
|
|
}
|
|
|
|
let b0 = r.upperBound._cache.utf8!.first!
|
|
let scalarLength8 = (~b0).leadingZeroBitCount
|
|
let scalarLength16 = scalarLength8 == 4 ? 2 : 1
|
|
let coreEnd = r.upperBound.encodedOffset + scalarLength16
|
|
return String.UTF8View(
|
|
_guts._extractSlice(r.lowerBound.encodedOffset..<coreEnd),
|
|
legacyOffsets: (
|
|
r.lowerBound._transcodedOffset,
|
|
r.upperBound._transcodedOffset - scalarLength8),
|
|
legacyPartialCharacters: legacyPartialCharacters)
|
|
}
|
|
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@available(swift, obsoleted: 4)
|
|
public subscript(bounds: ClosedRange<Index>) -> String.UTF8View {
|
|
return self[bounds.relative(to: self)]
|
|
}
|
|
}
|