mirror of
https://github.com/apple/swift.git
synced 2026-06-20 15:42:51 +02:00
[String] Drop in initial UTF-8 String prototype
This is a giant squashing of a lot of individual changes prototyping a switch of String in Swift 5 to be natively encoded as UTF-8. It includes what's necessary for a functional prototype, dropping some history, but still leaves plenty of history available for future commits. My apologies to anyone trying to do code archeology between this commit and the one prior. This was the lesser of evils.
This commit is contained in:
@@ -28,7 +28,7 @@ import Foundation
|
||||
//
|
||||
|
||||
func findSubstring(_ haystack: Substring, _ needle: String) -> String.Index? {
|
||||
return findSubstring(String(haystack._ephemeralContent), needle)
|
||||
return findSubstring(haystack._ephemeralString, needle)
|
||||
}
|
||||
|
||||
func findSubstring(_ string: String, _ substring: String) -> String.Index? {
|
||||
|
||||
@@ -66,6 +66,21 @@ extension Optional {
|
||||
}
|
||||
#endif
|
||||
|
||||
/// From a non-`nil` `UnsafePointer` to a null-terminated string
|
||||
/// with possibly-transient lifetime, create a null-terminated array of 'C' char.
|
||||
/// Returns `nil` if passed a null pointer.
|
||||
internal func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
|
||||
guard let cString = p else {
|
||||
return nil
|
||||
}
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
var result = [CChar](repeating: 0, count: len + 1)
|
||||
for i in 0..<len {
|
||||
result[i] = cString[i]
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
extension String {
|
||||
//===--- Class Methods --------------------------------------------------===//
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
@@ -72,7 +72,7 @@ extension Substring : _ObjectiveCBridgeable {
|
||||
result: inout Substring?
|
||||
) {
|
||||
let s = String(x)
|
||||
result = Substring(_base: s, s.startIndex ..< s.endIndex)
|
||||
result = s[...]
|
||||
}
|
||||
|
||||
public static func _conditionallyBridgeFromObjectiveC(
|
||||
@@ -91,7 +91,7 @@ extension Substring : _ObjectiveCBridgeable {
|
||||
// string; map it to an empty substring.
|
||||
if _slowPath(source == nil) { return Substring() }
|
||||
let s = String(source!)
|
||||
return Substring(_base: s, s.startIndex ..< s.endIndex)
|
||||
return s[...]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,6 @@ set(SWIFTLIB_ESSENTIAL
|
||||
Builtin.swift
|
||||
BuiltinMath.swift.gyb
|
||||
Character.swift
|
||||
CharacterUnicodeScalars.swift
|
||||
CocoaArray.swift
|
||||
Codable.swift.gyb
|
||||
Collection.swift
|
||||
@@ -47,6 +46,7 @@ set(SWIFTLIB_ESSENTIAL
|
||||
Comparable.swift
|
||||
CompilerProtocols.swift
|
||||
ContiguousArray.swift
|
||||
ContiguouslyStored.swift
|
||||
ClosedRange.swift
|
||||
ContiguousArrayBuffer.swift
|
||||
CString.swift
|
||||
|
||||
@@ -44,7 +44,9 @@ extension String {
|
||||
///
|
||||
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
|
||||
public init(cString: UnsafePointer<CChar>) {
|
||||
self = _decodeValidCString(cString, repair: true)
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
self = String._fromUTF8Repairing(
|
||||
UnsafeBufferPointer(start: cString._asUInt8, count: len)).0
|
||||
}
|
||||
|
||||
/// Creates a new string by copying the null-terminated UTF-8 data referenced
|
||||
@@ -53,7 +55,9 @@ extension String {
|
||||
/// This is identical to init(cString: UnsafePointer<CChar> but operates on an
|
||||
/// unsigned sequence of bytes.
|
||||
public init(cString: UnsafePointer<UInt8>) {
|
||||
self = _decodeValidCString(cString, repair: true)
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
self = String._fromUTF8Repairing(
|
||||
UnsafeBufferPointer(start: cString, count: len)).0
|
||||
}
|
||||
|
||||
/// Creates a new string by copying and validating the null-terminated UTF-8
|
||||
@@ -83,9 +87,11 @@ extension String {
|
||||
///
|
||||
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
|
||||
public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
|
||||
guard let str = _decodeCString(cString, repair: false) else {
|
||||
return nil
|
||||
}
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
guard let str = String._tryFromUTF8(
|
||||
UnsafeBufferPointer(start: cString._asUInt8, count: len))
|
||||
else { return nil }
|
||||
|
||||
self = str
|
||||
}
|
||||
|
||||
@@ -133,92 +139,50 @@ extension String {
|
||||
/// ill-formed sequence is detected, this method returns `nil`.
|
||||
@_specialize(where Encoding == Unicode.UTF8)
|
||||
@_specialize(where Encoding == Unicode.UTF16)
|
||||
@inlinable // Fold away specializations
|
||||
public static func decodeCString<Encoding : _UnicodeEncoding>(
|
||||
_ cString: UnsafePointer<Encoding.CodeUnit>?,
|
||||
as encoding: Encoding.Type,
|
||||
repairingInvalidCodeUnits isRepairing: Bool = true)
|
||||
-> (result: String, repairsMade: Bool)? {
|
||||
repairingInvalidCodeUnits isRepairing: Bool = true
|
||||
) -> (result: String, repairsMade: Bool)? {
|
||||
guard let cPtr = cString else { return nil }
|
||||
|
||||
guard let cString = cString else {
|
||||
return nil
|
||||
if _fastPath(encoding == Unicode.UTF8.self) {
|
||||
let ptr = UnsafeRawPointer(cPtr).assumingMemoryBound(to: UInt8.self)
|
||||
let len = UTF8._nullCodeUnitOffset(in: ptr)
|
||||
let codeUnits = UnsafeBufferPointer(start: ptr, count: len)
|
||||
if isRepairing {
|
||||
return String._fromUTF8Repairing(codeUnits)
|
||||
} else {
|
||||
guard let str = String._tryFromUTF8(codeUnits) else { return nil }
|
||||
return (str, false)
|
||||
}
|
||||
}
|
||||
var end = cString
|
||||
|
||||
var end = cPtr
|
||||
while end.pointee != 0 { end += 1 }
|
||||
let len = end - cString
|
||||
return _decodeCString(
|
||||
cString, as: encoding, length: len,
|
||||
repairingInvalidCodeUnits: isRepairing)
|
||||
let len = end - cPtr
|
||||
let codeUnits = UnsafeBufferPointer(start: cPtr, count: len)
|
||||
return String._fromCodeUnits(
|
||||
codeUnits, encoding: encoding, repair: isRepairing)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// From a non-`nil` `UnsafePointer` to a null-terminated string
|
||||
/// with possibly-transient lifetime, create a null-terminated array of 'C' char.
|
||||
/// Returns `nil` if passed a null pointer.
|
||||
public func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
|
||||
guard let s = p else {
|
||||
return nil
|
||||
}
|
||||
let count = Int(_swift_stdlib_strlen(s))
|
||||
var result = [CChar](repeating: 0, count: count + 1)
|
||||
for i in 0..<count {
|
||||
result[i] = s[i]
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
internal func _decodeValidCString(
|
||||
_ cString: UnsafePointer<Int8>, repair: Bool
|
||||
) -> String {
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
return cString.withMemoryRebound(to: UInt8.self, capacity: len) {
|
||||
(ptr: UnsafePointer<UInt8>) -> String in
|
||||
let bufPtr = UnsafeBufferPointer(start: ptr, count: len)
|
||||
return String._fromWellFormedUTF8(bufPtr, repair: repair)
|
||||
/// Creates a string from the null-terminated sequence of bytes at the given
|
||||
/// pointer.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code
|
||||
/// units in the encoding specified in `sourceEncoding`, ending just
|
||||
/// before the first zero code unit.
|
||||
/// - sourceEncoding: The encoding in which the code units should be
|
||||
/// interpreted.
|
||||
@_specialize(where Encoding == Unicode.UTF8)
|
||||
@_specialize(where Encoding == Unicode.UTF16)
|
||||
@inlinable // Fold away specializations
|
||||
public init<Encoding: Unicode.Encoding>(
|
||||
decodingCString ptr: UnsafePointer<Encoding.CodeUnit>,
|
||||
as sourceEncoding: Encoding.Type
|
||||
) {
|
||||
self = String.decodeCString(ptr, as: sourceEncoding)!.0
|
||||
}
|
||||
}
|
||||
|
||||
internal func _decodeValidCString(
|
||||
_ cString: UnsafePointer<UInt8>, repair: Bool
|
||||
) -> String {
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
let bufPtr = UnsafeBufferPointer(start: cString, count: len)
|
||||
return String._fromWellFormedUTF8(bufPtr, repair: repair)
|
||||
}
|
||||
|
||||
internal func _decodeCString(
|
||||
_ cString: UnsafePointer<Int8>, repair: Bool
|
||||
) -> String? {
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
return cString.withMemoryRebound(to: UInt8.self, capacity: len) {
|
||||
(ptr: UnsafePointer<UInt8>) -> String? in
|
||||
let bufPtr = UnsafeBufferPointer(start: ptr, count: len)
|
||||
return String._fromUTF8(bufPtr, repair: repair)
|
||||
}
|
||||
}
|
||||
|
||||
internal func _decodeCString(
|
||||
_ cString: UnsafePointer<UInt8>, repair: Bool
|
||||
) -> String? {
|
||||
let len = UTF8._nullCodeUnitOffset(in: cString)
|
||||
let bufPtr = UnsafeBufferPointer(start: cString, count: len)
|
||||
return String._fromUTF8(bufPtr, repair: repair)
|
||||
}
|
||||
|
||||
/// Creates a new string by copying the null-terminated data referenced by
|
||||
/// the given pointer using the specified encoding.
|
||||
///
|
||||
/// This internal helper takes the string length as an argument.
|
||||
internal func _decodeCString<Encoding : _UnicodeEncoding>(
|
||||
_ cString: UnsafePointer<Encoding.CodeUnit>,
|
||||
as encoding: Encoding.Type, length: Int,
|
||||
repairingInvalidCodeUnits isRepairing: Bool = true)
|
||||
-> (result: String, repairsMade: Bool)? {
|
||||
|
||||
let buffer = UnsafeBufferPointer<Encoding.CodeUnit>(
|
||||
start: cString, count: length)
|
||||
|
||||
let (guts, hadError) = _StringGuts.fromCodeUnits(
|
||||
buffer, encoding: encoding, repairIllFormedSequences: isRepairing)
|
||||
return guts.map { (result: String($0), repairsMade: hadError) }
|
||||
}
|
||||
|
||||
@@ -63,74 +63,61 @@
|
||||
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
|
||||
@_fixed_layout
|
||||
public struct Character {
|
||||
// Fundamentally, it is just a String, but it is optimized for the common case
|
||||
// where the UTF-16 representation fits in 63 bits. The remaining bit is used
|
||||
// to discriminate between small and large representations. Since a grapheme
|
||||
// cluster cannot have U+0000 anywhere but in its first scalar, we can store
|
||||
// zero in empty code units above the first one.
|
||||
@_frozen // FIXME(sil-serialize-all)
|
||||
@usableFromInline
|
||||
internal enum Representation {
|
||||
case smallUTF16(Builtin.Int63)
|
||||
case large(_UTF16StringStorage)
|
||||
}
|
||||
internal var _str: String
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal init(unchecked str: String) {
|
||||
self._str = str
|
||||
_invariantCheck()
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
_sanityCheck(_str.count == 1)
|
||||
_sanityCheck(_str._guts.isFastUTF8)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
@usableFromInline
|
||||
internal var _representation: Representation
|
||||
typealias UTF8View = String.UTF8View
|
||||
|
||||
// FIXME(sil-serialize-all): Should be @inlinable
|
||||
// <rdar://problem/34557187>
|
||||
internal static func _smallValue(_ value: Builtin.Int63) -> UInt64 {
|
||||
return UInt64(Builtin.zext_Int63_Int64(value))
|
||||
@inlinable
|
||||
internal var utf8: UTF8View {
|
||||
return _str.utf8
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@usableFromInline
|
||||
typealias UTF16View = String.UTF16View
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
|
||||
@inlinable
|
||||
internal var utf16: UTF16View {
|
||||
return String(self).utf16
|
||||
return _str.utf16
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_smallRepresentation b: _SmallUTF16) {
|
||||
_sanityCheck(Int64(b._storage) >= 0)
|
||||
_representation = .smallUTF16(
|
||||
Builtin.trunc_Int64_Int63(b._storage._value))
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_largeRepresentation storage: _UTF16StringStorage) {
|
||||
_representation = .large(storage)
|
||||
}
|
||||
|
||||
/// Creates a Character from a String that is already known to require the
|
||||
/// large representation.
|
||||
///
|
||||
/// - Note: `s` should contain only a single grapheme, but we can't require
|
||||
/// that formally because of grapheme cluster literals and the shifting
|
||||
/// sands of Unicode. https://bugs.swift.org/browse/SR-4955
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_largeRepresentationString s: String) {
|
||||
let storage = s._guts._extractNativeStorage(of: UTF16.CodeUnit.self)
|
||||
self.init(_largeRepresentation: storage)
|
||||
public typealias UnicodeScalarView = String.UnicodeScalarView
|
||||
@inlinable
|
||||
public var unicodeScalars: UnicodeScalarView {
|
||||
return _str.unicodeScalars
|
||||
}
|
||||
}
|
||||
|
||||
extension Character
|
||||
: _ExpressibleByBuiltinUTF16ExtendedGraphemeClusterLiteral,
|
||||
ExpressibleByExtendedGraphemeClusterLiteral
|
||||
: _ExpressibleByBuiltinUTF16ExtendedGraphemeClusterLiteral,
|
||||
ExpressibleByExtendedGraphemeClusterLiteral
|
||||
{
|
||||
/// Creates a character containing the given Unicode scalar value.
|
||||
///
|
||||
/// - Parameter content: The Unicode scalar value to convert into a character.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public init(_ content: Unicode.Scalar) {
|
||||
let content16 = UTF16.encode(content)._unsafelyUnwrappedUnchecked
|
||||
_representation = .smallUTF16(
|
||||
Builtin.zext_Int32_Int63(content16._storage._value))
|
||||
self.init(unchecked: String(content))
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
@_effects(readonly)
|
||||
public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
|
||||
self.init(Unicode.Scalar(_builtinUnicodeScalarLiteral: value))
|
||||
@@ -138,94 +125,30 @@ extension Character
|
||||
|
||||
// Inlining ensures that the whole constructor can be folded away to a single
|
||||
// integer constant in case of small character literals.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inline(__always)
|
||||
@inlinable @inline(__always)
|
||||
@_effects(readonly)
|
||||
public init(
|
||||
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
|
||||
utf8CodeUnitCount: Builtin.Word,
|
||||
isASCII: Builtin.Int1
|
||||
) {
|
||||
let utf8 = UnsafeBufferPointer(
|
||||
start: UnsafePointer<Unicode.UTF8.CodeUnit>(start),
|
||||
count: Int(utf8CodeUnitCount))
|
||||
|
||||
if utf8.count == 1 {
|
||||
_representation = .smallUTF16(
|
||||
Builtin.zext_Int8_Int63(utf8.first._unsafelyUnwrappedUnchecked._value))
|
||||
return
|
||||
}
|
||||
|
||||
FastPath:
|
||||
repeat {
|
||||
var shift = 0
|
||||
let maxShift = 64 - 16
|
||||
var bits: UInt64 = 0
|
||||
|
||||
for s8 in Unicode._ParsingIterator(
|
||||
codeUnits: utf8.makeIterator(), parser: UTF8.ForwardParser()) {
|
||||
|
||||
let s16
|
||||
= UTF16.transcode(s8, from: UTF8.self)._unsafelyUnwrappedUnchecked
|
||||
|
||||
for u16 in s16 {
|
||||
guard _fastPath(shift <= maxShift) else { break FastPath }
|
||||
bits |= UInt64(u16) &<< shift
|
||||
shift += 16
|
||||
}
|
||||
}
|
||||
guard _fastPath(Int64(truncatingIfNeeded: bits) >= 0) else {
|
||||
break FastPath
|
||||
}
|
||||
_representation = .smallUTF16(Builtin.trunc_Int64_Int63(bits._value))
|
||||
return
|
||||
}
|
||||
while false
|
||||
|
||||
// For anything that doesn't fit in 63 bits, build the large
|
||||
// representation.
|
||||
self = Character(_largeRepresentationString:
|
||||
String(
|
||||
_builtinExtendedGraphemeClusterLiteral: start,
|
||||
utf8CodeUnitCount: utf8CodeUnitCount,
|
||||
isASCII: isASCII))
|
||||
self.init(unchecked: String(
|
||||
_builtinExtendedGraphemeClusterLiteral: start,
|
||||
utf8CodeUnitCount: utf8CodeUnitCount,
|
||||
isASCII: isASCII))
|
||||
}
|
||||
|
||||
// Inlining ensures that the whole constructor can be folded away to a single
|
||||
// integer constant in case of small character literals.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inline(__always)
|
||||
@inlinable @inline(__always)
|
||||
@_effects(readonly)
|
||||
public init(
|
||||
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
|
||||
utf16CodeUnitCount: Builtin.Word
|
||||
) {
|
||||
let utf16 = _UnmanagedString<UTF16.CodeUnit>(
|
||||
start: UnsafePointer(start),
|
||||
count: Int(utf16CodeUnitCount))
|
||||
|
||||
switch utf16.count {
|
||||
case 1:
|
||||
_representation = .smallUTF16(Builtin.zext_Int16_Int63(utf16[0]._value))
|
||||
case 2:
|
||||
let bits = UInt32(utf16[0]) | UInt32(utf16[1]) &<< 16
|
||||
_representation = .smallUTF16(Builtin.zext_Int32_Int63(bits._value))
|
||||
case 3:
|
||||
let bits = UInt64(utf16[0])
|
||||
| UInt64(utf16[1]) &<< 16
|
||||
| UInt64(utf16[2]) &<< 32
|
||||
_representation = .smallUTF16(Builtin.trunc_Int64_Int63(bits._value))
|
||||
case 4 where utf16[3] < 0x8000:
|
||||
let bits = UInt64(utf16[0])
|
||||
| UInt64(utf16[1]) &<< 16
|
||||
| UInt64(utf16[2]) &<< 32
|
||||
| UInt64(utf16[3]) &<< 48
|
||||
_representation = .smallUTF16(Builtin.trunc_Int64_Int63(bits._value))
|
||||
default:
|
||||
// TODO(SSO): small check
|
||||
_representation = .large(
|
||||
_StringGuts(_large: utf16)._extractNativeStorage())
|
||||
}
|
||||
self.init(unchecked: String(
|
||||
_builtinUTF16StringLiteral: start,
|
||||
utf16CodeUnitCount: utf16CodeUnitCount))
|
||||
}
|
||||
|
||||
/// Creates a character with the specified value.
|
||||
@@ -240,9 +163,9 @@ extension Character
|
||||
///
|
||||
/// The assignment to the `oBreve` constant calls this initializer behind the
|
||||
/// scenes.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public init(extendedGraphemeClusterLiteral value: Character) {
|
||||
self = value
|
||||
self.init(unchecked: value._str)
|
||||
}
|
||||
|
||||
/// Creates a character from a single-character string.
|
||||
@@ -255,229 +178,53 @@ extension Character
|
||||
///
|
||||
/// - Parameter s: The single-character string to convert to a `Character`
|
||||
/// instance. `s` must contain exactly one extended grapheme cluster.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public init(_ s: String) {
|
||||
let count = s._guts.count
|
||||
_precondition(count != 0,
|
||||
_precondition(!s.isEmpty,
|
||||
"Can't form a Character from an empty String")
|
||||
_debugPrecondition(s.index(after: s.startIndex) == s.endIndex,
|
||||
"Can't form a Character from a String containing more than one extended grapheme cluster")
|
||||
|
||||
self.init(_unverified: s._guts)
|
||||
}
|
||||
|
||||
/// Construct a Character from a _StringGuts, assuming it consists of exactly
|
||||
/// one extended grapheme cluster.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_unverified guts: _StringGuts) {
|
||||
self = _visitGuts(guts,
|
||||
ascii: { ascii in
|
||||
if _fastPath(ascii.count == 1) {
|
||||
return Character(_singleCodeUnit: ascii[0])
|
||||
}
|
||||
// The only multi-scalar ASCII grapheme cluster is CR/LF.
|
||||
_sanityCheck(ascii.count == 2)
|
||||
_sanityCheck(ascii.start[0] == _CR)
|
||||
_sanityCheck(ascii.start[1] == _LF)
|
||||
return Character(_codeUnitPair: UInt16(_CR), UInt16(_LF))
|
||||
},
|
||||
utf16: { utf16 in return Character(_unverified: utf16) },
|
||||
opaque: { opaque in return Character(_unverified: opaque) })
|
||||
}
|
||||
|
||||
/// Construct a Character from a slice of a _StringGuts, assuming
|
||||
/// the specified range covers exactly one extended grapheme cluster.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_unverified guts: _StringGuts, range: Range<Int>) {
|
||||
self = _visitGuts(
|
||||
guts, range: (range, performBoundsCheck: true),
|
||||
ascii: { ascii in
|
||||
if _fastPath(ascii.count == 1) {
|
||||
return Character(_singleCodeUnit: ascii[0])
|
||||
}
|
||||
// The only multi-scalar ASCII grapheme cluster is CR/LF.
|
||||
_sanityCheck(ascii.count == 2)
|
||||
_sanityCheck(ascii.start[0] == _CR)
|
||||
_sanityCheck(ascii.start[1] == _LF)
|
||||
return Character(_codeUnitPair: UInt16(_CR), UInt16(_LF))
|
||||
},
|
||||
utf16: { utf16 in return Character(_unverified: utf16) },
|
||||
opaque: { opaque in return Character(_unverified: opaque) })
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal
|
||||
init(_singleCodeUnit cu: UInt16) {
|
||||
_sanityCheck(UTF16._isScalar(cu))
|
||||
_representation = .smallUTF16(
|
||||
Builtin.zext_Int16_Int63(Builtin.reinterpretCast(cu)))
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal
|
||||
init(_codeUnitPair first: UInt16, _ second: UInt16) {
|
||||
_sanityCheck(
|
||||
(UTF16._isScalar(first) && UTF16._isScalar(second)) ||
|
||||
(UTF16.isLeadSurrogate(first) && UTF16.isTrailSurrogate(second)))
|
||||
_representation = .smallUTF16(
|
||||
Builtin.zext_Int32_Int63(
|
||||
Builtin.reinterpretCast(
|
||||
UInt32(first) | UInt32(second) &<< 16)))
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal
|
||||
init(_unverified storage: _SwiftStringStorage<Unicode.UTF16.CodeUnit>) {
|
||||
if _fastPath(storage.count <= 4) {
|
||||
_sanityCheck(storage.count > 0)
|
||||
let b = _SmallUTF16(storage.unmanagedView)
|
||||
if _fastPath(Int64(bitPattern: b._storage) >= 0) {
|
||||
self.init(_smallRepresentation: b)
|
||||
_fixLifetime(storage)
|
||||
return
|
||||
}
|
||||
}
|
||||
// FIXME: We may want to make a copy if storage.unusedCapacity > 0
|
||||
self.init(_largeRepresentation: storage)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal
|
||||
init<V: _StringVariant>(_unverified variant: V) {
|
||||
if _fastPath(variant.count <= 4) {
|
||||
_sanityCheck(variant.count > 0)
|
||||
let b = _SmallUTF16(variant)
|
||||
if _fastPath(Int64(bitPattern: b._storage) >= 0) {
|
||||
self.init(_smallRepresentation: b)
|
||||
return
|
||||
}
|
||||
}
|
||||
self.init(_largeRepresentation: variant._copyToNativeStorage())
|
||||
self.init(unchecked: s)
|
||||
}
|
||||
}
|
||||
|
||||
extension Character : CustomStringConvertible {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var description: String {
|
||||
return String(self)
|
||||
}
|
||||
@inlinable
|
||||
public var description: String {
|
||||
return _str
|
||||
}
|
||||
}
|
||||
|
||||
extension Character : LosslessStringConvertible { }
|
||||
|
||||
extension Character : CustomDebugStringConvertible {
|
||||
/// A textual representation of the character, suitable for debugging.
|
||||
public var debugDescription: String {
|
||||
return String(self).debugDescription
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
@usableFromInline
|
||||
internal typealias _SmallUTF16 = _UIntBuffer<UInt64, Unicode.UTF16.CodeUnit>
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _smallUTF16 : _SmallUTF16? {
|
||||
guard case .smallUTF16(let _63bits) = _representation else { return nil }
|
||||
_onFastPath()
|
||||
let bits = UInt64(Builtin.zext_Int63_Int64(_63bits))
|
||||
let minBitWidth = type(of: bits).bitWidth - bits.leadingZeroBitCount
|
||||
return _SmallUTF16(
|
||||
_storage: bits,
|
||||
_bitCount: UInt8(
|
||||
truncatingIfNeeded: 16 * Swift.max(1, (minBitWidth + 15) / 16))
|
||||
)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _largeUTF16 : _UTF16StringStorage? {
|
||||
guard case .large(let storage) = _representation else { return nil }
|
||||
return storage
|
||||
}
|
||||
|
||||
@usableFromInline // @testable
|
||||
internal var _isSmall: Bool {
|
||||
guard case .smallUTF16(_) = _representation else { return false }
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _count : Int {
|
||||
if let small = _smallUTF16 { return small.count }
|
||||
return _largeUTF16._unsafelyUnwrappedUnchecked.count
|
||||
}
|
||||
/// A textual representation of the character, suitable for debugging.
|
||||
public var debugDescription: String {
|
||||
return _str.debugDescription
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// Creates a string containing the given character.
|
||||
///
|
||||
/// - Parameter c: The character to convert to a string.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public init(_ c: Character) {
|
||||
if let utf16 = c._smallUTF16 {
|
||||
if let small = _SmallUTF8String(utf16) {
|
||||
self = String(_StringGuts(small))
|
||||
} else {
|
||||
// FIXME: Remove when we support UTF-8 in small string
|
||||
self = String(decoding: utf16, as: Unicode.UTF16.self)
|
||||
}
|
||||
}
|
||||
else {
|
||||
// TODO(SSO): small check. For now, since we only do ASCII, this won't hit
|
||||
self = String(_StringGuts(_large: c._largeUTF16!))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `.small` characters are stored in an Int63 with their UTF-8 representation,
|
||||
/// with any unused bytes set to 0xFF. ASCII characters will have all bytes set
|
||||
/// to 0xFF except for the lowest byte, which will store the ASCII value. Since
|
||||
/// 0x7FFFFFFFFFFFFF80 or greater is an invalid UTF-8 sequence, we know if a
|
||||
/// value is ASCII by checking if it is greater than or equal to
|
||||
/// 0x7FFFFFFFFFFFFF00.
|
||||
// FIXME(sil-serialize-all): Should be @inlinable
|
||||
// <rdar://problem/34557187>
|
||||
internal var _minASCIICharReprBuiltin: Builtin.Int63 {
|
||||
@inline(__always) get {
|
||||
let x: Int64 = 0x7FFFFFFFFFFFFF00
|
||||
return Builtin.truncOrBitCast_Int64_Int63(x._value)
|
||||
self.init(c._str._guts)
|
||||
}
|
||||
}
|
||||
|
||||
extension Character : Equatable {
|
||||
@inlinable
|
||||
@inlinable @inline(__always)
|
||||
public static func == (lhs: Character, rhs: Character) -> Bool {
|
||||
let l0 = lhs._smallUTF16
|
||||
if _fastPath(l0 != nil), let l = l0?._storage {
|
||||
let r0 = rhs._smallUTF16
|
||||
if _fastPath(r0 != nil), let r = r0?._storage {
|
||||
if (l | r) < 0x300 { return l == r }
|
||||
if l == r { return true }
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME(performance): constructing two temporary strings is extremely
|
||||
// wasteful and inefficient.
|
||||
return String(lhs) == String(rhs)
|
||||
return lhs._str == rhs._str
|
||||
}
|
||||
}
|
||||
|
||||
extension Character : Comparable {
|
||||
@inlinable
|
||||
@inlinable @inline(__always)
|
||||
public static func < (lhs: Character, rhs: Character) -> Bool {
|
||||
let l0 = lhs._smallUTF16
|
||||
if _fastPath(l0 != nil), let l = l0?._storage {
|
||||
let r0 = rhs._smallUTF16
|
||||
if _fastPath(r0 != nil), let r = r0?._storage {
|
||||
if (l | r) < 0x80 { return l < r }
|
||||
if l == r { return false }
|
||||
}
|
||||
}
|
||||
// FIXME(performance): constructing two temporary strings is extremely
|
||||
// wasteful and inefficient.
|
||||
return String(lhs) < String(rhs)
|
||||
return lhs._str < rhs._str
|
||||
}
|
||||
}
|
||||
|
||||
@@ -490,8 +237,13 @@ extension Character: Hashable {
|
||||
/// of this instance.
|
||||
@_effects(releasenone)
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
// FIXME(performance): constructing a temporary string is extremely
|
||||
// wasteful and inefficient.
|
||||
hasher.combine(String(self))
|
||||
_str.hash(into: &hasher)
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
@usableFromInline // @testable
|
||||
internal var _isSmall: Bool {
|
||||
return _str._guts._object.isSmall
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,194 +0,0 @@
|
||||
//===--- CharacterUnicodeScalars.swift ------------------------------------===//
|
||||
//
|
||||
// This source file is part of the Swift.org open source project
|
||||
//
|
||||
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
extension Character {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct UnicodeScalarView {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal let _base: Character
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_base: Character) {
|
||||
self._base = _base
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var unicodeScalars : UnicodeScalarView {
|
||||
return UnicodeScalarView(_base: self)
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Iterator {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _base: IndexingIterator<Character.UnicodeScalarView>
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_base: IndexingIterator<Character.UnicodeScalarView>) {
|
||||
self._base = _base
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView.Iterator : IteratorProtocol {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func next() -> UnicodeScalar? {
|
||||
return _base.next()
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView : Sequence {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public __consuming func makeIterator() -> Iterator {
|
||||
return Iterator(_base: IndexingIterator(_elements: self))
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Index {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal let _encodedOffset: Int
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal let _scalar: Unicode.UTF16.EncodedScalar
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal let _stride: UInt8
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(
|
||||
_encodedOffset: Int,
|
||||
_scalar: Unicode.UTF16.EncodedScalar, _stride: UInt8
|
||||
) {
|
||||
self._encodedOffset = _encodedOffset
|
||||
self._scalar = _scalar
|
||||
self._stride = _stride
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView.Index : Equatable {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public static func == (
|
||||
lhs: Character.UnicodeScalarView.Index,
|
||||
rhs: Character.UnicodeScalarView.Index
|
||||
) -> Bool {
|
||||
return lhs._encodedOffset == rhs._encodedOffset
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView.Index : Comparable {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public static func < (
|
||||
lhs: Character.UnicodeScalarView.Index,
|
||||
rhs: Character.UnicodeScalarView.Index
|
||||
) -> Bool {
|
||||
return lhs._encodedOffset < rhs._encodedOffset
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView : Collection {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index {
|
||||
return index(
|
||||
after: Index(
|
||||
_encodedOffset: 0,
|
||||
_scalar: Unicode.UTF16.EncodedScalar(),
|
||||
_stride: 0
|
||||
))
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index {
|
||||
return Index(
|
||||
_encodedOffset: _base._count,
|
||||
_scalar: Unicode.UTF16.EncodedScalar(),
|
||||
_stride: 0
|
||||
)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(after i: Index) -> Index {
|
||||
var parser = Unicode.UTF16.ForwardParser()
|
||||
let startOfNextScalar = i._encodedOffset + numericCast(i._stride)
|
||||
let r: Unicode.ParseResult<Unicode.UTF16.EncodedScalar>
|
||||
|
||||
let small_ = _base._smallUTF16
|
||||
if _fastPath(small_ != nil), let u16 = small_ {
|
||||
var i = u16[u16.index(u16.startIndex, offsetBy: startOfNextScalar)...]
|
||||
.makeIterator()
|
||||
r = parser.parseScalar(from: &i)
|
||||
}
|
||||
else {
|
||||
let c = _base._largeUTF16!.unmanagedView
|
||||
var i = c[c.index(c.startIndex, offsetBy: startOfNextScalar)...]
|
||||
.makeIterator()
|
||||
r = parser.parseScalar(from: &i)
|
||||
}
|
||||
|
||||
switch r {
|
||||
case .valid(let s):
|
||||
return Index(
|
||||
_encodedOffset: startOfNextScalar, _scalar: s,
|
||||
_stride: UInt8(truncatingIfNeeded: s.count))
|
||||
case .error:
|
||||
return Index(
|
||||
_encodedOffset: startOfNextScalar,
|
||||
_scalar: Unicode.UTF16.encodedReplacementCharacter,
|
||||
_stride: 1)
|
||||
case .emptyInput:
|
||||
if i._stride != 0 { return endIndex }
|
||||
fatalError("no position after end of Character's last Unicode.Scalar")
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(_ i: Index) -> UnicodeScalar {
|
||||
return Unicode.UTF16.decode(i._scalar)
|
||||
}
|
||||
}
|
||||
|
||||
extension Character.UnicodeScalarView : BidirectionalCollection {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(before i: Index) -> Index {
|
||||
var parser = Unicode.UTF16.ReverseParser()
|
||||
let r: Unicode.ParseResult<Unicode.UTF16.EncodedScalar>
|
||||
|
||||
let small_ = _base._smallUTF16
|
||||
if _fastPath(small_ != nil), let u16 = small_ {
|
||||
var i = u16[..<u16.index(u16.startIndex, offsetBy: i._encodedOffset)]
|
||||
.reversed().makeIterator()
|
||||
r = parser.parseScalar(from: &i)
|
||||
}
|
||||
else {
|
||||
let c = _base._largeUTF16!.unmanagedView
|
||||
var i = c[..<c.index(c.startIndex, offsetBy: i._encodedOffset)]
|
||||
.reversed().makeIterator()
|
||||
r = parser.parseScalar(from: &i)
|
||||
}
|
||||
|
||||
switch r {
|
||||
case .valid(let s):
|
||||
return Index(
|
||||
_encodedOffset: i._encodedOffset - s.count, _scalar: s,
|
||||
_stride: UInt8(truncatingIfNeeded: s.count))
|
||||
case .error:
|
||||
return Index(
|
||||
_encodedOffset: i._encodedOffset - 1,
|
||||
_scalar: Unicode.UTF16.encodedReplacementCharacter,
|
||||
_stride: 1)
|
||||
case .emptyInput:
|
||||
fatalError("no position before Character's last Unicode.Scalar")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This source file is part of the Swift.org open source project
|
||||
//
|
||||
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@usableFromInline
|
||||
internal protocol _HasContiguousBytes {
|
||||
func withUnsafeBytes<R>(
|
||||
_ body: (UnsafeRawBufferPointer) throws -> R
|
||||
) rethrows -> R
|
||||
|
||||
var _providesContiguousBytesNoCopy: Bool { get }
|
||||
}
|
||||
extension _HasContiguousBytes {
|
||||
@inlinable
|
||||
var _providesContiguousBytesNoCopy: Bool {
|
||||
@inline(__always) get { return true }
|
||||
}
|
||||
}
|
||||
extension Array: _HasContiguousBytes {
|
||||
@inlinable
|
||||
var _providesContiguousBytesNoCopy: Bool {
|
||||
// TODO(UTF8 merge): Query `_buffer._isNative`, which is internal
|
||||
@inline(__always) get { return true }
|
||||
}
|
||||
}
|
||||
extension ContiguousArray: _HasContiguousBytes {}
|
||||
extension UnsafeBufferPointer: _HasContiguousBytes {
|
||||
@inlinable @inline(__always)
|
||||
func withUnsafeBytes<R>(
|
||||
_ body: (UnsafeRawBufferPointer) throws -> R
|
||||
) rethrows -> R {
|
||||
let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked)
|
||||
let len = self.count &* MemoryLayout<Element>.stride
|
||||
return try body(UnsafeRawBufferPointer(start: ptr, count: len))
|
||||
}
|
||||
}
|
||||
extension UnsafeMutableBufferPointer: _HasContiguousBytes {
|
||||
@inlinable @inline(__always)
|
||||
func withUnsafeBytes<R>(
|
||||
_ body: (UnsafeRawBufferPointer) throws -> R
|
||||
) rethrows -> R {
|
||||
let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked)
|
||||
let len = self.count &* MemoryLayout<Element>.stride
|
||||
return try body(UnsafeRawBufferPointer(start: ptr, count: len))
|
||||
}
|
||||
}
|
||||
extension String: _HasContiguousBytes {
|
||||
@inlinable
|
||||
var _providesContiguousBytesNoCopy: Bool {
|
||||
@inline(__always) get { return self._guts.isFastUTF8 }
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
func withUnsafeBytes<R>(
|
||||
_ body: (UnsafeRawBufferPointer) throws -> R
|
||||
) rethrows -> R {
|
||||
if _fastPath(self._guts.isFastUTF8) {
|
||||
return try self._guts.withFastUTF8 {
|
||||
try body(UnsafeRawBufferPointer($0))
|
||||
}
|
||||
}
|
||||
|
||||
return try ContiguousArray(self.utf8).withUnsafeBytes { try body($0) }
|
||||
}
|
||||
}
|
||||
extension Substring: _HasContiguousBytes {
|
||||
@inlinable
|
||||
var _providesContiguousBytesNoCopy: Bool {
|
||||
@inline(__always) get { return self.wholeGuts.isFastUTF8 }
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
func withUnsafeBytes<R>(
|
||||
_ body: (UnsafeRawBufferPointer) throws -> R
|
||||
) rethrows -> R {
|
||||
// TODO(UTF8): less error prone to have Substring and/or slice provide a
|
||||
// sliced fastUTF8
|
||||
if _fastPath(self.wholeGuts.isFastUTF8) {
|
||||
return try self.wholeGuts.withFastUTF8() {
|
||||
try body(UnsafeRawBufferPointer(UnsafeBufferPointer(rebasing:
|
||||
$0[self.startIndex.encodedOffset..<self.endIndex.encodedOffset])))
|
||||
}
|
||||
}
|
||||
|
||||
return try ContiguousArray(self.utf8).withUnsafeBytes { try body($0) }
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,6 @@
|
||||
"ASCII.swift",
|
||||
"CString.swift",
|
||||
"Character.swift",
|
||||
"CharacterUnicodeScalars.swift",
|
||||
"ICU.swift",
|
||||
"NormalizedCodeUnitIterator.swift",
|
||||
"SmallString.swift",
|
||||
@@ -21,7 +20,7 @@
|
||||
"StringObject.swift",
|
||||
"StringGuts.swift",
|
||||
"StringGutsVisitor.swift",
|
||||
"StringGraphemeBreaking.swift",
|
||||
"StringGraphemeBreaking.swift",
|
||||
"StringHashable.swift",
|
||||
"StringIndex.swift",
|
||||
"StringIndexConversions.swift",
|
||||
@@ -36,7 +35,7 @@
|
||||
"StringUTF16View.swift",
|
||||
"StringUTF8View.swift",
|
||||
"StringUnicodeScalarView.swift",
|
||||
"StringVariant.swift",
|
||||
"StringVariant.swift",
|
||||
"Substring.swift",
|
||||
"Unicode.swift",
|
||||
"UnicodeEncoding.swift",
|
||||
@@ -108,6 +107,7 @@
|
||||
"CocoaArray.swift",
|
||||
"ContiguousArray.swift",
|
||||
"ContiguousArrayBuffer.swift",
|
||||
"ContiguouslyStored.swift",
|
||||
"FixedArray.swift",
|
||||
"SliceBuffer.swift",
|
||||
"SwiftNativeNSArray.swift"],
|
||||
|
||||
@@ -64,9 +64,8 @@ public func readLine(strippingNewline: Bool = true) -> String? {
|
||||
}
|
||||
}
|
||||
}
|
||||
let result = String._fromUTF8(
|
||||
UnsafeBufferPointer(start: linePtr, count: readBytes),
|
||||
repair: true)!
|
||||
let result = String._fromUTF8Repairing(
|
||||
UnsafeBufferPointer(start: linePtr, count: readBytes)).0
|
||||
_swift_stdlib_free(linePtr)
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -139,29 +139,31 @@ extension FixedWidthInteger {
|
||||
/// `radix`.
|
||||
/// - radix: The radix, or base, to use for converting `text` to an integer
|
||||
/// value. `radix` must be in the range `2...36`. The default is 10.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // @specializable
|
||||
@_semantics("optimize.sil.specialize.generic.partial.never")
|
||||
public init?<S : StringProtocol>(_ text: S, radix: Int = 10) {
|
||||
_precondition(2...36 ~= radix, "Radix not in range 2...36")
|
||||
let r = Self(radix)
|
||||
let range = text._encodedOffsetRange
|
||||
let guts = text._wholeString._guts
|
||||
let result: Self?
|
||||
result = _visitGuts(guts,
|
||||
range: (range, false), args: r,
|
||||
ascii: { view, radix in
|
||||
var i = view.makeIterator()
|
||||
return _parseASCII(codeUnits: &i, radix: radix) },
|
||||
utf16: { view, radix in
|
||||
var i = view.makeIterator()
|
||||
return Self._parseASCIISlowPath(codeUnits: &i, radix: radix) },
|
||||
opaque: { view, radix in
|
||||
var i = view.makeIterator()
|
||||
return Self._parseASCIISlowPath(codeUnits: &i, radix: radix) }
|
||||
)
|
||||
|
||||
guard _fastPath(result != nil) else { return nil }
|
||||
self = result._unsafelyUnwrappedUnchecked
|
||||
if let str = text as? String, str._guts.isFastUTF8 {
|
||||
guard let ret = str._guts.withFastUTF8 ({ utf8 -> Self? in
|
||||
var iter = utf8.makeIterator()
|
||||
return _parseASCII(codeUnits: &iter, radix: Self(radix))
|
||||
}) else {
|
||||
return nil
|
||||
}
|
||||
self = ret
|
||||
return
|
||||
}
|
||||
|
||||
// TODO(String performance): We can provide fast paths for common radices,
|
||||
// native UTF-8 storage, etc.
|
||||
|
||||
var iter = text.utf8.makeIterator()
|
||||
guard let ret = Self._parseASCIISlowPath(
|
||||
codeUnits: &iter, radix: Self(radix)
|
||||
) else { return nil }
|
||||
|
||||
self = ret
|
||||
}
|
||||
|
||||
/// Creates a new integer value from the given string.
|
||||
@@ -182,7 +184,7 @@ extension FixedWidthInteger {
|
||||
/// Int("10000000000000000000000000") // Out of range
|
||||
///
|
||||
/// - Parameter description: The ASCII representation of a number.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@_semantics("optimize.sil.specialize.generic.partial.never")
|
||||
@inline(__always)
|
||||
public init?(_ description: String) {
|
||||
|
||||
@@ -44,31 +44,6 @@ public func withExtendedLifetime<T, Result>(
|
||||
return try body(x)
|
||||
}
|
||||
|
||||
extension String {
|
||||
|
||||
/// Calls the given closure with a pointer to the contents of the string,
|
||||
/// represented as a null-terminated sequence of UTF-8 code units.
|
||||
///
|
||||
/// The pointer passed as an argument to `body` is valid only during the
|
||||
/// execution of `withCString(_:)`. Do not store or return the pointer for
|
||||
/// later use.
|
||||
///
|
||||
/// - Parameter body: A closure with a pointer parameter that points to a
|
||||
/// null-terminated sequence of UTF-8 code units. If `body` has a return
|
||||
/// value, that value is also used as the return value for the
|
||||
/// `withCString(_:)` method. The pointer argument is valid only for the
|
||||
/// duration of the method's execution.
|
||||
/// - Returns: The return value, if any, of the `body` closure parameter.
|
||||
@inlinable
|
||||
public func withCString<Result>(
|
||||
_ body: (UnsafePointer<Int8>) throws -> Result
|
||||
) rethrows -> Result {
|
||||
return try self.utf8CString.withUnsafeBufferPointer {
|
||||
try body($0.baseAddress!)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the lifetime of the given instruction so that the ARC optimizer does not
|
||||
// shorten the lifetime of x to be before this point.
|
||||
@_transparent
|
||||
|
||||
@@ -582,19 +582,14 @@ extension String.UTF8View {
|
||||
extension String {
|
||||
@available(swift, obsoleted: 4)
|
||||
public subscript(bounds: Range<Index>) -> String {
|
||||
// TODO: Make unreachable when the Foundation overlay is off of Swift 3
|
||||
_boundsCheck(bounds)
|
||||
return String(Substring(_slice: Slice(base: self, bounds: bounds)))
|
||||
return String(self[bounds])
|
||||
}
|
||||
|
||||
@available(swift, obsoleted: 4)
|
||||
public subscript(bounds: ClosedRange<Index>) -> String {
|
||||
// TODO: Make unreachable when the Foundation overlay is off of Swift 3
|
||||
let r = bounds.relative(to: self)
|
||||
_boundsCheck(r)
|
||||
return String(Substring(_slice: Slice(
|
||||
base: self,
|
||||
bounds: r)))
|
||||
_boundsCheck(bounds)
|
||||
return String(self[bounds])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,229 +10,4 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
internal
|
||||
struct _NormalizedCodeUnitIterator: IteratorProtocol {
|
||||
var segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
|
||||
var overflowBuffer: [CodeUnit]? = nil
|
||||
var normalizationBuffer: [CodeUnit]? = nil
|
||||
var source: _SegmentSource
|
||||
var segmentBufferIndex = 0
|
||||
var segmentBufferCount = 0
|
||||
var overflowBufferIndex = 0
|
||||
var overflowBufferCount = 0
|
||||
|
||||
typealias CodeUnit = UInt16
|
||||
|
||||
init<Source: BidirectionalCollection>
|
||||
(_ collection: Source)
|
||||
where Source.Element == UInt16, Source.SubSequence == Source
|
||||
{
|
||||
source = _CollectionSource(collection)
|
||||
}
|
||||
|
||||
init(_ guts: _StringGuts, _ range: Range<Int>, startIndex: Int = 0) {
|
||||
source = _StringGutsSource(guts, range, start: startIndex)
|
||||
}
|
||||
|
||||
mutating func compare(with other: _NormalizedCodeUnitIterator) -> _Ordering {
|
||||
var mutableOther = other
|
||||
for cu in IteratorSequence(self) {
|
||||
if let otherCU = mutableOther.next() {
|
||||
let result = _lexicographicalCompare(cu, otherCU)
|
||||
if result == .equal {
|
||||
continue
|
||||
} else {
|
||||
return result
|
||||
}
|
||||
} else {
|
||||
//other returned nil, we are greater
|
||||
return .greater
|
||||
}
|
||||
}
|
||||
|
||||
//we ran out of code units, either we are equal, or only we ran out and
|
||||
//other is greater
|
||||
if let _ = mutableOther.next() {
|
||||
return .less
|
||||
} else {
|
||||
return .equal
|
||||
}
|
||||
}
|
||||
|
||||
struct _CollectionSource<Source: BidirectionalCollection>: _SegmentSource
|
||||
where Source.Element == UInt16, Source.SubSequence == Source
|
||||
{
|
||||
var remaining: Int {
|
||||
@_specialize(where Source == _UnmanagedString<UInt16>)
|
||||
@_specialize(where Source == _UnmanagedOpaqueString)
|
||||
get {
|
||||
return collection.distance(from: index, to: collection.endIndex)
|
||||
}
|
||||
}
|
||||
var collection: Source
|
||||
var index: Source.Index
|
||||
|
||||
init(_ collection: Source) {
|
||||
self.collection = collection
|
||||
index = collection.startIndex
|
||||
}
|
||||
|
||||
@_specialize(where Source == _UnmanagedString<UInt16>)
|
||||
@_specialize(where Source == _UnmanagedOpaqueString)
|
||||
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
|
||||
var bufferIndex = 0
|
||||
let originalIndex = index
|
||||
repeat {
|
||||
guard index != collection.endIndex else {
|
||||
break
|
||||
}
|
||||
|
||||
guard bufferIndex < buffer.count else {
|
||||
//The buffer isn't big enough for the current segment
|
||||
index = originalIndex
|
||||
return nil
|
||||
}
|
||||
|
||||
let cu = collection[index]
|
||||
buffer[bufferIndex] = cu
|
||||
index = collection.index(after: index)
|
||||
bufferIndex += 1
|
||||
} while !collection.hasNormalizationBoundary(after: collection.index(before: index))
|
||||
|
||||
return bufferIndex
|
||||
}
|
||||
}
|
||||
|
||||
struct _StringGutsSource: _SegmentSource {
|
||||
var remaining: Int {
|
||||
return range.count - index
|
||||
}
|
||||
var guts: _StringGuts
|
||||
var index: Int
|
||||
var range: Range<Int>
|
||||
|
||||
init(_ guts: _StringGuts, _ range: Range<Int>, start: Int = 0) {
|
||||
self.guts = guts
|
||||
self.range = range
|
||||
index = range.lowerBound + start
|
||||
}
|
||||
|
||||
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
|
||||
var bufferIndex = 0
|
||||
let originalIndex = index
|
||||
repeat {
|
||||
guard index < range.count else {
|
||||
break
|
||||
}
|
||||
|
||||
guard bufferIndex < buffer.count else {
|
||||
//The buffer isn't big enough for the current segment
|
||||
index = originalIndex
|
||||
return nil
|
||||
}
|
||||
|
||||
let cu = guts.codeUnit(atCheckedOffset: index)
|
||||
buffer[bufferIndex] = cu
|
||||
index += 1
|
||||
bufferIndex += 1
|
||||
} while !guts.hasNormalizationBoundary(after: index - 1)
|
||||
|
||||
return bufferIndex
|
||||
}
|
||||
}
|
||||
|
||||
mutating func next() -> CodeUnit? {
|
||||
if segmentBufferCount == segmentBufferIndex {
|
||||
segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
|
||||
segmentBufferCount = 0
|
||||
segmentBufferIndex = 0
|
||||
}
|
||||
|
||||
if overflowBufferCount == overflowBufferIndex {
|
||||
overflowBufferCount = 0
|
||||
overflowBufferIndex = 0
|
||||
}
|
||||
|
||||
if source.remaining <= 0
|
||||
&& segmentBufferCount == 0
|
||||
&& overflowBufferCount == 0 {
|
||||
// Our source of code units to normalize is empty and our buffers from
|
||||
// previous normalizations are also empty.
|
||||
return nil
|
||||
}
|
||||
|
||||
if segmentBufferCount == 0 && overflowBufferCount == 0 {
|
||||
//time to fill a buffer if possible. Otherwise we are done, return nil
|
||||
// Normalize segment, and then compare first code unit
|
||||
var intermediateBuffer = _FixedArray16<CodeUnit>(allZeros:())
|
||||
if overflowBuffer == nil,
|
||||
let filled = source.tryFill(buffer: &intermediateBuffer)
|
||||
{
|
||||
guard let count = _tryNormalize(
|
||||
_castOutputBuffer(&intermediateBuffer,
|
||||
endingAt: filled),
|
||||
into: &segmentBuffer
|
||||
)
|
||||
else {
|
||||
fatalError("Output buffer was not big enough, this should not happen")
|
||||
}
|
||||
segmentBufferCount = count
|
||||
} else {
|
||||
let size = source.remaining * _Normalization._maxNFCExpansionFactor
|
||||
if overflowBuffer == nil {
|
||||
overflowBuffer = Array(repeating: 0, count: size)
|
||||
normalizationBuffer = Array(repeating:0, count: size)
|
||||
}
|
||||
|
||||
guard let count = normalizationBuffer!.withUnsafeMutableBufferPointer({
|
||||
(normalizationBufferPtr) -> Int? in
|
||||
guard let filled = source.tryFill(buffer: normalizationBufferPtr)
|
||||
else {
|
||||
fatalError("Invariant broken, buffer should have space")
|
||||
}
|
||||
return overflowBuffer!.withUnsafeMutableBufferPointer {
|
||||
(overflowBufferPtr) -> Int? in
|
||||
return _tryNormalize(
|
||||
UnsafeBufferPointer( rebasing: normalizationBufferPtr[..<filled]),
|
||||
into: overflowBufferPtr
|
||||
)
|
||||
}
|
||||
}) else {
|
||||
fatalError("Invariant broken, overflow buffer should have space")
|
||||
}
|
||||
|
||||
overflowBufferCount = count
|
||||
}
|
||||
}
|
||||
|
||||
//exactly one of the buffers should have code units for us to return
|
||||
_sanityCheck((segmentBufferCount == 0)
|
||||
!= ((overflowBuffer?.count ?? 0) == 0))
|
||||
|
||||
if segmentBufferIndex < segmentBufferCount {
|
||||
let index = segmentBufferIndex
|
||||
segmentBufferIndex += 1
|
||||
return segmentBuffer[index]
|
||||
} else if overflowBufferIndex < overflowBufferCount {
|
||||
_sanityCheck(overflowBufferIndex < overflowBuffer!.count)
|
||||
let index = overflowBufferIndex
|
||||
overflowBufferIndex += 1
|
||||
return overflowBuffer![index]
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protocol _SegmentSource {
|
||||
var remaining: Int { get }
|
||||
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int?
|
||||
}
|
||||
|
||||
extension _SegmentSource {
|
||||
mutating func tryFill(
|
||||
buffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
|
||||
) -> Int? {
|
||||
return tryFill(buffer: _castOutputBuffer(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +80,6 @@ extension TextOutputStream {
|
||||
public mutating func _lock() {}
|
||||
public mutating func _unlock() {}
|
||||
|
||||
@inlinable
|
||||
public mutating func _writeASCII(_ buffer: UnsafeBufferPointer<UInt8>) {
|
||||
write(String._fromASCII(buffer))
|
||||
}
|
||||
@@ -522,15 +521,8 @@ internal struct _Stdout : TextOutputStream {
|
||||
internal mutating func write(_ string: String) {
|
||||
if string.isEmpty { return }
|
||||
|
||||
if _fastPath(string._guts.isASCII) {
|
||||
defer { _fixLifetime(string) }
|
||||
let ascii = string._guts._unmanagedASCIIView
|
||||
_swift_stdlib_fwrite_stdout(ascii.start, ascii.count, 1)
|
||||
return
|
||||
}
|
||||
|
||||
for c in string.utf8 {
|
||||
_swift_stdlib_putchar_unlocked(Int32(c))
|
||||
_ = string._withUTF8 { utf8 in
|
||||
_swift_stdlib_fwrite_stdout(utf8.baseAddress!, 1, utf8.count)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -544,7 +536,7 @@ extension String : TextOutputStream {
|
||||
}
|
||||
|
||||
public mutating func _writeASCII(_ buffer: UnsafeBufferPointer<UInt8>) {
|
||||
self._guts.append(_UnmanagedString(buffer))
|
||||
self._guts.append(_StringGuts(buffer, isKnownASCII: true))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -253,13 +253,7 @@ public struct StaticString
|
||||
|
||||
/// A string representation of the static string.
|
||||
public var description: String {
|
||||
return withUTF8Buffer { (buffer) in
|
||||
if isASCII {
|
||||
return String._fromASCII(buffer)
|
||||
} else {
|
||||
return String._fromWellFormedUTF8(buffer)
|
||||
}
|
||||
}
|
||||
return withUTF8Buffer { String._uncheckedFromUTF8($0) }
|
||||
}
|
||||
|
||||
/// A textual representation of the static string, suitable for debugging.
|
||||
|
||||
+422
-604
File diff suppressed because it is too large
Load Diff
@@ -20,52 +20,35 @@ import SwiftShims
|
||||
/// Effectively an untyped NSString that doesn't require foundation.
|
||||
public typealias _CocoaString = AnyObject
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public // @testable
|
||||
func _stdlib_binary_CFStringCreateCopy(
|
||||
@usableFromInline // @testable
|
||||
@_effects(releasenone)
|
||||
internal func _stdlib_binary_CFStringCreateCopy(
|
||||
_ source: _CocoaString
|
||||
) -> _CocoaString {
|
||||
let result = _swift_stdlib_CFStringCreateCopy(nil, source) as AnyObject
|
||||
return result
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@usableFromInline // @testable
|
||||
@_effects(readonly)
|
||||
public // @testable
|
||||
func _stdlib_binary_CFStringGetLength(
|
||||
internal func _stdlib_binary_CFStringGetLength(
|
||||
_ source: _CocoaString
|
||||
) -> Int {
|
||||
return _swift_stdlib_CFStringGetLength(source)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public // @testable
|
||||
func _stdlib_binary_CFStringGetCharactersPtr(
|
||||
@usableFromInline // @testable
|
||||
@_effects(readonly)
|
||||
internal func _stdlib_binary_CFStringGetCharactersPtr(
|
||||
_ source: _CocoaString
|
||||
) -> UnsafeMutablePointer<UTF16.CodeUnit>? {
|
||||
return UnsafeMutablePointer(
|
||||
mutating: _swift_stdlib_CFStringGetCharactersPtr(source))
|
||||
}
|
||||
|
||||
/// Loading Foundation initializes these function variables
|
||||
/// with useful values
|
||||
|
||||
/// Copies the entire contents of a _CocoaString into contiguous
|
||||
/// storage of sufficient capacity.
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@inline(never) // Hide the CF dependency
|
||||
internal func _cocoaStringReadAll(
|
||||
_ source: _CocoaString, _ destination: UnsafeMutablePointer<UTF16.CodeUnit>
|
||||
) {
|
||||
_swift_stdlib_CFStringGetCharacters(
|
||||
source, _swift_shims_CFRange(
|
||||
location: 0, length: _swift_stdlib_CFStringGetLength(source)), destination)
|
||||
}
|
||||
|
||||
/// Copies a slice of a _CocoaString into contiguous storage of
|
||||
/// sufficient capacity.
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@inline(never) // Hide the CF dependency
|
||||
@_effects(releasenone)
|
||||
internal func _cocoaStringCopyCharacters(
|
||||
from source: _CocoaString,
|
||||
range: Range<Int>,
|
||||
@@ -77,8 +60,8 @@ internal func _cocoaStringCopyCharacters(
|
||||
destination)
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@inline(never) // Hide the CF dependency
|
||||
|
||||
@_effects(releasenone)
|
||||
internal func _cocoaStringSlice(
|
||||
_ target: _CocoaString, _ bounds: Range<Int>
|
||||
) -> _CocoaString {
|
||||
@@ -95,16 +78,12 @@ internal func _cocoaStringSlice(
|
||||
return cfResult
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@inline(never) // Hide the CF dependency
|
||||
|
||||
@_effects(readonly)
|
||||
internal func _cocoaStringSubscript(
|
||||
_ target: _CocoaString, _ position: Int
|
||||
) -> UTF16.CodeUnit {
|
||||
let cfSelf: _swift_shims_CFStringRef = target
|
||||
|
||||
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
|
||||
"Known contiguously stored strings should already be converted to Swift")
|
||||
|
||||
return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position)
|
||||
}
|
||||
|
||||
@@ -112,23 +91,23 @@ internal func _cocoaStringSubscript(
|
||||
// Conversion from NSString to Swift's native representation
|
||||
//
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var kCFStringEncodingASCII : _swift_shims_CFStringEncoding {
|
||||
@inline(__always) get { return 0x0600 }
|
||||
}
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var kCFStringEncodingUTF8 : _swift_shims_CFStringEncoding {
|
||||
@inline(__always) get { return 0x8000100 }
|
||||
}
|
||||
|
||||
@usableFromInline // @opaque
|
||||
internal func _bridgeASCIICocoaString(
|
||||
// Resiliently write a tagged cocoa string's contents into a buffer
|
||||
@_effects(readonly) // @opaque
|
||||
internal func _bridgeTagged(
|
||||
_ cocoa: _CocoaString,
|
||||
intoUTF8 bufPtr: UnsafeMutableRawBufferPointer
|
||||
intoUTF8 bufPtr: UnsafeMutableBufferPointer<UInt8>
|
||||
) -> Int? {
|
||||
let ptr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked.assumingMemoryBound(
|
||||
to: UInt8.self)
|
||||
_sanityCheck(_isObjCTaggedPointer(cocoa))
|
||||
let ptr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
let length = _stdlib_binary_CFStringGetLength(cocoa)
|
||||
_sanityCheck(length <= _SmallUTF8String.capacity)
|
||||
var count = 0
|
||||
let numCharWritten = _swift_stdlib_CFStringGetBytes(
|
||||
cocoa, _swift_shims_CFRange(location: 0, length: length),
|
||||
@@ -136,26 +115,26 @@ internal func _bridgeASCIICocoaString(
|
||||
return length == numCharWritten ? count : nil
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@_effects(releasenone)
|
||||
internal func _bridgeToCocoa(_ small: _SmallUTF8String) -> _CocoaString {
|
||||
return small.withUTF8CodeUnits { bufPtr in
|
||||
return _swift_stdlib_CFStringCreateWithBytes(
|
||||
nil, bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
bufPtr.count,
|
||||
small.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUTF8, 0)
|
||||
as AnyObject
|
||||
}
|
||||
unimplemented_utf8()
|
||||
}
|
||||
|
||||
internal func _cocoaUTF8Pointer(_ str: _CocoaString) -> UnsafePointer<UInt8>? {
|
||||
// TODO(UTF8): Is there a better interface here? This requires nul
|
||||
// termination and may assume ASCII.
|
||||
guard let ptr = _swift_stdlib_CFStringGetCStringPtr(
|
||||
str, kCFStringEncodingUTF8
|
||||
) else { return nil }
|
||||
|
||||
return ptr._asUInt8
|
||||
}
|
||||
|
||||
@_effects(readonly)
|
||||
internal func _getCocoaStringPointer(
|
||||
_ cfImmutableValue: _CocoaString
|
||||
) -> (UnsafeRawPointer?, isUTF16: Bool) {
|
||||
// Look first for null-terminated ASCII
|
||||
// Note: the code in clownfish appears to guarantee
|
||||
// nul-termination, but I'm waiting for an answer from Chris Kane
|
||||
// about whether we can count on it for all time or not.
|
||||
let nulTerminatedASCII = _swift_stdlib_CFStringGetCStringPtr(
|
||||
cfImmutableValue, kCFStringEncodingASCII)
|
||||
let nulTerminatedASCII = _cocoaUTF8Pointer(cfImmutableValue)
|
||||
|
||||
// start will hold the base pointer of contiguous storage, if it
|
||||
// is found.
|
||||
@@ -171,65 +150,83 @@ internal func _getCocoaStringPointer(
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@inline(never) // Hide the CF dependency
|
||||
internal
|
||||
func _makeCocoaStringGuts(_ cocoaString: _CocoaString) -> _StringGuts {
|
||||
if let ascii = cocoaString as? _ASCIIStringStorage {
|
||||
return _StringGuts(_large: ascii)
|
||||
} else if let utf16 = cocoaString as? _UTF16StringStorage {
|
||||
return _StringGuts(_large: utf16)
|
||||
} else if let wrapped = cocoaString as? __NSContiguousString {
|
||||
return wrapped._guts
|
||||
@_effects(releasenone) // @opaque
|
||||
internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts {
|
||||
if let abstract = cocoaString as? _AbstractStringStorage {
|
||||
return abstract.asString._guts
|
||||
} else if _isObjCTaggedPointer(cocoaString) {
|
||||
guard let small = _SmallUTF8String(_cocoaString: cocoaString) else {
|
||||
fatalError("Internal invariant violated: large tagged NSStrings")
|
||||
}
|
||||
return _StringGuts(small)
|
||||
return _StringGuts(_SmallUTF8String(taggedCocoa: cocoaString))
|
||||
}
|
||||
|
||||
// "copy" it into a value to be sure nobody will modify behind
|
||||
// our backs. In practice, when value is already immutable, this
|
||||
// just does a retain.
|
||||
//
|
||||
// TODO: Only in certain circumstances should we emit this call:
|
||||
// 1) If it's immutable, just retain it.
|
||||
// 2) If it's mutable with no associated information, then a copy must
|
||||
// happen; might as well eagerly bridge it in.
|
||||
// 3) If it's mutable with associated information, must make the call
|
||||
//
|
||||
let immutableCopy
|
||||
= _stdlib_binary_CFStringCreateCopy(cocoaString) as AnyObject
|
||||
|
||||
if _isObjCTaggedPointer(immutableCopy) {
|
||||
guard let small = _SmallUTF8String(_cocoaString: cocoaString) else {
|
||||
fatalError("Internal invariant violated: large tagged NSStrings")
|
||||
}
|
||||
return _StringGuts(small)
|
||||
return _StringGuts(_SmallUTF8String(taggedCocoa: immutableCopy))
|
||||
}
|
||||
|
||||
let (start, isUTF16) = _getCocoaStringPointer(immutableCopy)
|
||||
let length = _stdlib_binary_CFStringGetLength(immutableCopy)
|
||||
|
||||
let length = _StringGuts.getCocoaLength(
|
||||
_unsafeBitPattern: Builtin.reinterpretCast(immutableCopy))
|
||||
// Detect fast-UTF8 Cocoa
|
||||
let fastUTF8 = !isUTF16 && start != nil
|
||||
return _StringGuts(
|
||||
_largeNonTaggedCocoaObject: immutableCopy,
|
||||
count: length,
|
||||
isSingleByte: !isUTF16,
|
||||
start: start)
|
||||
cocoa: immutableCopy, providesFastUTF8: fastUTF8, length: length)
|
||||
}
|
||||
|
||||
extension String {
|
||||
public // SPI(Foundation)
|
||||
init(_cocoaString: AnyObject) {
|
||||
self._guts = _makeCocoaStringGuts(_cocoaString)
|
||||
self._guts = _bridgeCocoaString(_cocoaString)
|
||||
}
|
||||
}
|
||||
|
||||
// At runtime, this class is derived from `__SwiftNativeNSStringBase`,
|
||||
extension String {
|
||||
@_effects(releasenone)
|
||||
public // SPI(Foundation)
|
||||
func _bridgeToObjectiveCImpl() -> AnyObject {
|
||||
// TODO(UTF8): create and use a visit pattern on _StringGuts to handle each
|
||||
// form, rather than querying object directly. Presumably there will be
|
||||
// other such visitors.
|
||||
if _guts._object.isSmall {
|
||||
return _guts._object.asSmallString.withUTF8 { bufPtr in
|
||||
// TODO(UTF8 perf): worth isKnownASCII check for different encoding?
|
||||
return _swift_stdlib_CFStringCreateWithBytes(
|
||||
nil, bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
bufPtr.count,
|
||||
kCFStringEncodingUTF8, 0)
|
||||
as AnyObject
|
||||
}
|
||||
}
|
||||
if _guts._object.isImmortal {
|
||||
return _SharedStringStorage(immortal: _guts._object.fastUTF8)
|
||||
}
|
||||
|
||||
_sanityCheck(_guts._object.hasObjCBridgeableObject,
|
||||
"Unknown non-bridgeable object case")
|
||||
return _guts._object.objCBridgeableObject
|
||||
}
|
||||
}
|
||||
|
||||
// At runtime, this class is derived from `_SwiftNativeNSStringBase`,
|
||||
// which is derived from `NSString`.
|
||||
//
|
||||
// The @_swift_native_objc_runtime_base attribute
|
||||
// This allows us to subclass an Objective-C class and use the fast Swift
|
||||
// memory allocator.
|
||||
//
|
||||
// NOTE: older runtimes called this _SwiftNativeNSString. The two must
|
||||
// coexist, so it was renamed. The old name must not be used in the new
|
||||
// runtime.
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
@objc @_swift_native_objc_runtime_base(__SwiftNativeNSStringBase)
|
||||
public class __SwiftNativeNSString {
|
||||
@objc @_swift_native_objc_runtime_base(_SwiftNativeNSStringBase)
|
||||
public class _SwiftNativeNSString {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@objc
|
||||
internal init() {}
|
||||
@@ -252,166 +249,18 @@ public protocol _NSStringCore : _NSCopying /* _NSFastEnumeration */ {
|
||||
@objc(characterAtIndex:)
|
||||
func character(at index: Int) -> UInt16
|
||||
|
||||
// We also override the following methods for efficiency.
|
||||
// We also override the following methods for efficiency.
|
||||
|
||||
@objc(getCharacters:range:)
|
||||
func getCharacters(
|
||||
_ buffer: UnsafeMutablePointer<UInt16>,
|
||||
range aRange: _SwiftNSRange)
|
||||
_ buffer: UnsafeMutablePointer<UInt16>,
|
||||
range aRange: _SwiftNSRange)
|
||||
|
||||
@objc(_fastCharacterContents)
|
||||
func _fastCharacterContents() -> UnsafePointer<UInt16>?
|
||||
}
|
||||
|
||||
/// An `NSString` built around a slice of contiguous Swift `String` storage.
|
||||
///
|
||||
/// NOTE: older runtimes called this _NSContiguousString. The two must
|
||||
/// coexist, so it was renamed. The old name must not be used in the new
|
||||
/// runtime.
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public final class __NSContiguousString : __SwiftNativeNSString, _NSStringCore {
|
||||
public let _guts: _StringGuts
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_ _guts: _StringGuts) {
|
||||
_sanityCheck(!_guts._isOpaque,
|
||||
"__NSContiguousString requires contiguous storage")
|
||||
self._guts = _guts
|
||||
super.init()
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_unmanaged guts: _StringGuts) {
|
||||
_sanityCheck(!guts._isOpaque,
|
||||
"__NSContiguousString requires contiguous storage")
|
||||
if guts.isASCII {
|
||||
self._guts = _StringGuts(_large: guts._unmanagedASCIIView)
|
||||
} else {
|
||||
self._guts = _StringGuts(_large: guts._unmanagedUTF16View)
|
||||
}
|
||||
super.init()
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_unmanaged guts: _StringGuts, range: Range<Int>) {
|
||||
_sanityCheck(!guts._isOpaque,
|
||||
"__NSContiguousString requires contiguous storage")
|
||||
if guts.isASCII {
|
||||
self._guts = _StringGuts(_large: guts._unmanagedASCIIView[range])
|
||||
} else {
|
||||
self._guts = _StringGuts(_large: guts._unmanagedUTF16View[range])
|
||||
}
|
||||
super.init()
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@objc
|
||||
init(coder aDecoder: AnyObject) {
|
||||
_sanityCheckFailure("init(coder:) not implemented for __NSContiguousString")
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
deinit {}
|
||||
|
||||
@inlinable
|
||||
@objc(length)
|
||||
public var length: Int {
|
||||
return _guts.count
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@objc(characterAtIndex:)
|
||||
public func character(at index: Int) -> UInt16 {
|
||||
defer { _fixLifetime(self) }
|
||||
return _guts.codeUnit(atCheckedOffset: index)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@objc(getCharacters:range:)
|
||||
public func getCharacters(
|
||||
_ buffer: UnsafeMutablePointer<UInt16>,
|
||||
range aRange: _SwiftNSRange) {
|
||||
_precondition(aRange.location >= 0 && aRange.length >= 0)
|
||||
let range: Range<Int> = aRange.location ..< aRange.location + aRange.length
|
||||
_precondition(range.upperBound <= Int(_guts.count))
|
||||
|
||||
if _guts.isASCII {
|
||||
_guts._unmanagedASCIIView[range]._copy(
|
||||
into: UnsafeMutableBufferPointer(start: buffer, count: range.count))
|
||||
} else {
|
||||
_guts._unmanagedUTF16View[range]._copy(
|
||||
into: UnsafeMutableBufferPointer(start: buffer, count: range.count))
|
||||
}
|
||||
_fixLifetime(self)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@objc(_fastCharacterContents)
|
||||
public func _fastCharacterContents() -> UnsafePointer<UInt16>? {
|
||||
guard !_guts.isASCII else { return nil }
|
||||
return _guts._unmanagedUTF16View.start
|
||||
}
|
||||
|
||||
@objc(copyWithZone:)
|
||||
public func copy(with zone: _SwiftNSZone?) -> AnyObject {
|
||||
// Since this string is immutable we can just return ourselves.
|
||||
return self
|
||||
}
|
||||
|
||||
/// The caller of this function guarantees that the closure 'body' does not
|
||||
/// escape the object referenced by the opaque pointer passed to it or
|
||||
/// anything transitively reachable form this object. Doing so
|
||||
/// will result in undefined behavior.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@_semantics("self_no_escaping_closure")
|
||||
func _unsafeWithNotEscapedSelfPointer<Result>(
|
||||
_ body: (OpaquePointer) throws -> Result
|
||||
) rethrows -> Result {
|
||||
let selfAsPointer = unsafeBitCast(self, to: OpaquePointer.self)
|
||||
defer {
|
||||
_fixLifetime(self)
|
||||
}
|
||||
return try body(selfAsPointer)
|
||||
}
|
||||
|
||||
/// The caller of this function guarantees that the closure 'body' does not
|
||||
/// escape either object referenced by the opaque pointer pair passed to it or
|
||||
/// transitively reachable objects. Doing so will result in undefined
|
||||
/// behavior.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@_semantics("pair_no_escaping_closure")
|
||||
func _unsafeWithNotEscapedSelfPointerPair<Result>(
|
||||
_ rhs: __NSContiguousString,
|
||||
_ body: (OpaquePointer, OpaquePointer) throws -> Result
|
||||
) rethrows -> Result {
|
||||
let selfAsPointer = unsafeBitCast(self, to: OpaquePointer.self)
|
||||
let rhsAsPointer = unsafeBitCast(rhs, to: OpaquePointer.self)
|
||||
defer {
|
||||
_fixLifetime(self)
|
||||
_fixLifetime(rhs)
|
||||
}
|
||||
return try body(selfAsPointer, rhsAsPointer)
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// Same as `_bridgeToObjectiveC()`, but located inside the core standard
|
||||
/// library.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func _stdlib_binary_bridgeToObjectiveCImpl() -> AnyObject {
|
||||
if _guts._isSmall {
|
||||
return _bridgeToCocoa(_guts._smallUTF8String)
|
||||
}
|
||||
if let cocoa = _guts._underlyingCocoaString {
|
||||
return cocoa
|
||||
}
|
||||
return __NSContiguousString(_guts)
|
||||
}
|
||||
|
||||
@inline(never) // Hide the CF dependency
|
||||
public func _bridgeToObjectiveCImpl() -> AnyObject {
|
||||
return _stdlib_binary_bridgeToObjectiveCImpl()
|
||||
}
|
||||
@objc(_fastCStringContents)
|
||||
func _fastCStringContents() -> UnsafePointer<CChar>?
|
||||
}
|
||||
|
||||
// Called by the SwiftObject implementation to get the description of a value
|
||||
@@ -421,11 +270,10 @@ public func _getDescription<T>(_ x: T) -> AnyObject {
|
||||
return String(reflecting: x)._bridgeToObjectiveCImpl()
|
||||
}
|
||||
|
||||
|
||||
#else // !_runtime(_ObjC)
|
||||
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public class __SwiftNativeNSString {
|
||||
public class _SwiftNativeNSString {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal init() {}
|
||||
deinit {}
|
||||
@@ -434,3 +282,46 @@ public class __SwiftNativeNSString {
|
||||
public protocol _NSStringCore: class {}
|
||||
|
||||
#endif
|
||||
|
||||
extension String {
|
||||
// Resiliently provide a (barely) amortized random access UTF-16 interface
|
||||
//
|
||||
// @opaque
|
||||
internal func _utf16OffsetToIndex(_ offset: Int) -> Index {
|
||||
// TODO(UTF8): Track known ASCII
|
||||
|
||||
// TODO(UTF8): Leave breadcrumbs, and more efficient impl
|
||||
|
||||
return self.utf16.index(self.utf16.startIndex, offsetBy: offset)
|
||||
}
|
||||
|
||||
// Resiliently provide a (barely) amortized random access UTF-16 interface
|
||||
//
|
||||
// @opaque
|
||||
internal func _utf16OffsetToIndex(_ range: Range<Int>) -> Range<Index> {
|
||||
// TODO(UTF8): Can be more efficient for a range
|
||||
return self._utf16OffsetToIndex(range.lowerBound)
|
||||
..< self._utf16OffsetToIndex(range.upperBound)
|
||||
}
|
||||
|
||||
// Resiliently provide a (barely) amortized random access UTF-16 interface
|
||||
//
|
||||
// @opaque
|
||||
internal func _utf16Length() -> Int {
|
||||
// TODO(UTF8): Track known ASCII
|
||||
|
||||
// TODO(UTF8): Leave breadcrumbs, and more efficient impl. Perhaps even
|
||||
// store it.
|
||||
|
||||
return self.utf16.count
|
||||
}
|
||||
|
||||
// Resiliently provide a (barely) amortized `characterAtIndex`
|
||||
//
|
||||
// @opaque
|
||||
internal func _utf16CodeUnitAtOffset(_ offset: Int) -> UInt16 {
|
||||
return self.utf16[self._utf16OffsetToIndex(offset)]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,16 +1,25 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//===--- StringCharacterView.swift - String's Collection of Characters ----===//
|
||||
//
|
||||
// This source file is part of the Swift.org open source project
|
||||
//
|
||||
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
||||
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// String is-not-a Sequence or Collection, but it exposes a
|
||||
// collection of characters.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// FIXME(ABI)#70 : The character string view should have a custom iterator type
|
||||
// to allow performance optimizations of linear traversals.
|
||||
|
||||
import SwiftShims
|
||||
|
||||
// String is a bidirectional collection of `Character`s, aka graphemes
|
||||
extension String: BidirectionalCollection {
|
||||
/// A type that represents the number of steps between two `String.Index`
|
||||
/// values, where one value is reachable from the other.
|
||||
@@ -21,22 +30,28 @@ extension String: BidirectionalCollection {
|
||||
|
||||
public typealias SubSequence = Substring
|
||||
|
||||
public typealias Element = Character
|
||||
|
||||
/// The position of the first character in a nonempty string.
|
||||
///
|
||||
/// In an empty string, `startIndex` is equal to `endIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index { return Index(encodedOffset: 0) }
|
||||
@inlinable
|
||||
public var startIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: 0) }
|
||||
}
|
||||
|
||||
/// A string's "past the end" position---that is, the position one greater
|
||||
/// than the last valid subscript argument.
|
||||
///
|
||||
/// In an empty string, `endIndex` is equal to `startIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index { return Index(encodedOffset: _guts.count) }
|
||||
@inlinable
|
||||
public var endIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: _guts.count) }
|
||||
}
|
||||
|
||||
/// The number of characters in a string.
|
||||
public var count: Int {
|
||||
return distance(from: startIndex, to: endIndex)
|
||||
@inline(__always) get { return distance(from: startIndex, to: endIndex) }
|
||||
}
|
||||
|
||||
/// Returns the position immediately after the given index.
|
||||
@@ -45,10 +60,10 @@ extension String: BidirectionalCollection {
|
||||
/// `endIndex`.
|
||||
/// - Returns: The index value immediately after `i`.
|
||||
public func index(after i: Index) -> Index {
|
||||
return _visitGuts(_guts, args: i,
|
||||
ascii: { ascii, i in ascii.characterIndex(after: i) },
|
||||
utf16: { utf16, i in utf16.characterIndex(after: i) },
|
||||
opaque: { opaque, i in opaque.characterIndex(after: i) })
|
||||
// TODO(UTF8): populate the stride cache in the resultant iterator
|
||||
|
||||
let stride = _characterStride(startingAt: i)
|
||||
return Index(encodedOffset: i.encodedOffset &+ stride)
|
||||
}
|
||||
|
||||
/// Returns the position immediately before the given index.
|
||||
@@ -57,12 +72,13 @@ extension String: BidirectionalCollection {
|
||||
/// `startIndex`.
|
||||
/// - Returns: The index value immediately before `i`.
|
||||
public func index(before i: Index) -> Index {
|
||||
return _visitGuts(_guts, args: i,
|
||||
ascii: { ascii, i in ascii.characterIndex(before: i) },
|
||||
utf16: { utf16, i in utf16.characterIndex(before: i) },
|
||||
opaque: { opaque, i in opaque.characterIndex(before: i) })
|
||||
}
|
||||
// TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
|
||||
|
||||
// TODO(UTF8): populate the stride cache in the resultant iterator
|
||||
|
||||
let stride = _characterStride(endingAt: i)
|
||||
return Index(encodedOffset: i.encodedOffset &- stride)
|
||||
}
|
||||
/// Returns an index that is the specified distance from the given index.
|
||||
///
|
||||
/// The following example obtains an index advanced four positions from a
|
||||
@@ -73,27 +89,22 @@ extension String: BidirectionalCollection {
|
||||
/// print(s[i])
|
||||
/// // Prints "t"
|
||||
///
|
||||
/// The value passed as `distance` must not offset `i` beyond the bounds of
|
||||
/// the collection.
|
||||
/// The value passed as `n` must not offset `i` beyond the bounds of the
|
||||
/// collection.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - i: A valid index of the collection.
|
||||
/// - distance: The distance to offset `i`.
|
||||
/// - Returns: An index offset by `distance` from the index `i`. If
|
||||
/// `distance` is positive, this is the same value as the result of
|
||||
/// `distance` calls to `index(after:)`. If `distance` is negative, this
|
||||
/// is the same value as the result of `abs(distance)` calls to
|
||||
/// `index(before:)`.
|
||||
/// - n: The distance to offset `i`.
|
||||
/// - Returns: An index offset by `n` from the index `i`. If `n` is positive,
|
||||
/// this is the same value as the result of `n` calls to `index(after:)`.
|
||||
/// If `n` is negative, this is the same value as the result of `-n` calls
|
||||
/// to `index(before:)`.
|
||||
///
|
||||
/// - Complexity: O(*k*), where *k* is the absolute value of `distance`.
|
||||
public func index(_ i: Index, offsetBy distance: IndexDistance) -> Index {
|
||||
return _visitGuts(_guts, args: (i, distance),
|
||||
ascii: { ascii, args in let (i, n) = args
|
||||
return ascii.characterIndex(i, offsetBy: n) },
|
||||
utf16: { utf16, args in let (i, n) = args
|
||||
return utf16.characterIndex(i, offsetBy: n) },
|
||||
opaque: { opaque, args in let (i, n) = args
|
||||
return opaque.characterIndex(i, offsetBy: n) })
|
||||
/// - Complexity: O(*n*), where *n* is the absolute value of `n`.
|
||||
@inlinable @inline(__always)
|
||||
public func index(_ i: Index, offsetBy n: IndexDistance) -> Index {
|
||||
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
|
||||
return __index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
/// Returns an index that is the specified distance from the given index,
|
||||
@@ -118,31 +129,27 @@ extension String: BidirectionalCollection {
|
||||
/// print(j)
|
||||
/// // Prints "nil"
|
||||
///
|
||||
/// The value passed as `distance` must not offset `i` beyond the bounds of the
|
||||
/// The value passed as `n` must not offset `i` beyond the bounds of the
|
||||
/// collection, unless the index passed as `limit` prevents offsetting
|
||||
/// beyond those bounds.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - i: A valid index of the collection.
|
||||
/// - distance: The distance to offset `i`.
|
||||
/// - limit: A valid index of the collection to use as a limit. If `distance > 0`,
|
||||
/// a limit that is less than `i` has no effect. Likewise, if `distance < 0`, a
|
||||
/// - n: The distance to offset `i`.
|
||||
/// - limit: A valid index of the collection to use as a limit. If `n > 0`,
|
||||
/// a limit that is less than `i` has no effect. Likewise, if `n < 0`, a
|
||||
/// limit that is greater than `i` has no effect.
|
||||
/// - Returns: An index offset by `distance` from the index `i`, unless that index
|
||||
/// - Returns: An index offset by `n` from the index `i`, unless that index
|
||||
/// would be beyond `limit` in the direction of movement. In that case,
|
||||
/// the method returns `nil`.
|
||||
///
|
||||
/// - Complexity: O(*k*), where *k* is the absolute value of `distance`.
|
||||
/// - Complexity: O(*n*), where *n* is the absolute value of `n`.
|
||||
@inlinable @inline(__always)
|
||||
public func index(
|
||||
_ i: Index, offsetBy distance: IndexDistance, limitedBy limit: Index
|
||||
_ i: Index, offsetBy n: IndexDistance, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
return _visitGuts(_guts, args: (i, distance, limit),
|
||||
ascii: { ascii, args in let (i, n, limit) = args
|
||||
return ascii.characterIndex(i, offsetBy: n, limitedBy: limit) },
|
||||
utf16: { utf16, args in let (i, n, limit) = args
|
||||
return utf16.characterIndex(i, offsetBy: n, limitedBy: limit) },
|
||||
opaque: { opaque, args in let (i, n, limit) = args
|
||||
return opaque.characterIndex(i, offsetBy: n, limitedBy: limit) })
|
||||
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
|
||||
return __index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
/// Returns the distance between two indices.
|
||||
@@ -153,15 +160,11 @@ extension String: BidirectionalCollection {
|
||||
/// `start`, the result is zero.
|
||||
/// - Returns: The distance between `start` and `end`.
|
||||
///
|
||||
/// - Complexity: O(*k*), where *k* is the resulting distance.
|
||||
/// - Complexity: O(*n*), where *n* is the resulting distance.
|
||||
@inlinable @inline(__always)
|
||||
public func distance(from start: Index, to end: Index) -> IndexDistance {
|
||||
return _visitGuts(_guts, args: (start, end),
|
||||
ascii: { ascii, args in let (start, end) = args
|
||||
return ascii.characterDistance(from: start, to: end) },
|
||||
utf16: { utf16, args in let (start, end) = args
|
||||
return utf16.characterDistance(from: start, to: end) },
|
||||
opaque: { opaque, args in let (start, end) = args
|
||||
return opaque.characterDistance(from: start, to: end) })
|
||||
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
|
||||
return __distance(from: start, to: end)
|
||||
}
|
||||
|
||||
/// Accesses the character at the given position.
|
||||
@@ -179,10 +182,68 @@ extension String: BidirectionalCollection {
|
||||
///
|
||||
/// - Parameter i: A valid index of the string. `i` must be less than the
|
||||
/// string's end index.
|
||||
@inlinable
|
||||
public subscript(i: Index) -> Character {
|
||||
return _visitGuts(_guts, args: i,
|
||||
ascii: { ascii, i in return ascii.character(at: i) },
|
||||
utf16: { utf16, i in return utf16.character(at: i) },
|
||||
opaque: { opaque, i in return opaque.character(at: i) })
|
||||
@inline(__always) get {
|
||||
// FIXME(UTF8): bounds checking
|
||||
|
||||
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
|
||||
|
||||
let distance = _characterStride(startingAt: i)
|
||||
|
||||
// TODO(UTF8): Probably worth making into `extractRange` on StringGuts.
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return _guts.withFastUTF8 { utf8 in
|
||||
let start = i.encodedOffset
|
||||
let end = start + distance
|
||||
let cus = UnsafeBufferPointer(rebasing: utf8[start..<end])
|
||||
return Character(unchecked: String._uncheckedFromUTF8(cus))
|
||||
}
|
||||
}
|
||||
|
||||
return _foreignSubscript(position: i, distance: distance)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _characterStride(startingAt i: Index) -> Int {
|
||||
// Fast check if it's already been measured, otherwise check resiliently
|
||||
if let d = i.characterStride { return d }
|
||||
|
||||
// TODO: Known-single-scalar-grapheme fast path
|
||||
return _guts._opaqueCharacterStride(startingAt: i.encodedOffset)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _characterStride(endingAt i: Index) -> Int {
|
||||
// TODO: Known-single-scalar-grapheme fast path
|
||||
return _guts._opaqueCharacterStride(endingAt: i.encodedOffset)
|
||||
}
|
||||
}
|
||||
|
||||
// Foreign string support
|
||||
extension String {
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignSubscript(position: Index, distance: Int) -> Character {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
let start = position.encodedOffset
|
||||
let end = start + distance
|
||||
let count = end - start
|
||||
|
||||
// TODO(UTF8 perf): Stack buffer if small enough...
|
||||
|
||||
var cus = Array<UInt16>(repeating: 0, count: count)
|
||||
cus.withUnsafeMutableBufferPointer {
|
||||
_cocoaStringCopyCharacters(
|
||||
from: _guts._object.cocoaObject,
|
||||
range: start..<end,
|
||||
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
|
||||
}
|
||||
return cus.withUnsafeBufferPointer {
|
||||
return Character(String._uncheckedFromUTF16($0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,162 +12,63 @@
|
||||
|
||||
import SwiftShims
|
||||
|
||||
extension _StringGuts {
|
||||
@inline(__always)
|
||||
@inlinable
|
||||
public func _bitwiseEqualTo(_ other: _StringGuts) -> Bool {
|
||||
return self.rawBits == other.rawBits
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func isEqual(
|
||||
_ left: _StringGuts, to right: _StringGuts
|
||||
) -> Bool {
|
||||
// Bitwise equality implies string equality
|
||||
if left._bitwiseEqualTo(right) {
|
||||
return true
|
||||
}
|
||||
if left._isSmall && right._isSmall {
|
||||
// TODO: Ensure normality when adding UTF-8 support
|
||||
_sanityCheck(left._isASCIIOrSmallASCII && right._isASCIIOrSmallASCII,
|
||||
"Need to ensure normality")
|
||||
|
||||
// Equal small strings should be bitwise equal if ASCII
|
||||
return false
|
||||
}
|
||||
return compare(left, to: right) == 0
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func isEqual(
|
||||
_ left: _StringGuts, _ leftRange: Range<Int>,
|
||||
to right: _StringGuts, _ rightRange: Range<Int>
|
||||
) -> Bool {
|
||||
// Bitwise equality implies string equality
|
||||
if left._bitwiseEqualTo(right) && leftRange == rightRange {
|
||||
return true
|
||||
}
|
||||
return compare(left, leftRange, to: right, rightRange) == 0
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func isLess(
|
||||
_ left: _StringGuts, than right: _StringGuts
|
||||
) -> Bool {
|
||||
// Bitwise equality implies string equality
|
||||
if left._bitwiseEqualTo(right) {
|
||||
return false
|
||||
}
|
||||
if left._isSmall && right._isSmall {
|
||||
// Small strings compare lexicographically if ASCII
|
||||
return left._smallUTF8String._compare(right._smallUTF8String) == .less
|
||||
}
|
||||
return compare(left, to: right) == -1
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func isLess(
|
||||
_ left: _StringGuts, _ leftRange: Range<Int>,
|
||||
than right: _StringGuts, _ rightRange: Range<Int>
|
||||
) -> Bool {
|
||||
// Bitwise equality implies string equality
|
||||
if left._bitwiseEqualTo(right) && leftRange == rightRange {
|
||||
return false
|
||||
}
|
||||
return compare(left, leftRange, to: right, rightRange) == -1
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func compare(
|
||||
_ left: _StringGuts, _ leftRange: Range<Int>,
|
||||
to right: _StringGuts, _ rightRange: Range<Int>
|
||||
) -> Int {
|
||||
defer { _fixLifetime(left) }
|
||||
defer { _fixLifetime(right) }
|
||||
|
||||
if left.isASCII && right.isASCII {
|
||||
let leftASCII = left._unmanagedASCIIView[leftRange]
|
||||
let rightASCII = right._unmanagedASCIIView[rightRange]
|
||||
let result = leftASCII.compareASCII(to: rightASCII)
|
||||
return result
|
||||
}
|
||||
|
||||
let leftBits = left.rawBits
|
||||
let rightBits = right.rawBits
|
||||
|
||||
return _compareUnicode(leftBits, leftRange, rightBits, rightRange)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func compare(
|
||||
_ left: _StringGuts, to right: _StringGuts
|
||||
) -> Int {
|
||||
defer { _fixLifetime(left) }
|
||||
defer { _fixLifetime(right) }
|
||||
|
||||
if left.isASCII && right.isASCII {
|
||||
let leftASCII = left._unmanagedASCIIView
|
||||
let rightASCII = right._unmanagedASCIIView
|
||||
let result = leftASCII.compareASCII(to: rightASCII)
|
||||
return result
|
||||
}
|
||||
|
||||
let leftBits = left.rawBits
|
||||
let rightBits = right.rawBits
|
||||
|
||||
return _compareUnicode(leftBits, rightBits)
|
||||
}
|
||||
}
|
||||
|
||||
extension StringProtocol {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@inline(__always) // de-virtualize
|
||||
public static func ==<S: StringProtocol>(lhs: Self, rhs: S) -> Bool {
|
||||
return _StringGuts.isEqual(
|
||||
lhs._wholeString._guts, lhs._encodedOffsetRange,
|
||||
to: rhs._wholeString._guts, rhs._encodedOffsetRange)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
return String(lhs) == String(rhs)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@inline(__always) // de-virtualize
|
||||
public static func !=<S: StringProtocol>(lhs: Self, rhs: S) -> Bool {
|
||||
return !(lhs == rhs)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
return String(lhs) != String(rhs)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@inline(__always) // de-virtualize
|
||||
public static func < <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
|
||||
return _StringGuts.isLess(
|
||||
lhs._wholeString._guts, lhs._encodedOffsetRange,
|
||||
than: rhs._wholeString._guts, rhs._encodedOffsetRange)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
return String(lhs) < String(rhs)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@inline(__always) // de-virtualize
|
||||
public static func > <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
|
||||
return rhs < lhs
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
return String(lhs) > String(rhs)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@inline(__always) // de-virtualize
|
||||
public static func <= <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
|
||||
return !(rhs < lhs)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
return String(lhs) <= String(rhs)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@inline(__always) // de-virtualize
|
||||
public static func >= <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
|
||||
return !(lhs < rhs)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
return String(lhs) >= String(rhs)
|
||||
}
|
||||
}
|
||||
|
||||
extension String : Equatable {
|
||||
// FIXME: Why do I need this? If I drop it, I get "ambiguous use of operator"
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always) // For the bitwise comparision
|
||||
public static func ==(lhs: String, rhs: String) -> Bool {
|
||||
return _StringGuts.isEqual(lhs._guts, to: rhs._guts)
|
||||
if lhs._guts.rawBits == rhs._guts.rawBits { return true }
|
||||
return _compareStringsEqual(lhs, rhs)
|
||||
}
|
||||
}
|
||||
|
||||
extension String : Comparable {
|
||||
// FIXME: Why do I need this? If I drop it, I get "ambiguous use of operator"
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always) // For the bitwise comparision
|
||||
public static func < (lhs: String, rhs: String) -> Bool {
|
||||
return _StringGuts.isLess(lhs._guts, than: rhs._guts)
|
||||
if lhs._guts.rawBits == rhs._guts.rawBits { return false }
|
||||
return _compareStringsLess(lhs, rhs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,176 +9,136 @@
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String Creation Helpers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
extension String {
|
||||
/// Constructs a `String` in `resultStorage` containing the given UTF-8.
|
||||
///
|
||||
/// Low-level construction interface used by introspection
|
||||
/// implementation in the runtime library.
|
||||
@inlinable
|
||||
@_silgen_name("swift_stringFromUTF8InRawMemory")
|
||||
public // COMPILER_INTRINSIC
|
||||
static func _fromUTF8InRawMemory(
|
||||
_ resultStorage: UnsafeMutablePointer<String>,
|
||||
start: UnsafeMutablePointer<UTF8.CodeUnit>,
|
||||
utf8CodeUnitCount: Int
|
||||
) {
|
||||
resultStorage.initialize(to:
|
||||
String._fromWellFormedUTF8(
|
||||
UnsafeBufferPointer(start: start, count: utf8CodeUnitCount)))
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
static func _fromUTF8(
|
||||
_ input: UnsafeBufferPointer<UInt8>, repair: Bool
|
||||
) -> String? {
|
||||
if _isAllASCII(input) {
|
||||
return _fromASCII(input)
|
||||
}
|
||||
return _fromNonASCIIUTF8(input, repair: repair)
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
static func _fromASCII(_ input: UnsafeBufferPointer<UInt8>) -> String {
|
||||
if let smol = _SmallUTF8String(input) {
|
||||
return String(_StringGuts(smol))
|
||||
}
|
||||
let storage = _SwiftStringStorage<UInt8>.create(
|
||||
capacity: input.count, count: input.count)
|
||||
_sanityCheck(storage.count == input.count)
|
||||
storage.start.initialize(
|
||||
from: input.baseAddress._unsafelyUnwrappedUnchecked, count: input.count)
|
||||
return String(_StringGuts(_large: storage))
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
static func _fromWellFormedUTF8(
|
||||
_ input: UnsafeBufferPointer<UInt8>, repair: Bool = false
|
||||
internal static func _fromASCII(
|
||||
_ input: UnsafeBufferPointer<UInt8>
|
||||
) -> String {
|
||||
return String._fromUTF8(input, repair: repair)!
|
||||
}
|
||||
|
||||
@inlinable
|
||||
static func _fromWellFormedUTF16CodeUnits<C : RandomAccessCollection>(
|
||||
_ input: C, repair: Bool = false
|
||||
) -> String where C.Element == UTF16.CodeUnit {
|
||||
if let smol = _SmallUTF8String(input) {
|
||||
if let smol = _SmallString(input) {
|
||||
return String(_StringGuts(smol))
|
||||
}
|
||||
return String._fromCodeUnits(
|
||||
input, encoding: UTF16.self, repairIllFormedSequences: repair)!
|
||||
|
||||
// TODO(UTF8): Do we want to do remember ASCII-ness?
|
||||
let storage = _StringStorage.create(initializingFrom: input)
|
||||
return storage.asString
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal static func _fromCodeUnits<
|
||||
Input: Collection, Encoding: Unicode.Encoding
|
||||
>(
|
||||
_ input: Input, encoding: Encoding.Type, repairIllFormedSequences: Bool
|
||||
) -> String?
|
||||
where Input.Element == Encoding.CodeUnit {
|
||||
@usableFromInline
|
||||
internal static func _tryFromUTF8(
|
||||
_ input: UnsafeBufferPointer<UInt8>
|
||||
) -> String? {
|
||||
// TODO(UTF8 perf): More efficient validation
|
||||
|
||||
// TODO(UTF8 perf): Skip intermediary array
|
||||
var contents: [UInt8] = []
|
||||
contents.reserveCapacity(input.count)
|
||||
let repaired = transcode(
|
||||
input.makeIterator(),
|
||||
from: UTF8.self,
|
||||
to: UTF8.self,
|
||||
stoppingOnError: true,
|
||||
into: { contents.append($0) })
|
||||
guard !repaired else { return nil }
|
||||
|
||||
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
internal static func _fromUTF8Repairing(
|
||||
_ input: UnsafeBufferPointer<UInt8>
|
||||
) -> (String, Bool) {
|
||||
// TODO(UTF8 perf): More efficient validation
|
||||
|
||||
// TODO(UTF8 perf): Skip intermediary array
|
||||
var contents: [UInt8] = []
|
||||
contents.reserveCapacity(input.count)
|
||||
let repaired = transcode(
|
||||
input.makeIterator(),
|
||||
from: UTF8.self,
|
||||
to: UTF8.self,
|
||||
stoppingOnError: false,
|
||||
into: { contents.append($0) })
|
||||
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
||||
return (str, repaired)
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
internal static func _uncheckedFromUTF8(
|
||||
_ input: UnsafeBufferPointer<UInt8>
|
||||
) -> String {
|
||||
if let smol = _SmallString(input) {
|
||||
return String(_StringGuts(smol))
|
||||
}
|
||||
|
||||
// TODO(UTF8): Do we want to do an ascii scan?
|
||||
let storage = _StringStorage.create(initializingFrom: input)
|
||||
return storage.asString
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
internal static func _uncheckedFromUTF16(
|
||||
_ input: UnsafeBufferPointer<UInt16>
|
||||
) -> String {
|
||||
// TODO(UTF8): smol strings
|
||||
|
||||
// TODO(UTF8): Faster transcoding...
|
||||
|
||||
// TODO(UTF8): Skip intermediary array
|
||||
var contents: [UInt8] = []
|
||||
contents.reserveCapacity(input.count)
|
||||
let repaired = transcode(
|
||||
input.makeIterator(),
|
||||
from: UTF16.self,
|
||||
to: UTF8.self,
|
||||
stoppingOnError: false,
|
||||
into: { contents.append($0) })
|
||||
_sanityCheck(!repaired, "Error present")
|
||||
|
||||
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
||||
}
|
||||
|
||||
internal func _withUnsafeBufferPointerToUTF8<R>(
|
||||
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
|
||||
) rethrows -> R {
|
||||
if isEmpty {
|
||||
var nothing: UInt8 = 0
|
||||
return try body(UnsafeBufferPointer(start: ¬hing, count: 0))
|
||||
}
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return try _guts.withFastUTF8(body)
|
||||
}
|
||||
|
||||
unimplemented_utf8()
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never) // slow-path
|
||||
internal static func _fromCodeUnits<
|
||||
Input: Collection,
|
||||
Encoding: Unicode.Encoding
|
||||
>(
|
||||
_ input: Input,
|
||||
encoding: Encoding.Type,
|
||||
repair: Bool
|
||||
) -> (String, repairsMade: Bool)?
|
||||
where Input.Element == Encoding.CodeUnit {
|
||||
// TODO(SSO): small check
|
||||
|
||||
// Determine how many UTF-16 code units we'll need
|
||||
let inputStream = input.makeIterator()
|
||||
guard let (utf16Count, isASCII) = UTF16.transcodedLength(
|
||||
of: inputStream,
|
||||
decodedAs: encoding,
|
||||
repairingIllFormedSequences: repairIllFormedSequences) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let capacity = utf16Count
|
||||
if isASCII {
|
||||
if let small = _SmallUTF8String(
|
||||
_fromCodeUnits: input,
|
||||
utf16Length: utf16Count,
|
||||
isASCII: true,
|
||||
Encoding.self
|
||||
) {
|
||||
return String(_StringGuts(small))
|
||||
}
|
||||
|
||||
let storage = _SwiftStringStorage<UInt8>.create(
|
||||
capacity: capacity,
|
||||
count: utf16Count)
|
||||
var p = storage.start
|
||||
let sink: (UTF32.CodeUnit) -> Void = {
|
||||
p.pointee = UTF8.CodeUnit($0)
|
||||
p += 1
|
||||
}
|
||||
let hadError = transcode(
|
||||
input.makeIterator(),
|
||||
from: encoding, to: UTF32.self,
|
||||
stoppingOnError: true,
|
||||
into: sink)
|
||||
_sanityCheck(!hadError,
|
||||
"string cannot be ASCII if there were decoding errors")
|
||||
return String(_largeStorage: storage)
|
||||
} else {
|
||||
// TODO(SSO): Small transcoded string
|
||||
|
||||
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
|
||||
capacity: capacity,
|
||||
count: utf16Count)
|
||||
var p = storage.start
|
||||
let sink: (UTF16.CodeUnit) -> Void = {
|
||||
p.pointee = $0
|
||||
p += 1
|
||||
}
|
||||
_ = transcode(
|
||||
input.makeIterator(),
|
||||
from: encoding, to: UTF16.self,
|
||||
stoppingOnError: !repairIllFormedSequences,
|
||||
into: sink)
|
||||
return String(_largeStorage: storage)
|
||||
}
|
||||
}
|
||||
|
||||
internal static func _fromNonASCIIUTF8(
|
||||
_ input: UnsafeBufferPointer<UInt8>, repair: Bool
|
||||
) -> String? {
|
||||
if let smol = _SmallUTF8String(input) {
|
||||
return String(_StringGuts(smol))
|
||||
}
|
||||
|
||||
// Determine how many UTF-16 code units we'll need
|
||||
let inputStream = input.makeIterator()
|
||||
|
||||
// TODO: Replace with much, much faster length check
|
||||
guard let (utf16Count, isASCII) = UTF16.transcodedLength(
|
||||
of: inputStream,
|
||||
decodedAs: UTF8.self,
|
||||
repairingIllFormedSequences: repair) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let capacity = utf16Count
|
||||
_sanityCheck(!isASCII, "was given ASCII UTF-8")
|
||||
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
|
||||
capacity: capacity,
|
||||
count: utf16Count)
|
||||
var p = storage.start
|
||||
let sink: (UTF16.CodeUnit) -> Void = {
|
||||
p.pointee = $0
|
||||
p += 1
|
||||
}
|
||||
// TODO: Replace with much, much faster transcoding
|
||||
_ = transcode(
|
||||
// TODO(UTF8): Skip intermediary array
|
||||
var contents: [UInt8] = []
|
||||
contents.reserveCapacity(input.underestimatedCount)
|
||||
let repaired = transcode(
|
||||
input.makeIterator(),
|
||||
from: UTF8.self, to: UTF16.self,
|
||||
stoppingOnError: !repair,
|
||||
into: sink)
|
||||
return String(_largeStorage: storage)
|
||||
}
|
||||
from: Encoding.self,
|
||||
to: UTF8.self,
|
||||
stoppingOnError: false,
|
||||
into: { contents.append($0) })
|
||||
guard repair || !repaired else { return nil }
|
||||
|
||||
// For testing purposes only, allow ourselves to have invalid contents
|
||||
@usableFromInline // @testable
|
||||
static internal
|
||||
func _fromInvalidUTF16(_ cus: UnsafeBufferPointer<UInt16>) -> String {
|
||||
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
|
||||
capacity: cus.count, count: cus.count)
|
||||
_ = storage._initialize(fromCodeUnits: cus, encoding: UTF16.self)
|
||||
return String(_StringGuts(_large: storage))
|
||||
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
||||
return (str, repaired)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,411 +12,161 @@
|
||||
|
||||
import SwiftShims
|
||||
|
||||
/// CR and LF are common special cases in grapheme breaking logic
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _CR: UInt8 { return 0x0d }
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _LF: UInt8 { return 0x0a }
|
||||
|
||||
extension _StringVariant {
|
||||
@inlinable
|
||||
internal func _stride(at i: String.Index) -> Int {
|
||||
if let stride = i.characterStride { return stride }
|
||||
return characterStride(atOffset: i.encodedOffset)
|
||||
@_effects(releasenone)
|
||||
internal func _measureCharacterStride(
|
||||
of utf8: UnsafeBufferPointer<UInt8>, startingAt i: Int
|
||||
) -> Int {
|
||||
let iterator = _ThreadLocalStorage.getUBreakIterator(utf8)
|
||||
let offset = __swift_stdlib_ubrk_following(
|
||||
iterator, Int32(truncatingIfNeeded: i))
|
||||
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
|
||||
if _fastPath(offset != -1) {
|
||||
// The offset into our buffer is the distance.
|
||||
_sanityCheck(offset > i, "zero-sized grapheme?")
|
||||
return Int(truncatingIfNeeded: offset) &- i
|
||||
}
|
||||
return utf8.count &- i
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func characterStride(atOffset offset: Int) -> Int {
|
||||
let slice = self.checkedSlice(from: offset)
|
||||
return slice.measureFirstExtendedGraphemeCluster()
|
||||
@_effects(releasenone)
|
||||
internal func _measureCharacterStride(
|
||||
of utf16: UnsafeBufferPointer<UInt16>, startingAt i: Int
|
||||
) -> Int {
|
||||
let iterator = _ThreadLocalStorage.getUBreakIterator(utf16)
|
||||
let offset = __swift_stdlib_ubrk_following(
|
||||
iterator, Int32(truncatingIfNeeded: i))
|
||||
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
|
||||
if _fastPath(offset != -1) {
|
||||
// The offset into our buffer is the distance.
|
||||
_sanityCheck(offset > i, "zero-sized grapheme?")
|
||||
return Int(truncatingIfNeeded: offset) &- i
|
||||
}
|
||||
return utf16.count &- i
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func characterIndex(atOffset offset: Int) -> String.Index {
|
||||
let stride = self.characterStride(atOffset: offset)
|
||||
return String.Index(encodedOffset: offset, characterStride: stride)
|
||||
@_effects(releasenone)
|
||||
internal func _measureCharacterStride(
|
||||
of utf8: UnsafeBufferPointer<UInt8>, endingAt i: Int
|
||||
) -> Int {
|
||||
let iterator = _ThreadLocalStorage.getUBreakIterator(utf8)
|
||||
let offset = __swift_stdlib_ubrk_preceding(
|
||||
iterator, Int32(truncatingIfNeeded: i))
|
||||
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
|
||||
if _fastPath(offset != -1) {
|
||||
// The offset into our buffer is the distance.
|
||||
_sanityCheck(offset < i, "zero-sized grapheme?")
|
||||
return i &- Int(truncatingIfNeeded: offset)
|
||||
}
|
||||
return i &- utf8.count
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func characterIndex(after i: String.Index) -> String.Index {
|
||||
@_effects(releasenone)
|
||||
internal func _measureCharacterStride(
|
||||
of utf16: UnsafeBufferPointer<UInt16>, endingAt i: Int
|
||||
) -> Int {
|
||||
let iterator = _ThreadLocalStorage.getUBreakIterator(utf16)
|
||||
let offset = __swift_stdlib_ubrk_preceding(
|
||||
iterator, Int32(truncatingIfNeeded: i))
|
||||
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
|
||||
if _fastPath(offset != -1) {
|
||||
// The offset into our buffer is the distance.
|
||||
_sanityCheck(offset < i, "zero-sized grapheme?")
|
||||
return i &- Int(truncatingIfNeeded: offset)
|
||||
}
|
||||
return i &- utf16.count
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func isOnGraphemeClusterBoundary(_ i: String.Index) -> Bool {
|
||||
let offset = i.encodedOffset
|
||||
_precondition(offset >= 0, "String index is out of bounds")
|
||||
_precondition(offset < count, "Can't advance past endIndex")
|
||||
// Find the current grapheme distance
|
||||
let slice = self[offset..<count]
|
||||
let stride1 = _stride(at: i)
|
||||
// Calculate and cache the next grapheme distance
|
||||
let stride2 = slice.dropFirst(stride1).measureFirstExtendedGraphemeCluster()
|
||||
return String.Index(
|
||||
encodedOffset: offset &+ stride1,
|
||||
characterStride: stride2)
|
||||
if offset == 0 || offset == self.count { return true }
|
||||
|
||||
guard isOnUnicodeScalarBoundary(i) else { return false }
|
||||
|
||||
let str = String(self)
|
||||
return i == str.index(before: str.index(after: i))
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func characterIndex(before i: String.Index) -> String.Index {
|
||||
let offset = i.encodedOffset
|
||||
_precondition(offset > 0, "Can't move before startIndex")
|
||||
_precondition(offset <= count, "String index is out of bounds")
|
||||
let slice = self[0..<offset]
|
||||
let stride = slice.measureLastExtendedGraphemeCluster()
|
||||
_sanityCheck(stride > 0 && stride <= UInt16.max)
|
||||
return String.Index(
|
||||
encodedOffset: offset &- stride,
|
||||
characterStride: stride)
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _opaqueCharacterStride(startingAt i: Int) -> Int {
|
||||
if _slowPath(isForeign) {
|
||||
return _foreignOpaqueCharacterStride(startingAt: i)
|
||||
}
|
||||
|
||||
// TODO(UTF8 perf): grapheme breaking fast-paths...
|
||||
|
||||
return self.withFastUTF8 {
|
||||
return _measureCharacterStride(of: $0, startingAt: i)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func characterIndex(
|
||||
_ i: String.Index,
|
||||
offsetBy n: Int
|
||||
) -> String.Index {
|
||||
var i = i
|
||||
if n >= 0 {
|
||||
for _ in 0 ..< n {
|
||||
i = characterIndex(after: i)
|
||||
}
|
||||
} else {
|
||||
for _ in n ..< 0 {
|
||||
i = characterIndex(before: i)
|
||||
}
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignOpaqueCharacterStride(startingAt i: Int) -> Int {
|
||||
_sanityCheck(isForeign)
|
||||
|
||||
// TODO(UTF8 perf): grapheme breaking fast-paths...
|
||||
|
||||
// TODO(UTF8 perf): local stack first, before nuclear solution
|
||||
// TODO(UTF8 perf): even nuclear solution should copy to larger arrays in a
|
||||
// loop
|
||||
|
||||
let count = _object.largeCount
|
||||
let cocoa = _object.cocoaObject
|
||||
var codeUnits = Array<UInt16>(repeating: 0, count: count)
|
||||
|
||||
codeUnits.withUnsafeMutableBufferPointer {
|
||||
_cocoaStringCopyCharacters(
|
||||
from: cocoa,
|
||||
range: 0..<count,
|
||||
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
|
||||
}
|
||||
return codeUnits.withUnsafeBufferPointer {
|
||||
_measureCharacterStride(of: $0, startingAt: i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func characterIndex(
|
||||
_ i: String.Index,
|
||||
offsetBy n: Int,
|
||||
limitedBy limit: String.Index
|
||||
) -> String.Index? {
|
||||
var i = i
|
||||
if n >= 0 {
|
||||
for _ in 0 ..< n {
|
||||
// Note condition is >=, not ==: we do not want to jump
|
||||
// over limit if it's in the middle of a grapheme cluster.
|
||||
// https://bugs.swift.org/browse/SR-6545
|
||||
if i >= limit { return nil }
|
||||
i = characterIndex(after: i)
|
||||
}
|
||||
} else {
|
||||
for _ in n ..< 0 {
|
||||
if i <= limit { return nil } // See note above.
|
||||
i = characterIndex(before: i)
|
||||
}
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _opaqueCharacterStride(endingAt i: Int) -> Int {
|
||||
if _slowPath(isForeign) {
|
||||
return _foreignOpaqueCharacterStride(endingAt: i)
|
||||
}
|
||||
|
||||
// TODO(UTF8 perf): grapheme breaking fast-paths...
|
||||
|
||||
return self.withFastUTF8 {
|
||||
return _measureCharacterStride(of: $0, endingAt: i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
public func characterDistance(
|
||||
from start: String.Index,
|
||||
to end: String.Index
|
||||
) -> Int {
|
||||
var i = start
|
||||
var count = 0
|
||||
if start < end {
|
||||
// Note that the loop condition isn't just an equality check: we do not
|
||||
// want to jump over `end` if it's in the middle of a grapheme cluster.
|
||||
// https://bugs.swift.org/browse/SR-6546
|
||||
while i < end {
|
||||
count += 1
|
||||
i = characterIndex(after: i)
|
||||
}
|
||||
} else {
|
||||
while i > end { // See note above.
|
||||
count -= 1
|
||||
i = characterIndex(before: i)
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignOpaqueCharacterStride(endingAt i: Int) -> Int {
|
||||
_sanityCheck(isForeign)
|
||||
|
||||
@inlinable
|
||||
internal func character(at i: String.Index) -> Character {
|
||||
let stride = _stride(at: i)
|
||||
let offset = i.encodedOffset
|
||||
if _slowPath(stride > 1) {
|
||||
return Character(_unverified: self.checkedSlice(offset..<offset + stride))
|
||||
// TODO(UTF8 perf): grapheme breaking fast-paths...
|
||||
|
||||
// TODO(UTF8 perf): local stack first, before nuclear solution
|
||||
// TODO(UTF8 perf): even nuclear solution should copy to larger arrays in a
|
||||
// loop
|
||||
|
||||
let count = _object.largeCount
|
||||
let cocoa = _object.cocoaObject
|
||||
var codeUnits = Array<UInt16>(repeating: 0, count: count)
|
||||
|
||||
codeUnits.withUnsafeMutableBufferPointer {
|
||||
_cocoaStringCopyCharacters(
|
||||
from: cocoa,
|
||||
range: 0..<count,
|
||||
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
|
||||
}
|
||||
let u = self.codeUnit(atCheckedOffset: offset)
|
||||
if _slowPath(!UTF16._isScalar(u)) {
|
||||
return Character(Unicode.Scalar._replacementCharacter)
|
||||
return codeUnits.withUnsafeBufferPointer {
|
||||
_measureCharacterStride(of: $0, endingAt: i)
|
||||
}
|
||||
return Character(_singleCodeUnit: u)
|
||||
}
|
||||
}
|
||||
|
||||
extension _StringVariant {
|
||||
// NOTE: Because this function is inlineable, it should contain only the fast
|
||||
// paths of grapheme breaking that we have high confidence won't change.
|
||||
/// Returns the length of the first extended grapheme cluster in UTF-16
|
||||
/// code units.
|
||||
@inlinable
|
||||
internal
|
||||
func measureFirstExtendedGraphemeCluster() -> Int {
|
||||
// No more graphemes at end of string.
|
||||
if count == 0 { return 0 }
|
||||
|
||||
// If there is a single code unit left, the grapheme length must be 1.
|
||||
if count == 1 { return 1 }
|
||||
|
||||
if isASCII {
|
||||
_onFastPath() // Please agressively inline
|
||||
// The only multi-scalar ASCII grapheme cluster is CR/LF.
|
||||
if _slowPath(self[0] == _CR && self[1] == _LF) {
|
||||
return 2
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
if _fastPath(
|
||||
UTF16._quickCheckGraphemeBreakBetween(self[0], self[1])) {
|
||||
return 1
|
||||
}
|
||||
return self._measureFirstExtendedGraphemeClusterSlow()
|
||||
}
|
||||
|
||||
// NOTE: Because this function is inlineable, it should contain only the fast
|
||||
// paths of grapheme breaking that we have high confidence won't change.
|
||||
//
|
||||
/// Returns the length of the last extended grapheme cluster in UTF-16
|
||||
/// code units.
|
||||
@inlinable
|
||||
internal
|
||||
func measureLastExtendedGraphemeCluster() -> Int {
|
||||
let count = self.count
|
||||
// No more graphemes at end of string.
|
||||
if count == 0 { return 0 }
|
||||
|
||||
// If there is a single code unit left, the grapheme length must be 1.
|
||||
if count == 1 { return 1 }
|
||||
|
||||
if isASCII {
|
||||
_onFastPath() // Please agressively inline
|
||||
// The only multi-scalar ASCII grapheme cluster is CR/LF.
|
||||
if _slowPath(self[count-1] == _LF && self[count-2] == _CR) {
|
||||
return 2
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
if _fastPath(
|
||||
UTF16._quickCheckGraphemeBreakBetween(self[count - 2], self[count - 1])) {
|
||||
return 1
|
||||
}
|
||||
return self._measureLastExtendedGraphemeClusterSlow()
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedString {
|
||||
@inline(never)
|
||||
@usableFromInline
|
||||
internal func _measureFirstExtendedGraphemeClusterSlow() -> Int {
|
||||
// ASCII case handled entirely on fast path.
|
||||
// FIXME: Have separate implementations for ASCII & UTF-16 views.
|
||||
_sanityCheck(CodeUnit.self == UInt16.self)
|
||||
return UTF16._measureFirstExtendedGraphemeCluster(
|
||||
in: UnsafeBufferPointer(
|
||||
start: rawStart.assumingMemoryBound(to: UInt16.self),
|
||||
count: count))
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@usableFromInline
|
||||
internal func _measureLastExtendedGraphemeClusterSlow() -> Int {
|
||||
// ASCII case handled entirely on fast path.
|
||||
// FIXME: Have separate implementations for ASCII & UTF-16 views.
|
||||
_sanityCheck(CodeUnit.self == UInt16.self)
|
||||
return UTF16._measureLastExtendedGraphemeCluster(
|
||||
in: UnsafeBufferPointer(
|
||||
start: rawStart.assumingMemoryBound(to: UInt16.self),
|
||||
count: count))
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedOpaqueString {
|
||||
@inline(never)
|
||||
@usableFromInline
|
||||
internal func _measureFirstExtendedGraphemeClusterSlow() -> Int {
|
||||
_sanityCheck(count >= 2, "should have at least two code units")
|
||||
|
||||
// Pull out some code units into a fixed array and try to perform grapheme
|
||||
// breaking on that.
|
||||
typealias ShortBuffer = _FixedArray16<UInt16>
|
||||
var shortBuffer = ShortBuffer(count: Swift.min(ShortBuffer.capacity, count))
|
||||
shortBuffer.withUnsafeMutableBufferPointer { buffer in
|
||||
self.prefix(buffer.count)._copy(into: buffer)
|
||||
}
|
||||
let shortLength = shortBuffer.withUnsafeBufferPointer { buffer in
|
||||
UTF16._measureFirstExtendedGraphemeCluster(in: buffer)
|
||||
}
|
||||
if _fastPath(shortLength < shortBuffer.capacity) {
|
||||
return shortLength
|
||||
}
|
||||
|
||||
// Nuclear option: copy out the rest of the string into a contiguous buffer.
|
||||
let longStart = UnsafeMutablePointer<UInt16>.allocate(capacity: count)
|
||||
defer { longStart.deallocate() }
|
||||
self._copy(into: UnsafeMutableBufferPointer(start: longStart, count: count))
|
||||
return UTF16._measureFirstExtendedGraphemeCluster(
|
||||
in: UnsafeBufferPointer(start: longStart, count: count))
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@usableFromInline
|
||||
internal func _measureLastExtendedGraphemeClusterSlow() -> Int {
|
||||
_sanityCheck(count >= 2, "should have at least two code units")
|
||||
|
||||
// Pull out some code units into a fixed array and try to perform grapheme
|
||||
// breaking on that.
|
||||
typealias ShortBuffer = _FixedArray16<UInt16>
|
||||
var shortBuffer = ShortBuffer(count: Swift.min(ShortBuffer.capacity, count))
|
||||
shortBuffer.withUnsafeMutableBufferPointer { buffer in
|
||||
self.suffix(buffer.count)._copy(into: buffer)
|
||||
}
|
||||
let shortLength = shortBuffer.withUnsafeBufferPointer { buffer in
|
||||
UTF16._measureLastExtendedGraphemeCluster(in: buffer)
|
||||
}
|
||||
if _fastPath(shortLength < shortBuffer.capacity) {
|
||||
return shortLength
|
||||
}
|
||||
|
||||
// Nuclear option: copy out the rest of the string into a contiguous buffer.
|
||||
let longStart = UnsafeMutablePointer<UInt16>.allocate(capacity: count)
|
||||
defer { longStart.deallocate() }
|
||||
self._copy(into: UnsafeMutableBufferPointer(start: longStart, count: count))
|
||||
return UTF16._measureLastExtendedGraphemeCluster(
|
||||
in: UnsafeBufferPointer(start: longStart, count: count))
|
||||
}
|
||||
}
|
||||
|
||||
extension Unicode.UTF16 {
|
||||
/// Fast check for a (stable) grapheme break between two UInt16 code units
|
||||
@inlinable // Safe to inline
|
||||
internal static func _quickCheckGraphemeBreakBetween(
|
||||
_ lhs: UInt16, _ rhs: UInt16
|
||||
) -> Bool {
|
||||
// With the exception of CR-LF, there is always a grapheme break between two
|
||||
// sub-0x300 code units
|
||||
if lhs < 0x300 && rhs < 0x300 {
|
||||
return lhs != UInt16(_CR) && rhs != UInt16(_LF)
|
||||
}
|
||||
return _internalExtraCheckGraphemeBreakBetween(lhs, rhs)
|
||||
}
|
||||
|
||||
@inline(never) // @inline(resilient_only)
|
||||
@usableFromInline
|
||||
internal static func _internalExtraCheckGraphemeBreakBetween(
|
||||
_ lhs: UInt16, _ rhs: UInt16
|
||||
) -> Bool {
|
||||
_sanityCheck(
|
||||
lhs != _CR || rhs != _LF,
|
||||
"CR-LF special case handled by _quickCheckGraphemeBreakBetween")
|
||||
|
||||
// Whether the given scalar, when it appears paired with another scalar
|
||||
// satisfying this property, has a grapheme break between it and the other
|
||||
// scalar.
|
||||
func hasBreakWhenPaired(_ x: UInt16) -> Bool {
|
||||
// TODO: This doesn't generate optimal code, tune/re-write at a lower
|
||||
// level.
|
||||
//
|
||||
// NOTE: Order of case ranges affects codegen, and thus performance. All
|
||||
// things being equal, keep existing order below.
|
||||
switch x {
|
||||
// Unified CJK Han ideographs, common and some supplemental, amongst
|
||||
// others:
|
||||
// 0x3400-0xA4CF
|
||||
case 0x3400...0xa4cf: return true
|
||||
|
||||
// Repeat sub-300 check, this is beneficial for common cases of Latin
|
||||
// characters embedded within non-Latin script (e.g. newlines, spaces,
|
||||
// proper nouns and/or jargon, punctuation).
|
||||
//
|
||||
// NOTE: CR-LF special case has already been checked.
|
||||
case 0x0000...0x02ff: return true
|
||||
|
||||
// Non-combining kana:
|
||||
// 0x3041-0x3096
|
||||
// 0x30A1-0x30FA
|
||||
case 0x3041...0x3096: return true
|
||||
case 0x30a1...0x30fa: return true
|
||||
|
||||
// Non-combining modern (and some archaic) Cyrillic:
|
||||
// 0x0400-0x0482 (first half of Cyrillic block)
|
||||
case 0x0400...0x0482: return true
|
||||
|
||||
// Modern Arabic, excluding extenders and prependers:
|
||||
// 0x061D-0x064A
|
||||
case 0x061d...0x064a: return true
|
||||
|
||||
// Precomposed Hangul syllables:
|
||||
// 0xAC00–0xD7AF
|
||||
case 0xac00...0xd7af: return true
|
||||
|
||||
// Common general use punctuation, excluding extenders:
|
||||
// 0x2010-0x2029
|
||||
case 0x2010...0x2029: return true
|
||||
|
||||
// CJK punctuation characters, excluding extenders:
|
||||
// 0x3000-0x3029
|
||||
case 0x3000...0x3029: return true
|
||||
|
||||
default: return false
|
||||
}
|
||||
}
|
||||
return hasBreakWhenPaired(lhs) && hasBreakWhenPaired(rhs)
|
||||
}
|
||||
|
||||
// NOT @usableFromInline
|
||||
internal static func _measureFirstExtendedGraphemeCluster(
|
||||
in buffer: UnsafeBufferPointer<CodeUnit>
|
||||
) -> Int {
|
||||
// ICU can only handle 32-bit offsets; don't feed it more than that.
|
||||
// https://bugs.swift.org/browse/SR-6544
|
||||
let count: Int32
|
||||
if _fastPath(buffer.count <= Int(Int32.max)) {
|
||||
count = Int32(truncatingIfNeeded: buffer.count)
|
||||
} else {
|
||||
count = Int32.max
|
||||
}
|
||||
let iterator = _ThreadLocalStorage.getUBreakIterator(
|
||||
start: buffer.baseAddress!,
|
||||
count: count)
|
||||
let offset = __swift_stdlib_ubrk_following(iterator, 0)
|
||||
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
|
||||
if _fastPath(offset != -1) {
|
||||
// The offset into our buffer is the distance.
|
||||
_sanityCheck(offset > 0, "zero-sized grapheme?")
|
||||
return Int(offset)
|
||||
}
|
||||
return Int(count)
|
||||
}
|
||||
|
||||
// NOT @usableFromInline
|
||||
internal static func _measureLastExtendedGraphemeCluster(
|
||||
in buffer: UnsafeBufferPointer<CodeUnit>
|
||||
) -> Int {
|
||||
// ICU can only handle 32-bit offsets; don't feed it more than that.
|
||||
// https://bugs.swift.org/browse/SR-6544
|
||||
let count: Int32
|
||||
let start: UnsafePointer<CodeUnit>
|
||||
if _fastPath(buffer.count <= Int(Int32.max)) {
|
||||
count = Int32(truncatingIfNeeded: buffer.count)
|
||||
start = buffer.baseAddress!
|
||||
} else {
|
||||
count = Int32.max
|
||||
start = buffer.baseAddress! + buffer.count - Int(Int32.max)
|
||||
}
|
||||
let iterator = _ThreadLocalStorage.getUBreakIterator(
|
||||
start: start,
|
||||
count: count)
|
||||
|
||||
let offset = __swift_stdlib_ubrk_preceding(iterator, count)
|
||||
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
|
||||
if _fastPath(offset != -1) {
|
||||
// The offset into our buffer is the distance.
|
||||
_sanityCheck(offset < count, "zero-sized grapheme?")
|
||||
return Int(count - offset)
|
||||
}
|
||||
return Int(count)
|
||||
}
|
||||
}
|
||||
|
||||
+292
-1273
File diff suppressed because it is too large
Load Diff
@@ -10,215 +10,3 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// TODO: describe
|
||||
//
|
||||
// HACK HACK HACK: For whatever reason, having this directly on String instead
|
||||
// of _StringGuts avoids a cascade of ARC. Also note, we can have a global
|
||||
// function that forwards, but that function **must not be on _StringGuts**,
|
||||
// else ARC.
|
||||
//
|
||||
extension String {
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _visit<Result>(
|
||||
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
|
||||
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>) -> Result,
|
||||
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>) -> Result,
|
||||
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString) -> Result
|
||||
) -> Result {
|
||||
if _slowPath(_guts._isOpaque) {
|
||||
return self._visitOpaque(
|
||||
range: range, ascii: ascii, utf16: utf16, opaque: opaque)
|
||||
}
|
||||
|
||||
defer { _fixLifetime(self) }
|
||||
if _guts.isASCII {
|
||||
var view = _guts._unmanagedASCIIView
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return ascii(view)
|
||||
} else {
|
||||
var view = _guts._unmanagedUTF16View
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return utf16(view)
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@_effects(readonly)
|
||||
@inline(never) // @_outlined
|
||||
func _visitOpaque<Result>(
|
||||
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
|
||||
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>) -> Result,
|
||||
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>) -> Result,
|
||||
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString) -> Result
|
||||
) -> Result {
|
||||
_sanityCheck(_guts._isOpaque)
|
||||
|
||||
if _guts._isSmall {
|
||||
_sanityCheck(_guts._object._isSmallUTF8, "no other small forms yet")
|
||||
let small = _guts._smallUTF8String
|
||||
if small.isASCII {
|
||||
return small.withUnmanagedASCII { view in
|
||||
var view = view
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return ascii(view)
|
||||
}
|
||||
}
|
||||
return small.withUnmanagedUTF16 { view in
|
||||
var view = view
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return utf16(view)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: But can it provide a pointer+length representation?
|
||||
defer { _fixLifetime(self) }
|
||||
var view = _guts._asOpaque()
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
|
||||
return opaque(view)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _visit<T, Result>(
|
||||
range: (Range<Int>, performBoundsCheck: Bool)?,
|
||||
args x: T,
|
||||
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>, T) -> Result,
|
||||
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>, T) -> Result,
|
||||
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString, T) -> Result
|
||||
) -> Result {
|
||||
if _slowPath(_guts._isOpaque) {
|
||||
return self._visitOpaque(
|
||||
range: range, args: x, ascii: ascii, utf16: utf16, opaque: opaque)
|
||||
}
|
||||
|
||||
defer { _fixLifetime(self) }
|
||||
if _guts.isASCII {
|
||||
var view = _guts._unmanagedASCIIView
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return ascii(view, x)
|
||||
} else {
|
||||
var view = _guts._unmanagedUTF16View
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return utf16(view, x)
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline // @opaque
|
||||
@_effects(readonly)
|
||||
@inline(never)
|
||||
func _visitOpaque<T, Result>(
|
||||
range: (Range<Int>, performBoundsCheck: Bool)?,
|
||||
args x: T,
|
||||
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>, T) -> Result,
|
||||
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>, T) -> Result,
|
||||
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString, T) -> Result
|
||||
) -> Result {
|
||||
_sanityCheck(_guts._isOpaque)
|
||||
|
||||
if _fastPath(_guts._isSmall) {
|
||||
_sanityCheck(_guts._object._isSmallUTF8, "no other small forms yet")
|
||||
let small = _guts._smallUTF8String
|
||||
if small.isASCII {
|
||||
return small.withUnmanagedASCII { view in
|
||||
var view = view
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return ascii(view, x)
|
||||
}
|
||||
}
|
||||
return small.withUnmanagedUTF16 { view in
|
||||
var view = view
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
return utf16(view, x)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: But can it provide a pointer+length representation?
|
||||
defer { _fixLifetime(self) }
|
||||
var view = _guts._asOpaque()
|
||||
if let (range, boundsCheck) = range {
|
||||
if boundsCheck {
|
||||
view._boundsCheck(offsetRange: range)
|
||||
}
|
||||
view = view[range]
|
||||
}
|
||||
|
||||
return opaque(view, x)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal
|
||||
func _visitGuts<Result>(
|
||||
_ guts: _StringGuts,
|
||||
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
|
||||
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>) -> Result,
|
||||
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>) -> Result,
|
||||
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString) -> Result
|
||||
) -> Result {
|
||||
return String(guts)._visit(
|
||||
range: range, ascii: ascii, utf16: utf16, opaque: opaque)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal
|
||||
func _visitGuts<T, Result>(
|
||||
_ guts: _StringGuts,
|
||||
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
|
||||
args x: T,
|
||||
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>, T) -> Result,
|
||||
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>, T) -> Result,
|
||||
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString, T) -> Result
|
||||
) -> Result {
|
||||
return String(guts)._visit(
|
||||
range: range, args: x, ascii: ascii, utf16: utf16, opaque: opaque)
|
||||
}
|
||||
|
||||
|
||||
@@ -12,190 +12,17 @@
|
||||
|
||||
import SwiftShims
|
||||
|
||||
extension _UnmanagedString where CodeUnit == UInt8 {
|
||||
internal func hashASCII(into core: inout Hasher._BufferingCore) {
|
||||
core.combine(bytes: rawBuffer)
|
||||
}
|
||||
}
|
||||
|
||||
extension BidirectionalCollection where Element == UInt16, SubSequence == Self {
|
||||
internal func hashUTF16(into core: inout Hasher._BufferingCore) {
|
||||
for i in self.indices {
|
||||
let cu = self[i]
|
||||
let cuIsASCII = cu <= 0x7F
|
||||
let isSingleSegmentScalar = self.hasNormalizationBoundary(after: i)
|
||||
|
||||
if cuIsASCII && isSingleSegmentScalar {
|
||||
core.combine(UInt8(truncatingIfNeeded: cu))
|
||||
} else {
|
||||
for encodedScalar in Unicode._ParsingIterator(
|
||||
codeUnits: _NormalizedCodeUnitIterator(self[i..<endIndex]),
|
||||
parser: Unicode.UTF16.ForwardParser()
|
||||
) {
|
||||
let transcoded = Unicode.UTF8.transcode(
|
||||
encodedScalar, from: Unicode.UTF16.self
|
||||
).unsafelyUnwrapped // never fails
|
||||
let (bytes, count) = transcoded._bytes
|
||||
core.combine(bytes: bytes, count: count)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedString where CodeUnit == UInt8 {
|
||||
internal func hash(into hasher: inout Hasher) {
|
||||
self.hashASCII(into: &hasher._core)
|
||||
hasher._core.combine(0xFF as UInt8) // terminator
|
||||
}
|
||||
|
||||
internal func _rawHashValue(seed: Int) -> Int {
|
||||
return Hasher._hash(seed: seed, bytes: rawBuffer)
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedString where CodeUnit == UInt16 {
|
||||
internal func hash(into hasher: inout Hasher) {
|
||||
self.hashUTF16(into: &hasher._core)
|
||||
hasher._core.combine(0xFF as UInt8) // terminator
|
||||
}
|
||||
|
||||
internal func _rawHashValue(seed: Int) -> Int {
|
||||
var core = Hasher._BufferingCore(seed: seed)
|
||||
self.hashUTF16(into: &core)
|
||||
return Int(truncatingIfNeeded: core.finalize())
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedOpaqueString {
|
||||
internal func hash(into hasher: inout Hasher) {
|
||||
self.hashUTF16(into: &hasher._core)
|
||||
hasher._core.combine(0xFF as UInt8) // terminator
|
||||
}
|
||||
|
||||
internal func _rawHashValue(seed: Int) -> Int {
|
||||
var core = Hasher._BufferingCore(seed: seed)
|
||||
self.hashUTF16(into: &core)
|
||||
return Int(truncatingIfNeeded: core.finalize())
|
||||
}
|
||||
}
|
||||
|
||||
extension _SmallUTF8String {
|
||||
internal func hash(into hasher: inout Hasher) {
|
||||
#if arch(i386) || arch(arm)
|
||||
unsupportedOn32bit()
|
||||
#else
|
||||
if isASCII {
|
||||
self.withUnmanagedASCII { $0.hash(into: &hasher) }
|
||||
return
|
||||
}
|
||||
self.withUnmanagedUTF16 { $0.hash(into: &hasher) }
|
||||
#endif // 64-bit
|
||||
}
|
||||
|
||||
internal func _rawHashValue(seed: Int) -> Int {
|
||||
#if arch(i386) || arch(arm)
|
||||
unsupportedOn32bit()
|
||||
#else
|
||||
if isASCII {
|
||||
return self.withUnmanagedASCII { $0._rawHashValue(seed: seed) }
|
||||
}
|
||||
return self.withUnmanagedUTF16 { $0._rawHashValue(seed: seed) }
|
||||
#endif // 64-bit
|
||||
}
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
|
||||
@usableFromInline
|
||||
internal func hash(into hasher: inout Hasher) {
|
||||
if _isSmall {
|
||||
_smallUTF8String.hash(into: &hasher)
|
||||
return
|
||||
}
|
||||
|
||||
defer { _fixLifetime(self) }
|
||||
if _slowPath(_isOpaque) {
|
||||
_asOpaque().hash(into: &hasher)
|
||||
return
|
||||
}
|
||||
if isASCII {
|
||||
_unmanagedASCIIView.hash(into: &hasher)
|
||||
return
|
||||
}
|
||||
_unmanagedUTF16View.hash(into: &hasher)
|
||||
}
|
||||
|
||||
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
|
||||
@usableFromInline
|
||||
internal func hash(_ range: Range<Int>, into hasher: inout Hasher) {
|
||||
if _isSmall {
|
||||
_smallUTF8String[range].hash(into: &hasher)
|
||||
return
|
||||
}
|
||||
|
||||
defer { _fixLifetime(self) }
|
||||
if _slowPath(_isOpaque) {
|
||||
_asOpaque()[range].hash(into: &hasher)
|
||||
return
|
||||
}
|
||||
if isASCII {
|
||||
_unmanagedASCIIView[range].hash(into: &hasher)
|
||||
return
|
||||
}
|
||||
_unmanagedUTF16View[range].hash(into: &hasher)
|
||||
}
|
||||
|
||||
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
|
||||
@usableFromInline
|
||||
internal func _rawHashValue(seed: Int) -> Int {
|
||||
if _isSmall {
|
||||
return _smallUTF8String._rawHashValue(seed: seed)
|
||||
}
|
||||
|
||||
defer { _fixLifetime(self) }
|
||||
if _slowPath(_isOpaque) {
|
||||
return _asOpaque()._rawHashValue(seed: seed)
|
||||
}
|
||||
if isASCII {
|
||||
return _unmanagedASCIIView._rawHashValue(seed: seed)
|
||||
}
|
||||
return _unmanagedUTF16View._rawHashValue(seed: seed)
|
||||
}
|
||||
|
||||
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
|
||||
@usableFromInline
|
||||
internal func _rawHashValue(_ range: Range<Int>, seed: Int) -> Int {
|
||||
if _isSmall {
|
||||
return _smallUTF8String[range]._rawHashValue(seed: seed)
|
||||
}
|
||||
|
||||
defer { _fixLifetime(self) }
|
||||
if _slowPath(_isOpaque) {
|
||||
return _asOpaque()[range]._rawHashValue(seed: seed)
|
||||
}
|
||||
if isASCII {
|
||||
return _unmanagedASCIIView[range]._rawHashValue(seed: seed)
|
||||
}
|
||||
return _unmanagedUTF16View[range]._rawHashValue(seed: seed)
|
||||
}
|
||||
}
|
||||
|
||||
extension String : Hashable {
|
||||
/// Hashes the essential components of this value by feeding them into the
|
||||
/// given hasher.
|
||||
///
|
||||
/// - Parameter hasher: The hasher to use when combining the components
|
||||
/// of this instance.
|
||||
@inlinable
|
||||
@inlinable // For pre-normal fast paths
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
_guts.hash(into: &hasher)
|
||||
}
|
||||
// TODO(UTF8 perf): pre-normal checks, fast-paths, etc.
|
||||
|
||||
@inlinable
|
||||
public func _rawHashValue(seed: Int) -> Int {
|
||||
return _guts._rawHashValue(seed: seed)
|
||||
_guts._normalizedHash(into: &hasher)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,11 +34,21 @@ extension StringProtocol {
|
||||
/// of this instance.
|
||||
@inlinable
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
_wholeString._guts.hash(_encodedOffsetRange, into: &hasher)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
public func _rawHashValue(seed: Int) -> Int {
|
||||
return _wholeString._guts._rawHashValue(_encodedOffsetRange, seed: seed)
|
||||
unimplemented_utf8()
|
||||
}
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
@usableFromInline // @opaque
|
||||
@inline(never) // slow-path
|
||||
internal func _normalizedHash(into hasher: inout Hasher) {
|
||||
// TODO(UTF8 perf): fast-paths, incremental (non-allocating) normalization,
|
||||
// etc. This approach is very slow.
|
||||
|
||||
String(self)._normalize().withUnsafeBytes {
|
||||
hasher.combine(bytes: $0)
|
||||
}
|
||||
hasher.combine(0xFF as UInt8) // terminator
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,66 +9,131 @@
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
extension String {
|
||||
/// A position of a character or code unit in a string.
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Index {
|
||||
@usableFromInline
|
||||
internal typealias _UTF8Buffer = UTF8.EncodedScalar
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _compoundOffset: UInt64
|
||||
import SwiftShims
|
||||
|
||||
@usableFromInline
|
||||
internal var _utf8Buffer = _UTF8Buffer()
|
||||
/*
|
||||
|
||||
@usableFromInline
|
||||
internal var _graphemeStrideCache: UInt16 = 0
|
||||
}
|
||||
}
|
||||
String's Index has the following layout:
|
||||
|
||||
┌──────────┬───────────────────┬────────────────┬──────────┐
|
||||
│ b63:b16 │ b15:b14 │ b13:b8 │ b7:b0 │
|
||||
├──────────┼───────────────────┼────────────────┼──────────┤
|
||||
│ position │ transcoded offset │ grapheme cache │ reserved │
|
||||
└──────────┴───────────────────┴────────────────┴──────────┘
|
||||
|
||||
- grapheme cache: A 6-bit value remembering the distance to the next grapheme
|
||||
boundary
|
||||
- position aka `encodedOffset`: An offset into the string's code units
|
||||
- transcoded offset: a sub-scalar offset, derived from transcoding
|
||||
|
||||
The use and interpretation of both `reserved` and `grapheme cache` is not part
|
||||
of Index's ABI; it should be hidden behind non-inlinable calls. However, the
|
||||
position of the sequence of 14 bits allocated is part of Index's ABI, as well as
|
||||
the default value being `0`.
|
||||
|
||||
*/
|
||||
|
||||
/// Convenience accessors
|
||||
extension String.Index {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var utf8Buffer: String.Index._UTF8Buffer? {
|
||||
guard !_utf8Buffer.isEmpty else { return nil }
|
||||
return _utf8Buffer
|
||||
|
||||
}
|
||||
|
||||
extension String.Index {
|
||||
@inlinable
|
||||
internal var orderingValue: UInt64 {
|
||||
@inline(__always) get { return _rawBits &>> 14 }
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
/// The offset into a string's code units for this index.
|
||||
@inlinable
|
||||
public var encodedOffset: Int {
|
||||
@inline(__always) get { return Int(truncatingIfNeeded: _rawBits &>> 16) }
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal var transcodedOffset: Int {
|
||||
@inline(__always) get {
|
||||
return Int(truncatingIfNeeded: orderingValue & 0x3)
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
internal var characterStride: Int? {
|
||||
guard _graphemeStrideCache > 0 else { return nil }
|
||||
return Int(truncatingIfNeeded: _graphemeStrideCache)
|
||||
let value = (_rawBits & 0x00FC_0000_0000_0000) &>> 50
|
||||
return value > 0 ? Int(truncatingIfNeeded: value) : nil
|
||||
}
|
||||
|
||||
// TODO: Probably worth carving a bit for, or maybe a isSubScalar bit...
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var isUTF8: Bool {
|
||||
return self.utf8Buffer != nil || self.transcodedOffset > 0
|
||||
@inlinable @inline(__always)
|
||||
internal init(encodedOffset: Int, transcodedOffset: Int) {
|
||||
#if arch(i386) || arch(arm)
|
||||
unimplemented_utf8_32bit()
|
||||
#else
|
||||
_sanityCheck(encodedOffset == encodedOffset & 0x0000_FFFF_FFFF_FFFF)
|
||||
_sanityCheck(transcodedOffset <= 3)
|
||||
let pos = UInt64(truncatingIfNeeded: encodedOffset)
|
||||
let trans = UInt64(truncatingIfNeeded: transcodedOffset)
|
||||
|
||||
self.init((pos &<< 16) | (trans &<< 14))
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Creates a new index at the specified code unit offset.
|
||||
///
|
||||
/// - Parameter offset: An offset in code units.
|
||||
@inlinable @inline(__always)
|
||||
public init(encodedOffset: Int) {
|
||||
self.init(encodedOffset: encodedOffset, transcodedOffset: 0)
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
internal init(
|
||||
encodedOffset: Int, transcodedOffset: Int, characterStride: Int
|
||||
) {
|
||||
self.init(encodedOffset: encodedOffset, transcodedOffset: transcodedOffset)
|
||||
if _slowPath(characterStride > 63) { return }
|
||||
|
||||
_sanityCheck(characterStride == characterStride & 0x3F)
|
||||
self._rawBits |= UInt64(truncatingIfNeeded: characterStride)
|
||||
self._invariantCheck()
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
internal init(encodedOffset pos: Int, characterStride char: Int) {
|
||||
self.init(encodedOffset: pos, transcodedOffset: 0, characterStride: char)
|
||||
}
|
||||
}
|
||||
|
||||
extension String.Index : Equatable {
|
||||
// A combined code unit and transcoded offset, for comparison purposes
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _orderingValue: UInt64 {
|
||||
return _compoundOffset
|
||||
// Creation helpers
|
||||
extension String.Index {
|
||||
@inlinable @inline(__always)
|
||||
internal init(transcodedAfter i: String.Index) {
|
||||
_sanityCheck((0...2) ~= i.transcodedOffset)
|
||||
self.init(
|
||||
encodedOffset: i.encodedOffset, transcodedOffset: i.transcodedOffset &+ 1)
|
||||
}
|
||||
@inlinable @inline(__always)
|
||||
internal init(transcodedBefore i: String.Index) {
|
||||
_sanityCheck((1...3) ~= i.transcodedOffset)
|
||||
self.init(
|
||||
encodedOffset: i.encodedOffset, transcodedOffset: i.transcodedOffset &- 1)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
extension String.Index: Equatable {
|
||||
@inlinable @inline(__always)
|
||||
public static func == (lhs: String.Index, rhs: String.Index) -> Bool {
|
||||
return lhs._orderingValue == rhs._orderingValue
|
||||
return lhs.orderingValue == rhs.orderingValue
|
||||
}
|
||||
}
|
||||
|
||||
extension String.Index : Comparable {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
extension String.Index: Comparable {
|
||||
@inlinable @inline(__always)
|
||||
public static func < (lhs: String.Index, rhs: String.Index) -> Bool {
|
||||
return lhs._orderingValue < rhs._orderingValue
|
||||
return lhs.orderingValue < rhs.orderingValue
|
||||
}
|
||||
}
|
||||
|
||||
extension String.Index : Hashable {
|
||||
extension String.Index: Hashable {
|
||||
/// Hashes the essential components of this value by feeding them into the
|
||||
/// given hasher.
|
||||
///
|
||||
@@ -76,66 +141,115 @@ extension String.Index : Hashable {
|
||||
/// of this instance.
|
||||
@inlinable
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(_orderingValue)
|
||||
hasher.combine(orderingValue)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(UTF8): restore these to StringIndexConversions.swift
|
||||
extension String.Index {
|
||||
@inline(__always)
|
||||
@inlinable
|
||||
internal init(encodedOffset: Int, transcodedOffset: Int) {
|
||||
let cuOffset = UInt64(truncatingIfNeeded: encodedOffset)
|
||||
_sanityCheck(
|
||||
cuOffset & 0xFFFF_0000_0000_0000 == 0, "String length capped at 48bits")
|
||||
let transOffset = UInt64(truncatingIfNeeded: transcodedOffset)
|
||||
_sanityCheck(transOffset <= 4, "UTF-8 max transcoding is 4 code units")
|
||||
|
||||
self._compoundOffset = cuOffset &<< 2 | transOffset
|
||||
}
|
||||
|
||||
@inline(__always)
|
||||
@inlinable
|
||||
internal init(from other: String.Index, adjustingEncodedOffsetBy adj: Int) {
|
||||
self.init(
|
||||
encodedOffset: other.encodedOffset &+ adj,
|
||||
transcodedOffset: other.transcodedOffset)
|
||||
self._utf8Buffer = other._utf8Buffer
|
||||
self._graphemeStrideCache = other._graphemeStrideCache
|
||||
}
|
||||
|
||||
/// Creates a new index at the specified UTF-16 offset.
|
||||
/// Creates an index in the given string that corresponds exactly to the
|
||||
/// specified position.
|
||||
///
|
||||
/// - Parameter offset: An offset in UTF-16 code units.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(encodedOffset offset: Int) {
|
||||
self.init(encodedOffset: offset, transcodedOffset: 0)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(
|
||||
encodedOffset offset: Int, transcodedOffset: Int, buffer: _UTF8Buffer
|
||||
/// If the index passed as `sourcePosition` represents the start of an
|
||||
/// extended grapheme cluster---the element type of a string---then the
|
||||
/// initializer succeeds.
|
||||
///
|
||||
/// The following example converts the position of the Unicode scalar `"e"`
|
||||
/// into its corresponding position in the string. The character at that
|
||||
/// position is the composed `"é"` character.
|
||||
///
|
||||
/// let cafe = "Cafe\u{0301}"
|
||||
/// print(cafe)
|
||||
/// // Prints "Café"
|
||||
///
|
||||
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
|
||||
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
|
||||
///
|
||||
/// print(cafe[...stringIndex])
|
||||
/// // Prints "Café"
|
||||
///
|
||||
/// If the index passed as `sourcePosition` doesn't have an exact
|
||||
/// corresponding position in `target`, the result of the initializer is
|
||||
/// `nil`. For example, an attempt to convert the position of the combining
|
||||
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
|
||||
/// their own position in a string.
|
||||
///
|
||||
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
|
||||
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
|
||||
///
|
||||
/// print(nextStringIndex)
|
||||
/// // Prints "nil"
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - sourcePosition: A position in a view of the `target` parameter.
|
||||
/// `sourcePosition` must be a valid index of at least one of the views
|
||||
/// of `target`.
|
||||
/// - target: The string referenced by the resulting index.
|
||||
public init?(
|
||||
_ sourcePosition: String.Index,
|
||||
within target: String
|
||||
) {
|
||||
self.init(encodedOffset: offset, transcodedOffset: transcodedOffset)
|
||||
self._utf8Buffer = buffer
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal init(encodedOffset: Int, characterStride: Int) {
|
||||
self.init(encodedOffset: encodedOffset, transcodedOffset: 0)
|
||||
if characterStride < UInt16.max {
|
||||
self._graphemeStrideCache = UInt16(truncatingIfNeeded: characterStride)
|
||||
guard target._guts.isOnGraphemeClusterBoundary(sourcePosition) else {
|
||||
return nil
|
||||
}
|
||||
self = sourcePosition
|
||||
}
|
||||
|
||||
/// The offset into a string's UTF-16 encoding for this index.
|
||||
/// Returns the position in the given UTF-8 view that corresponds exactly to
|
||||
/// this index.
|
||||
///
|
||||
/// This example first finds the position of the character `"é"`, and then
|
||||
/// uses this method find the same position in the string's `utf8` view.
|
||||
///
|
||||
/// let cafe = "Café"
|
||||
/// if let i = cafe.firstIndex(of: "é") {
|
||||
/// let j = i.samePosition(in: cafe.utf8)!
|
||||
/// print(Array(cafe.utf8[j...]))
|
||||
/// }
|
||||
/// // Prints "[195, 169]"
|
||||
///
|
||||
/// - Parameter utf8: The view to use for the index conversion. This index
|
||||
/// must be a valid index of at least one view of the string shared by
|
||||
/// `utf8`.
|
||||
/// - Returns: The position in `utf8` that corresponds exactly to this index.
|
||||
/// If this index does not have an exact corresponding position in `utf8`,
|
||||
/// this method returns `nil`. For example, an attempt to convert the
|
||||
/// position of a UTF-16 trailing surrogate returns `nil`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var encodedOffset : Int {
|
||||
return Int(truncatingIfNeeded: _compoundOffset &>> 2)
|
||||
public func samePosition(
|
||||
in utf8: String.UTF8View
|
||||
) -> String.UTF8View.Index? {
|
||||
return String.UTF8View.Index(self, within: utf8)
|
||||
}
|
||||
|
||||
/// The offset of this index within whatever encoding this is being viewed as
|
||||
/// Returns the position in the given UTF-16 view that corresponds exactly to
|
||||
/// this index.
|
||||
///
|
||||
/// The index must be a valid index of `String(utf16)`.
|
||||
///
|
||||
/// This example first finds the position of the character `"é"` and then
|
||||
/// uses this method find the same position in the string's `utf16` view.
|
||||
///
|
||||
/// let cafe = "Café"
|
||||
/// if let i = cafe.firstIndex(of: "é") {
|
||||
/// let j = i.samePosition(in: cafe.utf16)!
|
||||
/// print(cafe.utf16[j])
|
||||
/// }
|
||||
/// // Prints "233"
|
||||
///
|
||||
/// - Parameter utf16: The view to use for the index conversion. This index
|
||||
/// must be a valid index of at least one view of the string shared by
|
||||
/// `utf16`.
|
||||
/// - Returns: The position in `utf16` that corresponds exactly to this
|
||||
/// index. If this index does not have an exact corresponding position in
|
||||
/// `utf16`, this method returns `nil`. For example, an attempt to convert
|
||||
/// the position of a UTF-8 continuation byte returns `nil`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var transcodedOffset: Int {
|
||||
return Int(truncatingIfNeeded: _compoundOffset & 0x3)
|
||||
public func samePosition(
|
||||
in utf16: String.UTF16View
|
||||
) -> String.UTF16View.Index? {
|
||||
return String.UTF16View.Index(self, within: utf16)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -9,110 +9,3 @@
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
extension String.Index {
|
||||
/// Creates an index in the given string that corresponds exactly to the
|
||||
/// specified position.
|
||||
///
|
||||
/// If the index passed as `sourcePosition` represents the start of an
|
||||
/// extended grapheme cluster---the element type of a string---then the
|
||||
/// initializer succeeds.
|
||||
///
|
||||
/// The following example converts the position of the Unicode scalar `"e"`
|
||||
/// into its corresponding position in the string. The character at that
|
||||
/// position is the composed `"é"` character.
|
||||
///
|
||||
/// let cafe = "Cafe\u{0301}"
|
||||
/// print(cafe)
|
||||
/// // Prints "Café"
|
||||
///
|
||||
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
|
||||
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
|
||||
///
|
||||
/// print(cafe[...stringIndex])
|
||||
/// // Prints "Café"
|
||||
///
|
||||
/// If the index passed as `sourcePosition` doesn't have an exact
|
||||
/// corresponding position in `target`, the result of the initializer is
|
||||
/// `nil`. For example, an attempt to convert the position of the combining
|
||||
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
|
||||
/// their own position in a string.
|
||||
///
|
||||
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
|
||||
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
|
||||
///
|
||||
/// print(nextStringIndex)
|
||||
/// // Prints "nil"
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - sourcePosition: A position in a view of the `target` parameter.
|
||||
/// `sourcePosition` must be a valid index of at least one of the views
|
||||
/// of `target`.
|
||||
/// - target: The string referenced by the resulting index.
|
||||
public init?(
|
||||
_ sourcePosition: String.Index,
|
||||
within target: String
|
||||
) {
|
||||
guard target.unicodeScalars._isOnGraphemeClusterBoundary(sourcePosition)
|
||||
else { return nil }
|
||||
|
||||
self = target._index(atEncodedOffset: sourcePosition.encodedOffset)
|
||||
}
|
||||
|
||||
/// Returns the position in the given UTF-8 view that corresponds exactly to
|
||||
/// this index.
|
||||
///
|
||||
/// This example first finds the position of the character `"é"`, and then
|
||||
/// uses this method find the same position in the string's `utf8` view.
|
||||
///
|
||||
/// let cafe = "Café"
|
||||
/// if let i = cafe.firstIndex(of: "é") {
|
||||
/// let j = i.samePosition(in: cafe.utf8)!
|
||||
/// print(Array(cafe.utf8[j...]))
|
||||
/// }
|
||||
/// // Prints "[195, 169]"
|
||||
///
|
||||
/// - Parameter utf8: The view to use for the index conversion. This index
|
||||
/// must be a valid index of at least one view of the string shared by
|
||||
/// `utf8`.
|
||||
/// - Returns: The position in `utf8` that corresponds exactly to this index.
|
||||
/// If this index does not have an exact corresponding position in `utf8`,
|
||||
/// this method returns `nil`. For example, an attempt to convert the
|
||||
/// position of a UTF-16 trailing surrogate returns `nil`.
|
||||
@inlinable // trivial-implementation
|
||||
public func samePosition(
|
||||
in utf8: String.UTF8View
|
||||
) -> String.UTF8View.Index? {
|
||||
return String.UTF8View.Index(self, within: utf8)
|
||||
}
|
||||
|
||||
/// Returns the position in the given UTF-16 view that corresponds exactly to
|
||||
/// this index.
|
||||
///
|
||||
/// The index must be a valid index of `String(utf16)`.
|
||||
///
|
||||
/// This example first finds the position of the character `"é"` and then
|
||||
/// uses this method find the same position in the string's `utf16` view.
|
||||
///
|
||||
/// let cafe = "Café"
|
||||
/// if let i = cafe.firstIndex(of: "é") {
|
||||
/// let j = i.samePosition(in: cafe.utf16)!
|
||||
/// print(cafe.utf16[j])
|
||||
/// }
|
||||
/// // Prints "233"
|
||||
///
|
||||
/// - Parameter utf16: The view to use for the index conversion. This index
|
||||
/// must be a valid index of at least one view of the string shared by
|
||||
/// `utf16`.
|
||||
/// - Returns: The position in `utf16` that corresponds exactly to this
|
||||
/// index. If this index does not have an exact corresponding position in
|
||||
/// `utf16`, this method returns `nil`. For example, an attempt to convert
|
||||
/// the position of a UTF-8 continuation byte returns `nil`.
|
||||
@inlinable // trivial-implementation
|
||||
public func samePosition(
|
||||
in utf16: String.UTF16View
|
||||
) -> String.UTF16View.Index? {
|
||||
return String.UTF16View.Index(self, within: utf16)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -197,14 +197,11 @@ extension DefaultStringInterpolation: CustomStringConvertible {
|
||||
extension DefaultStringInterpolation: TextOutputStream {
|
||||
@inlinable
|
||||
public mutating func write(_ string: String) {
|
||||
// Most interpolations will not append to an empty string, so we bypass the
|
||||
// empty-singleton check.
|
||||
_storage._guts._appendSlow(string._guts)
|
||||
_storage.append(string)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
public mutating func _writeASCII(_ buffer: UnsafeBufferPointer<UInt8>) {
|
||||
_storage._guts.append(_UnmanagedString(buffer))
|
||||
_storage._guts.append(_StringGuts(buffer, isKnownASCII: true))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,25 +12,6 @@
|
||||
|
||||
import SwiftShims
|
||||
|
||||
extension _StringVariant {
|
||||
@usableFromInline
|
||||
func _repeated(_ count: Int) -> _SwiftStringStorage<CodeUnit> {
|
||||
_sanityCheck(count > 0)
|
||||
let c = self.count
|
||||
let storage = _copyToNativeStorage(
|
||||
of: CodeUnit.self,
|
||||
unusedCapacity: (count - 1) * c)
|
||||
var p = storage.start + c
|
||||
for _ in 1 ..< count {
|
||||
p.initialize(from: storage.start, count: c)
|
||||
p += c
|
||||
}
|
||||
_sanityCheck(p == storage.start + count * c)
|
||||
storage.count = p - storage.start
|
||||
return storage
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// Creates a new string representing the given string repeated the specified
|
||||
/// number of times.
|
||||
@@ -46,49 +27,30 @@ extension String {
|
||||
/// - repeatedValue: The string to repeat.
|
||||
/// - count: The number of times to repeat `repeatedValue` in the resulting
|
||||
/// string.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(repeating repeatedValue: String, count: Int) {
|
||||
precondition(count >= 0, "Negative count not allowed")
|
||||
guard count > 1 else {
|
||||
self = count == 0 ? "" : repeatedValue
|
||||
// TODO(UTF8 merge): use string literal
|
||||
self = count == 0 ? String() : repeatedValue
|
||||
return
|
||||
}
|
||||
self = String(repeatedValue._guts._repeated(count))
|
||||
|
||||
// TODO(UTF8 perf): Not the fastest approach...
|
||||
var result = String()
|
||||
result.reserveCapacity(repeatedValue._guts.count &* count)
|
||||
for _ in 0..<count {
|
||||
result += repeatedValue
|
||||
}
|
||||
self = result
|
||||
}
|
||||
|
||||
/// A Boolean value indicating whether a string has no characters.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var isEmpty: Bool {
|
||||
return _guts.count == 0
|
||||
@inline(__always) get { return _guts.isEmpty }
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: since this is generally useful, make public via evolution proposal.
|
||||
extension BidirectionalCollection {
|
||||
@inlinable
|
||||
internal func _ends<Suffix: BidirectionalCollection>(
|
||||
with suffix: Suffix, by areEquivalent: (Element,Element) -> Bool
|
||||
) -> Bool where Suffix.Element == Element {
|
||||
var (i,j) = (self.endIndex,suffix.endIndex)
|
||||
while i != self.startIndex, j != suffix.startIndex {
|
||||
self.formIndex(before: &i)
|
||||
suffix.formIndex(before: &j)
|
||||
if !areEquivalent(self[i],suffix[j]) { return false }
|
||||
}
|
||||
return j == suffix.startIndex
|
||||
}
|
||||
}
|
||||
|
||||
extension BidirectionalCollection where Element: Equatable {
|
||||
@inlinable
|
||||
internal func _ends<Suffix: BidirectionalCollection>(
|
||||
with suffix: Suffix
|
||||
) -> Bool where Suffix.Element == Element {
|
||||
return _ends(with: suffix, by: ==)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extension StringProtocol {
|
||||
/// Returns a Boolean value indicating whether the string begins with the
|
||||
/// specified prefix.
|
||||
@@ -155,100 +117,37 @@ extension StringProtocol {
|
||||
/// - Returns: `true` if the string ends with `suffix`; otherwise, `false`.
|
||||
@inlinable
|
||||
public func hasSuffix<Suffix: StringProtocol>(_ suffix: Suffix) -> Bool {
|
||||
return self._ends(with: suffix)
|
||||
return self.reversed().starts(with: suffix.reversed())
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
public func hasPrefix(_ prefix: String) -> Bool {
|
||||
let prefixCount = prefix._guts.count
|
||||
if prefixCount == 0 { return true }
|
||||
|
||||
// TODO: replace with 2-way vistor
|
||||
if self._guts._isSmall && prefix._guts._isSmall {
|
||||
let selfSmall = self._guts._smallUTF8String
|
||||
let prefixSmall = prefix._guts._smallUTF8String
|
||||
if selfSmall.isASCII && prefixSmall.isASCII {
|
||||
return selfSmall.withUnmanagedASCII { selfASCII in
|
||||
return prefixSmall.withUnmanagedASCII { prefixASCII in
|
||||
if prefixASCII.count > selfASCII.count { return false }
|
||||
return (0 as CInt) == _swift_stdlib_memcmp(
|
||||
selfASCII.rawStart,
|
||||
prefixASCII.rawStart,
|
||||
prefixASCII.count)
|
||||
}
|
||||
if _fastPath(self._guts.isNFCFastUTF8 && prefix._guts.isNFCFastUTF8) {
|
||||
guard prefix._guts.count <= self._guts.count else { return false }
|
||||
return prefix._guts.withFastUTF8 { nfcPrefix in
|
||||
let prefixEnd = nfcPrefix.count
|
||||
return self._guts.withFastUTF8(range: 0..<prefixEnd) { nfcSlicedSelf in
|
||||
return _binaryCompare(nfcSlicedSelf, nfcPrefix) == 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _fastPath(!self._guts._isOpaque && !prefix._guts._isOpaque) {
|
||||
if self._guts.isASCII && prefix._guts.isASCII {
|
||||
let result: Bool
|
||||
let selfASCII = self._guts._unmanagedASCIIView
|
||||
let prefixASCII = prefix._guts._unmanagedASCIIView
|
||||
if prefixASCII.count > selfASCII.count {
|
||||
// Prefix is longer than self.
|
||||
result = false
|
||||
} else {
|
||||
result = (0 as CInt) == _swift_stdlib_memcmp(
|
||||
selfASCII.rawStart,
|
||||
prefixASCII.rawStart,
|
||||
prefixASCII.count)
|
||||
}
|
||||
_fixLifetime(self)
|
||||
_fixLifetime(prefix)
|
||||
return result
|
||||
}
|
||||
else {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return self.starts(with: prefix)
|
||||
return starts(with: prefix)
|
||||
}
|
||||
|
||||
public func hasSuffix(_ suffix: String) -> Bool {
|
||||
let suffixCount = suffix._guts.count
|
||||
if suffixCount == 0 { return true }
|
||||
|
||||
// TODO: replace with 2-way vistor
|
||||
if self._guts._isSmall && suffix._guts._isSmall {
|
||||
let selfSmall = self._guts._smallUTF8String
|
||||
let suffixSmall = suffix._guts._smallUTF8String
|
||||
if selfSmall.isASCII && suffixSmall.isASCII {
|
||||
return selfSmall.withUnmanagedASCII { selfASCII in
|
||||
return suffixSmall.withUnmanagedASCII { suffixASCII in
|
||||
if suffixASCII.count > selfASCII.count { return false }
|
||||
return (0 as CInt) == _swift_stdlib_memcmp(
|
||||
selfASCII.rawStart + (selfASCII.count - suffixASCII.count),
|
||||
suffixASCII.rawStart,
|
||||
suffixASCII.count)
|
||||
}
|
||||
if _fastPath(self._guts.isNFCFastUTF8 && suffix._guts.isNFCFastUTF8) {
|
||||
guard suffix._guts.count <= self._guts.count else { return false }
|
||||
return suffix._guts.withFastUTF8 { nfcSuffix in
|
||||
let suffixStart = self._guts.count - nfcSuffix.count
|
||||
return self._guts.withFastUTF8(range: suffixStart..<self._guts.count) {
|
||||
nfcSlicedSelf in return _binaryCompare(nfcSlicedSelf, nfcSuffix) == 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _fastPath(!self._guts._isOpaque && !suffix._guts._isOpaque) {
|
||||
if self._guts.isASCII && suffix._guts.isASCII {
|
||||
let result: Bool
|
||||
let selfASCII = self._guts._unmanagedASCIIView
|
||||
let suffixASCII = suffix._guts._unmanagedASCIIView
|
||||
if suffixASCII.count > selfASCII.count {
|
||||
// Suffix is longer than self.
|
||||
result = false
|
||||
} else {
|
||||
result = (0 as CInt) == _swift_stdlib_memcmp(
|
||||
selfASCII.rawStart + (selfASCII.count - suffixASCII.count),
|
||||
suffixASCII.rawStart,
|
||||
suffixASCII.count)
|
||||
}
|
||||
_fixLifetime(self)
|
||||
_fixLifetime(suffix)
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
return self._ends(with: suffix)
|
||||
return self.reversed().starts(with: suffix.reversed())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,21 +189,3 @@ extension String {
|
||||
self = value._description(radix: radix, uppercase: uppercase)
|
||||
}
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
@inlinable
|
||||
func _repeated(_ n: Int) -> _StringGuts {
|
||||
_sanityCheck(n > 1)
|
||||
if self._isSmall {
|
||||
// TODO: visitor pattern for something like this...
|
||||
if let small = self._smallUTF8String._repeated(n) {
|
||||
return _StringGuts(small)
|
||||
}
|
||||
}
|
||||
return _visitGuts(self, range: nil, args: n,
|
||||
ascii: { ascii, n in return _StringGuts(_large: ascii._repeated(n)) },
|
||||
utf16: { utf16, n in return _StringGuts(_large: utf16._repeated(n)) },
|
||||
opaque: { opaque, n in return _StringGuts(_large: opaque._repeated(n)) })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,10 +12,11 @@
|
||||
|
||||
import SwiftShims
|
||||
|
||||
// A namespace for various heuristics
|
||||
//
|
||||
internal enum _Normalization {
|
||||
|
||||
// ICU's NFC unorm2 instance
|
||||
//
|
||||
// TODO(UTF8 perf): Should we cache one on TLS? Is this an expensive call?
|
||||
internal static var _nfcNormalizer: OpaquePointer = {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let normalizer = __swift_stdlib_unorm2_getNFCInstance(&err)
|
||||
@@ -27,77 +28,59 @@ internal enum _Normalization {
|
||||
return normalizer
|
||||
}()
|
||||
|
||||
// Whether this buffer of code units satisfies the quickCheck=YES property for
|
||||
// normality checking under NFC.
|
||||
//
|
||||
// ICU provides a quickCheck, which may yield "YES", "NO", or "MAYBE". YES
|
||||
// means that the string was determined to definitely be normal under NFC. In
|
||||
// practice, the majority of Strings have this property. Checking for YES is
|
||||
// considerably faster than trying to distinguish between NO and MAYBE.
|
||||
internal static func _prenormalQuickCheckYes(
|
||||
_ buffer: UnsafeBufferPointer<UInt16>
|
||||
) -> Bool {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let length = __swift_stdlib_unorm2_spanQuickCheckYes(
|
||||
_Normalization._nfcNormalizer,
|
||||
buffer.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(buffer.count),
|
||||
&err)
|
||||
|
||||
guard err.isSuccess else {
|
||||
// This shouldn't be possible unless some deep (unrecoverable) system
|
||||
// invariants are violated
|
||||
fatalError("Unable to talk to ICU")
|
||||
}
|
||||
return length == buffer.count
|
||||
}
|
||||
internal static func _prenormalQuickCheckYes(
|
||||
_ string: _UnmanagedString<UInt16>
|
||||
) -> Bool {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let length = __swift_stdlib_unorm2_spanQuickCheckYes(
|
||||
_Normalization._nfcNormalizer,
|
||||
string.start,
|
||||
Int32(string.count),
|
||||
&err)
|
||||
|
||||
guard err.isSuccess else {
|
||||
// This shouldn't be possible unless some deep (unrecoverable) system
|
||||
// invariants are violated
|
||||
fatalError("Unable to talk to ICU")
|
||||
}
|
||||
return length == string.count
|
||||
}
|
||||
}
|
||||
|
||||
extension UnicodeScalar {
|
||||
// Normalization boundary - a place in a string where everything left of the
|
||||
// boundary can be normalized independently from everything right of the
|
||||
// boundary. The concatenation of each result is the same as if the entire
|
||||
// string had been normalized as a whole.
|
||||
//
|
||||
// Normalization segment - a sequence of code units between two normalization
|
||||
// boundaries (without any boundaries in the middle). Note that normalization
|
||||
// segments can, as a process of normalization, expand, contract, and even
|
||||
// produce new sub-segments.
|
||||
|
||||
// Whether this scalar value always has a normalization boundary before it.
|
||||
internal var _hasNormalizationBoundaryBefore: Bool {
|
||||
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
|
||||
let value = Int32(bitPattern: self.value)
|
||||
return 0 != __swift_stdlib_unorm2_hasBoundaryBefore(
|
||||
_Normalization._nfcNormalizer, value)
|
||||
}
|
||||
}
|
||||
|
||||
extension _Normalization {
|
||||
// When normalized in NFC, some segments may expand in size (e.g. some non-BMP
|
||||
// musical notes). This expansion is capped by the maximum expansion factor of
|
||||
// the normal form. For NFC, that is 3x.
|
||||
internal static let _maxNFCExpansionFactor = 3
|
||||
|
||||
// A small output buffer to use for normalizing a single normalization
|
||||
// segment. Fits all but pathological arbitrary-length segments (i.e. zalgo-
|
||||
// segments)
|
||||
internal typealias _SegmentOutputBuffer = _FixedArray16<UInt16>
|
||||
}
|
||||
|
||||
extension String {
|
||||
// TODO(UTF8 perf): Change into a lazy sequence with fast-paths...
|
||||
@inline(never) // slow-path
|
||||
internal func _normalize() -> Array<UInt8> {
|
||||
func _tryNormalize(
|
||||
_ input: UnsafeBufferPointer<UInt16>,
|
||||
into outputBuffer: UnsafeMutableBufferPointer<UInt16>
|
||||
) -> Int? {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let count = __swift_stdlib_unorm2_normalize(
|
||||
_Normalization._nfcNormalizer,
|
||||
input.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
numericCast(input.count),
|
||||
outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
numericCast(outputBuffer.count),
|
||||
&err
|
||||
)
|
||||
guard err.isSuccess else {
|
||||
// The output buffer needs to grow
|
||||
return nil
|
||||
}
|
||||
return numericCast(count)
|
||||
}
|
||||
let transcoded = Array(self.utf16)
|
||||
let normalized: Array<UInt16> = transcoded.withUnsafeBufferPointer {
|
||||
(inputBufPtr) -> Array<UInt16> in
|
||||
var output = Array<UInt16>(
|
||||
repeating: 0,
|
||||
count: 1 + inputBufPtr.count * _Normalization._maxNFCExpansionFactor)
|
||||
let lenOpt = output.withUnsafeMutableBufferPointer { outputBufPtr in
|
||||
return _tryNormalize(inputBufPtr, into: outputBufPtr)
|
||||
}
|
||||
guard let len = lenOpt else {
|
||||
_sanityCheckFailure("normalization beyond max expansion factor")
|
||||
}
|
||||
_sanityCheck(len <= output.count)
|
||||
output.removeLast(output.count - len)
|
||||
return output
|
||||
}
|
||||
var codeUnits = Array<UInt8>()
|
||||
codeUnits.reserveCapacity(normalized.count)
|
||||
_ = transcode(
|
||||
normalized.makeIterator(),
|
||||
from: UTF16.self,
|
||||
to: UTF8.self,
|
||||
stoppingOnError: false,
|
||||
into: { codeUnits.append($0) })
|
||||
return codeUnits
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,8 +30,6 @@ public protocol StringProtocol
|
||||
|
||||
associatedtype UnicodeScalarView : BidirectionalCollection
|
||||
where UnicodeScalarView.Element == Unicode.Scalar
|
||||
|
||||
associatedtype SubSequence = Substring
|
||||
|
||||
var utf8: UTF8View { get }
|
||||
var utf16: UTF16View { get }
|
||||
@@ -112,75 +110,16 @@ public protocol StringProtocol
|
||||
encodedAs targetEncoding: Encoding.Type,
|
||||
_ body: (UnsafePointer<Encoding.CodeUnit>) throws -> Result
|
||||
) rethrows -> Result
|
||||
|
||||
/// The entire String onto whose slice this view is a projection.
|
||||
var _wholeString : String { get }
|
||||
/// The range of storage offsets of this view in `_wholeString`.
|
||||
var _encodedOffsetRange : Range<Int> { get }
|
||||
}
|
||||
|
||||
extension StringProtocol {
|
||||
public var _wholeString: String {
|
||||
return String(self)
|
||||
}
|
||||
|
||||
public var _encodedOffsetRange: Range<Int> {
|
||||
return 0 ..< numericCast(self.utf16.count)
|
||||
}
|
||||
}
|
||||
|
||||
/// A protocol that provides fast access to a known representation of String.
|
||||
///
|
||||
/// Can be used to specialize generic functions that would otherwise end up
|
||||
/// doing grapheme breaking to vend individual characters.
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal protocol _SwiftStringView {
|
||||
/// A `String`, having the same contents as `self`, that may be unsuitable for
|
||||
/// long-term storage.
|
||||
var _ephemeralContent : String { get }
|
||||
|
||||
/// A `String`, having the same contents as `self`, that is suitable for
|
||||
/// long-term storage.
|
||||
//
|
||||
// FIXME: Remove once _StringGuts has append(contentsOf:).
|
||||
var _persistentContent : String { get }
|
||||
|
||||
/// The entire String onto whose slice this view is a projection.
|
||||
var _wholeString : String { get }
|
||||
/// The range of storage offsets of this view in `_wholeString`.
|
||||
var _encodedOffsetRange : Range<Int> { get }
|
||||
}
|
||||
|
||||
extension _SwiftStringView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _ephemeralContent : String { return _persistentContent }
|
||||
}
|
||||
|
||||
extension StringProtocol {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public // Used in the Foundation overlay
|
||||
var _ephemeralString : String {
|
||||
if _fastPath(self is _SwiftStringView) {
|
||||
return (self as! _SwiftStringView)._ephemeralContent
|
||||
// TODO(UTF8): Wean NSStringAPI.swift off of this
|
||||
public // @SPI(NSStringAPI.swift)
|
||||
var _ephemeralString: String {
|
||||
if let str = self as? String {
|
||||
return str
|
||||
}
|
||||
// TODO: Smol check and then shared storage substring
|
||||
return String(self)
|
||||
}
|
||||
}
|
||||
|
||||
extension String : _SwiftStringView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _persistentContent : String {
|
||||
return self
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var _wholeString : String {
|
||||
return self
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var _encodedOffsetRange : Range<Int> {
|
||||
return 0..<_guts.count
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,9 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
extension String : StringProtocol, RangeReplaceableCollection {
|
||||
extension String: StringProtocol {}
|
||||
|
||||
extension String: RangeReplaceableCollection {
|
||||
/// Creates a string representing the given character repeated the specified
|
||||
/// number of times.
|
||||
///
|
||||
@@ -25,9 +27,8 @@ extension String : StringProtocol, RangeReplaceableCollection {
|
||||
/// - repeatedValue: The character to repeat.
|
||||
/// - count: The number of times to repeat `repeatedValue` in the
|
||||
/// resulting string.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(repeating repeatedValue: Character, count: Int) {
|
||||
self.init(repeating: String(repeatedValue), count: count)
|
||||
self.init(repeating: repeatedValue._str, count: count)
|
||||
}
|
||||
|
||||
// This initializer disambiguates between the following intitializers, now
|
||||
@@ -56,41 +57,6 @@ extension String : StringProtocol, RangeReplaceableCollection {
|
||||
self = other.description
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal func _boundsCheck(_ index: Index) {
|
||||
_precondition(index.encodedOffset >= 0 && index.encodedOffset < _guts.count,
|
||||
"String index is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal func _boundsCheck(_ range: Range<Index>) {
|
||||
_precondition(
|
||||
range.lowerBound.encodedOffset >= 0 &&
|
||||
range.upperBound.encodedOffset <= _guts.count,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal func _boundsCheck(_ range: ClosedRange<Index>) {
|
||||
_precondition(
|
||||
range.lowerBound.encodedOffset >= 0 &&
|
||||
range.upperBound.encodedOffset < _guts.count,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
internal func _index(atEncodedOffset offset: Int) -> Index {
|
||||
return _visitGuts(_guts, args: offset,
|
||||
ascii: { ascii, offset in return ascii.characterIndex(atOffset: offset) },
|
||||
utf16: { utf16, offset in return utf16.characterIndex(atOffset: offset) },
|
||||
opaque: { opaque, offset in
|
||||
return opaque.characterIndex(atOffset: offset) })
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// Creates a new string containing the characters in the given sequence.
|
||||
///
|
||||
/// You can use this initializer to create a new string from the result of
|
||||
@@ -106,9 +72,9 @@ extension String {
|
||||
///
|
||||
/// - Parameter characters: A string instance or another sequence of
|
||||
/// characters.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specialize
|
||||
public init<S : Sequence>(_ characters: S)
|
||||
where S.Iterator.Element == Character {
|
||||
where S.Iterator.Element == Character {
|
||||
self = ""
|
||||
self.append(contentsOf: characters)
|
||||
}
|
||||
@@ -126,7 +92,7 @@ extension String {
|
||||
///
|
||||
/// - Complexity: O(*n*)
|
||||
public mutating func reserveCapacity(_ n: Int) {
|
||||
_guts.reserveCapacity(n)
|
||||
self._guts.reserveCapacity(n)
|
||||
}
|
||||
|
||||
/// Appends the given character to the string.
|
||||
@@ -140,39 +106,28 @@ extension String {
|
||||
///
|
||||
/// - Parameter c: The character to append to the string.
|
||||
public mutating func append(_ c: Character) {
|
||||
if let small = c._smallUTF16 {
|
||||
_guts.append(contentsOf: small)
|
||||
} else {
|
||||
_guts.append(c._largeUTF16!.unmanagedView)
|
||||
_fixLifetime(c)
|
||||
}
|
||||
self.append(c._str)
|
||||
}
|
||||
|
||||
public mutating func append(contentsOf newElements: String) {
|
||||
append(newElements)
|
||||
self.append(newElements)
|
||||
}
|
||||
|
||||
public mutating func append(contentsOf newElements: Substring) {
|
||||
_guts.append(
|
||||
newElements._wholeString._guts,
|
||||
range: newElements._encodedOffsetRange)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
self.append(String(newElements))
|
||||
}
|
||||
|
||||
/// Appends the characters in the given sequence to the string.
|
||||
///
|
||||
/// - Parameter newElements: A sequence of characters.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func append<S : Sequence>(contentsOf newElements: S)
|
||||
where S.Iterator.Element == Character {
|
||||
if _fastPath(newElements is _SwiftStringView) {
|
||||
let v = newElements as! _SwiftStringView
|
||||
_guts.append(v._wholeString._guts, range: v._encodedOffsetRange)
|
||||
return
|
||||
}
|
||||
_guts.reserveUnusedCapacity(
|
||||
newElements.underestimatedCount,
|
||||
ascii: _guts.isASCII)
|
||||
for c in newElements { self.append(c) }
|
||||
where S.Iterator.Element == Character {
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
let scalars = String(
|
||||
decoding: newElements.map { $0.unicodeScalars }.joined().map { $0.value },
|
||||
as: UTF32.self)
|
||||
self.append(scalars)
|
||||
}
|
||||
|
||||
/// Replaces the text within the specified bounds with the given characters.
|
||||
@@ -189,15 +144,17 @@ extension String {
|
||||
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
|
||||
/// removes text at the end of the string, the complexity is O(*n*), where
|
||||
/// *n* is equal to `bounds.count`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@_specialize(where C == String)
|
||||
@_specialize(where C == Substring)
|
||||
@_specialize(where C == Array<Character>)
|
||||
public mutating func replaceSubrange<C>(
|
||||
_ bounds: Range<Index>,
|
||||
with newElements: C
|
||||
) where C : Collection, C.Iterator.Element == Character {
|
||||
let offsetRange: Range<Int> =
|
||||
bounds.lowerBound.encodedOffset ..< bounds.upperBound.encodedOffset
|
||||
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
|
||||
_guts.replaceSubrange(offsetRange, with: lazyUTF16)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
let prefix = self[..<bounds.lowerBound]
|
||||
let suffix = self[bounds.upperBound...]
|
||||
self = prefix + String(newElements) + suffix
|
||||
}
|
||||
|
||||
/// Inserts a new character at the specified position.
|
||||
@@ -211,10 +168,9 @@ extension String {
|
||||
/// index, this methods appends `newElement` to the string.
|
||||
///
|
||||
/// - Complexity: O(*n*), where *n* is the length of the string.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func insert(_ newElement: Character, at i: Index) {
|
||||
let offset = i.encodedOffset
|
||||
_guts.replaceSubrange(offset..<offset, with: newElement.utf16)
|
||||
// TODO(UTF8 perf): Operate on storage direclty, sliding down elements
|
||||
self.replaceSubrange(i..<i, with: newElement._str)
|
||||
}
|
||||
|
||||
/// Inserts a collection of characters at the specified position.
|
||||
@@ -231,13 +187,15 @@ extension String {
|
||||
///
|
||||
/// - Complexity: O(*n*), where *n* is the combined length of the string and
|
||||
/// `newElements`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@_specialize(where S == String)
|
||||
@_specialize(where S == Substring)
|
||||
@_specialize(where S == Array<Character>)
|
||||
public mutating func insert<S : Collection>(
|
||||
contentsOf newElements: S, at i: Index
|
||||
) where S.Iterator.Element == Character {
|
||||
let offset = i.encodedOffset
|
||||
let utf16 = newElements.lazy.flatMap { $0.utf16 }
|
||||
_guts.replaceSubrange(offset..<offset, with: utf16)
|
||||
) where S.Element == Character {
|
||||
// TODO(UTF8 perf): Operate on storage direclty, sliding down elements
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
self.replaceSubrange(i..<i, with: String(newElements))
|
||||
}
|
||||
|
||||
/// Removes and returns the character at the specified position.
|
||||
@@ -258,29 +216,12 @@ extension String {
|
||||
/// - Parameter i: The position of the character to remove. `i` must be a
|
||||
/// valid index of the string that is not equal to the string's end index.
|
||||
/// - Returns: The character that was removed.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@discardableResult
|
||||
public mutating func remove(at i: Index) -> Character {
|
||||
let offset = i.encodedOffset
|
||||
let stride = _stride(of: i)
|
||||
let range: Range<Int> = offset ..< offset + stride
|
||||
let old = Character(_unverified: _guts, range: range)
|
||||
_guts.replaceSubrange(range, with: EmptyCollection())
|
||||
return old
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _stride(of i: Index) -> Int {
|
||||
if let stride = i.characterStride { return stride }
|
||||
|
||||
let offset = i.encodedOffset
|
||||
return _visitGuts(_guts, args: offset,
|
||||
ascii: { ascii, offset in
|
||||
return ascii.characterStride(atOffset: offset) },
|
||||
utf16: { utf16, offset in
|
||||
return utf16.characterStride(atOffset: offset) },
|
||||
opaque: { opaque, offset in
|
||||
return opaque.characterStride(atOffset: offset) })
|
||||
// TODO(UTF8 perf): Operate on storage directly, sliding down elements
|
||||
let c = self[i]
|
||||
self.replaceSubrange(i..<i, with: String()) // TODO(UTF8): empty literal
|
||||
return c
|
||||
}
|
||||
|
||||
/// Removes the characters in the given range.
|
||||
@@ -293,11 +234,9 @@ extension String {
|
||||
/// equal to the string's end index.
|
||||
/// - Parameter bounds: The range of the elements to remove. The upper and
|
||||
/// lower bounds of `bounds` must be valid indices of the string.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func removeSubrange(_ bounds: Range<Index>) {
|
||||
let start = bounds.lowerBound.encodedOffset
|
||||
let end = bounds.upperBound.encodedOffset
|
||||
_guts.replaceSubrange(start..<end, with: EmptyCollection())
|
||||
// TODO(UTF8 perf): Operate on storage directly, sliding down elements
|
||||
self.replaceSubrange(bounds, with: String())
|
||||
}
|
||||
|
||||
/// Replaces this string with the empty string.
|
||||
@@ -309,13 +248,37 @@ extension String {
|
||||
/// string's allocated storage. Retaining the storage can be a useful
|
||||
/// optimization when you're planning to grow the string again. The
|
||||
/// default value is `false`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) {
|
||||
if keepCapacity {
|
||||
_guts.replaceSubrange(0..<_guts.count, with: EmptyCollection())
|
||||
} else {
|
||||
_guts = _StringGuts()
|
||||
guard keepCapacity || _guts.capacity != nil else {
|
||||
self = String()
|
||||
return
|
||||
}
|
||||
|
||||
unimplemented_utf8()
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
@inlinable @inline(__always)
|
||||
internal func _boundsCheck(_ index: Index) {
|
||||
_precondition(index.encodedOffset >= 0 && index.encodedOffset < _guts.count,
|
||||
"String index is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _boundsCheck(_ range: Range<Index>) {
|
||||
_precondition(
|
||||
range.lowerBound.encodedOffset >= 0 &&
|
||||
range.upperBound.encodedOffset <= _guts.count,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _boundsCheck(_ range: ClosedRange<Index>) {
|
||||
_precondition(
|
||||
range.lowerBound.encodedOffset >= 0 &&
|
||||
range.upperBound.encodedOffset < _guts.count,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -335,16 +298,14 @@ extension String {
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
extension Sequence where Element == String {
|
||||
@available(*, unavailable, message: "Operator '+' cannot be used to append a String to a sequence of strings")
|
||||
public static func + (lhs: Self, rhs: String) -> Never {
|
||||
fatalError()
|
||||
unimplemented_utf8()
|
||||
}
|
||||
|
||||
@available(*, unavailable, message: "Operator '+' cannot be used to append a String to a sequence of strings")
|
||||
public static func + (lhs: String, rhs: Self) -> Never {
|
||||
fatalError()
|
||||
unimplemented_utf8()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,308 +12,288 @@
|
||||
|
||||
import SwiftShims
|
||||
|
||||
// TODO(UTF8): We can drop the nonobjc annotations soon
|
||||
|
||||
@_fixed_layout
|
||||
@usableFromInline
|
||||
class _SwiftRawStringStorage : __SwiftNativeNSString {
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
final var capacity: Int
|
||||
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
final var count: Int
|
||||
|
||||
@nonobjc
|
||||
internal init(_doNotCallMe: ()) {
|
||||
_sanityCheckFailure("Use the create method")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
internal var rawStart: UnsafeMutableRawPointer {
|
||||
_abstract()
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
final var unusedCapacity: Int {
|
||||
_sanityCheck(capacity >= count)
|
||||
return capacity - count
|
||||
}
|
||||
@objc
|
||||
internal class _AbstractStringStorage: _SwiftNativeNSString, _NSStringCore {
|
||||
// Abstract interface
|
||||
internal var asString: String { get { Builtin.unreachable() } }
|
||||
internal var count: Int { get { Builtin.unreachable() } }
|
||||
}
|
||||
|
||||
internal typealias _ASCIIStringStorage = _SwiftStringStorage<UInt8>
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal typealias _UTF16StringStorage = _SwiftStringStorage<UTF16.CodeUnit>
|
||||
|
||||
@_fixed_layout
|
||||
@usableFromInline
|
||||
final class _SwiftStringStorage<CodeUnit>
|
||||
: _SwiftRawStringStorage, _NSStringCore
|
||||
where CodeUnit : UnsignedInteger & FixedWidthInteger {
|
||||
|
||||
/// Create uninitialized storage of at least the specified capacity.
|
||||
@usableFromInline
|
||||
@nonobjc
|
||||
@_specialize(where CodeUnit == UInt8)
|
||||
@_specialize(where CodeUnit == UInt16)
|
||||
internal static func create(
|
||||
capacity: Int,
|
||||
count: Int = 0
|
||||
) -> _SwiftStringStorage<CodeUnit> {
|
||||
_sanityCheck(count >= 0 && count <= capacity)
|
||||
|
||||
#if arch(i386) || arch(arm)
|
||||
#else
|
||||
// TODO(SR-7594): Restore below invariant
|
||||
// _sanityCheck(
|
||||
// CodeUnit.self != UInt8.self || capacity > _SmallUTF8String.capacity,
|
||||
// "Should prefer a small representation")
|
||||
#endif // 64-bit
|
||||
|
||||
let storage = Builtin.allocWithTailElems_1(
|
||||
_SwiftStringStorage<CodeUnit>.self,
|
||||
capacity._builtinWordValue, CodeUnit.self)
|
||||
|
||||
let storageAddr = UnsafeMutableRawPointer(
|
||||
Builtin.bridgeToRawPointer(storage))
|
||||
let endAddr = (
|
||||
storageAddr + _swift_stdlib_malloc_size(storageAddr)
|
||||
).assumingMemoryBound(to: CodeUnit.self)
|
||||
storage.capacity = endAddr - storage.start
|
||||
storage.count = count
|
||||
_sanityCheck(storage.capacity >= capacity)
|
||||
return storage
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
internal override final var rawStart: UnsafeMutableRawPointer {
|
||||
return UnsafeMutableRawPointer(start)
|
||||
}
|
||||
|
||||
// ObjC interfaces
|
||||
#if _runtime(_ObjC)
|
||||
// NSString API
|
||||
|
||||
@objc(initWithCoder:)
|
||||
@usableFromInline
|
||||
convenience init(coder aDecoder: AnyObject) {
|
||||
_sanityCheckFailure("init(coder:) not implemented for _SwiftStringStorage")
|
||||
}
|
||||
|
||||
extension _AbstractStringStorage {
|
||||
@objc(length)
|
||||
@usableFromInline
|
||||
var length: Int {
|
||||
return count
|
||||
}
|
||||
final internal var length: Int { return asString._utf16Length() }
|
||||
|
||||
@objc(characterAtIndex:)
|
||||
@usableFromInline
|
||||
func character(at index: Int) -> UInt16 {
|
||||
defer { _fixLifetime(self) }
|
||||
precondition(index >= 0 && index < count, "Index out of bounds")
|
||||
return UInt16(start[index])
|
||||
final func character(at index: Int) -> UInt16 {
|
||||
return asString._utf16CodeUnitAtOffset(index)
|
||||
}
|
||||
|
||||
@objc(getCharacters:range:)
|
||||
@usableFromInline
|
||||
func getCharacters(
|
||||
_ buffer: UnsafeMutablePointer<UInt16>,
|
||||
range aRange: _SwiftNSRange
|
||||
) {
|
||||
final func getCharacters(
|
||||
_ buffer: UnsafeMutablePointer<UInt16>,
|
||||
range aRange: _SwiftNSRange) {
|
||||
_precondition(aRange.location >= 0 && aRange.length >= 0,
|
||||
"Range out of bounds")
|
||||
_precondition(aRange.location + aRange.length <= Int(count),
|
||||
"Range out of bounds")
|
||||
let slice = unmanagedView[
|
||||
aRange.location ..< aRange.location + aRange.length]
|
||||
slice._copy(
|
||||
into: UnsafeMutableBufferPointer<UTF16.CodeUnit>(
|
||||
start: buffer,
|
||||
count: aRange.length))
|
||||
_fixLifetime(self)
|
||||
|
||||
let range = Range(
|
||||
uncheckedBounds: (aRange.location, aRange.location+aRange.length))
|
||||
let slice = asString.utf16[asString._utf16OffsetToIndex(range)]
|
||||
let outputBufPtr = UnsafeMutableBufferPointer(
|
||||
start: buffer, count: range.count)
|
||||
|
||||
let _ = slice._copyContents(initializing: outputBufPtr)
|
||||
}
|
||||
|
||||
@objc(_fastCharacterContents)
|
||||
@usableFromInline
|
||||
func _fastCharacterContents() -> UnsafePointer<UInt16>? {
|
||||
guard CodeUnit.self == UInt16.self else { return nil }
|
||||
return UnsafePointer(rawStart.assumingMemoryBound(to: UInt16.self))
|
||||
final func _fastCharacterContents() -> UnsafePointer<UInt16>? {
|
||||
return nil
|
||||
}
|
||||
|
||||
@objc(_fastCStringContents)
|
||||
final func _fastCStringContents() -> UnsafePointer<CChar>? {
|
||||
if let native = self as? _StringStorage {
|
||||
return native.start._asCChar
|
||||
}
|
||||
|
||||
// TODO(UTF8 perf): shared from literals are nul-terminated...
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@objc(copyWithZone:)
|
||||
@usableFromInline
|
||||
func copy(with zone: _SwiftNSZone?) -> AnyObject {
|
||||
// While _SwiftStringStorage instances aren't immutable in general,
|
||||
final func copy(with zone: _SwiftNSZone?) -> AnyObject {
|
||||
// While _StringStorage instances aren't immutable in general,
|
||||
// mutations may only occur when instances are uniquely referenced.
|
||||
// Therefore, it is safe to return self here; any outstanding Objective-C
|
||||
// reference will make the instance non-unique.
|
||||
return self
|
||||
}
|
||||
}
|
||||
#endif // _runtime(_ObjC)
|
||||
}
|
||||
|
||||
extension _SwiftStringStorage {
|
||||
// Basic properties
|
||||
|
||||
@inlinable
|
||||
@_fixed_layout
|
||||
@usableFromInline
|
||||
final internal class _StringStorage: _AbstractStringStorage {
|
||||
@nonobjc
|
||||
internal final var start: UnsafeMutablePointer<CodeUnit> {
|
||||
return UnsafeMutablePointer(Builtin.projectTailElems(self, CodeUnit.self))
|
||||
@usableFromInline
|
||||
internal var capacity: Int
|
||||
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
internal var _count: Int
|
||||
|
||||
@nonobjc
|
||||
@inlinable
|
||||
override internal var count: Int { @inline(__always) get { return _count } }
|
||||
|
||||
@nonobjc
|
||||
@inlinable
|
||||
override internal var asString: String {
|
||||
@inline(__always) get { return String(_StringGuts(self)) }
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
internal final var end: UnsafeMutablePointer<CodeUnit> {
|
||||
return start + count
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
internal final var capacityEnd: UnsafeMutablePointer<CodeUnit> {
|
||||
return start + capacity
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
var usedBuffer: UnsafeMutableBufferPointer<CodeUnit> {
|
||||
return UnsafeMutableBufferPointer(start: start, count: count)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
var unusedBuffer: UnsafeMutableBufferPointer<CodeUnit> {
|
||||
@inline(__always)
|
||||
get {
|
||||
return UnsafeMutableBufferPointer(start: end, count: capacity - count)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
var unmanagedView: _UnmanagedString<CodeUnit> {
|
||||
return _UnmanagedString(start: self.start, count: self.count)
|
||||
internal init(_doNotCallMe: ()) {
|
||||
_sanityCheckFailure("Use the create method")
|
||||
}
|
||||
}
|
||||
|
||||
extension _SwiftStringStorage {
|
||||
// Append operations
|
||||
|
||||
// Creation
|
||||
extension _StringStorage {
|
||||
@nonobjc
|
||||
internal final func _appendInPlace<OtherCodeUnit>(
|
||||
_ other: _UnmanagedString<OtherCodeUnit>
|
||||
)
|
||||
where OtherCodeUnit : FixedWidthInteger & UnsignedInteger {
|
||||
let otherCount = Int(other.count)
|
||||
_sanityCheck(self.count + otherCount <= self.capacity)
|
||||
other._copy(into: self.unusedBuffer)
|
||||
self.count += otherCount
|
||||
internal static func create(
|
||||
capacity: Int, count: Int = 0
|
||||
) -> _StringStorage {
|
||||
_sanityCheck(capacity >= count)
|
||||
|
||||
// Reserve enough capacity for a trailing nul character
|
||||
let capacity = 1 + Swift.max(capacity, _SmallUTF8String.capacity)
|
||||
_sanityCheck(capacity > count)
|
||||
|
||||
let storage = Builtin.allocWithTailElems_1(
|
||||
_StringStorage.self,
|
||||
capacity._builtinWordValue, UInt8.self)
|
||||
|
||||
let storageAddr = UnsafeRawPointer(
|
||||
Builtin.bridgeToRawPointer(storage))
|
||||
let endAddr = (
|
||||
storageAddr + _stdlib_malloc_size(storageAddr)
|
||||
).assumingMemoryBound(to: UInt8.self)
|
||||
|
||||
storage.capacity = endAddr - storage.start
|
||||
storage._count = count
|
||||
_sanityCheck(storage.capacity >= capacity)
|
||||
storage.unusedStorage[0] = 0 // nul-terminated
|
||||
storage._invariantCheck()
|
||||
|
||||
return storage
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
internal final func _appendInPlace(_ other: _UnmanagedOpaqueString) {
|
||||
let otherCount = Int(other.count)
|
||||
_sanityCheck(self.count + otherCount <= self.capacity)
|
||||
other._copy(into: self.unusedBuffer)
|
||||
self.count += otherCount
|
||||
internal static func create(
|
||||
initializingFrom bufPtr: UnsafeBufferPointer<UInt8>, capacity: Int
|
||||
) -> _StringStorage {
|
||||
_sanityCheck(capacity >= bufPtr.count)
|
||||
let storage = _StringStorage.create(
|
||||
capacity: capacity, count: bufPtr.count)
|
||||
let addr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
storage.mutableStart.initialize(from: addr, count: bufPtr.count)
|
||||
return storage
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
internal final func _appendInPlace<C: Collection>(contentsOf other: C)
|
||||
where C.Element == CodeUnit {
|
||||
let otherCount = Int(other.count)
|
||||
_sanityCheck(self.count + otherCount <= self.capacity)
|
||||
var (remainder, writtenUpTo) =
|
||||
other._copyContents(initializing: self.unusedBuffer)
|
||||
_precondition(remainder.next() == nil, "Collection underreported its count")
|
||||
_precondition(writtenUpTo == otherCount, "Collection misreported its count")
|
||||
count += otherCount
|
||||
internal static func create(
|
||||
initializingFrom bufPtr: UnsafeBufferPointer<UInt8>
|
||||
) -> _StringStorage {
|
||||
return _StringStorage.create(
|
||||
initializingFrom: bufPtr, capacity: bufPtr.count)
|
||||
}
|
||||
|
||||
@_specialize(where C == Character._SmallUTF16, CodeUnit == UInt8)
|
||||
@nonobjc
|
||||
internal final func _appendInPlaceUTF16<C: Collection>(contentsOf other: C)
|
||||
where C.Element == UInt16 {
|
||||
let otherCount = Int(other.count)
|
||||
_sanityCheck(self.count + otherCount <= self.capacity)
|
||||
// TODO: Use _copyContents(initializing:) for UTF16->UTF16 case
|
||||
var it = other.makeIterator()
|
||||
for p in end ..< end + otherCount {
|
||||
p.pointee = CodeUnit(it.next()!)
|
||||
}
|
||||
_precondition(it.next() == nil, "Collection underreported its count")
|
||||
count += otherCount
|
||||
internal static func create(
|
||||
initializingFrom bufPtr: UnsafeBufferPointer<UInt8>,
|
||||
andAppending secondBufPtr: UnsafeBufferPointer<UInt8>
|
||||
) -> _StringStorage {
|
||||
let size = bufPtr.count + secondBufPtr.count
|
||||
let storage = _StringStorage.create(
|
||||
capacity: size, count: size)
|
||||
|
||||
let addr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
storage.mutableStart.initialize(from: addr, count: bufPtr.count)
|
||||
|
||||
let secondAddr = secondBufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
(storage.mutableStart + bufPtr.count).initialize(
|
||||
from: secondAddr, count: secondBufPtr.count)
|
||||
return storage
|
||||
}
|
||||
}
|
||||
|
||||
extension _SwiftStringStorage {
|
||||
@nonobjc
|
||||
internal final func _appendInPlace(_ other: _StringGuts, range: Range<Int>) {
|
||||
if _slowPath(other._isOpaque) {
|
||||
_opaqueAppendInPlace(opaqueOther: other, range: range)
|
||||
return
|
||||
}
|
||||
// TODO(UTF8 perf): Append helpers, which can keep nul-termination
|
||||
|
||||
defer { _fixLifetime(other) }
|
||||
if other.isASCII {
|
||||
_appendInPlace(other._unmanagedASCIIView[range])
|
||||
} else {
|
||||
_appendInPlace(other._unmanagedUTF16View[range])
|
||||
// Usage
|
||||
extension _StringStorage {
|
||||
@nonobjc
|
||||
@inlinable
|
||||
internal var mutableStart: UnsafeMutablePointer<UInt8> {
|
||||
@inline(__always) get {
|
||||
return UnsafeMutablePointer(Builtin.projectTailElems(self, UInt8.self))
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline // @opaque
|
||||
internal final func _opaqueAppendInPlace(
|
||||
opaqueOther other: _StringGuts, range: Range<Int>
|
||||
) {
|
||||
_sanityCheck(other._isOpaque)
|
||||
if other._isSmall {
|
||||
other._smallUTF8String[range].withUnmanagedUTF16 {
|
||||
self._appendInPlace($0)
|
||||
}
|
||||
return
|
||||
}
|
||||
defer { _fixLifetime(other) }
|
||||
_appendInPlace(other._asOpaque()[range])
|
||||
@nonobjc
|
||||
@inlinable
|
||||
internal var mutableEnd: UnsafeMutablePointer<UInt8> {
|
||||
@inline(__always) get { return mutableStart + count }
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
internal final func _appendInPlace(_ other: _StringGuts) {
|
||||
if _slowPath(other._isOpaque) {
|
||||
_opaqueAppendInPlace(opaqueOther: other)
|
||||
return
|
||||
}
|
||||
|
||||
defer { _fixLifetime(other) }
|
||||
if other.isASCII {
|
||||
_appendInPlace(other._unmanagedASCIIView)
|
||||
} else {
|
||||
_appendInPlace(other._unmanagedUTF16View)
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline // @opaque
|
||||
internal final func _opaqueAppendInPlace(opaqueOther other: _StringGuts) {
|
||||
_sanityCheck(other._isOpaque)
|
||||
if other._isSmall {
|
||||
other._smallUTF8String.withUnmanagedUTF16 {
|
||||
self._appendInPlace($0)
|
||||
}
|
||||
return
|
||||
}
|
||||
defer { _fixLifetime(other) }
|
||||
_appendInPlace(other._asOpaque())
|
||||
@inlinable
|
||||
internal var start: UnsafePointer<UInt8> {
|
||||
@inline(__always) get { return UnsafePointer(mutableStart) }
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
internal final func _appendInPlace(_ other: String) {
|
||||
self._appendInPlace(other._guts)
|
||||
@inlinable
|
||||
internal final var end: UnsafePointer<UInt8> {
|
||||
@inline(__always) get { return UnsafePointer(mutableEnd) }
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
internal final func _appendInPlace<S : StringProtocol>(_ other: S) {
|
||||
self._appendInPlace(
|
||||
other._wholeString._guts,
|
||||
range: other._encodedOffsetRange)
|
||||
@inlinable
|
||||
internal var codeUnits: UnsafeBufferPointer<UInt8> {
|
||||
@inline(__always) get {
|
||||
return UnsafeBufferPointer(start: start, count: count)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@nonobjc
|
||||
internal var unusedStorage: UnsafeMutableBufferPointer<UInt8> {
|
||||
@inline(__always) get {
|
||||
return UnsafeMutableBufferPointer(
|
||||
start: mutableEnd, count: unusedCapacity)
|
||||
}
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
@inlinable
|
||||
internal var unusedCapacity: Int {
|
||||
@inline(__always) get { return capacity &- count }
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
let rawSelf = UnsafeRawPointer(Builtin.bridgeToRawPointer(self))
|
||||
let rawStart = UnsafeRawPointer(start)
|
||||
_sanityCheck(rawSelf + Int(_StringObject.nativeBias) == rawStart)
|
||||
_sanityCheck(self.capacity > self.count, "no room for nul-terminator")
|
||||
_sanityCheck(self.unusedStorage[0] == 0, "not nul terminated")
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// For bridging literals
|
||||
//
|
||||
// TODO(UTF8): Unify impls with _StringStorage
|
||||
//
|
||||
@_fixed_layout
|
||||
@usableFromInline
|
||||
final internal class _SharedStringStorage: _AbstractStringStorage {
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
internal var owner: AnyObject?
|
||||
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
internal var contents: UnsafeBufferPointer<UInt8>
|
||||
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
internal var start: UnsafePointer<UInt8> {
|
||||
return contents.baseAddress._unsafelyUnwrappedUnchecked
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
@usableFromInline
|
||||
override internal var count: Int { return contents.count }
|
||||
|
||||
@nonobjc
|
||||
internal init(owner: AnyObject, contents bufPtr: UnsafeBufferPointer<UInt8>) {
|
||||
self.owner = owner
|
||||
self.contents = bufPtr
|
||||
super.init()
|
||||
self._invariantCheck()
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
internal init(immortal bufPtr: UnsafeBufferPointer<UInt8>) {
|
||||
self.owner = nil
|
||||
self.contents = bufPtr
|
||||
super.init()
|
||||
self._invariantCheck()
|
||||
}
|
||||
|
||||
@nonobjc
|
||||
override internal var asString: String { return String(_StringGuts(self)) }
|
||||
}
|
||||
|
||||
extension _SharedStringStorage {
|
||||
@nonobjc
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ struct _StringRepresentation {
|
||||
case _cocoa(object: AnyObject)
|
||||
case _native(object: AnyObject)
|
||||
case _immortal(address: UInt)
|
||||
// TODO: shared native
|
||||
}
|
||||
public var _form: _Form
|
||||
|
||||
@@ -37,27 +38,35 @@ struct _StringRepresentation {
|
||||
|
||||
extension String {
|
||||
public // @testable
|
||||
func _classify() -> _StringRepresentation {
|
||||
func _classify() -> _StringRepresentation { return _guts._classify() }
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
internal func _classify() -> _StringRepresentation {
|
||||
var result = _StringRepresentation(
|
||||
_isASCII: _guts._isASCIIOrSmallASCII,
|
||||
_count: _guts.count,
|
||||
_capacity: _guts.capacity,
|
||||
_isASCII: self.isKnownASCII,
|
||||
_count: self.count,
|
||||
_capacity: self.capacity ?? 0,
|
||||
_form: ._small
|
||||
)
|
||||
if _guts._isSmall {
|
||||
if _object.isSmall {
|
||||
return result
|
||||
}
|
||||
if _guts._isNative {
|
||||
result._form = ._native(object: _guts._owner!)
|
||||
if _object.largeIsCocoa {
|
||||
result._form = ._cocoa(object: _object.cocoaObject)
|
||||
return result
|
||||
}
|
||||
if _guts._isCocoa {
|
||||
result._form = ._cocoa(object: _guts._owner!)
|
||||
return result
|
||||
}
|
||||
if _guts._isUnmanaged {
|
||||
|
||||
// TODO: shared native
|
||||
_sanityCheck(_object.providesFastUTF8)
|
||||
_sanityCheck(_object.largeFastIsNative)
|
||||
if _object.isImmortal {
|
||||
result._form = ._immortal(
|
||||
address: UInt(bitPattern: _guts._unmanagedRawStart))
|
||||
address: UInt(bitPattern: _object.nativeUTF8Start))
|
||||
return result
|
||||
}
|
||||
if _object.hasNativeStorage {
|
||||
result._form = ._native(object: _object.nativeStorage)
|
||||
return result
|
||||
}
|
||||
fatalError()
|
||||
|
||||
@@ -10,6 +10,42 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// TODO(UTF8 merge): Find a common place for these helpers
|
||||
extension _StringGuts {
|
||||
@_effects(releasenone)
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
internal func foreignUTF16CodeUnit(at i: Int) -> UInt16 {
|
||||
// Currently, foreign means NSString
|
||||
return _cocoaStringSubscript(_object.cocoaObject, i)
|
||||
}
|
||||
}
|
||||
|
||||
internal let _leadingSurrogateBias: UInt16 = 0xd800
|
||||
internal let _trailingSurrogateBias: UInt16 = 0xdc00
|
||||
internal let _surrogateMask: UInt16 = 0xfc00
|
||||
|
||||
@inline(__always)
|
||||
internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool {
|
||||
return cu & _surrogateMask == _trailingSurrogateBias
|
||||
}
|
||||
@inline(__always)
|
||||
internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool {
|
||||
return cu & _surrogateMask == _leadingSurrogateBias
|
||||
}
|
||||
|
||||
internal func _numTranscodedUTF8CodeUnits(_ x: UInt16) -> Int {
|
||||
_sanityCheck(!_isTrailingSurrogate(x))
|
||||
|
||||
if _slowPath(_isLeadingSurrogate(x)) { return 4 }
|
||||
|
||||
switch x {
|
||||
case 0..<0x80: return 1
|
||||
case 0x80..<0x0800: return 2
|
||||
case _: return 3
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// FIXME(ABI)#71 : The UTF-16 string view should have a custom iterator type to
|
||||
// allow performance optimizations of linear traversals.
|
||||
|
||||
@@ -99,230 +135,190 @@ extension String {
|
||||
/// }
|
||||
/// // Prints "Let it snow!"
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct UTF16View
|
||||
: BidirectionalCollection,
|
||||
CustomStringConvertible,
|
||||
CustomDebugStringConvertible {
|
||||
|
||||
public typealias Index = String.Index
|
||||
|
||||
/// The position of the first code unit if the `String` is
|
||||
/// nonempty; identical to `endIndex` otherwise.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index {
|
||||
return Index(encodedOffset: _offset)
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one greater than
|
||||
/// the last valid subscript argument.
|
||||
///
|
||||
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index {
|
||||
return Index(encodedOffset: _offset + _length)
|
||||
}
|
||||
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Indices {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(
|
||||
_elements: String.UTF16View, _startIndex: Index, _endIndex: Index
|
||||
) {
|
||||
self._elements = _elements
|
||||
self._startIndex = _startIndex
|
||||
self._endIndex = _endIndex
|
||||
}
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _elements: String.UTF16View
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _startIndex: Index
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _endIndex: Index
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var indices: Indices {
|
||||
return Indices(
|
||||
_elements: self, startIndex: startIndex, endIndex: endIndex)
|
||||
}
|
||||
|
||||
// TODO: swift-3-indexing-model - add docs
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(after i: Index) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check i?
|
||||
return Index(encodedOffset: _unsafePlus(i.encodedOffset, 1))
|
||||
}
|
||||
|
||||
// TODO: swift-3-indexing-model - add docs
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(before i: Index) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check i?
|
||||
return Index(encodedOffset: _unsafeMinus(i.encodedOffset, 1))
|
||||
}
|
||||
|
||||
// TODO: swift-3-indexing-model - add docs
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check i?
|
||||
return Index(encodedOffset: i.encodedOffset.advanced(by: n))
|
||||
}
|
||||
|
||||
// TODO: swift-3-indexing-model - add docs
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
// FIXME: swift-3-indexing-model: range check i?
|
||||
let d = i.encodedOffset.distance(to: limit.encodedOffset)
|
||||
if (d >= 0) ? (d < n) : (d > n) {
|
||||
return nil
|
||||
}
|
||||
return Index(encodedOffset: i.encodedOffset.advanced(by: n))
|
||||
}
|
||||
|
||||
// TODO: swift-3-indexing-model - add docs
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
// FIXME: swift-3-indexing-model: range check start and end?
|
||||
return start.encodedOffset.distance(to: end.encodedOffset)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _internalIndex(at i: Int) -> Int {
|
||||
return _guts.startIndex + i
|
||||
}
|
||||
|
||||
/// Accesses the code unit at the given position.
|
||||
///
|
||||
/// The following example uses the subscript to print the value of a
|
||||
/// string's first UTF-16 code unit.
|
||||
///
|
||||
/// let greeting = "Hello, friend!"
|
||||
/// let i = greeting.utf16.startIndex
|
||||
/// print("First character's UTF-16 code unit: \(greeting.utf16[i])")
|
||||
/// // Prints "First character's UTF-16 code unit: 72"
|
||||
///
|
||||
/// - Parameter position: A valid index of the view. `position` must be
|
||||
/// less than the view's end index.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(i: Index) -> UTF16.CodeUnit {
|
||||
_precondition(i >= startIndex && i < endIndex,
|
||||
"out-of-range access on a UTF16View")
|
||||
|
||||
let index = _internalIndex(at: i.encodedOffset)
|
||||
let u = _guts.codeUnit(atCheckedOffset: index)
|
||||
if _fastPath(UTF16._isScalar(u)) {
|
||||
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
|
||||
// of 1 code unit.
|
||||
return u
|
||||
}
|
||||
|
||||
if UTF16.isLeadSurrogate(u) {
|
||||
// Sequence is well-formed if `u` is followed by a low-surrogate.
|
||||
if _fastPath(
|
||||
index + 1 < _guts.count &&
|
||||
UTF16.isTrailSurrogate(_guts.codeUnit(atCheckedOffset: index + 1)))
|
||||
{
|
||||
return u
|
||||
}
|
||||
return UTF16._replacementCodeUnit
|
||||
}
|
||||
|
||||
// `u` is a low-surrogate. Sequence is well-formed if
|
||||
// previous code unit is a high-surrogate.
|
||||
if _fastPath(
|
||||
index != 0 &&
|
||||
UTF16.isLeadSurrogate(_guts.codeUnit(atCheckedOffset: index - 1)))
|
||||
{
|
||||
return u
|
||||
}
|
||||
return UTF16._replacementCodeUnit
|
||||
}
|
||||
|
||||
#if _runtime(_ObjC)
|
||||
// These may become less important once <rdar://problem/19255291> is addressed.
|
||||
|
||||
@available(
|
||||
*, unavailable,
|
||||
message: "Indexing a String's UTF16View requires a String.UTF16View.Index, which can be constructed from Int when Foundation is imported")
|
||||
public subscript(i: Int) -> UTF16.CodeUnit {
|
||||
Builtin.unreachable()
|
||||
}
|
||||
|
||||
@available(
|
||||
*, unavailable,
|
||||
message: "Slicing a String's UTF16View requires a Range<String.UTF16View.Index>, String.UTF16View.Index can be constructed from Int when Foundation is imported")
|
||||
public subscript(bounds: Range<Int>) -> UTF16View {
|
||||
Builtin.unreachable()
|
||||
}
|
||||
#endif
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ _guts: _StringGuts) {
|
||||
self.init(_guts, offset: 0, length: _guts.count)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ _guts: _StringGuts, offset: Int, length: Int) {
|
||||
self._offset = offset
|
||||
self._length = length
|
||||
self._guts = _guts
|
||||
}
|
||||
|
||||
public var description: String {
|
||||
return String(_guts._extractSlice(_encodedOffsetRange))
|
||||
}
|
||||
|
||||
public var debugDescription: String {
|
||||
return "StringUTF16(\(self.description.debugDescription))"
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _offset: Int
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _length: Int
|
||||
|
||||
public struct UTF16View {
|
||||
@usableFromInline
|
||||
internal var _guts: _StringGuts
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ guts: _StringGuts) {
|
||||
self._guts = guts
|
||||
_invariantCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF16View {
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF16View: BidirectionalCollection {
|
||||
public typealias Index = String.Index
|
||||
|
||||
/// The position of the first code unit if the `String` is
|
||||
/// nonempty; identical to `endIndex` otherwise.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: 0) }
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one greater than
|
||||
/// the last valid subscript argument.
|
||||
///
|
||||
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: _guts.count) }
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(after i: Index) -> Index {
|
||||
// TODO(UTF8) known-ASCII fast path
|
||||
|
||||
if _slowPath(_guts.isForeign) { return _foreignIndex(after: i) }
|
||||
|
||||
// For a BMP scalar (1-3 UTF-8 code units), advance past it. For a non-BMP
|
||||
// scalar, use a transcoded offset first.
|
||||
let len = _guts.fastUTF8ScalarLength(startingAt: i.encodedOffset)
|
||||
if len == 4 && i.transcodedOffset == 0 {
|
||||
return Index(transcodedAfter: i)
|
||||
}
|
||||
return Index(encodedOffset: i.encodedOffset &+ len)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(before i: Index) -> Index {
|
||||
precondition(i.encodedOffset > 0)
|
||||
|
||||
if _slowPath(_guts.isForeign) { return _foreignIndex(before: i) }
|
||||
|
||||
// TODO(UTF8) known-ASCII fast path
|
||||
|
||||
if i.transcodedOffset != 0 {
|
||||
_sanityCheck(i.transcodedOffset == 1)
|
||||
return Index(encodedOffset: i.encodedOffset)
|
||||
}
|
||||
|
||||
let len = _guts.fastUTF8ScalarLength(endingAt: i.encodedOffset)
|
||||
if len == 4 {
|
||||
return Index(
|
||||
encodedOffset: i.encodedOffset &- len,
|
||||
transcodedOffset: 1)
|
||||
}
|
||||
|
||||
_sanityCheck((1...3) ~= len)
|
||||
return Index(encodedOffset: i.encodedOffset &- len)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
if _slowPath(_guts.isForeign) {
|
||||
return _foreignIndex(i, offsetBy: n)
|
||||
}
|
||||
|
||||
// TODO(UTF8) known-ASCII fast path
|
||||
return __index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
if _slowPath(_guts.isForeign) {
|
||||
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
// TODO(UTF8) known-ASCII fast paths
|
||||
return __index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
if _slowPath(_guts.isForeign) {
|
||||
return _foreignDistance(from: start, to: end)
|
||||
}
|
||||
|
||||
// TODO(UTF8) known-ASCII fast paths
|
||||
return __distance(from: start, to: end)
|
||||
}
|
||||
|
||||
/// Accesses the code unit at the given position.
|
||||
///
|
||||
/// The following example uses the subscript to print the value of a
|
||||
/// string's first UTF-16 code unit.
|
||||
///
|
||||
/// let greeting = "Hello, friend!"
|
||||
/// let i = greeting.utf16.startIndex
|
||||
/// print("First character's UTF-16 code unit: \(greeting.utf16[i])")
|
||||
/// // Prints "First character's UTF-16 code unit: 72"
|
||||
///
|
||||
/// - Parameter position: A valid index of the view. `position` must be
|
||||
/// less than the view's end index.
|
||||
@inlinable
|
||||
public subscript(i: Index) -> UTF16.CodeUnit {
|
||||
@inline(__always) get {
|
||||
_precondition(i.encodedOffset >= 0 && i < endIndex)
|
||||
// TODO(UTF8): known-ASCII fast path
|
||||
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
let scalar = _guts.fastUTF8Scalar(startingAt: i.encodedOffset)
|
||||
if scalar.value <= 0xFFFF {
|
||||
return UInt16(truncatingIfNeeded: scalar.value)
|
||||
}
|
||||
return scalar.utf16[i.transcodedOffset]
|
||||
}
|
||||
|
||||
return _foreignSubscript(position: i)
|
||||
}
|
||||
}
|
||||
}
|
||||
extension String.UTF16View: CustomStringConvertible {
|
||||
@inlinable
|
||||
public var description: String {
|
||||
@inline(__always) get { return String(_guts) }
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF16View: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
return "StringUTF16(\(self.description.debugDescription))"
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// A UTF-16 encoding of `self`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var utf16: UTF16View {
|
||||
get {
|
||||
return UTF16View(_guts)
|
||||
}
|
||||
set {
|
||||
self = String(describing: newValue)
|
||||
}
|
||||
@inline(__always) get { return UTF16View(_guts) }
|
||||
@inline(__always) set { self = String(newValue._guts) }
|
||||
}
|
||||
|
||||
/// Creates a string corresponding to the given sequence of UTF-16 code units.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
@available(swift, introduced: 4.0)
|
||||
public init(_ utf16: UTF16View) {
|
||||
self = String(utf16._guts)
|
||||
self.init(utf16._guts)
|
||||
}
|
||||
|
||||
/// The index type for subscripting a string.
|
||||
public typealias UTF16Index = UTF16View.Index
|
||||
}
|
||||
|
||||
extension String.UTF16View : _SwiftStringView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _ephemeralContent : String { return _persistentContent }
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _persistentContent : String { return String(self._guts) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
var _wholeString : String {
|
||||
return String(_guts)
|
||||
#if _runtime(_ObjC)
|
||||
// These may become less important once <rdar://problem/19255291> is addressed.
|
||||
@available(
|
||||
*, unavailable,
|
||||
message: "Indexing a String's UTF16View requires a String.UTF16View.Index, which can be constructed from Int when Foundation is imported")
|
||||
public subscript(i: Int) -> UTF16.CodeUnit {
|
||||
Builtin.unreachable()
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
var _encodedOffsetRange : Range<Int> {
|
||||
return _offset..<_offset+_length
|
||||
@available(
|
||||
*, unavailable,
|
||||
message: "Slicing a String's UTF16View requires a Range<String.UTF16View.Index>, String.UTF16View.Index can be constructed from Int when Foundation is imported")
|
||||
public subscript(bounds: Range<Int>) -> UTF16View {
|
||||
Builtin.unreachable()
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Index conversions
|
||||
@@ -399,112 +395,75 @@ extension String.UTF16View : CustomReflectable {
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF16View.Indices : BidirectionalCollection {
|
||||
public typealias Index = String.UTF16View.Index
|
||||
public typealias Indices = String.UTF16View.Indices
|
||||
public typealias SubSequence = String.UTF16View.Indices
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(
|
||||
_elements: String.UTF16View,
|
||||
startIndex: Index,
|
||||
endIndex: Index
|
||||
) {
|
||||
self._elements = _elements
|
||||
self._startIndex = startIndex
|
||||
self._endIndex = endIndex
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index {
|
||||
return _startIndex
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index {
|
||||
return _endIndex
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var indices: Indices {
|
||||
return self
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(i: Index) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check.
|
||||
return i
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(bounds: Range<Index>) -> String.UTF16View.Indices {
|
||||
// FIXME: swift-3-indexing-model: range check.
|
||||
return String.UTF16View.Indices(
|
||||
_elements: _elements,
|
||||
startIndex: bounds.lowerBound,
|
||||
endIndex: bounds.upperBound)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(after i: Index) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check.
|
||||
return _elements.index(after: i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func formIndex(after i: inout Index) {
|
||||
// FIXME: swift-3-indexing-model: range check.
|
||||
_elements.formIndex(after: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(before i: Index) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check.
|
||||
return _elements.index(before: i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func formIndex(before i: inout Index) {
|
||||
// FIXME: swift-3-indexing-model: range check.
|
||||
_elements.formIndex(before: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
// FIXME: swift-3-indexing-model: range check i?
|
||||
return _elements.index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
// FIXME: swift-3-indexing-model: range check i?
|
||||
return _elements.index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
// TODO: swift-3-indexing-model - add docs
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
// FIXME: swift-3-indexing-model: range check start and end?
|
||||
return _elements.distance(from: start, to: end)
|
||||
}
|
||||
}
|
||||
|
||||
//===--- Slicing Support --------------------------------------------------===//
|
||||
/// In Swift 3.2, in the absence of type context,
|
||||
///
|
||||
/// someString.utf16[someString.utf16.startIndex..<someString.utf16.endIndex]
|
||||
///
|
||||
/// was deduced to be of type `String.UTF16View`. Provide a more-specific
|
||||
/// Swift-3-only `subscript` overload that continues to produce
|
||||
/// `String.UTF16View`.
|
||||
// Slicing
|
||||
extension String.UTF16View {
|
||||
public typealias SubSequence = Substring.UTF16View
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@available(swift, introduced: 4)
|
||||
public subscript(bounds: Range<Index>) -> String.UTF16View.SubSequence {
|
||||
return String.UTF16View.SubSequence(self, _bounds: bounds)
|
||||
public subscript(r: Range<Index>) -> Substring.UTF16View {
|
||||
return Substring.UTF16View(self, _bounds: r)
|
||||
}
|
||||
}
|
||||
|
||||
// Foreign string support
|
||||
extension String.UTF16View {
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(after i: Index) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
return Index(encodedOffset: i.encodedOffset + 1)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(before i: Index) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
return Index(encodedOffset: i.encodedOffset - 1)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignSubscript(position i: Index) -> UTF16.CodeUnit {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
return _guts.foreignUTF16CodeUnit(at: i.encodedOffset)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignDistance(from start: Index, to end: Index) -> Int {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
return end.encodedOffset - start.encodedOffset
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
let l = limit.encodedOffset - i.encodedOffset
|
||||
if n > 0 ? l >= 0 && l < n : l <= 0 && n < l {
|
||||
return nil
|
||||
}
|
||||
return Index(encodedOffset: i.encodedOffset + n)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(_ i: Index, offsetBy n: Int) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
return Index(encodedOffset: i.encodedOffset + n)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -85,330 +85,247 @@ extension String {
|
||||
/// // Prints "-17"
|
||||
/// print(String(s1.utf8.prefix(15)))
|
||||
/// // Prints "They call me 'B"
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct UTF8View
|
||||
: BidirectionalCollection,
|
||||
CustomStringConvertible,
|
||||
CustomDebugStringConvertible {
|
||||
|
||||
/// Underlying UTF-16-compatible representation
|
||||
@_fixed_layout
|
||||
public struct UTF8View {
|
||||
@usableFromInline
|
||||
internal var _guts: _StringGuts
|
||||
|
||||
/// Distances to `(startIndex, endIndex)` from the endpoints of _guts,
|
||||
/// measured in UTF-8 code units.
|
||||
///
|
||||
/// Note: this is *only* here to support legacy Swift3-style slicing where
|
||||
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
|
||||
/// those semantics are no longer supported.
|
||||
@usableFromInline
|
||||
internal let _legacyOffsets: (start: Int8, end: Int8)
|
||||
|
||||
/// Flags indicating whether the limits of this view did not originally fall
|
||||
/// on grapheme cluster boundaries in the original string. This is used to
|
||||
/// emulate (undocumented) Swift 3 behavior where String.init?(_:) returned
|
||||
/// nil in such cases.
|
||||
///
|
||||
/// Note: this is *only* here to support legacy Swift3-style slicing where
|
||||
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
|
||||
/// those semantics are no longer supported.
|
||||
@usableFromInline
|
||||
internal let _legacyPartialCharacters: (start: Bool, end: Bool)
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(
|
||||
_ _guts: _StringGuts,
|
||||
legacyOffsets: (Int, Int) = (0, 0),
|
||||
legacyPartialCharacters: (Bool, Bool) = (false, false)
|
||||
) {
|
||||
self._guts = _guts
|
||||
self._legacyOffsets = (Int8(legacyOffsets.0), Int8(legacyOffsets.1))
|
||||
self._legacyPartialCharacters = legacyPartialCharacters
|
||||
}
|
||||
|
||||
public typealias Index = String.Index
|
||||
|
||||
/// The position of the first code unit if the UTF-8 view is
|
||||
/// nonempty.
|
||||
///
|
||||
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index {
|
||||
let r: Index
|
||||
if _fastPath(_guts._isASCIIOrSmallASCII) {
|
||||
r = Index(encodedOffset: 0)
|
||||
} else {
|
||||
r = _nonASCIIIndex(atEncodedOffset: 0)
|
||||
}
|
||||
_sanityCheck(r.encodedOffset == 0)
|
||||
if _fastPath(_legacyOffsets.start == 0) { return r }
|
||||
|
||||
return index(r, offsetBy: numericCast(_legacyOffsets.start))
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one
|
||||
/// greater than the last valid subscript argument.
|
||||
///
|
||||
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index {
|
||||
_sanityCheck(_legacyOffsets.end >= -3 && _legacyOffsets.end <= 0,
|
||||
"out of bounds legacy end")
|
||||
|
||||
var r = Index(encodedOffset: _guts.endIndex)
|
||||
if _fastPath(_legacyOffsets.end == 0) {
|
||||
return r
|
||||
}
|
||||
switch _legacyOffsets.end {
|
||||
case -3: r = index(before: r); fallthrough
|
||||
case -2: r = index(before: r); fallthrough
|
||||
case -1: return index(before: r)
|
||||
default: Builtin.unreachable()
|
||||
}
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@_effects(releasenone)
|
||||
@usableFromInline
|
||||
internal func _nonASCIIIndex(atEncodedOffset n: Int) -> Index {
|
||||
_sanityCheck(!_guts._isASCIIOrSmallASCII)
|
||||
let count = _guts.count
|
||||
if n == count { return endIndex }
|
||||
let buffer: Index._UTF8Buffer = _visitGuts(
|
||||
_guts, range: (n..<count, performBoundsCheck: true),
|
||||
ascii: { _ in
|
||||
Builtin.unreachable()
|
||||
/* return Index._UTF8Buffer() */ },
|
||||
utf16: { utf16 in
|
||||
var i = utf16.makeIterator()
|
||||
return UTF8View._fillBuffer(from: &i) },
|
||||
opaque: { opaque in
|
||||
var i = opaque.makeIterator()
|
||||
return UTF8View._fillBuffer(from: &i)}
|
||||
)
|
||||
|
||||
return Index(encodedOffset: n, transcodedOffset: 0, buffer: buffer)
|
||||
}
|
||||
|
||||
@inline(__always)
|
||||
internal
|
||||
static func _fillBuffer<Iter: IteratorProtocol>(
|
||||
from i: inout Iter
|
||||
) -> Index._UTF8Buffer where Iter.Element == UInt16 {
|
||||
var p = UTF16.ForwardParser()
|
||||
var buffer = Index._UTF8Buffer()
|
||||
while true {
|
||||
switch p.parseScalar(from: &i) {
|
||||
case .valid(let u16):
|
||||
let u8 = Unicode.UTF8.transcode(u16, from: Unicode.UTF16.self)
|
||||
._unsafelyUnwrappedUnchecked
|
||||
if buffer.count + u8.count > buffer.capacity {
|
||||
return buffer
|
||||
}
|
||||
buffer.append(contentsOf: u8)
|
||||
case .error:
|
||||
let u8 = Unicode.UTF8.encodedReplacementCharacter
|
||||
if buffer.count + u8.count > buffer.capacity {
|
||||
return buffer
|
||||
}
|
||||
buffer.append(contentsOf: u8)
|
||||
case .emptyInput:
|
||||
return buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next consecutive position after `i`.
|
||||
///
|
||||
/// - Precondition: The next position is representable.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inline(__always)
|
||||
public func index(after i: Index) -> Index {
|
||||
if _fastPath(_guts._isASCIIOrSmallASCII) {
|
||||
precondition(i.encodedOffset < _guts.count)
|
||||
return Index(encodedOffset: i.encodedOffset + 1)
|
||||
}
|
||||
|
||||
return _nonASCIIIndex(after: i)
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@_effects(releasenone)
|
||||
@usableFromInline
|
||||
internal func _nonASCIIIndex(after i: Index) -> Index {
|
||||
_sanityCheck(!_guts._isASCIIOrSmallASCII)
|
||||
|
||||
var j = i
|
||||
|
||||
// Ensure j's cache is utf8
|
||||
if _slowPath(j.utf8Buffer == nil) {
|
||||
j = _nonASCIIIndex(atEncodedOffset: j.encodedOffset)
|
||||
precondition(j != endIndex, "Index out of bounds")
|
||||
}
|
||||
|
||||
let buffer = j.utf8Buffer._unsafelyUnwrappedUnchecked
|
||||
|
||||
var scalarLength16 = 1
|
||||
let b0 = buffer.first._unsafelyUnwrappedUnchecked
|
||||
var nextBuffer = buffer
|
||||
|
||||
let leading1s = (~b0).leadingZeroBitCount
|
||||
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
|
||||
nextBuffer.removeFirst()
|
||||
}
|
||||
else {
|
||||
// Number of bytes consumed in this scalar
|
||||
let n8 = j.transcodedOffset + 1
|
||||
// If we haven't reached a scalar boundary...
|
||||
if _fastPath(n8 < leading1s) {
|
||||
// Advance to the next position in this scalar
|
||||
return Index(
|
||||
encodedOffset: j.encodedOffset,
|
||||
transcodedOffset: n8, buffer: buffer)
|
||||
}
|
||||
// We reached a scalar boundary; compute the underlying utf16's width
|
||||
// based on the number of utf8 code units
|
||||
scalarLength16 = n8 >> 2 + 1
|
||||
nextBuffer.removeFirst(n8)
|
||||
}
|
||||
|
||||
if _fastPath(!nextBuffer.isEmpty) {
|
||||
return Index(
|
||||
encodedOffset: j.encodedOffset + scalarLength16,
|
||||
transcodedOffset: 0,
|
||||
buffer: nextBuffer)
|
||||
}
|
||||
// If nothing left in the buffer, refill it.
|
||||
return _nonASCIIIndex(atEncodedOffset: j.encodedOffset + scalarLength16)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(before i: Index) -> Index {
|
||||
if _fastPath(_guts._isASCIIOrSmallASCII) {
|
||||
precondition(i.encodedOffset > 0)
|
||||
return Index(encodedOffset: i.encodedOffset - 1)
|
||||
}
|
||||
|
||||
return _nonASCIIIndex(before: i)
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@_effects(releasenone)
|
||||
@usableFromInline
|
||||
internal func _nonASCIIIndex(before i: Index) -> Index {
|
||||
_sanityCheck(!_guts._isASCIIOrSmallASCII)
|
||||
if i.transcodedOffset != 0 {
|
||||
_sanityCheck(i.utf8Buffer != nil)
|
||||
return Index(
|
||||
encodedOffset: i.encodedOffset,
|
||||
transcodedOffset: i.transcodedOffset &- 1,
|
||||
buffer: i.utf8Buffer._unsafelyUnwrappedUnchecked)
|
||||
}
|
||||
|
||||
// Handle the scalar boundary the same way as the not-a-utf8-index case.
|
||||
_precondition(i.encodedOffset > 0, "Can't move before startIndex")
|
||||
|
||||
// Parse a single scalar
|
||||
let u = _guts.unicodeScalar(endingAt: i.encodedOffset)
|
||||
let u8 = Unicode.UTF8.encode(u)._unsafelyUnwrappedUnchecked
|
||||
return Index(
|
||||
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
|
||||
transcodedOffset: u8.count &- 1,
|
||||
buffer: String.Index._UTF8Buffer(u8))
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func distance(from i: Index, to j: Index) -> Int {
|
||||
if _fastPath(_guts._isASCIIOrSmallASCII) {
|
||||
return j.encodedOffset - i.encodedOffset
|
||||
}
|
||||
return _nonASCIIDistance(from: i, to: j)
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@_effects(releasenone)
|
||||
@usableFromInline
|
||||
internal func _nonASCIIDistance(from i: Index, to j: Index) -> Int {
|
||||
let forwards = j >= i
|
||||
|
||||
let start, end: Index
|
||||
if forwards {
|
||||
start = i
|
||||
end = j
|
||||
} else {
|
||||
start = j
|
||||
end = i
|
||||
}
|
||||
let countAbs = end.transcodedOffset - start.transcodedOffset
|
||||
+ _gutsNonASCIIUTF8Count(start.encodedOffset..<end.encodedOffset)
|
||||
return forwards ? countAbs : -countAbs
|
||||
}
|
||||
|
||||
/// Accesses the code unit at the given position.
|
||||
///
|
||||
/// The following example uses the subscript to print the value of a
|
||||
/// string's first UTF-8 code unit.
|
||||
///
|
||||
/// let greeting = "Hello, friend!"
|
||||
/// let i = greeting.utf8.startIndex
|
||||
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
|
||||
/// // Prints "First character's UTF-8 code unit: 72"
|
||||
///
|
||||
/// - Parameter position: A valid index of the view. `position`
|
||||
/// must be less than the view's end index.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(position: Index) -> UTF8.CodeUnit {
|
||||
@inline(__always)
|
||||
get {
|
||||
if _fastPath(_guts._isASCIIOrSmallASCII) {
|
||||
let offset = position.encodedOffset
|
||||
_precondition(offset < _guts.count, "Index out of bounds")
|
||||
|
||||
if _guts._isSmall {
|
||||
return _guts._smallUTF8String[offset]
|
||||
}
|
||||
return _guts._unmanagedASCIIView.buffer[offset]
|
||||
}
|
||||
|
||||
return _nonASCIISubscript(position: position)
|
||||
}
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@_effects(releasenone)
|
||||
@usableFromInline
|
||||
internal func _nonASCIISubscript(position: Index) -> UTF8.CodeUnit {
|
||||
_sanityCheck(!_guts._isASCIIOrSmallASCII)
|
||||
var j = position
|
||||
while true {
|
||||
if let buffer = j.utf8Buffer {
|
||||
_onFastPath()
|
||||
return buffer[
|
||||
buffer.index(buffer.startIndex, offsetBy: j.transcodedOffset)]
|
||||
}
|
||||
j = _nonASCIIIndex(atEncodedOffset: j.encodedOffset)
|
||||
precondition(j < endIndex, "Index out of bounds")
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var description: String {
|
||||
return String(_guts)
|
||||
}
|
||||
|
||||
public var debugDescription: String {
|
||||
return "UTF8View(\(self.description.debugDescription))"
|
||||
@inlinable @inline(__always)
|
||||
internal init(_ guts: _StringGuts) {
|
||||
self._guts = guts
|
||||
_invariantCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A UTF-8 encoding of `self`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var utf8: UTF8View {
|
||||
get {
|
||||
return UTF8View(self._guts)
|
||||
extension String.UTF8View {
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(UTF8 merge): when this refactoring lands on master and we can get a
|
||||
// toolchain, remove these and use the single-underscore ones. Will still need
|
||||
// to solve access control somehow, perhaps shims need to expose them...
|
||||
extension BidirectionalCollection {
|
||||
/// Do not use this method directly; call advanced(by: n) instead.
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal func __advanceForward(_ i: Index, by n: Int) -> Index {
|
||||
_precondition(n >= 0,
|
||||
"Only BidirectionalCollections can be advanced by a negative amount")
|
||||
|
||||
var i = i
|
||||
for _ in stride(from: 0, to: n, by: 1) {
|
||||
formIndex(after: &i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
/// Do not use this method directly; call advanced(by: n, limit) instead.
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal func __advanceForward(
|
||||
_ i: Index, by n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
_precondition(n >= 0,
|
||||
"Only BidirectionalCollections can be advanced by a negative amount")
|
||||
|
||||
var i = i
|
||||
for _ in stride(from: 0, to: n, by: 1) {
|
||||
if i == limit {
|
||||
return nil
|
||||
}
|
||||
formIndex(after: &i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func __index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
if n >= 0 {
|
||||
return __advanceForward(i, by: n)
|
||||
}
|
||||
var i = i
|
||||
for _ in stride(from: 0, to: n, by: -1) {
|
||||
formIndex(before: &i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func __index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
if n >= 0 {
|
||||
return __advanceForward(i, by: n, limitedBy: limit)
|
||||
}
|
||||
var i = i
|
||||
for _ in stride(from: 0, to: n, by: -1) {
|
||||
if i == limit {
|
||||
return nil
|
||||
}
|
||||
formIndex(before: &i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func __distance(from start: Index, to end: Index) -> Int {
|
||||
var start = start
|
||||
var count = 0
|
||||
|
||||
if start < end {
|
||||
while start != end {
|
||||
count += 1
|
||||
formIndex(after: &start)
|
||||
}
|
||||
}
|
||||
else if start > end {
|
||||
while start != end {
|
||||
count -= 1
|
||||
formIndex(before: &start)
|
||||
}
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF8View: BidirectionalCollection {
|
||||
public typealias Index = String.Index
|
||||
|
||||
public typealias Element = UTF8.CodeUnit
|
||||
|
||||
/// The position of the first code unit if the UTF-8 view is
|
||||
/// nonempty.
|
||||
///
|
||||
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
|
||||
@inlinable
|
||||
public var startIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: 0) }
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one
|
||||
/// greater than the last valid subscript argument.
|
||||
///
|
||||
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
|
||||
@inlinable
|
||||
public var endIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: _guts.count) }
|
||||
}
|
||||
|
||||
/// Returns the next consecutive position after `i`.
|
||||
///
|
||||
/// - Precondition: The next position is representable.
|
||||
@inlinable @inline(__always)
|
||||
public func index(after i: Index) -> Index {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return Index(encodedOffset: i.encodedOffset &+ 1)
|
||||
}
|
||||
|
||||
return _foreignIndex(after: i)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(before i: Index) -> Index {
|
||||
precondition(i.encodedOffset > 0)
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return Index(encodedOffset: i.encodedOffset &- 1)
|
||||
}
|
||||
|
||||
return _foreignIndex(before: i)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
let offset = i.encodedOffset + n
|
||||
_precondition(offset >= 0 && offset <= _guts.count)
|
||||
return Index(encodedOffset: offset)
|
||||
}
|
||||
|
||||
return _foreignIndex(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
// Check the limit: ignore limit if it precedes `i` (in the correct
|
||||
// direction), otherwise must not be beyond limit (in the correct
|
||||
// direction).
|
||||
let iOffset = i.encodedOffset
|
||||
let result = iOffset + n
|
||||
let limitOffset = limit.encodedOffset
|
||||
if n >= 0 {
|
||||
guard limitOffset < iOffset || result <= limitOffset else { return nil }
|
||||
} else {
|
||||
guard limitOffset > iOffset || result >= limitOffset else { return nil }
|
||||
}
|
||||
return Index(encodedOffset: result)
|
||||
}
|
||||
|
||||
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
public func distance(from i: Index, to j: Index) -> Int {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return j.encodedOffset &- i.encodedOffset
|
||||
}
|
||||
return _foreignDistance(from: i, to: j)
|
||||
}
|
||||
|
||||
/// Accesses the code unit at the given position.
|
||||
///
|
||||
/// The following example uses the subscript to print the value of a
|
||||
/// string's first UTF-8 code unit.
|
||||
///
|
||||
/// let greeting = "Hello, friend!"
|
||||
/// let i = greeting.utf8.startIndex
|
||||
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
|
||||
/// // Prints "First character's UTF-8 code unit: 72"
|
||||
///
|
||||
/// - Parameter position: A valid index of the view. `position`
|
||||
/// must be less than the view's end index.
|
||||
@inlinable
|
||||
public subscript(i: Index) -> UTF8.CodeUnit {
|
||||
@inline(__always) get {
|
||||
_precondition(i.encodedOffset >= 0 && i < endIndex)
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return _guts.withFastUTF8 { utf8 in utf8[i.encodedOffset] }
|
||||
}
|
||||
|
||||
return _foreignSubscript(position: i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF8View: CustomStringConvertible {
|
||||
@inlinable
|
||||
public var description: String {
|
||||
@inline(__always) get { return String(String(_guts)) }
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF8View: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
return "UTF8View(\(self.description.debugDescription))"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extension String {
|
||||
/// A UTF-8 encoding of `self`.
|
||||
@inlinable
|
||||
public var utf8: UTF8View {
|
||||
@inline(__always) get { return UTF8View(self._guts) }
|
||||
set {
|
||||
self = String(describing: newValue)
|
||||
unimplemented_utf8()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -427,208 +344,65 @@ extension String {
|
||||
/// }
|
||||
/// // Prints "6"
|
||||
public var utf8CString: ContiguousArray<CChar> {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
var result = _guts.withFastUTF8 { return ContiguousArray($0._asCChar) }
|
||||
result.append(0)
|
||||
return result
|
||||
}
|
||||
|
||||
return _slowUTF8CString()
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never) // slow-path
|
||||
internal func _slowUTF8CString() -> ContiguousArray<CChar> {
|
||||
var result = ContiguousArray<CChar>()
|
||||
result.reserveCapacity(utf8.count + 1)
|
||||
for c in utf8 {
|
||||
result.reserveCapacity(self._guts.count + 1)
|
||||
for c in self.utf8 {
|
||||
result.append(CChar(bitPattern: c))
|
||||
}
|
||||
result.append(0)
|
||||
return result
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _withUnsafeBufferPointerToUTF8<R>(
|
||||
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
|
||||
) rethrows -> R {
|
||||
if _guts.isASCII {
|
||||
return try body(_guts._unmanagedASCIIView.buffer)
|
||||
}
|
||||
var nullTerminatedUTF8 = ContiguousArray<UTF8.CodeUnit>()
|
||||
nullTerminatedUTF8.reserveCapacity(utf8.count + 1)
|
||||
nullTerminatedUTF8 += utf8
|
||||
nullTerminatedUTF8.append(0)
|
||||
return try nullTerminatedUTF8.withUnsafeBufferPointer(body)
|
||||
}
|
||||
|
||||
/// Creates a string corresponding to the given sequence of UTF-8 code units.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@available(swift, introduced: 4.0, message:
|
||||
"Please use failable String.init?(_:UTF8View) when in Swift 3.2 mode")
|
||||
"Please use failable String.init?(_:UTF8View) when in Swift 3.2 mode")
|
||||
@inlinable @inline(__always)
|
||||
public init(_ utf8: UTF8View) {
|
||||
self = String(utf8._guts)
|
||||
}
|
||||
|
||||
/// The index type for subscripting a string.
|
||||
public typealias UTF8Index = UTF8View.Index
|
||||
}
|
||||
|
||||
extension String.UTF8View : _SwiftStringView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _persistentContent : String {
|
||||
return String(self._guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
var _wholeString : String {
|
||||
return String(_guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
var _encodedOffsetRange : Range<Int> {
|
||||
return 0..<_guts.count
|
||||
}
|
||||
}
|
||||
// TODO(UTF8): design specialized iterator, rather than default indexing one
|
||||
//extension String.UTF8View {
|
||||
// @_fixed_layout // FIXME(sil-serialize-all)
|
||||
// public struct Iterator {
|
||||
// // TODO(UTF8):
|
||||
// }
|
||||
//
|
||||
// public func makeIterator() -> Iterator {
|
||||
// unimplemented_utf8()
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//extension String.UTF8View.Iterator : IteratorProtocol {
|
||||
// public typealias Element = String.UTF8View.Element
|
||||
//
|
||||
// @inlinable @inline(__always)
|
||||
// public mutating func next() -> Unicode.UTF8.CodeUnit? {
|
||||
// unimplemented_utf8()
|
||||
// }
|
||||
//}
|
||||
|
||||
extension String.UTF8View {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Iterator {
|
||||
@usableFromInline
|
||||
internal typealias _OutputBuffer = _ValidUTF8Buffer<UInt64>
|
||||
|
||||
@usableFromInline
|
||||
internal let _guts: _StringGuts
|
||||
@usableFromInline
|
||||
internal let _endOffset: Int
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _nextOffset: Int
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _buffer: _OutputBuffer
|
||||
}
|
||||
|
||||
public func makeIterator() -> Iterator {
|
||||
return Iterator(self)
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF8View.Iterator : IteratorProtocol {
|
||||
public typealias Element = String.UTF8View.Element
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ utf8: String.UTF8View) {
|
||||
self._guts = utf8._guts
|
||||
self._nextOffset = 0
|
||||
self._buffer = _OutputBuffer()
|
||||
self._endOffset = utf8._guts.count
|
||||
}
|
||||
|
||||
internal mutating func _clear() {
|
||||
self._nextOffset = self._endOffset
|
||||
self._buffer = _OutputBuffer()
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inline(__always)
|
||||
public mutating func next() -> Unicode.UTF8.CodeUnit? {
|
||||
if _slowPath(_nextOffset == _endOffset) {
|
||||
if _slowPath(_buffer.isEmpty) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if _guts.isASCII {
|
||||
defer { _nextOffset += 1 }
|
||||
return _guts._unmanagedASCIIView.buffer[_nextOffset]
|
||||
}
|
||||
if _guts._isSmall {
|
||||
defer { _nextOffset += 1 }
|
||||
return _guts._smallUTF8String[_nextOffset]
|
||||
}
|
||||
|
||||
if _fastPath(!_buffer.isEmpty) {
|
||||
return _buffer.removeFirst()
|
||||
}
|
||||
return _fillBuffer()
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@inline(never)
|
||||
internal mutating func _fillBuffer() -> Unicode.UTF8.CodeUnit {
|
||||
_sanityCheck(!_guts.isASCII, "next() already checks for known ASCII")
|
||||
if _slowPath(_guts._isOpaque) {
|
||||
return _opaqueFillBuffer()
|
||||
}
|
||||
|
||||
defer { _fixLifetime(_guts) }
|
||||
return _fillBuffer(from: _guts._unmanagedUTF16View)
|
||||
}
|
||||
|
||||
@usableFromInline // @opaque
|
||||
internal mutating func _opaqueFillBuffer() -> Unicode.UTF8.CodeUnit {
|
||||
_sanityCheck(_guts._isOpaque)
|
||||
defer { _fixLifetime(_guts) }
|
||||
return _fillBuffer(from: _guts._asOpaque())
|
||||
}
|
||||
|
||||
// NOT @usableFromInline
|
||||
internal mutating func _fillBuffer<V: _StringVariant>(
|
||||
from variant: V
|
||||
) -> Unicode.UTF8.CodeUnit {
|
||||
// Eat as many ASCII characters as possible
|
||||
let asciiEnd = Swift.min(_nextOffset + _buffer.capacity, _endOffset)
|
||||
for cu in variant[_nextOffset..<asciiEnd] {
|
||||
if !UTF16._isASCII(cu) { break }
|
||||
_buffer.append(UInt8(truncatingIfNeeded: cu))
|
||||
_nextOffset += 1
|
||||
}
|
||||
if _nextOffset == asciiEnd {
|
||||
return _buffer.removeFirst()
|
||||
}
|
||||
// Decode UTF-16, encode UTF-8
|
||||
for scalar in IteratorSequence(
|
||||
variant[_nextOffset..<_endOffset].makeUnicodeScalarIterator()) {
|
||||
let u8 = UTF8.encode(scalar)._unsafelyUnwrappedUnchecked
|
||||
let c8 = u8.count
|
||||
guard _buffer.count + c8 <= _buffer.capacity else { break }
|
||||
_buffer.append(contentsOf: u8)
|
||||
_nextOffset += 1 &+ (c8 &>> 2)
|
||||
}
|
||||
return _buffer.removeFirst()
|
||||
}
|
||||
}
|
||||
|
||||
// Used to calculate a running count. For non-BMP scalars, it's important if the
|
||||
// prior code unit was a leading surrogate (validity).
|
||||
internal func _utf8Count(_ utf16CU: UInt16, prev: UInt16) -> Int {
|
||||
switch utf16CU {
|
||||
case 0..<0x80: return 1
|
||||
case 0x80..<0x800: return 2
|
||||
case 0x800..<0xDC00: return 3
|
||||
case 0xDC00..<0xE000: return UTF16.isLeadSurrogate(prev) ? 1 : 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UTF8View {
|
||||
internal static func _count<Source: RandomAccessCollection>(
|
||||
fromUTF16 source: Source
|
||||
) -> Int where Source.Element == Unicode.UTF16.CodeUnit {
|
||||
var result = 0
|
||||
var prev: Unicode.UTF16.CodeUnit = 0
|
||||
for u in source {
|
||||
result += _utf8Count(u, prev: prev)
|
||||
prev = u
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var count: Int {
|
||||
let gutsCount = _guts.count
|
||||
if _fastPath(_guts._isASCIIOrSmallASCII) { return gutsCount }
|
||||
return _gutsNonASCIIUTF8Count(0..<gutsCount)
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
@_effects(releasenone)
|
||||
@usableFromInline
|
||||
internal func _gutsNonASCIIUTF8Count(
|
||||
_ range: Range<Int>
|
||||
) -> Int {
|
||||
_sanityCheck(!_guts._isASCIIOrSmallASCII)
|
||||
return _visitGuts(_guts, range: (range, performBoundsCheck: true),
|
||||
ascii: { ascii in return ascii.count },
|
||||
utf16: { utf16 in return String.UTF8View._count(fromUTF16: utf16) },
|
||||
opaque: { opaque in return String.UTF8View._count(fromUTF16: opaque) }
|
||||
)
|
||||
@inline(__always) get {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return _guts.count
|
||||
}
|
||||
return _foreignCount()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -673,13 +447,7 @@ extension String.UTF8View.Index {
|
||||
/// - target: The `UTF8View` in which to find the new position.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init?(_ idx: String.Index, within target: String.UTF8View) {
|
||||
guard idx.isUTF8 ||
|
||||
String.UnicodeScalarView(target._guts)._isOnUnicodeScalarBoundary(idx)
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
|
||||
self = idx
|
||||
unimplemented_utf8()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -691,6 +459,7 @@ extension String.UTF8View : CustomReflectable {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(UTF8): Can we just unify this view?
|
||||
//===--- Slicing Support --------------------------------------------------===//
|
||||
/// In Swift 3.2, in the absence of type context,
|
||||
///
|
||||
@@ -702,10 +471,10 @@ extension String.UTF8View : CustomReflectable {
|
||||
extension String.UTF8View {
|
||||
public typealias SubSequence = Substring.UTF8View
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@available(swift, introduced: 4)
|
||||
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
|
||||
return String.UTF8View.SubSequence(self, _bounds: r)
|
||||
return Substring.UTF8View(self, _bounds: r)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -713,37 +482,119 @@ extension String.UTF8View {
|
||||
/// Copies `self` into the supplied buffer.
|
||||
///
|
||||
/// - Precondition: The memory in `self` is uninitialized. The buffer must
|
||||
/// contain sufficient uninitialized memory to accommodate `source.underestimatedCount`.
|
||||
/// contain sufficient uninitialized memory to accommodate
|
||||
/// `source.underestimatedCount`.
|
||||
///
|
||||
/// - Postcondition: The `Pointee`s at `buffer[startIndex..<returned index]` are
|
||||
/// initialized.
|
||||
/// - Postcondition: The `Pointee`s at `buffer[startIndex..<returned index]`
|
||||
/// are initialized.
|
||||
@inlinable @inline(__always)
|
||||
public func _copyContents(
|
||||
initializing buffer: UnsafeMutableBufferPointer<Iterator.Element>
|
||||
) -> (Iterator,UnsafeMutableBufferPointer<Iterator.Element>.Index) {
|
||||
guard var ptr = buffer.baseAddress else {
|
||||
) -> (Iterator, UnsafeMutableBufferPointer<Iterator.Element>.Index) {
|
||||
guard buffer.baseAddress != nil else {
|
||||
_preconditionFailure(
|
||||
"Attempt to copy string contents into nil buffer pointer")
|
||||
}
|
||||
var it = self.makeIterator()
|
||||
|
||||
if _guts.isASCII {
|
||||
defer { _fixLifetime(_guts) }
|
||||
let asciiView = _guts._unmanagedASCIIView
|
||||
_precondition(asciiView.count <= buffer.count,
|
||||
guard let written = _guts.copyUTF8(into: buffer) else {
|
||||
_preconditionFailure(
|
||||
"Insufficient space allocated to copy string contents")
|
||||
ptr.initialize(from: asciiView.start, count: asciiView.count)
|
||||
it._clear()
|
||||
return (it, buffer.index(buffer.startIndex, offsetBy: asciiView.count))
|
||||
}
|
||||
else {
|
||||
for idx in buffer.startIndex..<buffer.count {
|
||||
guard let x = it.next() else {
|
||||
return (it, idx)
|
||||
}
|
||||
ptr.initialize(to: x)
|
||||
ptr += 1
|
||||
}
|
||||
return (it,buffer.endIndex)
|
||||
}
|
||||
|
||||
let it = String().utf8.makeIterator()
|
||||
return (it, buffer.index(buffer.startIndex, offsetBy: written))
|
||||
}
|
||||
}
|
||||
|
||||
// Foreign string support
|
||||
extension String.UTF8View {
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(after i: Index) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
let cu = _guts.foreignUTF16CodeUnit(at: i.encodedOffset)
|
||||
let len = _numTranscodedUTF8CodeUnits(cu)
|
||||
|
||||
if len == 1 {
|
||||
_sanityCheck(i.transcodedOffset == 0)
|
||||
return Index(encodedOffset: i.encodedOffset + 1)
|
||||
}
|
||||
|
||||
// Check if we're still transcoding sub-scalar
|
||||
if i.transcodedOffset < len - 1 {
|
||||
return Index(transcodedAfter: i)
|
||||
}
|
||||
|
||||
// Skip to the next scalar
|
||||
let scalarLen = len == 4 ? 2 : 1
|
||||
return Index(encodedOffset: i.encodedOffset + scalarLen)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(before i: Index) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
if i.transcodedOffset != 0 {
|
||||
_sanityCheck((1...3) ~= i.transcodedOffset)
|
||||
return Index(transcodedBefore: i)
|
||||
}
|
||||
var offset = i.encodedOffset &- 1
|
||||
var cu = _guts.foreignUTF16CodeUnit(at: offset)
|
||||
if _isTrailingSurrogate(cu) {
|
||||
offset = offset &- 1
|
||||
_sanityCheck(offset >= 0)
|
||||
cu = _guts.foreignUTF16CodeUnit(at: offset)
|
||||
}
|
||||
let len = _numTranscodedUTF8CodeUnits(cu)
|
||||
|
||||
return Index(encodedOffset: offset, transcodedOffset: len &- 1)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignSubscript(position i: Index) -> UTF8.CodeUnit {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
// Currently, foreign means NSString
|
||||
|
||||
// TODO(UTF8 perf): Could probably work just off a single code unit
|
||||
let scalar = _guts.foreignScalar(startingAt: i.encodedOffset)
|
||||
let encoded = Unicode.UTF8.encode(scalar)._unsafelyUnwrappedUnchecked
|
||||
|
||||
_sanityCheck(i.transcodedOffset < 1+encoded.count)
|
||||
return encoded[
|
||||
encoded.index(encoded.startIndex, offsetBy: i.transcodedOffset)]
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(_ i: Index, offsetBy n: Int) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
return __index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
return __index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignDistance(from i: Index, to j: Index) -> Int {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
return __distance(from: i, to: j)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignCount() -> Int {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
return __distance(from: startIndex, to: endIndex)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -10,6 +10,184 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// TODO(UTF8 merge): Find a common place for these helpers
|
||||
@inlinable @inline(__always)
|
||||
internal func _isASCII(_ x: UInt8) -> Bool {
|
||||
return x & 0b1000_0000 == 0
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _decodeUTF8(_ x: UInt8) -> Unicode.Scalar {
|
||||
_sanityCheck(_isASCII(x))
|
||||
return Unicode.Scalar(_unchecked: UInt32(x))
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _decodeUTF8(_ x: UInt8, _ y: UInt8) -> Unicode.Scalar {
|
||||
_sanityCheck(_utf8ScalarLength(x) == 2)
|
||||
_sanityCheck(_isContinuation(y))
|
||||
let x = UInt32(x)
|
||||
let value = ((x & 0b0001_1111) &<< 6) | _continuationPayload(y)
|
||||
return Unicode.Scalar(_unchecked: value)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _decodeUTF8(
|
||||
_ x: UInt8, _ y: UInt8, _ z: UInt8
|
||||
) -> Unicode.Scalar {
|
||||
_sanityCheck(_utf8ScalarLength(x) == 3)
|
||||
_sanityCheck(_isContinuation(y) && _isContinuation(z))
|
||||
let x = UInt32(x)
|
||||
let value = ((x & 0b0000_1111) &<< 12)
|
||||
| (_continuationPayload(y) &<< 6)
|
||||
| _continuationPayload(z)
|
||||
return Unicode.Scalar(_unchecked: value)
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _decodeUTF8(
|
||||
_ x: UInt8, _ y: UInt8, _ z: UInt8, _ w: UInt8
|
||||
) -> Unicode.Scalar {
|
||||
_sanityCheck(_utf8ScalarLength(x) == 4)
|
||||
_sanityCheck(
|
||||
_isContinuation(y) && _isContinuation(z) && _isContinuation(w))
|
||||
let x = UInt32(x)
|
||||
let value = ((x & 0b0000_1111) &<< 18)
|
||||
| (_continuationPayload(y) &<< 12)
|
||||
| (_continuationPayload(z) &<< 6)
|
||||
| _continuationPayload(w)
|
||||
return Unicode.Scalar(_unchecked: value)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(__always)
|
||||
internal func _utf8ScalarLength(_ x: UInt8) -> Int {
|
||||
_sanityCheck(!_isContinuation(x))
|
||||
if _isASCII(x) { return 1 }
|
||||
// TODO(UTF8): Not great codegen on x86
|
||||
return (~x).leadingZeroBitCount
|
||||
}
|
||||
|
||||
@usableFromInline @inline(__always)
|
||||
internal func _isContinuation(_ x: UInt8) -> Bool {
|
||||
return x & 0b1100_0000 == 0b1000_0000
|
||||
}
|
||||
|
||||
@usableFromInline @inline(__always)
|
||||
internal func _continuationPayload(_ x: UInt8) -> UInt32 {
|
||||
return UInt32(x & 0x3F)
|
||||
}
|
||||
|
||||
@inline(__always)
|
||||
internal func _decodeSurrogatePair(
|
||||
leading high: UInt16, trailing low: UInt16
|
||||
) -> UInt32 {
|
||||
_sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low))
|
||||
let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias)
|
||||
_sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something")
|
||||
let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias)
|
||||
_sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something")
|
||||
|
||||
return ((hi10 &<< 10) | lo10) &+ 0x1_00_00
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
@usableFromInline @inline(__always)
|
||||
internal func fastUTF8ScalarLength(startingAt i: Int) -> Int {
|
||||
_sanityCheck(isFastUTF8)
|
||||
let len = _utf8ScalarLength(self.withFastUTF8 { $0[i] })
|
||||
_sanityCheck((1...4) ~= len)
|
||||
return len
|
||||
}
|
||||
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
@usableFromInline @inline(__always)
|
||||
internal func fastUTF8ScalarLength(endingAt i: Int) -> Int {
|
||||
_sanityCheck(isFastUTF8)
|
||||
|
||||
return self.withFastUTF8 { utf8 in
|
||||
_sanityCheck(i == utf8.count || !_isContinuation(utf8[i]))
|
||||
var len = 1
|
||||
while _isContinuation(utf8[i - len]) {
|
||||
_sanityCheck(i - len > 0)
|
||||
len += 1
|
||||
}
|
||||
_sanityCheck(len <= 4)
|
||||
return len
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
@usableFromInline @inline(__always)
|
||||
internal func fastUTF8Scalar(startingAt i: Int) -> Unicode.Scalar {
|
||||
_sanityCheck(isFastUTF8)
|
||||
return self.withFastUTF8 { utf8 in
|
||||
let cu0 = utf8[i]
|
||||
switch _utf8ScalarLength(cu0) {
|
||||
case 1: return _decodeUTF8(cu0)
|
||||
case 2: return _decodeUTF8(cu0, utf8[i &+ 1])
|
||||
case 3: return _decodeUTF8(cu0, utf8[i &+ 1], utf8[i &+ 2])
|
||||
case 4: return _decodeUTF8(cu0, utf8[i &+ 1], utf8[i &+ 2], utf8[i &+ 3])
|
||||
default: Builtin.unreachable()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
@_effects(releasenone)
|
||||
internal func foreignScalar(startingAt i: Int) -> Unicode.Scalar {
|
||||
let cu = foreignUTF16CodeUnit(at: i)
|
||||
_sanityCheck(!_isTrailingSurrogate(cu))
|
||||
|
||||
if _slowPath(_isLeadingSurrogate(cu)) {
|
||||
let trailing = foreignUTF16CodeUnit(at: i+1)
|
||||
return Unicode.Scalar(
|
||||
_unchecked: _decodeSurrogatePair(leading: cu, trailing: trailing))
|
||||
}
|
||||
|
||||
return Unicode.Scalar(_unchecked: UInt32(cu))
|
||||
}
|
||||
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
@_effects(releasenone)
|
||||
internal func foreignScalarLength(startingAt i: Int) -> Int {
|
||||
let cu = foreignUTF16CodeUnit(at: i)
|
||||
_sanityCheck(!_isTrailingSurrogate(cu))
|
||||
|
||||
if _slowPath(_isLeadingSurrogate(cu)) {
|
||||
return 2
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
|
||||
@_effects(releasenone)
|
||||
internal func foreignScalarLength(endingAt i: Int) -> Int {
|
||||
let cu = foreignUTF16CodeUnit(at: i &- 1)
|
||||
_sanityCheck(!_isLeadingSurrogate(cu))
|
||||
if _slowPath(_isTrailingSurrogate(cu)) {
|
||||
return 2
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
@_effects(releasenone)
|
||||
internal func isOnUnicodeScalarBoundary(_ i: String.Index) -> Bool {
|
||||
// TODO(UTF8 perf): isASCII check
|
||||
|
||||
// TODO(UTF8): Guts bounds check helper, or something in terms of Index
|
||||
|
||||
if i.encodedOffset == 0 || i.encodedOffset == self.count { return true }
|
||||
|
||||
if _fastPath(isFastUTF8) {
|
||||
return self.withFastUTF8 { return !_isContinuation($0[i.encodedOffset]) }
|
||||
}
|
||||
let cu = foreignUTF16CodeUnit(at: i.encodedOffset)
|
||||
return !_isTrailingSurrogate(cu)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// A view of a string's contents as a collection of Unicode scalar values.
|
||||
///
|
||||
@@ -57,218 +235,162 @@ extension String {
|
||||
/// print(asciiPrefix)
|
||||
/// }
|
||||
/// // Prints "My favorite emoji is "
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct UnicodeScalarView :
|
||||
BidirectionalCollection,
|
||||
CustomStringConvertible,
|
||||
CustomDebugStringConvertible
|
||||
{
|
||||
@_fixed_layout
|
||||
public struct UnicodeScalarView {
|
||||
@usableFromInline
|
||||
internal var _guts: _StringGuts
|
||||
|
||||
/// The offset of this view's `_guts` from the start of an original string,
|
||||
/// in UTF-16 code units. This is here to support legacy Swift 3-style
|
||||
/// slicing where `s.unicodeScalars[i..<j]` produces a
|
||||
/// `String.UnicodeScalarView`. The offset should be subtracted from the
|
||||
/// `encodedOffset` of view indices before it is passed to `_guts`.
|
||||
///
|
||||
/// Note: This should be removed when Swift 3 semantics are no longer
|
||||
/// supported.
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _coreOffset: Int
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ _guts: _StringGuts, coreOffset: Int = 0) {
|
||||
@inlinable @inline(__always)
|
||||
internal init(_ _guts: _StringGuts) {
|
||||
self._guts = _guts
|
||||
self._coreOffset = coreOffset
|
||||
}
|
||||
|
||||
public typealias Index = String.Index
|
||||
|
||||
/// Translates a `_guts` index into a `UnicodeScalarIndex` using this
|
||||
/// view's `_coreOffset`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _fromCoreIndex(_ i: Int) -> Index {
|
||||
return Index(encodedOffset: i + _coreOffset)
|
||||
}
|
||||
|
||||
/// Translates a `UnicodeScalarIndex` into a `_guts` index using this
|
||||
/// view's `_coreOffset`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _toCoreIndex(_ i: Index) -> Int {
|
||||
return i.encodedOffset - _coreOffset
|
||||
}
|
||||
|
||||
/// The position of the first Unicode scalar value if the string is
|
||||
/// nonempty.
|
||||
///
|
||||
/// If the string is empty, `startIndex` is equal to `endIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index {
|
||||
return _fromCoreIndex(_guts.startIndex)
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one greater than
|
||||
/// the last valid subscript argument.
|
||||
///
|
||||
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index {
|
||||
return _fromCoreIndex(_guts.endIndex)
|
||||
}
|
||||
|
||||
/// Returns the next consecutive location after `i`.
|
||||
///
|
||||
/// - Precondition: The next location exists.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(after i: Index) -> Index {
|
||||
let offset = _toCoreIndex(i)
|
||||
let length: Int = _visitGuts(_guts, args: offset,
|
||||
ascii: { (_,_) -> Int in return 1 },
|
||||
utf16: { utf16, offset in
|
||||
return utf16.unicodeScalarWidth(startingAt: offset) },
|
||||
opaque: { opaque, offset in
|
||||
return opaque.unicodeScalarWidth(startingAt: offset) }
|
||||
)
|
||||
return _fromCoreIndex(offset + length)
|
||||
}
|
||||
|
||||
/// Returns the previous consecutive location before `i`.
|
||||
///
|
||||
/// - Precondition: The previous location exists.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(before i: Index) -> Index {
|
||||
let offset = _toCoreIndex(i)
|
||||
let length: Int = _visitGuts(_guts, args: offset,
|
||||
ascii: { (_,_) -> Int in return 1 },
|
||||
utf16: { utf16, offset in
|
||||
return utf16.unicodeScalarWidth(endingAt: offset) },
|
||||
opaque: { opaque, offset in
|
||||
return opaque.unicodeScalarWidth(endingAt: offset) }
|
||||
)
|
||||
return _fromCoreIndex(offset - length)
|
||||
}
|
||||
|
||||
/// Accesses the Unicode scalar value at the given position.
|
||||
///
|
||||
/// The following example searches a string's Unicode scalars view for a
|
||||
/// capital letter and then prints the character and Unicode scalar value
|
||||
/// at the found index:
|
||||
///
|
||||
/// let greeting = "Hello, friend!"
|
||||
/// if let i = greeting.unicodeScalars.firstIndex(where: { "A"..."Z" ~= $0 }) {
|
||||
/// print("First capital letter: \(greeting.unicodeScalars[i])")
|
||||
/// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
|
||||
/// }
|
||||
/// // Prints "First capital letter: H"
|
||||
/// // Prints "Unicode scalar value: 72"
|
||||
///
|
||||
/// - Parameter position: A valid index of the character view. `position`
|
||||
/// must be less than the view's end index.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(position: Index) -> Unicode.Scalar {
|
||||
let offset = position.encodedOffset
|
||||
return _guts.unicodeScalar(startingAt: offset)
|
||||
}
|
||||
|
||||
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
|
||||
/// collection.
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Iterator : IteratorProtocol {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _guts: _StringGuts
|
||||
|
||||
// FIXME(TODO: JIRA): the below is absurdly wasteful.
|
||||
// UnicodeScalarView.Iterator should be able to be passed in-registers.
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _asciiIterator: _UnmanagedASCIIString.UnicodeScalarIterator?
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _utf16Iterator: _UnmanagedUTF16String.UnicodeScalarIterator?
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _opaqueIterator: _UnmanagedOpaqueString.UnicodeScalarIterator?
|
||||
|
||||
@usableFromInline
|
||||
internal var _smallIterator: _SmallUTF8String.UnicodeScalarIterator?
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ guts: _StringGuts) {
|
||||
if _slowPath(guts._isOpaque) {
|
||||
self.init(_opaque: guts)
|
||||
return
|
||||
}
|
||||
self.init(_concrete: guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inline(__always)
|
||||
internal init(_concrete guts: _StringGuts) {
|
||||
_sanityCheck(!guts._isOpaque)
|
||||
self._guts = guts
|
||||
defer { _fixLifetime(self) }
|
||||
if _guts.isASCII {
|
||||
self._asciiIterator =
|
||||
_guts._unmanagedASCIIView.makeUnicodeScalarIterator()
|
||||
} else {
|
||||
self._utf16Iterator =
|
||||
_guts._unmanagedUTF16View.makeUnicodeScalarIterator()
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline // @opaque
|
||||
init(_opaque _guts: _StringGuts) {
|
||||
_sanityCheck(_guts._isOpaque)
|
||||
defer { _fixLifetime(self) }
|
||||
self._guts = _guts
|
||||
// TODO: Replace the whole iterator scheme with a sensible solution.
|
||||
if self._guts._isSmall {
|
||||
self._smallIterator =
|
||||
_guts._smallUTF8String.makeUnicodeScalarIterator()
|
||||
} else {
|
||||
self._opaqueIterator = _guts._asOpaque().makeUnicodeScalarIterator()
|
||||
}
|
||||
}
|
||||
|
||||
/// Advances to the next element and returns it, or `nil` if no next
|
||||
/// element exists.
|
||||
///
|
||||
/// Once `nil` has been returned, all subsequent calls return `nil`.
|
||||
///
|
||||
/// - Precondition: `next()` has not been applied to a copy of `self`
|
||||
/// since the copy was made.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func next() -> Unicode.Scalar? {
|
||||
if _slowPath(_opaqueIterator != nil) {
|
||||
return _opaqueIterator!.next()
|
||||
}
|
||||
if _asciiIterator != nil {
|
||||
return _asciiIterator!.next()
|
||||
}
|
||||
if _guts._isSmall {
|
||||
return _smallIterator!.next()
|
||||
}
|
||||
return _utf16Iterator!.next()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the Unicode scalars that make up this view.
|
||||
///
|
||||
/// - Returns: An iterator over this collection's `Unicode.Scalar` elements.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func makeIterator() -> Iterator {
|
||||
return Iterator(_guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var description: String {
|
||||
return String(_guts)
|
||||
}
|
||||
|
||||
public var debugDescription: String {
|
||||
return "StringUnicodeScalarView(\(self.description.debugDescription))"
|
||||
_invariantCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UnicodeScalarView {
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UnicodeScalarView: BidirectionalCollection {
|
||||
public typealias Index = String.Index
|
||||
|
||||
/// The position of the first Unicode scalar value if the string is
|
||||
/// nonempty.
|
||||
///
|
||||
/// If the string is empty, `startIndex` is equal to `endIndex`.
|
||||
@inlinable
|
||||
public var startIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: 0) }
|
||||
}
|
||||
|
||||
/// The "past the end" position---that is, the position one greater than
|
||||
/// the last valid subscript argument.
|
||||
///
|
||||
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
|
||||
@inlinable
|
||||
public var endIndex: Index {
|
||||
@inline(__always) get { return Index(encodedOffset: _guts.count) }
|
||||
}
|
||||
|
||||
/// Returns the next consecutive location after `i`.
|
||||
///
|
||||
/// - Precondition: The next location exists.
|
||||
@inlinable @inline(__always)
|
||||
public func index(after i: Index) -> Index {
|
||||
_sanityCheck(i < endIndex)
|
||||
// TODO(UTF8): isKnownASCII bit fast-path...
|
||||
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
let len = _guts.fastUTF8ScalarLength(startingAt: i.encodedOffset)
|
||||
return Index(encodedOffset: i.encodedOffset &+ len)
|
||||
}
|
||||
|
||||
return _foreignIndex(after: i)
|
||||
}
|
||||
|
||||
/// Returns the previous consecutive location before `i`.
|
||||
///
|
||||
/// - Precondition: The previous location exists.
|
||||
@inlinable @inline(__always)
|
||||
public func index(before i: Index) -> Index {
|
||||
precondition(i.encodedOffset > 0)
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
// TODO(UTF8): isKnownASCII bit fast-path...
|
||||
|
||||
let len = _guts.withFastUTF8 { utf8 -> Int in
|
||||
var len = 1
|
||||
while _isContinuation(utf8[i.encodedOffset &- len]) {
|
||||
len += 1
|
||||
}
|
||||
_sanityCheck(len == _utf8ScalarLength(utf8[i.encodedOffset - len]))
|
||||
return len
|
||||
}
|
||||
_sanityCheck(len <= 4, "invalid UTF8")
|
||||
return Index(encodedOffset: i.encodedOffset &- len)
|
||||
}
|
||||
|
||||
return _foreignIndex(before: i)
|
||||
}
|
||||
|
||||
/// Accesses the Unicode scalar value at the given position.
|
||||
///
|
||||
/// The following example searches a string's Unicode scalars view for a
|
||||
/// capital letter and then prints the character and Unicode scalar value
|
||||
/// at the found index:
|
||||
///
|
||||
/// let greeting = "Hello, friend!"
|
||||
/// if let i = greeting.unicodeScalars.firstIndex(where: { "A"..."Z" ~= $0 }) {
|
||||
/// print("First capital letter: \(greeting.unicodeScalars[i])")
|
||||
/// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
|
||||
/// }
|
||||
/// // Prints "First capital letter: H"
|
||||
/// // Prints "Unicode scalar value: 72"
|
||||
///
|
||||
/// - Parameter position: A valid index of the character view. `position`
|
||||
/// must be less than the view's end index.
|
||||
@inlinable
|
||||
public subscript(position: Index) -> Unicode.Scalar {
|
||||
@inline(__always) get {
|
||||
if _fastPath(_guts.isFastUTF8) {
|
||||
return _guts.fastUTF8Scalar(startingAt: position.encodedOffset)
|
||||
}
|
||||
|
||||
return _foreignSubscript(position: position)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(UTF8): design specialized iterator, rather than default indexing one
|
||||
// extension String.UnicodeScalarView {
|
||||
// /// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
|
||||
// /// collection.
|
||||
// @_fixed_layout
|
||||
// public struct Iterator : IteratorProtocol {
|
||||
// // TODO:
|
||||
|
||||
// /// Advances to the next element and returns it, or `nil` if no next
|
||||
// /// element exists.
|
||||
// ///
|
||||
// /// Once `nil` has been returned, all subsequent calls return `nil`.
|
||||
// ///
|
||||
// /// - Precondition: `next()` has not been applied to a copy of `self`
|
||||
// /// since the copy was made.
|
||||
// @inlinable @inline(__always)
|
||||
// public mutating func next() -> Unicode.Scalar? {
|
||||
// unimplemented_utf8()
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// extension String.UnicodeScalarView {
|
||||
// /// Returns an iterator over the Unicode scalars that make up this view.
|
||||
// ///
|
||||
// /// - Returns: An iterator over this collection's `Unicode.Scalar` elements.
|
||||
// @inlinable @inline(__always)
|
||||
// public func makeIterator() -> Iterator {
|
||||
// unimplemented_utf8()
|
||||
// }
|
||||
// }
|
||||
|
||||
extension String.UnicodeScalarView: CustomStringConvertible {
|
||||
@inlinable
|
||||
public var description: String {
|
||||
@inline(__always) get { return String(_guts) }
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UnicodeScalarView: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
return "StringUnicodeScalarView(\(self.description.debugDescription))"
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// Creates a string corresponding to the given collection of Unicode
|
||||
/// scalars.
|
||||
///
|
||||
@@ -286,74 +408,27 @@ extension String {
|
||||
/// slice of the `picnicGuest.unicodeScalars` view.
|
||||
///
|
||||
/// - Parameter unicodeScalars: A collection of Unicode scalar values.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public init(_ unicodeScalars: UnicodeScalarView) {
|
||||
self.init(unicodeScalars._guts)
|
||||
}
|
||||
|
||||
/// The index type for a string's `unicodeScalars` view.
|
||||
public typealias UnicodeScalarIndex = UnicodeScalarView.Index
|
||||
}
|
||||
|
||||
extension _StringGuts {
|
||||
@inlinable
|
||||
internal func unicodeScalar(startingAt offset: Int) -> Unicode.Scalar {
|
||||
return _visitGuts(self, args: offset,
|
||||
ascii: { ascii, offset in
|
||||
let u = ascii.codeUnit(atCheckedOffset: offset)
|
||||
return Unicode.Scalar(_unchecked: UInt32(u)) },
|
||||
utf16: { utf16, offset in
|
||||
return utf16.unicodeScalar(startingAt: offset) },
|
||||
opaque: { opaque, offset in
|
||||
return opaque.unicodeScalar(startingAt: offset) })
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func unicodeScalar(endingAt offset: Int) -> Unicode.Scalar {
|
||||
return _visitGuts(self, args: offset,
|
||||
ascii: { ascii, offset in
|
||||
let u = ascii.codeUnit(atCheckedOffset: offset &- 1)
|
||||
return Unicode.Scalar(_unchecked: UInt32(u)) },
|
||||
utf16: { utf16, offset in
|
||||
return utf16.unicodeScalar(endingAt: offset) },
|
||||
opaque: { opaque, offset in
|
||||
return opaque.unicodeScalar(endingAt: offset) })
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UnicodeScalarView : _SwiftStringView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _persistentContent : String { return String(_guts) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
var _wholeString : String {
|
||||
return String(_guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
var _encodedOffsetRange : Range<Int> {
|
||||
return 0..<_guts.count
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
/// The string's value represented as a collection of Unicode scalar values.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var unicodeScalars: UnicodeScalarView {
|
||||
get {
|
||||
return UnicodeScalarView(_guts)
|
||||
}
|
||||
set {
|
||||
_guts = newValue._guts
|
||||
}
|
||||
@inline(__always) get { return UnicodeScalarView(_guts) }
|
||||
@inline(__always) set { _guts = newValue._guts }
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UnicodeScalarView : RangeReplaceableCollection {
|
||||
/// Creates an empty view instance.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public init() {
|
||||
self = String.UnicodeScalarView(_StringGuts())
|
||||
self.init(_StringGuts())
|
||||
}
|
||||
|
||||
/// Reserves enough space in the view's underlying storage to store the
|
||||
@@ -369,34 +444,15 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
|
||||
///
|
||||
/// - Complexity: O(*n*), where *n* is the capacity being reserved.
|
||||
public mutating func reserveCapacity(_ n: Int) {
|
||||
_guts.reserveCapacity(n)
|
||||
self._guts.reserveCapacity(n)
|
||||
}
|
||||
|
||||
/// Appends the given Unicode scalar to the view.
|
||||
///
|
||||
/// - Parameter c: The character to append to the string.
|
||||
public mutating func append(_ c: Unicode.Scalar) {
|
||||
if _fastPath(_guts.isASCII && c.value <= 0x7f) {
|
||||
_guts.withMutableASCIIStorage(unusedCapacity: 1) { storage in
|
||||
unowned(unsafe) let s = storage._value
|
||||
s.end.pointee = UInt8(c.value)
|
||||
s.count += 1
|
||||
}
|
||||
} else {
|
||||
let width = UTF16.width(c)
|
||||
_guts.withMutableUTF16Storage(unusedCapacity: width) { storage in
|
||||
unowned(unsafe) let s = storage._value
|
||||
_sanityCheck(s.count + width <= s.capacity)
|
||||
if _fastPath(width == 1) {
|
||||
s.end.pointee = UTF16.CodeUnit(c.value)
|
||||
} else {
|
||||
_sanityCheck(width == 2)
|
||||
s.end[0] = UTF16.leadSurrogate(c)
|
||||
s.end[1] = UTF16.trailSurrogate(c)
|
||||
}
|
||||
s.count += width
|
||||
}
|
||||
}
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
self.append(contentsOf: [c])
|
||||
}
|
||||
|
||||
/// Appends the Unicode scalar values in the given sequence to the view.
|
||||
@@ -406,30 +462,9 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
|
||||
/// - Complexity: O(*n*), where *n* is the length of the resulting view.
|
||||
public mutating func append<S : Sequence>(contentsOf newElements: S)
|
||||
where S.Element == Unicode.Scalar {
|
||||
// FIXME: Keep ASCII storage if possible
|
||||
_guts.reserveUnusedCapacity(newElements.underestimatedCount)
|
||||
var it = newElements.makeIterator()
|
||||
var next = it.next()
|
||||
while let n = next {
|
||||
_guts.withMutableUTF16Storage(unusedCapacity: UTF16.width(n)) { storage in
|
||||
var p = storage._value.end
|
||||
let limit = storage._value.capacityEnd
|
||||
while let n = next {
|
||||
let w = UTF16.width(n)
|
||||
guard p + w <= limit else { break }
|
||||
if w == 1 {
|
||||
p.pointee = UTF16.CodeUnit(n.value)
|
||||
} else {
|
||||
_sanityCheck(w == 2)
|
||||
p[0] = UTF16.leadSurrogate(n)
|
||||
p[1] = UTF16.trailSurrogate(n)
|
||||
}
|
||||
p += w
|
||||
next = it.next()
|
||||
}
|
||||
storage._value.count = p - storage._value.start
|
||||
}
|
||||
}
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
let scalars = String(decoding: newElements.map { $0.value }, as: UTF32.self)
|
||||
self = (String(self._guts) + scalars).unicodeScalars
|
||||
}
|
||||
|
||||
/// Replaces the elements within the specified bounds with the given Unicode
|
||||
@@ -451,10 +486,18 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
|
||||
_ bounds: Range<Index>,
|
||||
with newElements: C
|
||||
) where C : Collection, C.Element == Unicode.Scalar {
|
||||
let rawSubRange: Range<Int> = _toCoreIndex(bounds.lowerBound) ..<
|
||||
_toCoreIndex(bounds.upperBound)
|
||||
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
|
||||
_guts.replaceSubrange(rawSubRange, with: lazyUTF16)
|
||||
// TODO(UTF8 perf): This is a horribly slow means...
|
||||
//
|
||||
// TODO(UTF8 perf): Consider storing a string directly, or implemeting RSR
|
||||
// on guts.
|
||||
|
||||
let utf8Replacement = newElements.flatMap { String($0).utf8 }
|
||||
let replacement = utf8Replacement.withUnsafeBufferPointer {
|
||||
return String._uncheckedFromUTF8($0)
|
||||
}
|
||||
var copy = String(_guts)
|
||||
copy.replaceSubrange(bounds, with: replacement)
|
||||
self = copy.unicodeScalars
|
||||
}
|
||||
}
|
||||
|
||||
@@ -481,16 +524,18 @@ extension String.UnicodeScalarIndex {
|
||||
/// the trailing surrogate of a UTF-16 surrogate pair results in `nil`.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - sourcePosition: A position in the `utf16` view of a string. `utf16Index`
|
||||
/// must be an element of `String(unicodeScalars).utf16.indices`.
|
||||
/// - sourcePosition: A position in the `utf16` view of a string.
|
||||
/// `utf16Index` must be an element of
|
||||
/// `String(unicodeScalars).utf16.indices`.
|
||||
/// - unicodeScalars: The `UnicodeScalarView` in which to find the new
|
||||
/// position.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init?(
|
||||
_ sourcePosition: String.UTF16Index,
|
||||
_ sourcePosition: String.Index,
|
||||
within unicodeScalars: String.UnicodeScalarView
|
||||
) {
|
||||
if !unicodeScalars._isOnUnicodeScalarBoundary(sourcePosition) { return nil }
|
||||
guard unicodeScalars._guts.isOnUnicodeScalarBoundary(sourcePosition) else {
|
||||
return nil
|
||||
}
|
||||
self = sourcePosition
|
||||
}
|
||||
|
||||
@@ -514,42 +559,11 @@ extension String.UnicodeScalarIndex {
|
||||
/// position in `characters`, this method returns `nil`. For example,
|
||||
/// an attempt to convert the position of a UTF-8 continuation byte
|
||||
/// returns `nil`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func samePosition(in characters: String) -> String.Index? {
|
||||
return String.Index(self, within: characters)
|
||||
}
|
||||
}
|
||||
|
||||
extension String.UnicodeScalarView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _isOnUnicodeScalarBoundary(_ i: Index) -> Bool {
|
||||
if _fastPath(_guts.isASCII) { return true }
|
||||
if i == startIndex || i == endIndex {
|
||||
return true
|
||||
}
|
||||
if i.transcodedOffset != 0 { return false }
|
||||
let i2 = _toCoreIndex(i)
|
||||
if _fastPath(
|
||||
!UTF16.isTrailSurrogate(_guts.codeUnit(atCheckedOffset: i2))) {
|
||||
return true
|
||||
}
|
||||
return i2 == 0 || !UTF16.isLeadSurrogate(
|
||||
_guts.codeUnit(atCheckedOffset:i2 &- 1))
|
||||
}
|
||||
|
||||
// NOTE: Don't make this function inlineable. Grapheme cluster
|
||||
// segmentation uses a completely different algorithm in Unicode 9.0.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool {
|
||||
if i == startIndex || i == endIndex {
|
||||
return true
|
||||
}
|
||||
if !_isOnUnicodeScalarBoundary(i) { return false }
|
||||
let str = String(_guts)
|
||||
return i == str.index(before: str.index(after: i))
|
||||
}
|
||||
}
|
||||
|
||||
// Reflection
|
||||
extension String.UnicodeScalarView : CustomReflectable {
|
||||
/// Returns a mirror that reflects the Unicode scalars view of a string.
|
||||
@@ -558,6 +572,7 @@ extension String.UnicodeScalarView : CustomReflectable {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//===--- Slicing Support --------------------------------------------------===//
|
||||
/// In Swift 3.2, in the absence of type context,
|
||||
///
|
||||
@@ -571,9 +586,37 @@ extension String.UnicodeScalarView : CustomReflectable {
|
||||
extension String.UnicodeScalarView {
|
||||
public typealias SubSequence = Substring.UnicodeScalarView
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@available(swift, introduced: 4)
|
||||
public subscript(bounds: Range<Index>) -> String.UnicodeScalarView.SubSequence {
|
||||
return String.UnicodeScalarView.SubSequence(self, _bounds: bounds)
|
||||
public subscript(r: Range<Index>) -> String.UnicodeScalarView.SubSequence {
|
||||
return String.UnicodeScalarView.SubSequence(self, _bounds: r)
|
||||
}
|
||||
}
|
||||
|
||||
// Foreign string Support
|
||||
extension String.UnicodeScalarView {
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(after i: Index) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
let len = _guts.foreignScalarLength(startingAt: i.encodedOffset)
|
||||
return Index(encodedOffset: i.encodedOffset + len)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignIndex(before i: Index) -> Index {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
let len = _guts.foreignScalarLength(endingAt: i.encodedOffset)
|
||||
return Index(encodedOffset: i.encodedOffset - len)
|
||||
}
|
||||
|
||||
@usableFromInline @inline(never)
|
||||
@_effects(releasenone)
|
||||
internal func _foreignSubscript(position i: Index) -> Unicode.Scalar {
|
||||
_sanityCheck(_guts.isForeign)
|
||||
|
||||
return _guts.foreignScalar(startingAt: i.encodedOffset)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,174 +10,3 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@usableFromInline
|
||||
internal protocol _StringVariant : RandomAccessCollection
|
||||
where
|
||||
Element == Unicode.UTF16.CodeUnit,
|
||||
SubSequence == Self {
|
||||
// FIXME associatedtype Encoding : _UnicodeEncoding
|
||||
associatedtype CodeUnit : FixedWidthInteger & UnsignedInteger
|
||||
associatedtype UnicodeScalarIterator : IteratorProtocol
|
||||
where UnicodeScalarIterator.Element == Unicode.Scalar
|
||||
|
||||
var isASCII: Bool { get }
|
||||
|
||||
// Offset-based subscripts allow integer offsets within 0..<count,
|
||||
// regardless of what the Index type is.
|
||||
subscript(offset: Int) -> Element { get }
|
||||
subscript(offsetRange: Range<Int>) -> Self { get }
|
||||
|
||||
func makeUnicodeScalarIterator() -> UnicodeScalarIterator
|
||||
|
||||
// Measure the length in UTF-16 code units of the first extended grapheme
|
||||
// cluster in self.
|
||||
func measureFirstExtendedGraphemeCluster() -> Int
|
||||
|
||||
// Measure the length in UTF-16 code units of the last extended grapheme
|
||||
// cluster in self.
|
||||
func measureLastExtendedGraphemeCluster() -> Int
|
||||
|
||||
// Slow path for measuring the length in UTF-16 code units of the first
|
||||
// extended grapheme cluster in self.
|
||||
func _measureFirstExtendedGraphemeClusterSlow() -> Int
|
||||
|
||||
// Slow path for measuring the length in UTF-16 code units of the last
|
||||
// extended grapheme cluster in self.
|
||||
func _measureLastExtendedGraphemeClusterSlow() -> Int
|
||||
|
||||
func _copy<TargetCodeUnit>(
|
||||
into target: UnsafeMutableBufferPointer<TargetCodeUnit>
|
||||
) where TargetCodeUnit : FixedWidthInteger & UnsignedInteger
|
||||
}
|
||||
|
||||
extension _StringVariant {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _copyToNativeStorage<TargetCodeUnit>(
|
||||
of codeUnit: TargetCodeUnit.Type = TargetCodeUnit.self,
|
||||
unusedCapacity: Int = 0
|
||||
) -> _SwiftStringStorage<TargetCodeUnit>
|
||||
where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
|
||||
let storage = _SwiftStringStorage<TargetCodeUnit>.create(
|
||||
capacity: count + unusedCapacity,
|
||||
count: count)
|
||||
_copy(into: storage.usedBuffer)
|
||||
return storage
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(_ i: Index) {
|
||||
_precondition(i >= startIndex && i < endIndex,
|
||||
"String index is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(_ range: Range<Index>) {
|
||||
_precondition(range.lowerBound >= startIndex,
|
||||
"String index range is out of bounds")
|
||||
_precondition(range.upperBound <= endIndex,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(offset i: Int) {
|
||||
_precondition(i >= 0 && i < count,
|
||||
"String index is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(offsetRange range: Range<Int>) {
|
||||
_precondition(range.lowerBound >= 0 && range.upperBound <= count,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func codeUnit(atCheckedIndex index: Index) -> Element {
|
||||
_boundsCheck(index)
|
||||
return self[index]
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func codeUnit(atCheckedOffset offset: Int) -> Element {
|
||||
_boundsCheck(offset: offset)
|
||||
return self[offset]
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func checkedSlice(_ range: Range<Int>) -> Self {
|
||||
_boundsCheck(offsetRange: range)
|
||||
return self[range]
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func checkedSlice(from startOffset: Int) -> Self {
|
||||
let r: Range<Int> = startOffset..<count
|
||||
_boundsCheck(offsetRange: r)
|
||||
return self[r]
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal func checkedSlice(upTo endOffset: Int) -> Self {
|
||||
let r: Range<Int> = 0..<endOffset
|
||||
_boundsCheck(offsetRange: r)
|
||||
return self[r]
|
||||
}
|
||||
}
|
||||
|
||||
extension _StringVariant {
|
||||
@inlinable
|
||||
internal func unicodeScalarWidth(startingAt offset: Int) -> Int {
|
||||
_boundsCheck(offset: offset)
|
||||
if _slowPath(UTF16.isLeadSurrogate(self[offset])) {
|
||||
if offset + 1 < self.count &&
|
||||
UTF16.isTrailSurrogate(self[offset + 1]) {
|
||||
return 2
|
||||
}
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
@inlinable
|
||||
func unicodeScalarWidth(endingAt offset: Int) -> Int {
|
||||
_boundsCheck(offset: offset - 1)
|
||||
if _slowPath(UTF16.isTrailSurrogate(self[offset - 1])) {
|
||||
if offset >= 2 && UTF16.isLeadSurrogate(self[offset - 2]) {
|
||||
return 2
|
||||
}
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
@inlinable
|
||||
func unicodeScalar(startingAt offset: Int) -> Unicode.Scalar {
|
||||
let u0 = self.codeUnit(atCheckedOffset: offset)
|
||||
if _fastPath(UTF16._isScalar(u0)) {
|
||||
return Unicode.Scalar(_unchecked: UInt32(u0))
|
||||
}
|
||||
if UTF16.isLeadSurrogate(u0) && offset + 1 < count {
|
||||
let u1 = self[offset + 1]
|
||||
if UTF16.isTrailSurrogate(u1) {
|
||||
return UTF16._decodeSurrogates(u0, u1)
|
||||
}
|
||||
}
|
||||
return Unicode.Scalar._replacementCharacter
|
||||
}
|
||||
|
||||
@inlinable
|
||||
func unicodeScalar(endingAt offset: Int) -> Unicode.Scalar {
|
||||
let u1 = self.codeUnit(atCheckedOffset: offset - 1)
|
||||
if _fastPath(UTF16._isScalar(u1)) {
|
||||
return Unicode.Scalar(_unchecked: UInt32(u1))
|
||||
}
|
||||
if UTF16.isTrailSurrogate(u1) && offset >= 2 {
|
||||
let u0 = self[offset - 2]
|
||||
if UTF16.isLeadSurrogate(u0) {
|
||||
return UTF16._decodeSurrogates(u0, u1)
|
||||
}
|
||||
}
|
||||
return Unicode.Scalar._replacementCharacter
|
||||
}
|
||||
}
|
||||
|
||||
+186
-262
@@ -18,9 +18,11 @@ extension String {
|
||||
/// instance.
|
||||
///
|
||||
/// - Complexity: O(*n*), where *n* is the length of `substring`.
|
||||
@inlinable
|
||||
public init(_ substring: Substring) {
|
||||
let wholeGuts = substring._wholeString._guts
|
||||
self.init(wholeGuts._extractSlice(substring._encodedOffsetRange))
|
||||
self = substring.withUnsafeBytes {
|
||||
return String._uncheckedFromUTF8($0._asUInt8)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,88 +94,94 @@ extension String {
|
||||
/// when there is no other reference to the original string. Storing
|
||||
/// substrings may, therefore, prolong the lifetime of string data that is
|
||||
/// no longer otherwise accessible, which can appear to be memory leakage.
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct Substring : StringProtocol {
|
||||
@_fixed_layout
|
||||
public struct Substring {
|
||||
@usableFromInline
|
||||
internal var _slice: Slice<String>
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal init(_ slice: Slice<String>) {
|
||||
self._slice = slice
|
||||
_invariantCheck()
|
||||
}
|
||||
|
||||
/// Creates an empty substring.
|
||||
@inlinable @inline(__always)
|
||||
public init() {
|
||||
self.init(Slice())
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring {
|
||||
@inlinable
|
||||
internal var wholeGuts: _StringGuts {
|
||||
@inline(__always) get { return _slice.base._guts }
|
||||
}
|
||||
@inlinable
|
||||
internal var wholeString: String {
|
||||
@inline(__always) get { return String(self.wholeGuts) }
|
||||
}
|
||||
@usableFromInline // TODO(UTF8 merge): for testing, drop this decl after merge
|
||||
internal var _wholeString: String { return wholeString }
|
||||
|
||||
@inlinable
|
||||
internal var offsetRange: Range<Int> {
|
||||
let start = _slice.startIndex
|
||||
let end = _slice.endIndex
|
||||
_sanityCheck(start.transcodedOffset == 0 && end.transcodedOffset == 0)
|
||||
|
||||
return Range(uncheckedBounds: (start.encodedOffset, end.encodedOffset))
|
||||
}
|
||||
|
||||
@inlinable @inline(__always)
|
||||
internal func _invariantCheck() {
|
||||
#if INTERNAL_CHECKS_ENABLED
|
||||
self.wholeString._invariantCheck()
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring: StringProtocol {
|
||||
public typealias Index = String.Index
|
||||
public typealias SubSequence = Substring
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal var _slice: Slice<String>
|
||||
|
||||
/// Creates an empty substring.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init() {
|
||||
_slice = Slice()
|
||||
@inlinable
|
||||
public var startIndex: Index {
|
||||
@inline(__always) get { return _slice.startIndex }
|
||||
}
|
||||
@inlinable
|
||||
public var endIndex: Index {
|
||||
@inline(__always) get { return _slice.endIndex }
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_slice: Slice<String>) {
|
||||
self._slice = _slice
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init(_ guts: _StringGuts, _ offsetRange: Range<Int>) {
|
||||
self.init(
|
||||
_base: String(guts),
|
||||
Index(encodedOffset: offsetRange.lowerBound) ..<
|
||||
Index(encodedOffset: offsetRange.upperBound))
|
||||
}
|
||||
|
||||
/// Creates a substring with the specified bounds within the given string.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - base: The string to create a substring of.
|
||||
/// - bounds: The range of `base` to use for the substring. The lower and
|
||||
/// upper bounds of `bounds` must be valid indices of `base`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_base base: String, _ bounds: Range<Index>) {
|
||||
_slice = Slice(base: base, bounds: bounds)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal init<R: RangeExpression>(
|
||||
_base base: String, _ bounds: R
|
||||
) where R.Bound == Index {
|
||||
self.init(_base: base, bounds.relative(to: base))
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var startIndex: Index { return _slice.startIndex }
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var endIndex: Index { return _slice.endIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public func index(after i: Index) -> Index {
|
||||
_precondition(i < endIndex, "Cannot increment beyond endIndex")
|
||||
_precondition(i >= startIndex, "Cannot increment an invalid index")
|
||||
// FIXME(strings): slice types currently lack necessary bound checks
|
||||
return _slice.index(after: i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public func index(before i: Index) -> Index {
|
||||
_precondition(i <= endIndex, "Cannot decrement an invalid index")
|
||||
_precondition(i > startIndex, "Cannot decrement beyond startIndex")
|
||||
// FIXME(strings): slice types currently lack necessary bound checks
|
||||
return _slice.index(before: i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
let result = _slice.index(i, offsetBy: n)
|
||||
// FIXME(strings): slice types currently lack necessary bound checks
|
||||
_precondition(
|
||||
(_slice._startIndex ... _slice.endIndex).contains(result),
|
||||
"Operation results in an invalid index")
|
||||
return result
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
let result = _slice.index(i, offsetBy: n, limitedBy: limit)
|
||||
// FIXME(strings): slice types currently lack necessary bound checks
|
||||
_precondition(result.map {
|
||||
(_slice._startIndex ... _slice.endIndex).contains($0)
|
||||
} ?? true,
|
||||
@@ -181,26 +189,22 @@ public struct Substring : StringProtocol {
|
||||
return result
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable @inline(__always)
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
return _slice.distance(from: start, to: end)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(i: Index) -> Character {
|
||||
return _slice[i]
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func replaceSubrange<C>(
|
||||
_ bounds: Range<Index>,
|
||||
with newElements: C
|
||||
) where C : Collection, C.Iterator.Element == Iterator.Element {
|
||||
// FIXME(strings): slice types currently lack necessary bound checks
|
||||
_slice.replaceSubrange(bounds, with: newElements)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func replaceSubrange(
|
||||
_ bounds: Range<Index>, with newElements: Substring
|
||||
) {
|
||||
@@ -215,7 +219,7 @@ public struct Substring : StringProtocol {
|
||||
/// specified in `sourceEncoding`.
|
||||
/// - sourceEncoding: The encoding in which `codeUnits` should be
|
||||
/// interpreted.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specialization
|
||||
public init<C: Collection, Encoding: _UnicodeEncoding>(
|
||||
decoding codeUnits: C, as sourceEncoding: Encoding.Type
|
||||
) where C.Iterator.Element == Encoding.CodeUnit {
|
||||
@@ -227,7 +231,6 @@ public struct Substring : StringProtocol {
|
||||
///
|
||||
/// - Parameter nullTerminatedUTF8: A pointer to a sequence of contiguous,
|
||||
/// UTF-8 encoded bytes ending just before the first zero byte.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(cString nullTerminatedUTF8: UnsafePointer<CChar>) {
|
||||
self.init(String(cString: nullTerminatedUTF8))
|
||||
}
|
||||
@@ -241,7 +244,7 @@ public struct Substring : StringProtocol {
|
||||
/// before the first zero code unit.
|
||||
/// - sourceEncoding: The encoding in which the code units should be
|
||||
/// interpreted.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specialization
|
||||
public init<Encoding: _UnicodeEncoding>(
|
||||
decodingCString nullTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
|
||||
as sourceEncoding: Encoding.Type
|
||||
@@ -263,14 +266,12 @@ public struct Substring : StringProtocol {
|
||||
/// `withCString(_:)` method. The pointer argument is valid only for the
|
||||
/// duration of the method's execution.
|
||||
/// - Returns: The return value, if any, of the `body` closure parameter.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specialization
|
||||
public func withCString<Result>(
|
||||
_ body: (UnsafePointer<CChar>) throws -> Result) rethrows -> Result {
|
||||
return try _wholeString._guts._withCSubstringAndLength(
|
||||
in: _encodedOffsetRange,
|
||||
encoding: UTF8.self) { p, length in
|
||||
try p.withMemoryRebound(to: CChar.self, capacity: length, body)
|
||||
}
|
||||
// TODO(UTF8 perf): Detect when we cover the rest of a nul-terminated
|
||||
// String, and thus can avoid a copy.
|
||||
return try String(self).withCString(body)
|
||||
}
|
||||
|
||||
/// Calls the given closure with a pointer to the contents of the string,
|
||||
@@ -289,68 +290,48 @@ public struct Substring : StringProtocol {
|
||||
/// - targetEncoding: The encoding in which the code units should be
|
||||
/// interpreted.
|
||||
/// - Returns: The return value, if any, of the `body` closure parameter.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specialization
|
||||
public func withCString<Result, TargetEncoding: _UnicodeEncoding>(
|
||||
encodedAs targetEncoding: TargetEncoding.Type,
|
||||
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
|
||||
) rethrows -> Result {
|
||||
return try _wholeString._guts._withCSubstring(
|
||||
in: _encodedOffsetRange,
|
||||
encoding: targetEncoding,
|
||||
body)
|
||||
// TODO(UTF8 perf): Detect when we cover the rest of a nul-terminated
|
||||
// String, and thus can avoid a copy.
|
||||
return try String(self).withCString(encodedAs: targetEncoding, body)
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : _SwiftStringView {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _persistentContent: String {
|
||||
return String(self)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public // @testable
|
||||
var _ephemeralContent: String {
|
||||
return _persistentContent
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var _wholeString: String {
|
||||
return _slice._base
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public var _encodedOffsetRange: Range<Int> {
|
||||
return startIndex.encodedOffset..<endIndex.encodedOffset
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : CustomReflectable {
|
||||
public var customMirror: Mirror {
|
||||
return String(self).customMirror
|
||||
}
|
||||
}
|
||||
//extension Substring : CustomReflectable {
|
||||
// public var customMirror: Mirror { return String(self).customMirror }
|
||||
//}
|
||||
|
||||
//extension Substring : CustomPlaygroundQuickLookable {
|
||||
// @available(*, deprecated, message: "Substring.customPlaygroundQuickLook will be removed in a future Swift version")
|
||||
// public var customPlaygroundQuickLook: PlaygroundQuickLook {
|
||||
// return String(self).customPlaygroundQuickLook
|
||||
// }
|
||||
//}
|
||||
//
|
||||
extension Substring : CustomStringConvertible {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var description: String {
|
||||
return String(self)
|
||||
@inline(__always) get { return String(self) }
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
return String(self).debugDescription
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : LosslessStringConvertible {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_ content: String) {
|
||||
self.init(_base: content, content.startIndex ..< content.endIndex)
|
||||
}
|
||||
public var debugDescription: String { return String(self).debugDescription }
|
||||
}
|
||||
|
||||
//extension Substring : LosslessStringConvertible {
|
||||
// @inlinable @inline(__always)
|
||||
// public init(_ content: String) {
|
||||
// self.init(
|
||||
// Slice(base: content, bounds: content.beginIndex..<content.endIndex))
|
||||
// }
|
||||
//}
|
||||
|
||||
// TODO(UTF8 merge): Can we just unify all these?
|
||||
extension Substring {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
public struct UTF8View {
|
||||
@@ -366,60 +347,49 @@ extension Substring.UTF8View : BidirectionalCollection {
|
||||
public typealias SubSequence = Substring.UTF8View
|
||||
|
||||
/// Creates an instance that slices `base` at `_bounds`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
internal init(_ base: String.UTF8View, _bounds: Range<Index>) {
|
||||
_slice = Slice(
|
||||
base: String(base._guts).utf8,
|
||||
bounds: _bounds)
|
||||
}
|
||||
|
||||
/// The entire String onto whose slice this view is a projection.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _wholeString: String {
|
||||
return String(_slice._base._guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _encodedOffsetRange: Range<Int> {
|
||||
return startIndex.encodedOffset..<endIndex.encodedOffset
|
||||
}
|
||||
|
||||
//
|
||||
// Plumb slice operations through
|
||||
//
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var startIndex: Index { return _slice.startIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var endIndex: Index { return _slice.endIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public subscript(index: Index) -> Element { return _slice[index] }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var indices: Indices { return _slice.indices }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(after i: Index) -> Index { return _slice.index(after: i) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func formIndex(after i: inout Index) {
|
||||
_slice.formIndex(after: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
return _slice.index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
return _slice.index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
return _slice.distance(from: start, to: end)
|
||||
}
|
||||
@@ -436,28 +406,25 @@ extension Substring.UTF8View : BidirectionalCollection {
|
||||
_slice._failEarlyRangeCheck(range, bounds: bounds)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func index(before i: Index) -> Index { return _slice.index(before: i) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func formIndex(before i: inout Index) {
|
||||
_slice.formIndex(before: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public subscript(r: Range<Index>) -> Substring.UTF8View {
|
||||
// FIXME(strings): tests.
|
||||
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
|
||||
"UTF8View index range out of bounds")
|
||||
return Substring.UTF8View(_wholeString.utf8, _bounds: r)
|
||||
return Substring.UTF8View(_slice.base, _bounds: r)
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var utf8: UTF8View {
|
||||
get {
|
||||
return UTF8View(_wholeString.utf8, _bounds: startIndex..<endIndex)
|
||||
return wholeString.utf8[startIndex..<endIndex]
|
||||
}
|
||||
set {
|
||||
self = Substring(newValue)
|
||||
@@ -467,9 +434,10 @@ extension Substring {
|
||||
/// Creates a Substring having the given content.
|
||||
///
|
||||
/// - Complexity: O(1)
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_ content: UTF8View) {
|
||||
self = content._wholeString[content.startIndex..<content.endIndex]
|
||||
self = String(
|
||||
content._slice.base._guts
|
||||
)[content.startIndex..<content.endIndex]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -480,20 +448,20 @@ extension String {
|
||||
///
|
||||
/// - Complexity: O(N), where N is the length of the resulting `String`'s
|
||||
/// UTF-16.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init?(_ codeUnits: Substring.UTF8View) {
|
||||
let wholeString = codeUnits._wholeString
|
||||
guard
|
||||
codeUnits.startIndex.samePosition(in: wholeString.unicodeScalars) != nil
|
||||
&& codeUnits.endIndex.samePosition(in: wholeString.unicodeScalars) != nil
|
||||
else { return nil }
|
||||
let guts = codeUnits._slice.base._guts
|
||||
guard guts.isOnUnicodeScalarBoundary(codeUnits._slice.startIndex),
|
||||
guts.isOnUnicodeScalarBoundary(codeUnits._slice.endIndex) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
self = String(Substring(codeUnits))
|
||||
}
|
||||
}
|
||||
extension Substring {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
@_fixed_layout
|
||||
public struct UTF16View {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@usableFromInline
|
||||
internal var _slice: Slice<String.UTF16View>
|
||||
}
|
||||
}
|
||||
@@ -505,98 +473,84 @@ extension Substring.UTF16View : BidirectionalCollection {
|
||||
public typealias SubSequence = Substring.UTF16View
|
||||
|
||||
/// Creates an instance that slices `base` at `_bounds`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
internal init(_ base: String.UTF16View, _bounds: Range<Index>) {
|
||||
_slice = Slice(
|
||||
base: String(base._guts).utf16,
|
||||
bounds: _bounds)
|
||||
}
|
||||
|
||||
/// The entire String onto whose slice this view is a projection.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _wholeString: String {
|
||||
return String(_slice._base._guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _encodedOffsetRange: Range<Int> {
|
||||
return startIndex.encodedOffset..<endIndex.encodedOffset
|
||||
}
|
||||
|
||||
//
|
||||
// Plumb slice operations through
|
||||
//
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var startIndex: Index { return _slice.startIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var endIndex: Index { return _slice.endIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public subscript(index: Index) -> Element { return _slice[index] }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var indices: Indices { return _slice.indices }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(after i: Index) -> Index { return _slice.index(after: i) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func formIndex(after i: inout Index) {
|
||||
_slice.formIndex(after: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
return _slice.index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
return _slice.index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
return _slice.distance(from: start, to: end)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
|
||||
_slice._failEarlyRangeCheck(index, bounds: bounds)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func _failEarlyRangeCheck(
|
||||
_ range: Range<Index>, bounds: Range<Index>
|
||||
) {
|
||||
_slice._failEarlyRangeCheck(range, bounds: bounds)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(before i: Index) -> Index { return _slice.index(before: i) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func formIndex(before i: inout Index) {
|
||||
_slice.formIndex(before: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public subscript(r: Range<Index>) -> Substring.UTF16View {
|
||||
// FIXME(strings): tests.
|
||||
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
|
||||
"UTF16View index range out of bounds")
|
||||
return Substring.UTF16View(_wholeString.utf16, _bounds: r)
|
||||
return Substring.UTF16View(_slice.base, _bounds: r)
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var utf16: UTF16View {
|
||||
get {
|
||||
return UTF16View(_wholeString.utf16, _bounds: startIndex..<endIndex)
|
||||
return wholeString.utf16[startIndex..<endIndex]
|
||||
}
|
||||
set {
|
||||
self = Substring(newValue)
|
||||
@@ -606,9 +560,10 @@ extension Substring {
|
||||
/// Creates a Substring having the given content.
|
||||
///
|
||||
/// - Complexity: O(1)
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_ content: UTF16View) {
|
||||
self = content._wholeString[content.startIndex..<content.endIndex]
|
||||
self = String(
|
||||
content._slice.base._guts
|
||||
)[content.startIndex..<content.endIndex]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -619,20 +574,20 @@ extension String {
|
||||
///
|
||||
/// - Complexity: O(N), where N is the length of the resulting `String`'s
|
||||
/// UTF-16.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init?(_ codeUnits: Substring.UTF16View) {
|
||||
let wholeString = codeUnits._wholeString
|
||||
guard
|
||||
codeUnits.startIndex.samePosition(in: wholeString.unicodeScalars) != nil
|
||||
&& codeUnits.endIndex.samePosition(in: wholeString.unicodeScalars) != nil
|
||||
else { return nil }
|
||||
let guts = codeUnits._slice.base._guts
|
||||
guard guts.isOnUnicodeScalarBoundary(codeUnits._slice.startIndex),
|
||||
guts.isOnUnicodeScalarBoundary(codeUnits._slice.endIndex) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
self = String(Substring(codeUnits))
|
||||
}
|
||||
}
|
||||
extension Substring {
|
||||
@_fixed_layout // FIXME(sil-serialize-all)
|
||||
@_fixed_layout
|
||||
public struct UnicodeScalarView {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@usableFromInline
|
||||
internal var _slice: Slice<String.UnicodeScalarView>
|
||||
}
|
||||
}
|
||||
@@ -644,98 +599,84 @@ extension Substring.UnicodeScalarView : BidirectionalCollection {
|
||||
public typealias SubSequence = Substring.UnicodeScalarView
|
||||
|
||||
/// Creates an instance that slices `base` at `_bounds`.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
internal init(_ base: String.UnicodeScalarView, _bounds: Range<Index>) {
|
||||
_slice = Slice(
|
||||
base: String(base._guts).unicodeScalars,
|
||||
bounds: _bounds)
|
||||
}
|
||||
|
||||
/// The entire String onto whose slice this view is a projection.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _wholeString: String {
|
||||
return String(_slice._base._guts)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal var _encodedOffsetRange: Range<Int> {
|
||||
return startIndex.encodedOffset..<endIndex.encodedOffset
|
||||
}
|
||||
|
||||
//
|
||||
// Plumb slice operations through
|
||||
//
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var startIndex: Index { return _slice.startIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var endIndex: Index { return _slice.endIndex }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public subscript(index: Index) -> Element { return _slice[index] }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var indices: Indices { return _slice.indices }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(after i: Index) -> Index { return _slice.index(after: i) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func formIndex(after i: inout Index) {
|
||||
_slice.formIndex(after: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(_ i: Index, offsetBy n: Int) -> Index {
|
||||
return _slice.index(i, offsetBy: n)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(
|
||||
_ i: Index, offsetBy n: Int, limitedBy limit: Index
|
||||
) -> Index? {
|
||||
return _slice.index(i, offsetBy: n, limitedBy: limit)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func distance(from start: Index, to end: Index) -> Int {
|
||||
return _slice.distance(from: start, to: end)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
|
||||
_slice._failEarlyRangeCheck(index, bounds: bounds)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func _failEarlyRangeCheck(
|
||||
_ range: Range<Index>, bounds: Range<Index>
|
||||
) {
|
||||
_slice._failEarlyRangeCheck(range, bounds: bounds)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func index(before i: Index) -> Index { return _slice.index(before: i) }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public func formIndex(before i: inout Index) {
|
||||
_slice.formIndex(before: &i)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public subscript(r: Range<Index>) -> Substring.UnicodeScalarView {
|
||||
// FIXME(strings): tests.
|
||||
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
|
||||
"UnicodeScalarView index range out of bounds")
|
||||
return Substring.UnicodeScalarView(_wholeString.unicodeScalars, _bounds: r)
|
||||
return Substring.UnicodeScalarView(_slice.base, _bounds: r)
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public var unicodeScalars: UnicodeScalarView {
|
||||
get {
|
||||
return UnicodeScalarView(_wholeString.unicodeScalars, _bounds: startIndex..<endIndex)
|
||||
return wholeString.unicodeScalars[startIndex..<endIndex]
|
||||
}
|
||||
set {
|
||||
self = Substring(newValue)
|
||||
@@ -745,9 +686,10 @@ extension Substring {
|
||||
/// Creates a Substring having the given content.
|
||||
///
|
||||
/// - Complexity: O(1)
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_ content: UnicodeScalarView) {
|
||||
self = content._wholeString[content.startIndex..<content.endIndex]
|
||||
self = String(
|
||||
content._slice.base._guts
|
||||
)[content.startIndex..<content.endIndex]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -756,7 +698,6 @@ extension String {
|
||||
///
|
||||
/// - Complexity: O(N), where N is the length of the resulting `String`'s
|
||||
/// UTF-16.
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init(_ content: Substring.UnicodeScalarView) {
|
||||
self = String(Substring(content))
|
||||
}
|
||||
@@ -764,10 +705,9 @@ extension String {
|
||||
|
||||
// FIXME: The other String views should be RangeReplaceable too.
|
||||
extension Substring.UnicodeScalarView : RangeReplaceableCollection {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public init() { _slice = Slice.init() }
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func replaceSubrange<C : Collection>(
|
||||
_ target: Range<Index>, with replacement: C
|
||||
) where C.Element == Element {
|
||||
@@ -776,18 +716,12 @@ extension Substring.UnicodeScalarView : RangeReplaceableCollection {
|
||||
}
|
||||
|
||||
extension Substring : RangeReplaceableCollection {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public init<S : Sequence>(_ elements: S)
|
||||
where S.Element == Character {
|
||||
let e0 = elements as? _SwiftStringView
|
||||
if _fastPath(e0 != nil), let e = e0 {
|
||||
self.init(e._wholeString._guts, e._encodedOffsetRange)
|
||||
} else {
|
||||
self.init(String(elements))
|
||||
}
|
||||
self = String(elements)[...]
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specialize
|
||||
public mutating func append<S : Sequence>(contentsOf elements: S)
|
||||
where S.Element == Character {
|
||||
var string = String(self)
|
||||
@@ -798,17 +732,14 @@ extension Substring : RangeReplaceableCollection {
|
||||
}
|
||||
|
||||
extension Substring {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func lowercased() -> String {
|
||||
return String(self).lowercased()
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func uppercased() -> String {
|
||||
return String(self).uppercased()
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public func filter(
|
||||
_ isIncluded: (Element) throws -> Bool
|
||||
) rethrows -> String {
|
||||
@@ -817,61 +748,54 @@ extension Substring {
|
||||
}
|
||||
|
||||
extension Substring : TextOutputStream {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
public mutating func write(_ other: String) {
|
||||
append(contentsOf: other)
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : TextOutputStreamable {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable // specializable
|
||||
public func write<Target : TextOutputStream>(to target: inout Target) {
|
||||
target.write(String(self))
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : ExpressibleByUnicodeScalarLiteral {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public init(unicodeScalarLiteral value: String) {
|
||||
self.init(_base: value, value.startIndex ..< value.endIndex)
|
||||
self.init(value)
|
||||
}
|
||||
}
|
||||
extension Substring : ExpressibleByExtendedGraphemeClusterLiteral {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public init(extendedGraphemeClusterLiteral value: String) {
|
||||
self.init(_base: value, value.startIndex ..< value.endIndex)
|
||||
self.init(value)
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring : ExpressibleByStringLiteral {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
public init(stringLiteral value: String) {
|
||||
self.init(_base: value, value.startIndex ..< value.endIndex)
|
||||
self.init(value)
|
||||
}
|
||||
}
|
||||
|
||||
//===--- String/Substring Slicing Support ---------------------------------===//
|
||||
/// In Swift 3.2, in the absence of type context,
|
||||
///
|
||||
/// someString[someString.startIndex..<someString.endIndex]
|
||||
///
|
||||
/// was deduced to be of type `String`. Therefore have a more-specific
|
||||
/// Swift-3-only `subscript` overload on `String` (and `Substring`) that
|
||||
/// continues to produce `String`.
|
||||
// String/Substring Slicing
|
||||
extension String {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@available(swift, introduced: 4)
|
||||
public subscript(r: Range<Index>) -> Substring {
|
||||
_boundsCheck(r)
|
||||
return Substring(
|
||||
_slice: Slice(base: self, bounds: r))
|
||||
return Substring(Slice(base: self, bounds: r))
|
||||
}
|
||||
}
|
||||
|
||||
extension Substring {
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inlinable
|
||||
@available(swift, introduced: 4)
|
||||
public subscript(r: Range<Index>) -> Substring {
|
||||
return Substring(_slice: _slice[r])
|
||||
return Substring(_slice[r])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -71,14 +71,40 @@ internal struct _ThreadLocalStorage {
|
||||
}
|
||||
|
||||
internal static func getUBreakIterator(
|
||||
start: UnsafePointer<UTF16.CodeUnit>,
|
||||
count: Int32
|
||||
_ bufPtr: UnsafeBufferPointer<UTF16.CodeUnit>
|
||||
) -> OpaquePointer {
|
||||
let tlsPtr = getPointer()
|
||||
let brkIter = tlsPtr[0].uBreakIterator
|
||||
let utext = tlsPtr[0].uText
|
||||
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
__swift_stdlib_ubrk_setText(brkIter, start, count, &err)
|
||||
|
||||
let start = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
_ = __swift_stdlib_utext_openUChars(
|
||||
utext, start, Int64(bufPtr.count), &err)
|
||||
_precondition(err.isSuccess, "Unexpected utext_openUChars failure")
|
||||
|
||||
__swift_stdlib_ubrk_setUText(brkIter, utext, &err)
|
||||
_precondition(err.isSuccess, "Unexpected ubrk_setUText failure")
|
||||
|
||||
return brkIter
|
||||
}
|
||||
|
||||
internal static func getUBreakIterator(
|
||||
_ bufPtr: UnsafeBufferPointer<UTF8.CodeUnit>
|
||||
) -> OpaquePointer {
|
||||
let tlsPtr = getPointer()
|
||||
let brkIter = tlsPtr[0].uBreakIterator
|
||||
let utext = tlsPtr[0].uText
|
||||
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
|
||||
let start = bufPtr.baseAddress._unsafelyUnwrappedUnchecked._asCChar
|
||||
_ = __swift_stdlib_utext_openUTF8(
|
||||
utext, start, Int64(bufPtr.count), &err)
|
||||
_precondition(err.isSuccess, "Unexpected utext_openUChars failure")
|
||||
|
||||
__swift_stdlib_ubrk_setUText(brkIter, utext, &err)
|
||||
_precondition(err.isSuccess, "Unexpected ubrk_setUText failure")
|
||||
|
||||
return brkIter
|
||||
|
||||
@@ -690,47 +690,37 @@ extension Unicode.Scalar.Properties {
|
||||
/// all current case mappings. In the event more space is needed, it will be
|
||||
/// allocated on the heap.
|
||||
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
|
||||
var scratchBuffer = _Normalization._SegmentOutputBuffer(allZeros: ())
|
||||
let count = scratchBuffer.withUnsafeMutableBufferPointer { bufPtr -> Int in
|
||||
return _scalar.withUTF16CodeUnits { utf16 in
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let correctSize = u_strTo(
|
||||
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(bufPtr.count),
|
||||
utf16.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(utf16.count),
|
||||
"",
|
||||
&err)
|
||||
guard err.isSuccess ||
|
||||
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
|
||||
fatalError("Unexpected error case-converting Unicode scalar.")
|
||||
}
|
||||
return Int(correctSize)
|
||||
}
|
||||
}
|
||||
let utf16Length = UnicodeScalar(UInt32(_value))!.utf16.count
|
||||
var utf16 = _utf16CodeUnits
|
||||
|
||||
if _fastPath(count <= scratchBuffer.count) {
|
||||
scratchBuffer.count = count
|
||||
return String._fromWellFormedUTF16CodeUnits(scratchBuffer)
|
||||
}
|
||||
// TODO(UTF8 perf): Stack buffer first and then detect real count
|
||||
let count = 64
|
||||
var array = Array<UInt16>(repeating: 0, count: count)
|
||||
array.withUnsafeMutableBufferPointer { bufPtr in
|
||||
return _scalar.withUTF16CodeUnits { utf16 in
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let correctSize = u_strTo(
|
||||
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(bufPtr.count),
|
||||
utf16.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(utf16.count),
|
||||
"",
|
||||
&err)
|
||||
guard err.isSuccess else {
|
||||
fatalError("Unexpected error case-converting Unicode scalar.")
|
||||
let len: Int = array.withUnsafeMutableBufferPointer { bufPtr in
|
||||
return withUnsafePointer(to: &utf16) {
|
||||
(tuplePtr) -> Int in
|
||||
return tuplePtr.withMemoryRebound(to: UInt16.self, capacity: 2) {
|
||||
(utf16Pointer) -> Int in
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let correctSize = u_strTo(
|
||||
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(bufPtr.count),
|
||||
utf16Pointer,
|
||||
Int32(utf16Length),
|
||||
"",
|
||||
&err)
|
||||
guard err.isSuccess else {
|
||||
fatalError("Unexpected error case-converting Unicode scalar.")
|
||||
}
|
||||
// TODO: _sanityCheck(count == correctSize, "inconsistent ICU behavior")
|
||||
return Int(correctSize)
|
||||
}
|
||||
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
|
||||
}
|
||||
}
|
||||
return String._fromWellFormedUTF16CodeUnits(array[..<count])
|
||||
// TODO: replace `len` with `count`
|
||||
return array[..<len].withUnsafeBufferPointer {
|
||||
return String._uncheckedFromUTF16($0)
|
||||
}
|
||||
}
|
||||
|
||||
/// The lowercase mapping of the scalar.
|
||||
|
||||
@@ -10,390 +10,3 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
public protocol _OpaqueString: class {
|
||||
var length: Int { get }
|
||||
func character(at index: Int) -> UInt16
|
||||
|
||||
// FIXME: This is not an NSString method; I'd like to use
|
||||
// `getCharacters(_:,range:)`, but it would be weird to define
|
||||
// `_SwiftNSRange` without an Objective-C runtime.
|
||||
func copyCodeUnits(
|
||||
from range: Range<Int>,
|
||||
into dest: UnsafeMutablePointer<UInt16>)
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@_fixed_layout
|
||||
internal struct _UnmanagedOpaqueString {
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
@usableFromInline
|
||||
unowned(unsafe) let object: _CocoaString
|
||||
#else
|
||||
@usableFromInline
|
||||
unowned(unsafe) let object: _OpaqueString
|
||||
#endif
|
||||
|
||||
@usableFromInline
|
||||
let range: Range<Int>
|
||||
|
||||
@usableFromInline
|
||||
let isSlice: Bool
|
||||
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
@inlinable
|
||||
init(_ object: _CocoaString, range: Range<Int>, isSlice: Bool) {
|
||||
self.object = object
|
||||
self.range = range
|
||||
self.isSlice = isSlice
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
init(_ object: _CocoaString) {
|
||||
let count = _stdlib_binary_CFStringGetLength(object)
|
||||
self.init(object, count: count)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
init(_ object: _CocoaString, count: Int) {
|
||||
self.init(object, range: 0..<count, isSlice: false)
|
||||
}
|
||||
#else
|
||||
@inlinable
|
||||
init(_ object: _OpaqueString, range: Range<Int>, isSlice: Bool) {
|
||||
self.object = object
|
||||
self.range = range
|
||||
self.isSlice = isSlice
|
||||
}
|
||||
|
||||
@inline(never)
|
||||
init(_ object: _OpaqueString) {
|
||||
self.init(object, count: object.length)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
init(_ object: _OpaqueString, count: Int) {
|
||||
self.init(object, range: 0..<count, isSlice: false)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
extension _UnmanagedOpaqueString : Sequence {
|
||||
typealias Element = UTF16.CodeUnit
|
||||
|
||||
@inlinable
|
||||
func makeIterator() -> Iterator {
|
||||
return Iterator(self, startingAt: range.lowerBound)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func makeIterator(startingAt position: Int) -> Iterator {
|
||||
return Iterator(self, startingAt: position)
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@_fixed_layout
|
||||
struct Iterator : IteratorProtocol {
|
||||
@usableFromInline
|
||||
internal typealias Element = UTF16.CodeUnit
|
||||
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
@usableFromInline
|
||||
internal let _object: _CocoaString
|
||||
#else
|
||||
@usableFromInline
|
||||
internal let _object: _OpaqueString
|
||||
#endif
|
||||
|
||||
@usableFromInline
|
||||
internal var _range: Range<Int>
|
||||
|
||||
@usableFromInline
|
||||
internal var _buffer = _FixedArray16<Element>()
|
||||
|
||||
@usableFromInline
|
||||
internal var _bufferIndex: Int8 = 0
|
||||
|
||||
@inlinable
|
||||
init(_ string: _UnmanagedOpaqueString, startingAt start: Int) {
|
||||
self._object = string.object
|
||||
self._range = start..<string.range.upperBound
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
mutating func next() -> Element? {
|
||||
if _fastPath(_bufferIndex < _buffer.count) {
|
||||
let result = _buffer[Int(_bufferIndex)]
|
||||
_bufferIndex += 1
|
||||
return result
|
||||
}
|
||||
if _slowPath(_range.isEmpty) { return nil }
|
||||
return _nextOnSlowPath()
|
||||
}
|
||||
|
||||
@usableFromInline
|
||||
@inline(never)
|
||||
mutating func _nextOnSlowPath() -> Element {
|
||||
// Fill buffer
|
||||
_sanityCheck(!_range.isEmpty)
|
||||
let end = Swift.min(
|
||||
_range.lowerBound + _buffer.capacity,
|
||||
_range.upperBound)
|
||||
let r: Range<Int> = _range.lowerBound..<end
|
||||
let opaque = _UnmanagedOpaqueString(_object, range: r, isSlice: true)
|
||||
_buffer.count = r.count
|
||||
_buffer.withUnsafeMutableBufferPointer { b in
|
||||
_sanityCheck(b.count == r.count)
|
||||
opaque._copy(into: b)
|
||||
}
|
||||
_bufferIndex = 1
|
||||
_range = r.upperBound ..< _range.upperBound
|
||||
_fixLifetime(_object)
|
||||
return _buffer[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedOpaqueString : RandomAccessCollection {
|
||||
internal typealias IndexDistance = Int
|
||||
internal typealias Indices = Range<Index>
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal typealias SubSequence = _UnmanagedOpaqueString
|
||||
|
||||
@_fixed_layout
|
||||
@usableFromInline
|
||||
struct Index : Strideable {
|
||||
@usableFromInline
|
||||
internal var _value: Int
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
init(_ value: Int) {
|
||||
self._value = value
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func distance(to other: Index) -> Int {
|
||||
return other._value - self._value
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func advanced(by n: Int) -> Index {
|
||||
return Index(_value + n)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
var startIndex: Index {
|
||||
return Index(range.lowerBound)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
var endIndex: Index {
|
||||
return Index(range.upperBound)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
var count: Int {
|
||||
return range.count
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
subscript(position: Index) -> UTF16.CodeUnit {
|
||||
_sanityCheck(position._value >= range.lowerBound)
|
||||
_sanityCheck(position._value < range.upperBound)
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
return _cocoaStringSubscript(object, position._value)
|
||||
#else
|
||||
return object.character(at: position._value)
|
||||
#endif
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
subscript(bounds: Range<Index>) -> _UnmanagedOpaqueString {
|
||||
_sanityCheck(bounds.lowerBound._value >= range.lowerBound)
|
||||
_sanityCheck(bounds.upperBound._value <= range.upperBound)
|
||||
let b: Range<Int> = bounds.lowerBound._value ..< bounds.upperBound._value
|
||||
let newSlice = self.isSlice || b.count != range.count
|
||||
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedOpaqueString : _StringVariant {
|
||||
@usableFromInline internal typealias Encoding = Unicode.UTF16
|
||||
@usableFromInline internal typealias CodeUnit = Encoding.CodeUnit
|
||||
|
||||
@inlinable
|
||||
var isASCII: Bool {
|
||||
@inline(__always) get { return false }
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(_ i: Index) {
|
||||
_precondition(i._value >= range.lowerBound && i._value < range.upperBound,
|
||||
"String index is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(_ range: Range<Index>) {
|
||||
_precondition(
|
||||
range.lowerBound._value >= self.range.lowerBound &&
|
||||
range.upperBound._value <= self.range.upperBound,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(offset: Int) {
|
||||
_precondition(offset >= 0 && offset < range.count,
|
||||
"String index is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
func _boundsCheck(offsetRange range: Range<Int>) {
|
||||
_precondition(range.lowerBound >= 0 && range.upperBound <= count,
|
||||
"String index range is out of bounds")
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
subscript(offset: Int) -> UTF16.CodeUnit {
|
||||
_sanityCheck(offset >= 0 && offset < count)
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
return _cocoaStringSubscript(object, range.lowerBound + offset)
|
||||
#else
|
||||
return object.character(at: range.lowerBound + offset)
|
||||
#endif
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
subscript(offsetRange: Range<Int>) -> _UnmanagedOpaqueString {
|
||||
_sanityCheck(offsetRange.lowerBound >= 0)
|
||||
_sanityCheck(offsetRange.upperBound <= range.count)
|
||||
let b: Range<Int> =
|
||||
range.lowerBound + offsetRange.lowerBound ..<
|
||||
range.lowerBound + offsetRange.upperBound
|
||||
let newSlice = self.isSlice || b.count != range.count
|
||||
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: PartialRangeUpTo<Int>) -> SubSequence {
|
||||
_sanityCheck(offsetRange.upperBound <= range.count)
|
||||
let b: Range<Int> =
|
||||
range.lowerBound ..<
|
||||
range.lowerBound + offsetRange.upperBound
|
||||
let newSlice = self.isSlice || b.count != range.count
|
||||
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: PartialRangeThrough<Int>) -> SubSequence {
|
||||
_sanityCheck(offsetRange.upperBound <= range.count)
|
||||
let b: Range<Int> =
|
||||
range.lowerBound ..<
|
||||
range.lowerBound + offsetRange.upperBound + 1
|
||||
let newSlice = self.isSlice || b.count != range.count
|
||||
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: PartialRangeFrom<Int>) -> SubSequence {
|
||||
_sanityCheck(offsetRange.lowerBound < range.count)
|
||||
let b: Range<Int> =
|
||||
range.lowerBound + offsetRange.lowerBound ..<
|
||||
range.upperBound
|
||||
let newSlice = self.isSlice || b.count != range.count
|
||||
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _copy(
|
||||
into dest: UnsafeMutableBufferPointer<UTF16.CodeUnit>
|
||||
) {
|
||||
_sanityCheck(dest.count >= range.count)
|
||||
guard range.count > 0 else { return }
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
_cocoaStringCopyCharacters(
|
||||
from: object,
|
||||
range: range,
|
||||
into: dest.baseAddress!)
|
||||
#else
|
||||
object.copyCodeUnits(from: range, into: dest.baseAddress!)
|
||||
#endif
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal func _copy<TargetCodeUnit>(
|
||||
into dest: UnsafeMutableBufferPointer<TargetCodeUnit>
|
||||
)
|
||||
where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
|
||||
guard TargetCodeUnit.bitWidth == 16 else {
|
||||
_sanityCheckFailure("Narrowing copy from opaque strings is not implemented")
|
||||
}
|
||||
_sanityCheck(dest.count >= range.count)
|
||||
guard range.count > 0 else { return }
|
||||
let d = UnsafeMutableRawPointer(dest.baseAddress!)
|
||||
.assumingMemoryBound(to: UTF16.CodeUnit.self)
|
||||
#if _runtime(_ObjC) // FIXME unify
|
||||
_cocoaStringCopyCharacters(from: object, range: range, into: d)
|
||||
#else
|
||||
object.copyCodeUnits(from: range, into: d)
|
||||
#endif
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@_fixed_layout // FIXME(resilience)
|
||||
internal struct UnicodeScalarIterator : IteratorProtocol {
|
||||
var _base: _UnmanagedOpaqueString.Iterator
|
||||
var _peek: UTF16.CodeUnit?
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
init(_ base: _UnmanagedOpaqueString) {
|
||||
self._base = base.makeIterator()
|
||||
self._peek = _base.next()
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
mutating func next() -> Unicode.Scalar? {
|
||||
if _slowPath(_peek == nil) { return nil }
|
||||
let u0 = _peek._unsafelyUnwrappedUnchecked
|
||||
_peek = _base.next()
|
||||
if _fastPath(UTF16._isScalar(u0)) {
|
||||
return Unicode.Scalar(_unchecked: UInt32(u0))
|
||||
}
|
||||
if UTF16.isLeadSurrogate(u0) && _peek != nil {
|
||||
let u1 = _peek._unsafelyUnwrappedUnchecked
|
||||
if UTF16.isTrailSurrogate(u1) {
|
||||
_peek = _base.next()
|
||||
return UTF16._decodeSurrogates(u0, u1)
|
||||
}
|
||||
}
|
||||
return Unicode.Scalar._replacementCharacter
|
||||
}
|
||||
}
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
@inline(never)
|
||||
func makeUnicodeScalarIterator() -> UnicodeScalarIterator {
|
||||
return UnicodeScalarIterator(self)
|
||||
}
|
||||
}
|
||||
|
||||
#if _runtime(_ObjC)
|
||||
extension _UnmanagedOpaqueString {
|
||||
@usableFromInline
|
||||
@inline(never)
|
||||
internal func cocoaSlice() -> _CocoaString {
|
||||
guard isSlice else { return object }
|
||||
// FIXME: This usually copies storage; maybe add an NSString subclass
|
||||
// for opaque slices?
|
||||
return _cocoaStringSlice(object, range)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -11,289 +11,3 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
import SwiftShims
|
||||
|
||||
@usableFromInline
|
||||
internal typealias _UnmanagedASCIIString = _UnmanagedString<UInt8>
|
||||
|
||||
@usableFromInline
|
||||
internal typealias _UnmanagedUTF16String = _UnmanagedString<UTF16.CodeUnit>
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal
|
||||
func memcpy_zext<
|
||||
Target: FixedWidthInteger & UnsignedInteger,
|
||||
Source: FixedWidthInteger & UnsignedInteger
|
||||
>(
|
||||
dst: UnsafeMutablePointer<Target>, src: UnsafePointer<Source>, count: Int
|
||||
) {
|
||||
_sanityCheck(Source.bitWidth < Target.bitWidth)
|
||||
_sanityCheck(count >= 0)
|
||||
// Don't use the for-in-range syntax to avoid precondition checking in Range.
|
||||
// This enables vectorization of the memcpy loop.
|
||||
var i = 0
|
||||
while i < count {
|
||||
dst[i] = Target(src[i])
|
||||
i = i &+ 1
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal
|
||||
func memcpy_trunc<
|
||||
Target: FixedWidthInteger & UnsignedInteger,
|
||||
Source: FixedWidthInteger & UnsignedInteger
|
||||
>(
|
||||
dst: UnsafeMutablePointer<Target>, src: UnsafePointer<Source>, count: Int
|
||||
) {
|
||||
_sanityCheck(Source.bitWidth > Target.bitWidth)
|
||||
_sanityCheck(count >= 0)
|
||||
// Don't use the for-in-range syntax to avoid precondition checking in Range.
|
||||
// This enables vectorization of the memcpy loop.
|
||||
var i = 0
|
||||
while i < count {
|
||||
dst[i] = Target(truncatingIfNeeded: src[i])
|
||||
i = i &+ 1
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
@inline(__always)
|
||||
internal
|
||||
func memcpy_<
|
||||
Source: FixedWidthInteger & UnsignedInteger
|
||||
>(
|
||||
dst: UnsafeMutablePointer<Source>, src: UnsafePointer<Source>, count: Int
|
||||
) {
|
||||
// Don't use the for-in-range syntax to avoid precondition checking in Range.
|
||||
// This enables vectorization of the memcpy loop.
|
||||
var i = 0
|
||||
while i < count {
|
||||
dst[i] = src[i]
|
||||
i = i &+ 1
|
||||
}
|
||||
}
|
||||
|
||||
@_fixed_layout
|
||||
@usableFromInline
|
||||
internal
|
||||
struct _UnmanagedString<CodeUnit>
|
||||
where CodeUnit : FixedWidthInteger & UnsignedInteger {
|
||||
// TODO: Use the extra 13 bits
|
||||
//
|
||||
// StringGuts when representing UnmanagedStrings should have an extra 13 bits
|
||||
// *at least* to store whatever we want, e.g. flags. x86_64 ABI has at least
|
||||
// 13 bits due to:
|
||||
// * 8 bits from count: 56-bit (max) address spaces means we need at most
|
||||
// 56-bit count
|
||||
// * 5 bits from BridgeObject: 64 - 2 tagging - 56-bit address space - 1 bit
|
||||
// designating UnsafeString
|
||||
//
|
||||
|
||||
@usableFromInline
|
||||
internal var start: UnsafePointer<CodeUnit>
|
||||
|
||||
@usableFromInline
|
||||
internal var count: Int
|
||||
|
||||
@inlinable
|
||||
init(start: UnsafePointer<CodeUnit>, count: Int) {
|
||||
_sanityCheck(CodeUnit.self == UInt8.self || CodeUnit.self == UInt16.self)
|
||||
self.start = start
|
||||
self.count = count
|
||||
}
|
||||
|
||||
@inlinable
|
||||
init(_ bufPtr: UnsafeBufferPointer<CodeUnit>) {
|
||||
self.init(
|
||||
start: bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
count: bufPtr.count)
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedString {
|
||||
@inlinable
|
||||
internal var end: UnsafePointer<CodeUnit> {
|
||||
return start + count
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal var rawStart: UnsafeRawPointer {
|
||||
return UnsafeRawPointer(start)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal var rawEnd: UnsafeRawPointer {
|
||||
return UnsafeRawPointer(end)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal var buffer: UnsafeBufferPointer<CodeUnit> {
|
||||
return .init(start: start, count: count)
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal var rawBuffer: UnsafeRawBufferPointer {
|
||||
return .init(start: rawStart, count: rawEnd - rawStart)
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedString : RandomAccessCollection {
|
||||
internal typealias Element = UTF16.CodeUnit
|
||||
// Note that the Index type can't be an integer offset because Collection
|
||||
// requires that SubSequence share indices with the original collection.
|
||||
// Therefore, we use pointers as the index type; however, we also provide
|
||||
// integer subscripts as a convenience, in a separate extension below.
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal typealias Index = UnsafePointer<CodeUnit>
|
||||
internal typealias IndexDistance = Int
|
||||
internal typealias Indices = Range<Index>
|
||||
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal typealias SubSequence = _UnmanagedString
|
||||
|
||||
@inlinable
|
||||
internal
|
||||
var startIndex: Index { return start }
|
||||
|
||||
@inlinable
|
||||
internal
|
||||
var endIndex: Index { return end }
|
||||
|
||||
@inlinable
|
||||
internal subscript(position: Index) -> UTF16.CodeUnit {
|
||||
@inline(__always)
|
||||
get {
|
||||
_sanityCheck(position >= start && position < end)
|
||||
return UTF16.CodeUnit(position.pointee)
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(_ bounds: Range<Index>) -> SubSequence {
|
||||
_sanityCheck(bounds.lowerBound >= start && bounds.upperBound <= end)
|
||||
return _UnmanagedString(start: bounds.lowerBound, count: bounds.count)
|
||||
}
|
||||
}
|
||||
|
||||
extension _UnmanagedString : _StringVariant {
|
||||
@inlinable
|
||||
internal var isASCII: Bool {
|
||||
// NOTE: For now, single byte means ASCII. Might change in future
|
||||
return CodeUnit.bitWidth == 8
|
||||
}
|
||||
|
||||
@inlinable
|
||||
internal subscript(offset: Int) -> UTF16.CodeUnit {
|
||||
@inline(__always)
|
||||
get {
|
||||
_sanityCheck(offset >= 0 && offset < count)
|
||||
return UTF16.CodeUnit(start[offset])
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: Range<Int>) -> _UnmanagedString {
|
||||
_sanityCheck(offsetRange.lowerBound >= 0 && offsetRange.upperBound <= count)
|
||||
return _UnmanagedString(
|
||||
start: start + offsetRange.lowerBound,
|
||||
count: offsetRange.count)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: PartialRangeFrom<Int>) -> SubSequence {
|
||||
_sanityCheck(offsetRange.lowerBound >= 0)
|
||||
return _UnmanagedString(
|
||||
start: start + offsetRange.lowerBound,
|
||||
count: self.count - offsetRange.lowerBound
|
||||
)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: PartialRangeUpTo<Int>) -> SubSequence {
|
||||
_sanityCheck(offsetRange.upperBound <= count)
|
||||
return _UnmanagedString(
|
||||
start: start,
|
||||
count: offsetRange.upperBound
|
||||
)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
internal subscript(offsetRange: PartialRangeThrough<Int>) -> SubSequence {
|
||||
_sanityCheck(offsetRange.upperBound < count)
|
||||
return _UnmanagedString(
|
||||
start: start,
|
||||
count: offsetRange.upperBound + 1
|
||||
)
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
@inline(__always)
|
||||
internal func _copy<TargetCodeUnit>(
|
||||
into target: UnsafeMutableBufferPointer<TargetCodeUnit>
|
||||
) where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
|
||||
_sanityCheck(
|
||||
TargetCodeUnit.self == UInt8.self || TargetCodeUnit.self == UInt16.self)
|
||||
guard count > 0 else { return }
|
||||
_sanityCheck(target.count >= self.count)
|
||||
if CodeUnit.bitWidth == TargetCodeUnit.bitWidth {
|
||||
_memcpy(
|
||||
dest: target.baseAddress!,
|
||||
src: self.start,
|
||||
size: UInt(self.count * MemoryLayout<CodeUnit>.stride))
|
||||
} else if CodeUnit.bitWidth == 8 {
|
||||
_sanityCheck(TargetCodeUnit.bitWidth == 16)
|
||||
memcpy_zext(
|
||||
dst: target.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
src: start,
|
||||
count: self.count)
|
||||
} else {
|
||||
_sanityCheck(CodeUnit.bitWidth == 16 && TargetCodeUnit.bitWidth == 8)
|
||||
_sanityCheck(self.filter { $0 >= UInt8.max }.isEmpty, "ASCII only")
|
||||
memcpy_trunc(
|
||||
dst: target.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
src: start,
|
||||
count: self.count)
|
||||
}
|
||||
}
|
||||
|
||||
@_fixed_layout
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
internal struct UnicodeScalarIterator : IteratorProtocol {
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
let _base: _UnmanagedString
|
||||
@usableFromInline // FIXME(sil-serialize-all)
|
||||
var _offset: Int
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
init(_ base: _UnmanagedString) {
|
||||
self._base = base
|
||||
self._offset = 0
|
||||
}
|
||||
|
||||
@inlinable // FIXME(sil-serialize-all)
|
||||
mutating func next() -> Unicode.Scalar? {
|
||||
if _slowPath(_offset == _base.count) { return nil }
|
||||
let u0 = _base[_offset]
|
||||
if _fastPath(CodeUnit.bitWidth == 8 || UTF16._isScalar(u0)) {
|
||||
_offset += 1
|
||||
return Unicode.Scalar(u0)
|
||||
}
|
||||
if UTF16.isLeadSurrogate(u0) && _offset + 1 < _base.count {
|
||||
let u1 = _base[_offset + 1]
|
||||
if UTF16.isTrailSurrogate(u1) {
|
||||
_offset += 2
|
||||
return UTF16._decodeSurrogates(u0, u1)
|
||||
}
|
||||
}
|
||||
_offset += 1
|
||||
return Unicode.Scalar._replacementCharacter
|
||||
}
|
||||
}
|
||||
|
||||
@inlinable
|
||||
func makeUnicodeScalarIterator() -> UnicodeScalarIterator {
|
||||
return UnicodeScalarIterator(self)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user