[String] Drop in initial UTF-8 String prototype

This is a giant squashing of a lot of individual changes prototyping a
switch of String in Swift 5 to be natively encoded as UTF-8. It
includes what's necessary for a functional prototype, dropping some
history, but still leaves plenty of history available for future
commits.

My apologies to anyone trying to do code archeology between this
commit and the one prior. This was the lesser of evils.
This commit is contained in:
Michael Ilseman
2018-11-03 16:23:37 -07:00
parent b2f60bf978
commit 4ab45dfe20
46 changed files with 4320 additions and 10315 deletions
@@ -28,7 +28,7 @@ import Foundation
//
func findSubstring(_ haystack: Substring, _ needle: String) -> String.Index? {
return findSubstring(String(haystack._ephemeralContent), needle)
return findSubstring(haystack._ephemeralString, needle)
}
func findSubstring(_ string: String, _ substring: String) -> String.Index? {
@@ -66,6 +66,21 @@ extension Optional {
}
#endif
/// From a non-`nil` `UnsafePointer` to a null-terminated string
/// with possibly-transient lifetime, create a null-terminated array of 'C' char.
/// Returns `nil` if passed a null pointer.
internal func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
guard let cString = p else {
return nil
}
let len = UTF8._nullCodeUnitOffset(in: cString)
var result = [CChar](repeating: 0, count: len + 1)
for i in 0..<len {
result[i] = cString[i]
}
return result
}
extension String {
//===--- Class Methods --------------------------------------------------===//
//===--------------------------------------------------------------------===//
+2 -2
View File
@@ -72,7 +72,7 @@ extension Substring : _ObjectiveCBridgeable {
result: inout Substring?
) {
let s = String(x)
result = Substring(_base: s, s.startIndex ..< s.endIndex)
result = s[...]
}
public static func _conditionallyBridgeFromObjectiveC(
@@ -91,7 +91,7 @@ extension Substring : _ObjectiveCBridgeable {
// string; map it to an empty substring.
if _slowPath(source == nil) { return Substring() }
let s = String(source!)
return Substring(_base: s, s.startIndex ..< s.endIndex)
return s[...]
}
}
+1 -1
View File
@@ -39,7 +39,6 @@ set(SWIFTLIB_ESSENTIAL
Builtin.swift
BuiltinMath.swift.gyb
Character.swift
CharacterUnicodeScalars.swift
CocoaArray.swift
Codable.swift.gyb
Collection.swift
@@ -47,6 +46,7 @@ set(SWIFTLIB_ESSENTIAL
Comparable.swift
CompilerProtocols.swift
ContiguousArray.swift
ContiguouslyStored.swift
ClosedRange.swift
ContiguousArrayBuffer.swift
CString.swift
+48 -84
View File
@@ -44,7 +44,9 @@ extension String {
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init(cString: UnsafePointer<CChar>) {
self = _decodeValidCString(cString, repair: true)
let len = UTF8._nullCodeUnitOffset(in: cString)
self = String._fromUTF8Repairing(
UnsafeBufferPointer(start: cString._asUInt8, count: len)).0
}
/// Creates a new string by copying the null-terminated UTF-8 data referenced
@@ -53,7 +55,9 @@ extension String {
/// This is identical to init(cString: UnsafePointer<CChar> but operates on an
/// unsigned sequence of bytes.
public init(cString: UnsafePointer<UInt8>) {
self = _decodeValidCString(cString, repair: true)
let len = UTF8._nullCodeUnitOffset(in: cString)
self = String._fromUTF8Repairing(
UnsafeBufferPointer(start: cString, count: len)).0
}
/// Creates a new string by copying and validating the null-terminated UTF-8
@@ -83,9 +87,11 @@ extension String {
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
guard let str = _decodeCString(cString, repair: false) else {
return nil
}
let len = UTF8._nullCodeUnitOffset(in: cString)
guard let str = String._tryFromUTF8(
UnsafeBufferPointer(start: cString._asUInt8, count: len))
else { return nil }
self = str
}
@@ -133,92 +139,50 @@ extension String {
/// ill-formed sequence is detected, this method returns `nil`.
@_specialize(where Encoding == Unicode.UTF8)
@_specialize(where Encoding == Unicode.UTF16)
@inlinable // Fold away specializations
public static func decodeCString<Encoding : _UnicodeEncoding>(
_ cString: UnsafePointer<Encoding.CodeUnit>?,
as encoding: Encoding.Type,
repairingInvalidCodeUnits isRepairing: Bool = true)
-> (result: String, repairsMade: Bool)? {
repairingInvalidCodeUnits isRepairing: Bool = true
) -> (result: String, repairsMade: Bool)? {
guard let cPtr = cString else { return nil }
guard let cString = cString else {
return nil
if _fastPath(encoding == Unicode.UTF8.self) {
let ptr = UnsafeRawPointer(cPtr).assumingMemoryBound(to: UInt8.self)
let len = UTF8._nullCodeUnitOffset(in: ptr)
let codeUnits = UnsafeBufferPointer(start: ptr, count: len)
if isRepairing {
return String._fromUTF8Repairing(codeUnits)
} else {
guard let str = String._tryFromUTF8(codeUnits) else { return nil }
return (str, false)
}
}
var end = cString
var end = cPtr
while end.pointee != 0 { end += 1 }
let len = end - cString
return _decodeCString(
cString, as: encoding, length: len,
repairingInvalidCodeUnits: isRepairing)
let len = end - cPtr
let codeUnits = UnsafeBufferPointer(start: cPtr, count: len)
return String._fromCodeUnits(
codeUnits, encoding: encoding, repair: isRepairing)
}
}
/// From a non-`nil` `UnsafePointer` to a null-terminated string
/// with possibly-transient lifetime, create a null-terminated array of 'C' char.
/// Returns `nil` if passed a null pointer.
public func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
guard let s = p else {
return nil
}
let count = Int(_swift_stdlib_strlen(s))
var result = [CChar](repeating: 0, count: count + 1)
for i in 0..<count {
result[i] = s[i]
}
return result
}
internal func _decodeValidCString(
_ cString: UnsafePointer<Int8>, repair: Bool
) -> String {
let len = UTF8._nullCodeUnitOffset(in: cString)
return cString.withMemoryRebound(to: UInt8.self, capacity: len) {
(ptr: UnsafePointer<UInt8>) -> String in
let bufPtr = UnsafeBufferPointer(start: ptr, count: len)
return String._fromWellFormedUTF8(bufPtr, repair: repair)
/// Creates a string from the null-terminated sequence of bytes at the given
/// pointer.
///
/// - Parameters:
/// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code
/// units in the encoding specified in `sourceEncoding`, ending just
/// before the first zero code unit.
/// - sourceEncoding: The encoding in which the code units should be
/// interpreted.
@_specialize(where Encoding == Unicode.UTF8)
@_specialize(where Encoding == Unicode.UTF16)
@inlinable // Fold away specializations
public init<Encoding: Unicode.Encoding>(
decodingCString ptr: UnsafePointer<Encoding.CodeUnit>,
as sourceEncoding: Encoding.Type
) {
self = String.decodeCString(ptr, as: sourceEncoding)!.0
}
}
internal func _decodeValidCString(
_ cString: UnsafePointer<UInt8>, repair: Bool
) -> String {
let len = UTF8._nullCodeUnitOffset(in: cString)
let bufPtr = UnsafeBufferPointer(start: cString, count: len)
return String._fromWellFormedUTF8(bufPtr, repair: repair)
}
internal func _decodeCString(
_ cString: UnsafePointer<Int8>, repair: Bool
) -> String? {
let len = UTF8._nullCodeUnitOffset(in: cString)
return cString.withMemoryRebound(to: UInt8.self, capacity: len) {
(ptr: UnsafePointer<UInt8>) -> String? in
let bufPtr = UnsafeBufferPointer(start: ptr, count: len)
return String._fromUTF8(bufPtr, repair: repair)
}
}
internal func _decodeCString(
_ cString: UnsafePointer<UInt8>, repair: Bool
) -> String? {
let len = UTF8._nullCodeUnitOffset(in: cString)
let bufPtr = UnsafeBufferPointer(start: cString, count: len)
return String._fromUTF8(bufPtr, repair: repair)
}
/// Creates a new string by copying the null-terminated data referenced by
/// the given pointer using the specified encoding.
///
/// This internal helper takes the string length as an argument.
internal func _decodeCString<Encoding : _UnicodeEncoding>(
_ cString: UnsafePointer<Encoding.CodeUnit>,
as encoding: Encoding.Type, length: Int,
repairingInvalidCodeUnits isRepairing: Bool = true)
-> (result: String, repairsMade: Bool)? {
let buffer = UnsafeBufferPointer<Encoding.CodeUnit>(
start: cString, count: length)
let (guts, hadError) = _StringGuts.fromCodeUnits(
buffer, encoding: encoding, repairIllFormedSequences: isRepairing)
return guts.map { (result: String($0), repairsMade: hadError) }
}
+72 -320
View File
@@ -63,74 +63,61 @@
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
@_fixed_layout
public struct Character {
// Fundamentally, it is just a String, but it is optimized for the common case
// where the UTF-16 representation fits in 63 bits. The remaining bit is used
// to discriminate between small and large representations. Since a grapheme
// cluster cannot have U+0000 anywhere but in its first scalar, we can store
// zero in empty code units above the first one.
@_frozen // FIXME(sil-serialize-all)
@usableFromInline
internal enum Representation {
case smallUTF16(Builtin.Int63)
case large(_UTF16StringStorage)
}
internal var _str: String
@inlinable @inline(__always)
internal init(unchecked str: String) {
self._str = str
_invariantCheck()
}
}
extension Character {
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
_sanityCheck(_str.count == 1)
_sanityCheck(_str._guts.isFastUTF8)
#endif
}
}
extension Character {
@usableFromInline
internal var _representation: Representation
typealias UTF8View = String.UTF8View
// FIXME(sil-serialize-all): Should be @inlinable
// <rdar://problem/34557187>
internal static func _smallValue(_ value: Builtin.Int63) -> UInt64 {
return UInt64(Builtin.zext_Int63_Int64(value))
@inlinable
internal var utf8: UTF8View {
return _str.utf8
}
@usableFromInline // FIXME(sil-serialize-all)
@usableFromInline
typealias UTF16View = String.UTF16View
@inlinable // FIXME(sil-serialize-all)
@inlinable
internal var utf16: UTF16View {
return String(self).utf16
return _str.utf16
}
@inlinable // FIXME(sil-serialize-all)
internal init(_smallRepresentation b: _SmallUTF16) {
_sanityCheck(Int64(b._storage) >= 0)
_representation = .smallUTF16(
Builtin.trunc_Int64_Int63(b._storage._value))
}
@inlinable // FIXME(sil-serialize-all)
internal init(_largeRepresentation storage: _UTF16StringStorage) {
_representation = .large(storage)
}
/// Creates a Character from a String that is already known to require the
/// large representation.
///
/// - Note: `s` should contain only a single grapheme, but we can't require
/// that formally because of grapheme cluster literals and the shifting
/// sands of Unicode. https://bugs.swift.org/browse/SR-4955
@inlinable // FIXME(sil-serialize-all)
internal init(_largeRepresentationString s: String) {
let storage = s._guts._extractNativeStorage(of: UTF16.CodeUnit.self)
self.init(_largeRepresentation: storage)
public typealias UnicodeScalarView = String.UnicodeScalarView
@inlinable
public var unicodeScalars: UnicodeScalarView {
return _str.unicodeScalars
}
}
extension Character
: _ExpressibleByBuiltinUTF16ExtendedGraphemeClusterLiteral,
ExpressibleByExtendedGraphemeClusterLiteral
: _ExpressibleByBuiltinUTF16ExtendedGraphemeClusterLiteral,
ExpressibleByExtendedGraphemeClusterLiteral
{
/// Creates a character containing the given Unicode scalar value.
///
/// - Parameter content: The Unicode scalar value to convert into a character.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public init(_ content: Unicode.Scalar) {
let content16 = UTF16.encode(content)._unsafelyUnwrappedUnchecked
_representation = .smallUTF16(
Builtin.zext_Int32_Int63(content16._storage._value))
self.init(unchecked: String(content))
}
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
@_effects(readonly)
public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
self.init(Unicode.Scalar(_builtinUnicodeScalarLiteral: value))
@@ -138,94 +125,30 @@ extension Character
// Inlining ensures that the whole constructor can be folded away to a single
// integer constant in case of small character literals.
@inlinable // FIXME(sil-serialize-all)
@inline(__always)
@inlinable @inline(__always)
@_effects(readonly)
public init(
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
utf8CodeUnitCount: Builtin.Word,
isASCII: Builtin.Int1
) {
let utf8 = UnsafeBufferPointer(
start: UnsafePointer<Unicode.UTF8.CodeUnit>(start),
count: Int(utf8CodeUnitCount))
if utf8.count == 1 {
_representation = .smallUTF16(
Builtin.zext_Int8_Int63(utf8.first._unsafelyUnwrappedUnchecked._value))
return
}
FastPath:
repeat {
var shift = 0
let maxShift = 64 - 16
var bits: UInt64 = 0
for s8 in Unicode._ParsingIterator(
codeUnits: utf8.makeIterator(), parser: UTF8.ForwardParser()) {
let s16
= UTF16.transcode(s8, from: UTF8.self)._unsafelyUnwrappedUnchecked
for u16 in s16 {
guard _fastPath(shift <= maxShift) else { break FastPath }
bits |= UInt64(u16) &<< shift
shift += 16
}
}
guard _fastPath(Int64(truncatingIfNeeded: bits) >= 0) else {
break FastPath
}
_representation = .smallUTF16(Builtin.trunc_Int64_Int63(bits._value))
return
}
while false
// For anything that doesn't fit in 63 bits, build the large
// representation.
self = Character(_largeRepresentationString:
String(
_builtinExtendedGraphemeClusterLiteral: start,
utf8CodeUnitCount: utf8CodeUnitCount,
isASCII: isASCII))
self.init(unchecked: String(
_builtinExtendedGraphemeClusterLiteral: start,
utf8CodeUnitCount: utf8CodeUnitCount,
isASCII: isASCII))
}
// Inlining ensures that the whole constructor can be folded away to a single
// integer constant in case of small character literals.
@inlinable // FIXME(sil-serialize-all)
@inline(__always)
@inlinable @inline(__always)
@_effects(readonly)
public init(
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
utf16CodeUnitCount: Builtin.Word
) {
let utf16 = _UnmanagedString<UTF16.CodeUnit>(
start: UnsafePointer(start),
count: Int(utf16CodeUnitCount))
switch utf16.count {
case 1:
_representation = .smallUTF16(Builtin.zext_Int16_Int63(utf16[0]._value))
case 2:
let bits = UInt32(utf16[0]) | UInt32(utf16[1]) &<< 16
_representation = .smallUTF16(Builtin.zext_Int32_Int63(bits._value))
case 3:
let bits = UInt64(utf16[0])
| UInt64(utf16[1]) &<< 16
| UInt64(utf16[2]) &<< 32
_representation = .smallUTF16(Builtin.trunc_Int64_Int63(bits._value))
case 4 where utf16[3] < 0x8000:
let bits = UInt64(utf16[0])
| UInt64(utf16[1]) &<< 16
| UInt64(utf16[2]) &<< 32
| UInt64(utf16[3]) &<< 48
_representation = .smallUTF16(Builtin.trunc_Int64_Int63(bits._value))
default:
// TODO(SSO): small check
_representation = .large(
_StringGuts(_large: utf16)._extractNativeStorage())
}
self.init(unchecked: String(
_builtinUTF16StringLiteral: start,
utf16CodeUnitCount: utf16CodeUnitCount))
}
/// Creates a character with the specified value.
@@ -240,9 +163,9 @@ extension Character
///
/// The assignment to the `oBreve` constant calls this initializer behind the
/// scenes.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public init(extendedGraphemeClusterLiteral value: Character) {
self = value
self.init(unchecked: value._str)
}
/// Creates a character from a single-character string.
@@ -255,229 +178,53 @@ extension Character
///
/// - Parameter s: The single-character string to convert to a `Character`
/// instance. `s` must contain exactly one extended grapheme cluster.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public init(_ s: String) {
let count = s._guts.count
_precondition(count != 0,
_precondition(!s.isEmpty,
"Can't form a Character from an empty String")
_debugPrecondition(s.index(after: s.startIndex) == s.endIndex,
"Can't form a Character from a String containing more than one extended grapheme cluster")
self.init(_unverified: s._guts)
}
/// Construct a Character from a _StringGuts, assuming it consists of exactly
/// one extended grapheme cluster.
@inlinable // FIXME(sil-serialize-all)
internal init(_unverified guts: _StringGuts) {
self = _visitGuts(guts,
ascii: { ascii in
if _fastPath(ascii.count == 1) {
return Character(_singleCodeUnit: ascii[0])
}
// The only multi-scalar ASCII grapheme cluster is CR/LF.
_sanityCheck(ascii.count == 2)
_sanityCheck(ascii.start[0] == _CR)
_sanityCheck(ascii.start[1] == _LF)
return Character(_codeUnitPair: UInt16(_CR), UInt16(_LF))
},
utf16: { utf16 in return Character(_unverified: utf16) },
opaque: { opaque in return Character(_unverified: opaque) })
}
/// Construct a Character from a slice of a _StringGuts, assuming
/// the specified range covers exactly one extended grapheme cluster.
@inlinable // FIXME(sil-serialize-all)
internal init(_unverified guts: _StringGuts, range: Range<Int>) {
self = _visitGuts(
guts, range: (range, performBoundsCheck: true),
ascii: { ascii in
if _fastPath(ascii.count == 1) {
return Character(_singleCodeUnit: ascii[0])
}
// The only multi-scalar ASCII grapheme cluster is CR/LF.
_sanityCheck(ascii.count == 2)
_sanityCheck(ascii.start[0] == _CR)
_sanityCheck(ascii.start[1] == _LF)
return Character(_codeUnitPair: UInt16(_CR), UInt16(_LF))
},
utf16: { utf16 in return Character(_unverified: utf16) },
opaque: { opaque in return Character(_unverified: opaque) })
}
@inlinable
internal
init(_singleCodeUnit cu: UInt16) {
_sanityCheck(UTF16._isScalar(cu))
_representation = .smallUTF16(
Builtin.zext_Int16_Int63(Builtin.reinterpretCast(cu)))
}
@inlinable
internal
init(_codeUnitPair first: UInt16, _ second: UInt16) {
_sanityCheck(
(UTF16._isScalar(first) && UTF16._isScalar(second)) ||
(UTF16.isLeadSurrogate(first) && UTF16.isTrailSurrogate(second)))
_representation = .smallUTF16(
Builtin.zext_Int32_Int63(
Builtin.reinterpretCast(
UInt32(first) | UInt32(second) &<< 16)))
}
@inlinable
internal
init(_unverified storage: _SwiftStringStorage<Unicode.UTF16.CodeUnit>) {
if _fastPath(storage.count <= 4) {
_sanityCheck(storage.count > 0)
let b = _SmallUTF16(storage.unmanagedView)
if _fastPath(Int64(bitPattern: b._storage) >= 0) {
self.init(_smallRepresentation: b)
_fixLifetime(storage)
return
}
}
// FIXME: We may want to make a copy if storage.unusedCapacity > 0
self.init(_largeRepresentation: storage)
}
@inlinable
internal
init<V: _StringVariant>(_unverified variant: V) {
if _fastPath(variant.count <= 4) {
_sanityCheck(variant.count > 0)
let b = _SmallUTF16(variant)
if _fastPath(Int64(bitPattern: b._storage) >= 0) {
self.init(_smallRepresentation: b)
return
}
}
self.init(_largeRepresentation: variant._copyToNativeStorage())
self.init(unchecked: s)
}
}
extension Character : CustomStringConvertible {
@inlinable // FIXME(sil-serialize-all)
public var description: String {
return String(self)
}
@inlinable
public var description: String {
return _str
}
}
extension Character : LosslessStringConvertible { }
extension Character : CustomDebugStringConvertible {
/// A textual representation of the character, suitable for debugging.
public var debugDescription: String {
return String(self).debugDescription
}
}
extension Character {
@usableFromInline
internal typealias _SmallUTF16 = _UIntBuffer<UInt64, Unicode.UTF16.CodeUnit>
@inlinable // FIXME(sil-serialize-all)
internal var _smallUTF16 : _SmallUTF16? {
guard case .smallUTF16(let _63bits) = _representation else { return nil }
_onFastPath()
let bits = UInt64(Builtin.zext_Int63_Int64(_63bits))
let minBitWidth = type(of: bits).bitWidth - bits.leadingZeroBitCount
return _SmallUTF16(
_storage: bits,
_bitCount: UInt8(
truncatingIfNeeded: 16 * Swift.max(1, (minBitWidth + 15) / 16))
)
}
@inlinable // FIXME(sil-serialize-all)
internal var _largeUTF16 : _UTF16StringStorage? {
guard case .large(let storage) = _representation else { return nil }
return storage
}
@usableFromInline // @testable
internal var _isSmall: Bool {
guard case .smallUTF16(_) = _representation else { return false }
return true
}
}
extension Character {
@inlinable // FIXME(sil-serialize-all)
internal var _count : Int {
if let small = _smallUTF16 { return small.count }
return _largeUTF16._unsafelyUnwrappedUnchecked.count
}
/// A textual representation of the character, suitable for debugging.
public var debugDescription: String {
return _str.debugDescription
}
}
extension String {
/// Creates a string containing the given character.
///
/// - Parameter c: The character to convert to a string.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public init(_ c: Character) {
if let utf16 = c._smallUTF16 {
if let small = _SmallUTF8String(utf16) {
self = String(_StringGuts(small))
} else {
// FIXME: Remove when we support UTF-8 in small string
self = String(decoding: utf16, as: Unicode.UTF16.self)
}
}
else {
// TODO(SSO): small check. For now, since we only do ASCII, this won't hit
self = String(_StringGuts(_large: c._largeUTF16!))
}
}
}
/// `.small` characters are stored in an Int63 with their UTF-8 representation,
/// with any unused bytes set to 0xFF. ASCII characters will have all bytes set
/// to 0xFF except for the lowest byte, which will store the ASCII value. Since
/// 0x7FFFFFFFFFFFFF80 or greater is an invalid UTF-8 sequence, we know if a
/// value is ASCII by checking if it is greater than or equal to
/// 0x7FFFFFFFFFFFFF00.
// FIXME(sil-serialize-all): Should be @inlinable
// <rdar://problem/34557187>
internal var _minASCIICharReprBuiltin: Builtin.Int63 {
@inline(__always) get {
let x: Int64 = 0x7FFFFFFFFFFFFF00
return Builtin.truncOrBitCast_Int64_Int63(x._value)
self.init(c._str._guts)
}
}
extension Character : Equatable {
@inlinable
@inlinable @inline(__always)
public static func == (lhs: Character, rhs: Character) -> Bool {
let l0 = lhs._smallUTF16
if _fastPath(l0 != nil), let l = l0?._storage {
let r0 = rhs._smallUTF16
if _fastPath(r0 != nil), let r = r0?._storage {
if (l | r) < 0x300 { return l == r }
if l == r { return true }
}
}
// FIXME(performance): constructing two temporary strings is extremely
// wasteful and inefficient.
return String(lhs) == String(rhs)
return lhs._str == rhs._str
}
}
extension Character : Comparable {
@inlinable
@inlinable @inline(__always)
public static func < (lhs: Character, rhs: Character) -> Bool {
let l0 = lhs._smallUTF16
if _fastPath(l0 != nil), let l = l0?._storage {
let r0 = rhs._smallUTF16
if _fastPath(r0 != nil), let r = r0?._storage {
if (l | r) < 0x80 { return l < r }
if l == r { return false }
}
}
// FIXME(performance): constructing two temporary strings is extremely
// wasteful and inefficient.
return String(lhs) < String(rhs)
return lhs._str < rhs._str
}
}
@@ -490,8 +237,13 @@ extension Character: Hashable {
/// of this instance.
@_effects(releasenone)
public func hash(into hasher: inout Hasher) {
// FIXME(performance): constructing a temporary string is extremely
// wasteful and inefficient.
hasher.combine(String(self))
_str.hash(into: &hasher)
}
}
extension Character {
@usableFromInline // @testable
internal var _isSmall: Bool {
return _str._guts._object.isSmall
}
}
@@ -1,194 +0,0 @@
//===--- CharacterUnicodeScalars.swift ------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
extension Character {
@_fixed_layout // FIXME(sil-serialize-all)
public struct UnicodeScalarView {
@usableFromInline // FIXME(sil-serialize-all)
internal let _base: Character
@inlinable // FIXME(sil-serialize-all)
internal init(_base: Character) {
self._base = _base
}
}
@inlinable // FIXME(sil-serialize-all)
public var unicodeScalars : UnicodeScalarView {
return UnicodeScalarView(_base: self)
}
}
extension Character.UnicodeScalarView {
@_fixed_layout // FIXME(sil-serialize-all)
public struct Iterator {
@usableFromInline // FIXME(sil-serialize-all)
internal var _base: IndexingIterator<Character.UnicodeScalarView>
@inlinable // FIXME(sil-serialize-all)
internal init(_base: IndexingIterator<Character.UnicodeScalarView>) {
self._base = _base
}
}
}
extension Character.UnicodeScalarView.Iterator : IteratorProtocol {
@inlinable // FIXME(sil-serialize-all)
public mutating func next() -> UnicodeScalar? {
return _base.next()
}
}
extension Character.UnicodeScalarView : Sequence {
@inlinable // FIXME(sil-serialize-all)
public __consuming func makeIterator() -> Iterator {
return Iterator(_base: IndexingIterator(_elements: self))
}
}
extension Character.UnicodeScalarView {
@_fixed_layout // FIXME(sil-serialize-all)
public struct Index {
@usableFromInline // FIXME(sil-serialize-all)
internal let _encodedOffset: Int
@usableFromInline // FIXME(sil-serialize-all)
internal let _scalar: Unicode.UTF16.EncodedScalar
@usableFromInline // FIXME(sil-serialize-all)
internal let _stride: UInt8
@inlinable // FIXME(sil-serialize-all)
internal init(
_encodedOffset: Int,
_scalar: Unicode.UTF16.EncodedScalar, _stride: UInt8
) {
self._encodedOffset = _encodedOffset
self._scalar = _scalar
self._stride = _stride
}
}
}
extension Character.UnicodeScalarView.Index : Equatable {
@inlinable // FIXME(sil-serialize-all)
public static func == (
lhs: Character.UnicodeScalarView.Index,
rhs: Character.UnicodeScalarView.Index
) -> Bool {
return lhs._encodedOffset == rhs._encodedOffset
}
}
extension Character.UnicodeScalarView.Index : Comparable {
@inlinable // FIXME(sil-serialize-all)
public static func < (
lhs: Character.UnicodeScalarView.Index,
rhs: Character.UnicodeScalarView.Index
) -> Bool {
return lhs._encodedOffset < rhs._encodedOffset
}
}
extension Character.UnicodeScalarView : Collection {
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index {
return index(
after: Index(
_encodedOffset: 0,
_scalar: Unicode.UTF16.EncodedScalar(),
_stride: 0
))
}
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index {
return Index(
_encodedOffset: _base._count,
_scalar: Unicode.UTF16.EncodedScalar(),
_stride: 0
)
}
@inlinable // FIXME(sil-serialize-all)
public func index(after i: Index) -> Index {
var parser = Unicode.UTF16.ForwardParser()
let startOfNextScalar = i._encodedOffset + numericCast(i._stride)
let r: Unicode.ParseResult<Unicode.UTF16.EncodedScalar>
let small_ = _base._smallUTF16
if _fastPath(small_ != nil), let u16 = small_ {
var i = u16[u16.index(u16.startIndex, offsetBy: startOfNextScalar)...]
.makeIterator()
r = parser.parseScalar(from: &i)
}
else {
let c = _base._largeUTF16!.unmanagedView
var i = c[c.index(c.startIndex, offsetBy: startOfNextScalar)...]
.makeIterator()
r = parser.parseScalar(from: &i)
}
switch r {
case .valid(let s):
return Index(
_encodedOffset: startOfNextScalar, _scalar: s,
_stride: UInt8(truncatingIfNeeded: s.count))
case .error:
return Index(
_encodedOffset: startOfNextScalar,
_scalar: Unicode.UTF16.encodedReplacementCharacter,
_stride: 1)
case .emptyInput:
if i._stride != 0 { return endIndex }
fatalError("no position after end of Character's last Unicode.Scalar")
}
}
@inlinable // FIXME(sil-serialize-all)
public subscript(_ i: Index) -> UnicodeScalar {
return Unicode.UTF16.decode(i._scalar)
}
}
extension Character.UnicodeScalarView : BidirectionalCollection {
@inlinable // FIXME(sil-serialize-all)
public func index(before i: Index) -> Index {
var parser = Unicode.UTF16.ReverseParser()
let r: Unicode.ParseResult<Unicode.UTF16.EncodedScalar>
let small_ = _base._smallUTF16
if _fastPath(small_ != nil), let u16 = small_ {
var i = u16[..<u16.index(u16.startIndex, offsetBy: i._encodedOffset)]
.reversed().makeIterator()
r = parser.parseScalar(from: &i)
}
else {
let c = _base._largeUTF16!.unmanagedView
var i = c[..<c.index(c.startIndex, offsetBy: i._encodedOffset)]
.reversed().makeIterator()
r = parser.parseScalar(from: &i)
}
switch r {
case .valid(let s):
return Index(
_encodedOffset: i._encodedOffset - s.count, _scalar: s,
_stride: UInt8(truncatingIfNeeded: s.count))
case .error:
return Index(
_encodedOffset: i._encodedOffset - 1,
_scalar: Unicode.UTF16.encodedReplacementCharacter,
_stride: 1)
case .emptyInput:
fatalError("no position before Character's last Unicode.Scalar")
}
}
}
@@ -0,0 +1,95 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
@usableFromInline
internal protocol _HasContiguousBytes {
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R
var _providesContiguousBytesNoCopy: Bool { get }
}
extension _HasContiguousBytes {
@inlinable
var _providesContiguousBytesNoCopy: Bool {
@inline(__always) get { return true }
}
}
extension Array: _HasContiguousBytes {
@inlinable
var _providesContiguousBytesNoCopy: Bool {
// TODO(UTF8 merge): Query `_buffer._isNative`, which is internal
@inline(__always) get { return true }
}
}
extension ContiguousArray: _HasContiguousBytes {}
extension UnsafeBufferPointer: _HasContiguousBytes {
@inlinable @inline(__always)
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked)
let len = self.count &* MemoryLayout<Element>.stride
return try body(UnsafeRawBufferPointer(start: ptr, count: len))
}
}
extension UnsafeMutableBufferPointer: _HasContiguousBytes {
@inlinable @inline(__always)
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked)
let len = self.count &* MemoryLayout<Element>.stride
return try body(UnsafeRawBufferPointer(start: ptr, count: len))
}
}
extension String: _HasContiguousBytes {
@inlinable
var _providesContiguousBytesNoCopy: Bool {
@inline(__always) get { return self._guts.isFastUTF8 }
}
@inlinable @inline(__always)
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
if _fastPath(self._guts.isFastUTF8) {
return try self._guts.withFastUTF8 {
try body(UnsafeRawBufferPointer($0))
}
}
return try ContiguousArray(self.utf8).withUnsafeBytes { try body($0) }
}
}
extension Substring: _HasContiguousBytes {
@inlinable
var _providesContiguousBytesNoCopy: Bool {
@inline(__always) get { return self.wholeGuts.isFastUTF8 }
}
@inlinable @inline(__always)
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
// TODO(UTF8): less error prone to have Substring and/or slice provide a
// sliced fastUTF8
if _fastPath(self.wholeGuts.isFastUTF8) {
return try self.wholeGuts.withFastUTF8() {
try body(UnsafeRawBufferPointer(UnsafeBufferPointer(rebasing:
$0[self.startIndex.encodedOffset..<self.endIndex.encodedOffset])))
}
}
return try ContiguousArray(self.utf8).withUnsafeBytes { try body($0) }
}
}
+3 -3
View File
@@ -7,7 +7,6 @@
"ASCII.swift",
"CString.swift",
"Character.swift",
"CharacterUnicodeScalars.swift",
"ICU.swift",
"NormalizedCodeUnitIterator.swift",
"SmallString.swift",
@@ -21,7 +20,7 @@
"StringObject.swift",
"StringGuts.swift",
"StringGutsVisitor.swift",
"StringGraphemeBreaking.swift",
"StringGraphemeBreaking.swift",
"StringHashable.swift",
"StringIndex.swift",
"StringIndexConversions.swift",
@@ -36,7 +35,7 @@
"StringUTF16View.swift",
"StringUTF8View.swift",
"StringUnicodeScalarView.swift",
"StringVariant.swift",
"StringVariant.swift",
"Substring.swift",
"Unicode.swift",
"UnicodeEncoding.swift",
@@ -108,6 +107,7 @@
"CocoaArray.swift",
"ContiguousArray.swift",
"ContiguousArrayBuffer.swift",
"ContiguouslyStored.swift",
"FixedArray.swift",
"SliceBuffer.swift",
"SwiftNativeNSArray.swift"],
+2 -3
View File
@@ -64,9 +64,8 @@ public func readLine(strippingNewline: Bool = true) -> String? {
}
}
}
let result = String._fromUTF8(
UnsafeBufferPointer(start: linePtr, count: readBytes),
repair: true)!
let result = String._fromUTF8Repairing(
UnsafeBufferPointer(start: linePtr, count: readBytes)).0
_swift_stdlib_free(linePtr)
return result
}
+22 -20
View File
@@ -139,29 +139,31 @@ extension FixedWidthInteger {
/// `radix`.
/// - radix: The radix, or base, to use for converting `text` to an integer
/// value. `radix` must be in the range `2...36`. The default is 10.
@inlinable // FIXME(sil-serialize-all)
@inlinable // @specializable
@_semantics("optimize.sil.specialize.generic.partial.never")
public init?<S : StringProtocol>(_ text: S, radix: Int = 10) {
_precondition(2...36 ~= radix, "Radix not in range 2...36")
let r = Self(radix)
let range = text._encodedOffsetRange
let guts = text._wholeString._guts
let result: Self?
result = _visitGuts(guts,
range: (range, false), args: r,
ascii: { view, radix in
var i = view.makeIterator()
return _parseASCII(codeUnits: &i, radix: radix) },
utf16: { view, radix in
var i = view.makeIterator()
return Self._parseASCIISlowPath(codeUnits: &i, radix: radix) },
opaque: { view, radix in
var i = view.makeIterator()
return Self._parseASCIISlowPath(codeUnits: &i, radix: radix) }
)
guard _fastPath(result != nil) else { return nil }
self = result._unsafelyUnwrappedUnchecked
if let str = text as? String, str._guts.isFastUTF8 {
guard let ret = str._guts.withFastUTF8 ({ utf8 -> Self? in
var iter = utf8.makeIterator()
return _parseASCII(codeUnits: &iter, radix: Self(radix))
}) else {
return nil
}
self = ret
return
}
// TODO(String performance): We can provide fast paths for common radices,
// native UTF-8 storage, etc.
var iter = text.utf8.makeIterator()
guard let ret = Self._parseASCIISlowPath(
codeUnits: &iter, radix: Self(radix)
) else { return nil }
self = ret
}
/// Creates a new integer value from the given string.
@@ -182,7 +184,7 @@ extension FixedWidthInteger {
/// Int("10000000000000000000000000") // Out of range
///
/// - Parameter description: The ASCII representation of a number.
@inlinable // FIXME(sil-serialize-all)
@inlinable
@_semantics("optimize.sil.specialize.generic.partial.never")
@inline(__always)
public init?(_ description: String) {
-25
View File
@@ -44,31 +44,6 @@ public func withExtendedLifetime<T, Result>(
return try body(x)
}
extension String {
/// Calls the given closure with a pointer to the contents of the string,
/// represented as a null-terminated sequence of UTF-8 code units.
///
/// The pointer passed as an argument to `body` is valid only during the
/// execution of `withCString(_:)`. Do not store or return the pointer for
/// later use.
///
/// - Parameter body: A closure with a pointer parameter that points to a
/// null-terminated sequence of UTF-8 code units. If `body` has a return
/// value, that value is also used as the return value for the
/// `withCString(_:)` method. The pointer argument is valid only for the
/// duration of the method's execution.
/// - Returns: The return value, if any, of the `body` closure parameter.
@inlinable
public func withCString<Result>(
_ body: (UnsafePointer<Int8>) throws -> Result
) rethrows -> Result {
return try self.utf8CString.withUnsafeBufferPointer {
try body($0.baseAddress!)
}
}
}
// Fix the lifetime of the given instruction so that the ARC optimizer does not
// shorten the lifetime of x to be before this point.
@_transparent
+3 -8
View File
@@ -582,19 +582,14 @@ extension String.UTF8View {
extension String {
@available(swift, obsoleted: 4)
public subscript(bounds: Range<Index>) -> String {
// TODO: Make unreachable when the Foundation overlay is off of Swift 3
_boundsCheck(bounds)
return String(Substring(_slice: Slice(base: self, bounds: bounds)))
return String(self[bounds])
}
@available(swift, obsoleted: 4)
public subscript(bounds: ClosedRange<Index>) -> String {
// TODO: Make unreachable when the Foundation overlay is off of Swift 3
let r = bounds.relative(to: self)
_boundsCheck(r)
return String(Substring(_slice: Slice(
base: self,
bounds: r)))
_boundsCheck(bounds)
return String(self[bounds])
}
}
@@ -10,229 +10,4 @@
//
//===----------------------------------------------------------------------===//
internal
struct _NormalizedCodeUnitIterator: IteratorProtocol {
var segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
var overflowBuffer: [CodeUnit]? = nil
var normalizationBuffer: [CodeUnit]? = nil
var source: _SegmentSource
var segmentBufferIndex = 0
var segmentBufferCount = 0
var overflowBufferIndex = 0
var overflowBufferCount = 0
typealias CodeUnit = UInt16
init<Source: BidirectionalCollection>
(_ collection: Source)
where Source.Element == UInt16, Source.SubSequence == Source
{
source = _CollectionSource(collection)
}
init(_ guts: _StringGuts, _ range: Range<Int>, startIndex: Int = 0) {
source = _StringGutsSource(guts, range, start: startIndex)
}
mutating func compare(with other: _NormalizedCodeUnitIterator) -> _Ordering {
var mutableOther = other
for cu in IteratorSequence(self) {
if let otherCU = mutableOther.next() {
let result = _lexicographicalCompare(cu, otherCU)
if result == .equal {
continue
} else {
return result
}
} else {
//other returned nil, we are greater
return .greater
}
}
//we ran out of code units, either we are equal, or only we ran out and
//other is greater
if let _ = mutableOther.next() {
return .less
} else {
return .equal
}
}
struct _CollectionSource<Source: BidirectionalCollection>: _SegmentSource
where Source.Element == UInt16, Source.SubSequence == Source
{
var remaining: Int {
@_specialize(where Source == _UnmanagedString<UInt16>)
@_specialize(where Source == _UnmanagedOpaqueString)
get {
return collection.distance(from: index, to: collection.endIndex)
}
}
var collection: Source
var index: Source.Index
init(_ collection: Source) {
self.collection = collection
index = collection.startIndex
}
@_specialize(where Source == _UnmanagedString<UInt16>)
@_specialize(where Source == _UnmanagedOpaqueString)
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
var bufferIndex = 0
let originalIndex = index
repeat {
guard index != collection.endIndex else {
break
}
guard bufferIndex < buffer.count else {
//The buffer isn't big enough for the current segment
index = originalIndex
return nil
}
let cu = collection[index]
buffer[bufferIndex] = cu
index = collection.index(after: index)
bufferIndex += 1
} while !collection.hasNormalizationBoundary(after: collection.index(before: index))
return bufferIndex
}
}
struct _StringGutsSource: _SegmentSource {
var remaining: Int {
return range.count - index
}
var guts: _StringGuts
var index: Int
var range: Range<Int>
init(_ guts: _StringGuts, _ range: Range<Int>, start: Int = 0) {
self.guts = guts
self.range = range
index = range.lowerBound + start
}
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
var bufferIndex = 0
let originalIndex = index
repeat {
guard index < range.count else {
break
}
guard bufferIndex < buffer.count else {
//The buffer isn't big enough for the current segment
index = originalIndex
return nil
}
let cu = guts.codeUnit(atCheckedOffset: index)
buffer[bufferIndex] = cu
index += 1
bufferIndex += 1
} while !guts.hasNormalizationBoundary(after: index - 1)
return bufferIndex
}
}
mutating func next() -> CodeUnit? {
if segmentBufferCount == segmentBufferIndex {
segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
segmentBufferCount = 0
segmentBufferIndex = 0
}
if overflowBufferCount == overflowBufferIndex {
overflowBufferCount = 0
overflowBufferIndex = 0
}
if source.remaining <= 0
&& segmentBufferCount == 0
&& overflowBufferCount == 0 {
// Our source of code units to normalize is empty and our buffers from
// previous normalizations are also empty.
return nil
}
if segmentBufferCount == 0 && overflowBufferCount == 0 {
//time to fill a buffer if possible. Otherwise we are done, return nil
// Normalize segment, and then compare first code unit
var intermediateBuffer = _FixedArray16<CodeUnit>(allZeros:())
if overflowBuffer == nil,
let filled = source.tryFill(buffer: &intermediateBuffer)
{
guard let count = _tryNormalize(
_castOutputBuffer(&intermediateBuffer,
endingAt: filled),
into: &segmentBuffer
)
else {
fatalError("Output buffer was not big enough, this should not happen")
}
segmentBufferCount = count
} else {
let size = source.remaining * _Normalization._maxNFCExpansionFactor
if overflowBuffer == nil {
overflowBuffer = Array(repeating: 0, count: size)
normalizationBuffer = Array(repeating:0, count: size)
}
guard let count = normalizationBuffer!.withUnsafeMutableBufferPointer({
(normalizationBufferPtr) -> Int? in
guard let filled = source.tryFill(buffer: normalizationBufferPtr)
else {
fatalError("Invariant broken, buffer should have space")
}
return overflowBuffer!.withUnsafeMutableBufferPointer {
(overflowBufferPtr) -> Int? in
return _tryNormalize(
UnsafeBufferPointer( rebasing: normalizationBufferPtr[..<filled]),
into: overflowBufferPtr
)
}
}) else {
fatalError("Invariant broken, overflow buffer should have space")
}
overflowBufferCount = count
}
}
//exactly one of the buffers should have code units for us to return
_sanityCheck((segmentBufferCount == 0)
!= ((overflowBuffer?.count ?? 0) == 0))
if segmentBufferIndex < segmentBufferCount {
let index = segmentBufferIndex
segmentBufferIndex += 1
return segmentBuffer[index]
} else if overflowBufferIndex < overflowBufferCount {
_sanityCheck(overflowBufferIndex < overflowBuffer!.count)
let index = overflowBufferIndex
overflowBufferIndex += 1
return overflowBuffer![index]
} else {
return nil
}
}
}
protocol _SegmentSource {
var remaining: Int { get }
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int?
}
extension _SegmentSource {
mutating func tryFill(
buffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
) -> Int? {
return tryFill(buffer: _castOutputBuffer(buffer))
}
}
+3 -11
View File
@@ -80,7 +80,6 @@ extension TextOutputStream {
public mutating func _lock() {}
public mutating func _unlock() {}
@inlinable
public mutating func _writeASCII(_ buffer: UnsafeBufferPointer<UInt8>) {
write(String._fromASCII(buffer))
}
@@ -522,15 +521,8 @@ internal struct _Stdout : TextOutputStream {
internal mutating func write(_ string: String) {
if string.isEmpty { return }
if _fastPath(string._guts.isASCII) {
defer { _fixLifetime(string) }
let ascii = string._guts._unmanagedASCIIView
_swift_stdlib_fwrite_stdout(ascii.start, ascii.count, 1)
return
}
for c in string.utf8 {
_swift_stdlib_putchar_unlocked(Int32(c))
_ = string._withUTF8 { utf8 in
_swift_stdlib_fwrite_stdout(utf8.baseAddress!, 1, utf8.count)
}
}
}
@@ -544,7 +536,7 @@ extension String : TextOutputStream {
}
public mutating func _writeASCII(_ buffer: UnsafeBufferPointer<UInt8>) {
self._guts.append(_UnmanagedString(buffer))
self._guts.append(_StringGuts(buffer, isKnownASCII: true))
}
}
File diff suppressed because it is too large Load Diff
+1 -7
View File
@@ -253,13 +253,7 @@ public struct StaticString
/// A string representation of the static string.
public var description: String {
return withUTF8Buffer { (buffer) in
if isASCII {
return String._fromASCII(buffer)
} else {
return String._fromWellFormedUTF8(buffer)
}
}
return withUTF8Buffer { String._uncheckedFromUTF8($0) }
}
/// A textual representation of the static string, suitable for debugging.
File diff suppressed because it is too large Load Diff
+132 -241
View File
@@ -20,52 +20,35 @@ import SwiftShims
/// Effectively an untyped NSString that doesn't require foundation.
public typealias _CocoaString = AnyObject
@inlinable // FIXME(sil-serialize-all)
public // @testable
func _stdlib_binary_CFStringCreateCopy(
@usableFromInline // @testable
@_effects(releasenone)
internal func _stdlib_binary_CFStringCreateCopy(
_ source: _CocoaString
) -> _CocoaString {
let result = _swift_stdlib_CFStringCreateCopy(nil, source) as AnyObject
return result
}
@inlinable // FIXME(sil-serialize-all)
@usableFromInline // @testable
@_effects(readonly)
public // @testable
func _stdlib_binary_CFStringGetLength(
internal func _stdlib_binary_CFStringGetLength(
_ source: _CocoaString
) -> Int {
return _swift_stdlib_CFStringGetLength(source)
}
@inlinable // FIXME(sil-serialize-all)
public // @testable
func _stdlib_binary_CFStringGetCharactersPtr(
@usableFromInline // @testable
@_effects(readonly)
internal func _stdlib_binary_CFStringGetCharactersPtr(
_ source: _CocoaString
) -> UnsafeMutablePointer<UTF16.CodeUnit>? {
return UnsafeMutablePointer(
mutating: _swift_stdlib_CFStringGetCharactersPtr(source))
}
/// Loading Foundation initializes these function variables
/// with useful values
/// Copies the entire contents of a _CocoaString into contiguous
/// storage of sufficient capacity.
@usableFromInline // FIXME(sil-serialize-all)
@inline(never) // Hide the CF dependency
internal func _cocoaStringReadAll(
_ source: _CocoaString, _ destination: UnsafeMutablePointer<UTF16.CodeUnit>
) {
_swift_stdlib_CFStringGetCharacters(
source, _swift_shims_CFRange(
location: 0, length: _swift_stdlib_CFStringGetLength(source)), destination)
}
/// Copies a slice of a _CocoaString into contiguous storage of
/// sufficient capacity.
@usableFromInline // FIXME(sil-serialize-all)
@inline(never) // Hide the CF dependency
@_effects(releasenone)
internal func _cocoaStringCopyCharacters(
from source: _CocoaString,
range: Range<Int>,
@@ -77,8 +60,8 @@ internal func _cocoaStringCopyCharacters(
destination)
}
@usableFromInline // FIXME(sil-serialize-all)
@inline(never) // Hide the CF dependency
@_effects(releasenone)
internal func _cocoaStringSlice(
_ target: _CocoaString, _ bounds: Range<Int>
) -> _CocoaString {
@@ -95,16 +78,12 @@ internal func _cocoaStringSlice(
return cfResult
}
@usableFromInline // FIXME(sil-serialize-all)
@inline(never) // Hide the CF dependency
@_effects(readonly)
internal func _cocoaStringSubscript(
_ target: _CocoaString, _ position: Int
) -> UTF16.CodeUnit {
let cfSelf: _swift_shims_CFStringRef = target
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously stored strings should already be converted to Swift")
return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position)
}
@@ -112,23 +91,23 @@ internal func _cocoaStringSubscript(
// Conversion from NSString to Swift's native representation
//
@inlinable // FIXME(sil-serialize-all)
internal var kCFStringEncodingASCII : _swift_shims_CFStringEncoding {
@inline(__always) get { return 0x0600 }
}
@inlinable // FIXME(sil-serialize-all)
internal var kCFStringEncodingUTF8 : _swift_shims_CFStringEncoding {
@inline(__always) get { return 0x8000100 }
}
@usableFromInline // @opaque
internal func _bridgeASCIICocoaString(
// Resiliently write a tagged cocoa string's contents into a buffer
@_effects(readonly) // @opaque
internal func _bridgeTagged(
_ cocoa: _CocoaString,
intoUTF8 bufPtr: UnsafeMutableRawBufferPointer
intoUTF8 bufPtr: UnsafeMutableBufferPointer<UInt8>
) -> Int? {
let ptr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked.assumingMemoryBound(
to: UInt8.self)
_sanityCheck(_isObjCTaggedPointer(cocoa))
let ptr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
let length = _stdlib_binary_CFStringGetLength(cocoa)
_sanityCheck(length <= _SmallUTF8String.capacity)
var count = 0
let numCharWritten = _swift_stdlib_CFStringGetBytes(
cocoa, _swift_shims_CFRange(location: 0, length: length),
@@ -136,26 +115,26 @@ internal func _bridgeASCIICocoaString(
return length == numCharWritten ? count : nil
}
@usableFromInline
@_effects(releasenone)
internal func _bridgeToCocoa(_ small: _SmallUTF8String) -> _CocoaString {
return small.withUTF8CodeUnits { bufPtr in
return _swift_stdlib_CFStringCreateWithBytes(
nil, bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
bufPtr.count,
small.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUTF8, 0)
as AnyObject
}
unimplemented_utf8()
}
internal func _cocoaUTF8Pointer(_ str: _CocoaString) -> UnsafePointer<UInt8>? {
// TODO(UTF8): Is there a better interface here? This requires nul
// termination and may assume ASCII.
guard let ptr = _swift_stdlib_CFStringGetCStringPtr(
str, kCFStringEncodingUTF8
) else { return nil }
return ptr._asUInt8
}
@_effects(readonly)
internal func _getCocoaStringPointer(
_ cfImmutableValue: _CocoaString
) -> (UnsafeRawPointer?, isUTF16: Bool) {
// Look first for null-terminated ASCII
// Note: the code in clownfish appears to guarantee
// nul-termination, but I'm waiting for an answer from Chris Kane
// about whether we can count on it for all time or not.
let nulTerminatedASCII = _swift_stdlib_CFStringGetCStringPtr(
cfImmutableValue, kCFStringEncodingASCII)
let nulTerminatedASCII = _cocoaUTF8Pointer(cfImmutableValue)
// start will hold the base pointer of contiguous storage, if it
// is found.
@@ -171,65 +150,83 @@ internal func _getCocoaStringPointer(
}
@usableFromInline
@inline(never) // Hide the CF dependency
internal
func _makeCocoaStringGuts(_ cocoaString: _CocoaString) -> _StringGuts {
if let ascii = cocoaString as? _ASCIIStringStorage {
return _StringGuts(_large: ascii)
} else if let utf16 = cocoaString as? _UTF16StringStorage {
return _StringGuts(_large: utf16)
} else if let wrapped = cocoaString as? __NSContiguousString {
return wrapped._guts
@_effects(releasenone) // @opaque
internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts {
if let abstract = cocoaString as? _AbstractStringStorage {
return abstract.asString._guts
} else if _isObjCTaggedPointer(cocoaString) {
guard let small = _SmallUTF8String(_cocoaString: cocoaString) else {
fatalError("Internal invariant violated: large tagged NSStrings")
}
return _StringGuts(small)
return _StringGuts(_SmallUTF8String(taggedCocoa: cocoaString))
}
// "copy" it into a value to be sure nobody will modify behind
// our backs. In practice, when value is already immutable, this
// just does a retain.
//
// TODO: Only in certain circumstances should we emit this call:
// 1) If it's immutable, just retain it.
// 2) If it's mutable with no associated information, then a copy must
// happen; might as well eagerly bridge it in.
// 3) If it's mutable with associated information, must make the call
//
let immutableCopy
= _stdlib_binary_CFStringCreateCopy(cocoaString) as AnyObject
if _isObjCTaggedPointer(immutableCopy) {
guard let small = _SmallUTF8String(_cocoaString: cocoaString) else {
fatalError("Internal invariant violated: large tagged NSStrings")
}
return _StringGuts(small)
return _StringGuts(_SmallUTF8String(taggedCocoa: immutableCopy))
}
let (start, isUTF16) = _getCocoaStringPointer(immutableCopy)
let length = _stdlib_binary_CFStringGetLength(immutableCopy)
let length = _StringGuts.getCocoaLength(
_unsafeBitPattern: Builtin.reinterpretCast(immutableCopy))
// Detect fast-UTF8 Cocoa
let fastUTF8 = !isUTF16 && start != nil
return _StringGuts(
_largeNonTaggedCocoaObject: immutableCopy,
count: length,
isSingleByte: !isUTF16,
start: start)
cocoa: immutableCopy, providesFastUTF8: fastUTF8, length: length)
}
extension String {
public // SPI(Foundation)
init(_cocoaString: AnyObject) {
self._guts = _makeCocoaStringGuts(_cocoaString)
self._guts = _bridgeCocoaString(_cocoaString)
}
}
// At runtime, this class is derived from `__SwiftNativeNSStringBase`,
extension String {
@_effects(releasenone)
public // SPI(Foundation)
func _bridgeToObjectiveCImpl() -> AnyObject {
// TODO(UTF8): create and use a visit pattern on _StringGuts to handle each
// form, rather than querying object directly. Presumably there will be
// other such visitors.
if _guts._object.isSmall {
return _guts._object.asSmallString.withUTF8 { bufPtr in
// TODO(UTF8 perf): worth isKnownASCII check for different encoding?
return _swift_stdlib_CFStringCreateWithBytes(
nil, bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
bufPtr.count,
kCFStringEncodingUTF8, 0)
as AnyObject
}
}
if _guts._object.isImmortal {
return _SharedStringStorage(immortal: _guts._object.fastUTF8)
}
_sanityCheck(_guts._object.hasObjCBridgeableObject,
"Unknown non-bridgeable object case")
return _guts._object.objCBridgeableObject
}
}
// At runtime, this class is derived from `_SwiftNativeNSStringBase`,
// which is derived from `NSString`.
//
// The @_swift_native_objc_runtime_base attribute
// This allows us to subclass an Objective-C class and use the fast Swift
// memory allocator.
//
// NOTE: older runtimes called this _SwiftNativeNSString. The two must
// coexist, so it was renamed. The old name must not be used in the new
// runtime.
@_fixed_layout // FIXME(sil-serialize-all)
@objc @_swift_native_objc_runtime_base(__SwiftNativeNSStringBase)
public class __SwiftNativeNSString {
@objc @_swift_native_objc_runtime_base(_SwiftNativeNSStringBase)
public class _SwiftNativeNSString {
@usableFromInline // FIXME(sil-serialize-all)
@objc
internal init() {}
@@ -252,166 +249,18 @@ public protocol _NSStringCore : _NSCopying /* _NSFastEnumeration */ {
@objc(characterAtIndex:)
func character(at index: Int) -> UInt16
// We also override the following methods for efficiency.
// We also override the following methods for efficiency.
@objc(getCharacters:range:)
func getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange)
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange)
@objc(_fastCharacterContents)
func _fastCharacterContents() -> UnsafePointer<UInt16>?
}
/// An `NSString` built around a slice of contiguous Swift `String` storage.
///
/// NOTE: older runtimes called this _NSContiguousString. The two must
/// coexist, so it was renamed. The old name must not be used in the new
/// runtime.
@_fixed_layout // FIXME(sil-serialize-all)
public final class __NSContiguousString : __SwiftNativeNSString, _NSStringCore {
public let _guts: _StringGuts
@inlinable // FIXME(sil-serialize-all)
public init(_ _guts: _StringGuts) {
_sanityCheck(!_guts._isOpaque,
"__NSContiguousString requires contiguous storage")
self._guts = _guts
super.init()
}
@inlinable // FIXME(sil-serialize-all)
public init(_unmanaged guts: _StringGuts) {
_sanityCheck(!guts._isOpaque,
"__NSContiguousString requires contiguous storage")
if guts.isASCII {
self._guts = _StringGuts(_large: guts._unmanagedASCIIView)
} else {
self._guts = _StringGuts(_large: guts._unmanagedUTF16View)
}
super.init()
}
@inlinable // FIXME(sil-serialize-all)
public init(_unmanaged guts: _StringGuts, range: Range<Int>) {
_sanityCheck(!guts._isOpaque,
"__NSContiguousString requires contiguous storage")
if guts.isASCII {
self._guts = _StringGuts(_large: guts._unmanagedASCIIView[range])
} else {
self._guts = _StringGuts(_large: guts._unmanagedUTF16View[range])
}
super.init()
}
@usableFromInline // FIXME(sil-serialize-all)
@objc
init(coder aDecoder: AnyObject) {
_sanityCheckFailure("init(coder:) not implemented for __NSContiguousString")
}
@inlinable // FIXME(sil-serialize-all)
deinit {}
@inlinable
@objc(length)
public var length: Int {
return _guts.count
}
@inlinable
@objc(characterAtIndex:)
public func character(at index: Int) -> UInt16 {
defer { _fixLifetime(self) }
return _guts.codeUnit(atCheckedOffset: index)
}
@inlinable
@objc(getCharacters:range:)
public func getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange) {
_precondition(aRange.location >= 0 && aRange.length >= 0)
let range: Range<Int> = aRange.location ..< aRange.location + aRange.length
_precondition(range.upperBound <= Int(_guts.count))
if _guts.isASCII {
_guts._unmanagedASCIIView[range]._copy(
into: UnsafeMutableBufferPointer(start: buffer, count: range.count))
} else {
_guts._unmanagedUTF16View[range]._copy(
into: UnsafeMutableBufferPointer(start: buffer, count: range.count))
}
_fixLifetime(self)
}
@inlinable
@objc(_fastCharacterContents)
public func _fastCharacterContents() -> UnsafePointer<UInt16>? {
guard !_guts.isASCII else { return nil }
return _guts._unmanagedUTF16View.start
}
@objc(copyWithZone:)
public func copy(with zone: _SwiftNSZone?) -> AnyObject {
// Since this string is immutable we can just return ourselves.
return self
}
/// The caller of this function guarantees that the closure 'body' does not
/// escape the object referenced by the opaque pointer passed to it or
/// anything transitively reachable form this object. Doing so
/// will result in undefined behavior.
@inlinable // FIXME(sil-serialize-all)
@_semantics("self_no_escaping_closure")
func _unsafeWithNotEscapedSelfPointer<Result>(
_ body: (OpaquePointer) throws -> Result
) rethrows -> Result {
let selfAsPointer = unsafeBitCast(self, to: OpaquePointer.self)
defer {
_fixLifetime(self)
}
return try body(selfAsPointer)
}
/// The caller of this function guarantees that the closure 'body' does not
/// escape either object referenced by the opaque pointer pair passed to it or
/// transitively reachable objects. Doing so will result in undefined
/// behavior.
@inlinable // FIXME(sil-serialize-all)
@_semantics("pair_no_escaping_closure")
func _unsafeWithNotEscapedSelfPointerPair<Result>(
_ rhs: __NSContiguousString,
_ body: (OpaquePointer, OpaquePointer) throws -> Result
) rethrows -> Result {
let selfAsPointer = unsafeBitCast(self, to: OpaquePointer.self)
let rhsAsPointer = unsafeBitCast(rhs, to: OpaquePointer.self)
defer {
_fixLifetime(self)
_fixLifetime(rhs)
}
return try body(selfAsPointer, rhsAsPointer)
}
}
extension String {
/// Same as `_bridgeToObjectiveC()`, but located inside the core standard
/// library.
@inlinable // FIXME(sil-serialize-all)
public func _stdlib_binary_bridgeToObjectiveCImpl() -> AnyObject {
if _guts._isSmall {
return _bridgeToCocoa(_guts._smallUTF8String)
}
if let cocoa = _guts._underlyingCocoaString {
return cocoa
}
return __NSContiguousString(_guts)
}
@inline(never) // Hide the CF dependency
public func _bridgeToObjectiveCImpl() -> AnyObject {
return _stdlib_binary_bridgeToObjectiveCImpl()
}
@objc(_fastCStringContents)
func _fastCStringContents() -> UnsafePointer<CChar>?
}
// Called by the SwiftObject implementation to get the description of a value
@@ -421,11 +270,10 @@ public func _getDescription<T>(_ x: T) -> AnyObject {
return String(reflecting: x)._bridgeToObjectiveCImpl()
}
#else // !_runtime(_ObjC)
@_fixed_layout // FIXME(sil-serialize-all)
public class __SwiftNativeNSString {
public class _SwiftNativeNSString {
@usableFromInline // FIXME(sil-serialize-all)
internal init() {}
deinit {}
@@ -434,3 +282,46 @@ public class __SwiftNativeNSString {
public protocol _NSStringCore: class {}
#endif
extension String {
// Resiliently provide a (barely) amortized random access UTF-16 interface
//
// @opaque
internal func _utf16OffsetToIndex(_ offset: Int) -> Index {
// TODO(UTF8): Track known ASCII
// TODO(UTF8): Leave breadcrumbs, and more efficient impl
return self.utf16.index(self.utf16.startIndex, offsetBy: offset)
}
// Resiliently provide a (barely) amortized random access UTF-16 interface
//
// @opaque
internal func _utf16OffsetToIndex(_ range: Range<Int>) -> Range<Index> {
// TODO(UTF8): Can be more efficient for a range
return self._utf16OffsetToIndex(range.lowerBound)
..< self._utf16OffsetToIndex(range.upperBound)
}
// Resiliently provide a (barely) amortized random access UTF-16 interface
//
// @opaque
internal func _utf16Length() -> Int {
// TODO(UTF8): Track known ASCII
// TODO(UTF8): Leave breadcrumbs, and more efficient impl. Perhaps even
// store it.
return self.utf16.count
}
// Resiliently provide a (barely) amortized `characterAtIndex`
//
// @opaque
internal func _utf16CodeUnitAtOffset(_ offset: Int) -> UInt16 {
return self.utf16[self._utf16OffsetToIndex(offset)]
}
}
+122 -61
View File
@@ -1,16 +1,25 @@
//===----------------------------------------------------------------------===//
//===--- StringCharacterView.swift - String's Collection of Characters ----===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// String is-not-a Sequence or Collection, but it exposes a
// collection of characters.
//
//===----------------------------------------------------------------------===//
// FIXME(ABI)#70 : The character string view should have a custom iterator type
// to allow performance optimizations of linear traversals.
import SwiftShims
// String is a bidirectional collection of `Character`s, aka graphemes
extension String: BidirectionalCollection {
/// A type that represents the number of steps between two `String.Index`
/// values, where one value is reachable from the other.
@@ -21,22 +30,28 @@ extension String: BidirectionalCollection {
public typealias SubSequence = Substring
public typealias Element = Character
/// The position of the first character in a nonempty string.
///
/// In an empty string, `startIndex` is equal to `endIndex`.
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index { return Index(encodedOffset: 0) }
@inlinable
public var startIndex: Index {
@inline(__always) get { return Index(encodedOffset: 0) }
}
/// A string's "past the end" position---that is, the position one greater
/// than the last valid subscript argument.
///
/// In an empty string, `endIndex` is equal to `startIndex`.
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index { return Index(encodedOffset: _guts.count) }
@inlinable
public var endIndex: Index {
@inline(__always) get { return Index(encodedOffset: _guts.count) }
}
/// The number of characters in a string.
public var count: Int {
return distance(from: startIndex, to: endIndex)
@inline(__always) get { return distance(from: startIndex, to: endIndex) }
}
/// Returns the position immediately after the given index.
@@ -45,10 +60,10 @@ extension String: BidirectionalCollection {
/// `endIndex`.
/// - Returns: The index value immediately after `i`.
public func index(after i: Index) -> Index {
return _visitGuts(_guts, args: i,
ascii: { ascii, i in ascii.characterIndex(after: i) },
utf16: { utf16, i in utf16.characterIndex(after: i) },
opaque: { opaque, i in opaque.characterIndex(after: i) })
// TODO(UTF8): populate the stride cache in the resultant iterator
let stride = _characterStride(startingAt: i)
return Index(encodedOffset: i.encodedOffset &+ stride)
}
/// Returns the position immediately before the given index.
@@ -57,12 +72,13 @@ extension String: BidirectionalCollection {
/// `startIndex`.
/// - Returns: The index value immediately before `i`.
public func index(before i: Index) -> Index {
return _visitGuts(_guts, args: i,
ascii: { ascii, i in ascii.characterIndex(before: i) },
utf16: { utf16, i in utf16.characterIndex(before: i) },
opaque: { opaque, i in opaque.characterIndex(before: i) })
}
// TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
// TODO(UTF8): populate the stride cache in the resultant iterator
let stride = _characterStride(endingAt: i)
return Index(encodedOffset: i.encodedOffset &- stride)
}
/// Returns an index that is the specified distance from the given index.
///
/// The following example obtains an index advanced four positions from a
@@ -73,27 +89,22 @@ extension String: BidirectionalCollection {
/// print(s[i])
/// // Prints "t"
///
/// The value passed as `distance` must not offset `i` beyond the bounds of
/// the collection.
/// The value passed as `n` must not offset `i` beyond the bounds of the
/// collection.
///
/// - Parameters:
/// - i: A valid index of the collection.
/// - distance: The distance to offset `i`.
/// - Returns: An index offset by `distance` from the index `i`. If
/// `distance` is positive, this is the same value as the result of
/// `distance` calls to `index(after:)`. If `distance` is negative, this
/// is the same value as the result of `abs(distance)` calls to
/// `index(before:)`.
/// - n: The distance to offset `i`.
/// - Returns: An index offset by `n` from the index `i`. If `n` is positive,
/// this is the same value as the result of `n` calls to `index(after:)`.
/// If `n` is negative, this is the same value as the result of `-n` calls
/// to `index(before:)`.
///
/// - Complexity: O(*k*), where *k* is the absolute value of `distance`.
public func index(_ i: Index, offsetBy distance: IndexDistance) -> Index {
return _visitGuts(_guts, args: (i, distance),
ascii: { ascii, args in let (i, n) = args
return ascii.characterIndex(i, offsetBy: n) },
utf16: { utf16, args in let (i, n) = args
return utf16.characterIndex(i, offsetBy: n) },
opaque: { opaque, args in let (i, n) = args
return opaque.characterIndex(i, offsetBy: n) })
/// - Complexity: O(*n*), where *n* is the absolute value of `n`.
@inlinable @inline(__always)
public func index(_ i: Index, offsetBy n: IndexDistance) -> Index {
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
return __index(i, offsetBy: n)
}
/// Returns an index that is the specified distance from the given index,
@@ -118,31 +129,27 @@ extension String: BidirectionalCollection {
/// print(j)
/// // Prints "nil"
///
/// The value passed as `distance` must not offset `i` beyond the bounds of the
/// The value passed as `n` must not offset `i` beyond the bounds of the
/// collection, unless the index passed as `limit` prevents offsetting
/// beyond those bounds.
///
/// - Parameters:
/// - i: A valid index of the collection.
/// - distance: The distance to offset `i`.
/// - limit: A valid index of the collection to use as a limit. If `distance > 0`,
/// a limit that is less than `i` has no effect. Likewise, if `distance < 0`, a
/// - n: The distance to offset `i`.
/// - limit: A valid index of the collection to use as a limit. If `n > 0`,
/// a limit that is less than `i` has no effect. Likewise, if `n < 0`, a
/// limit that is greater than `i` has no effect.
/// - Returns: An index offset by `distance` from the index `i`, unless that index
/// - Returns: An index offset by `n` from the index `i`, unless that index
/// would be beyond `limit` in the direction of movement. In that case,
/// the method returns `nil`.
///
/// - Complexity: O(*k*), where *k* is the absolute value of `distance`.
/// - Complexity: O(*n*), where *n* is the absolute value of `n`.
@inlinable @inline(__always)
public func index(
_ i: Index, offsetBy distance: IndexDistance, limitedBy limit: Index
_ i: Index, offsetBy n: IndexDistance, limitedBy limit: Index
) -> Index? {
return _visitGuts(_guts, args: (i, distance, limit),
ascii: { ascii, args in let (i, n, limit) = args
return ascii.characterIndex(i, offsetBy: n, limitedBy: limit) },
utf16: { utf16, args in let (i, n, limit) = args
return utf16.characterIndex(i, offsetBy: n, limitedBy: limit) },
opaque: { opaque, args in let (i, n, limit) = args
return opaque.characterIndex(i, offsetBy: n, limitedBy: limit) })
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
return __index(i, offsetBy: n, limitedBy: limit)
}
/// Returns the distance between two indices.
@@ -153,15 +160,11 @@ extension String: BidirectionalCollection {
/// `start`, the result is zero.
/// - Returns: The distance between `start` and `end`.
///
/// - Complexity: O(*k*), where *k* is the resulting distance.
/// - Complexity: O(*n*), where *n* is the resulting distance.
@inlinable @inline(__always)
public func distance(from start: Index, to end: Index) -> IndexDistance {
return _visitGuts(_guts, args: (start, end),
ascii: { ascii, args in let (start, end) = args
return ascii.characterDistance(from: start, to: end) },
utf16: { utf16, args in let (start, end) = args
return utf16.characterDistance(from: start, to: end) },
opaque: { opaque, args in let (start, end) = args
return opaque.characterDistance(from: start, to: end) })
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
return __distance(from: start, to: end)
}
/// Accesses the character at the given position.
@@ -179,10 +182,68 @@ extension String: BidirectionalCollection {
///
/// - Parameter i: A valid index of the string. `i` must be less than the
/// string's end index.
@inlinable
public subscript(i: Index) -> Character {
return _visitGuts(_guts, args: i,
ascii: { ascii, i in return ascii.character(at: i) },
utf16: { utf16, i in return utf16.character(at: i) },
opaque: { opaque, i in return opaque.character(at: i) })
@inline(__always) get {
// FIXME(UTF8): bounds checking
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
let distance = _characterStride(startingAt: i)
// TODO(UTF8): Probably worth making into `extractRange` on StringGuts.
if _fastPath(_guts.isFastUTF8) {
return _guts.withFastUTF8 { utf8 in
let start = i.encodedOffset
let end = start + distance
let cus = UnsafeBufferPointer(rebasing: utf8[start..<end])
return Character(unchecked: String._uncheckedFromUTF8(cus))
}
}
return _foreignSubscript(position: i, distance: distance)
}
}
}
@inlinable @inline(__always)
internal func _characterStride(startingAt i: Index) -> Int {
// Fast check if it's already been measured, otherwise check resiliently
if let d = i.characterStride { return d }
// TODO: Known-single-scalar-grapheme fast path
return _guts._opaqueCharacterStride(startingAt: i.encodedOffset)
}
@inlinable @inline(__always)
internal func _characterStride(endingAt i: Index) -> Int {
// TODO: Known-single-scalar-grapheme fast path
return _guts._opaqueCharacterStride(endingAt: i.encodedOffset)
}
}
// Foreign string support
extension String {
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignSubscript(position: Index, distance: Int) -> Character {
_sanityCheck(_guts.isForeign)
let start = position.encodedOffset
let end = start + distance
let count = end - start
// TODO(UTF8 perf): Stack buffer if small enough...
var cus = Array<UInt16>(repeating: 0, count: count)
cus.withUnsafeMutableBufferPointer {
_cocoaStringCopyCharacters(
from: _guts._object.cocoaObject,
range: start..<end,
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
}
return cus.withUnsafeBufferPointer {
return Character(String._uncheckedFromUTF16($0))
}
}
}
+30 -129
View File
@@ -12,162 +12,63 @@
import SwiftShims
extension _StringGuts {
@inline(__always)
@inlinable
public func _bitwiseEqualTo(_ other: _StringGuts) -> Bool {
return self.rawBits == other.rawBits
}
@inlinable
internal static func isEqual(
_ left: _StringGuts, to right: _StringGuts
) -> Bool {
// Bitwise equality implies string equality
if left._bitwiseEqualTo(right) {
return true
}
if left._isSmall && right._isSmall {
// TODO: Ensure normality when adding UTF-8 support
_sanityCheck(left._isASCIIOrSmallASCII && right._isASCIIOrSmallASCII,
"Need to ensure normality")
// Equal small strings should be bitwise equal if ASCII
return false
}
return compare(left, to: right) == 0
}
@inlinable
internal static func isEqual(
_ left: _StringGuts, _ leftRange: Range<Int>,
to right: _StringGuts, _ rightRange: Range<Int>
) -> Bool {
// Bitwise equality implies string equality
if left._bitwiseEqualTo(right) && leftRange == rightRange {
return true
}
return compare(left, leftRange, to: right, rightRange) == 0
}
@inlinable
internal static func isLess(
_ left: _StringGuts, than right: _StringGuts
) -> Bool {
// Bitwise equality implies string equality
if left._bitwiseEqualTo(right) {
return false
}
if left._isSmall && right._isSmall {
// Small strings compare lexicographically if ASCII
return left._smallUTF8String._compare(right._smallUTF8String) == .less
}
return compare(left, to: right) == -1
}
@inlinable
internal static func isLess(
_ left: _StringGuts, _ leftRange: Range<Int>,
than right: _StringGuts, _ rightRange: Range<Int>
) -> Bool {
// Bitwise equality implies string equality
if left._bitwiseEqualTo(right) && leftRange == rightRange {
return false
}
return compare(left, leftRange, to: right, rightRange) == -1
}
@inlinable
internal static func compare(
_ left: _StringGuts, _ leftRange: Range<Int>,
to right: _StringGuts, _ rightRange: Range<Int>
) -> Int {
defer { _fixLifetime(left) }
defer { _fixLifetime(right) }
if left.isASCII && right.isASCII {
let leftASCII = left._unmanagedASCIIView[leftRange]
let rightASCII = right._unmanagedASCIIView[rightRange]
let result = leftASCII.compareASCII(to: rightASCII)
return result
}
let leftBits = left.rawBits
let rightBits = right.rawBits
return _compareUnicode(leftBits, leftRange, rightBits, rightRange)
}
@inlinable
internal static func compare(
_ left: _StringGuts, to right: _StringGuts
) -> Int {
defer { _fixLifetime(left) }
defer { _fixLifetime(right) }
if left.isASCII && right.isASCII {
let leftASCII = left._unmanagedASCIIView
let rightASCII = right._unmanagedASCIIView
let result = leftASCII.compareASCII(to: rightASCII)
return result
}
let leftBits = left.rawBits
let rightBits = right.rawBits
return _compareUnicode(leftBits, rightBits)
}
}
extension StringProtocol {
@inlinable // FIXME(sil-serialize-all)
@inlinable
@inline(__always) // de-virtualize
public static func ==<S: StringProtocol>(lhs: Self, rhs: S) -> Bool {
return _StringGuts.isEqual(
lhs._wholeString._guts, lhs._encodedOffsetRange,
to: rhs._wholeString._guts, rhs._encodedOffsetRange)
// TODO(UTF8 perf): This is a horribly slow means...
return String(lhs) == String(rhs)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
@inline(__always) // de-virtualize
public static func !=<S: StringProtocol>(lhs: Self, rhs: S) -> Bool {
return !(lhs == rhs)
// TODO(UTF8 perf): This is a horribly slow means...
return String(lhs) != String(rhs)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
@inline(__always) // de-virtualize
public static func < <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
return _StringGuts.isLess(
lhs._wholeString._guts, lhs._encodedOffsetRange,
than: rhs._wholeString._guts, rhs._encodedOffsetRange)
// TODO(UTF8 perf): This is a horribly slow means...
return String(lhs) < String(rhs)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
@inline(__always) // de-virtualize
public static func > <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
return rhs < lhs
// TODO(UTF8 perf): This is a horribly slow means...
return String(lhs) > String(rhs)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
@inline(__always) // de-virtualize
public static func <= <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
return !(rhs < lhs)
// TODO(UTF8 perf): This is a horribly slow means...
return String(lhs) <= String(rhs)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
@inline(__always) // de-virtualize
public static func >= <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
return !(lhs < rhs)
// TODO(UTF8 perf): This is a horribly slow means...
return String(lhs) >= String(rhs)
}
}
extension String : Equatable {
// FIXME: Why do I need this? If I drop it, I get "ambiguous use of operator"
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always) // For the bitwise comparision
public static func ==(lhs: String, rhs: String) -> Bool {
return _StringGuts.isEqual(lhs._guts, to: rhs._guts)
if lhs._guts.rawBits == rhs._guts.rawBits { return true }
return _compareStringsEqual(lhs, rhs)
}
}
extension String : Comparable {
// FIXME: Why do I need this? If I drop it, I get "ambiguous use of operator"
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always) // For the bitwise comparision
public static func < (lhs: String, rhs: String) -> Bool {
return _StringGuts.isLess(lhs._guts, than: rhs._guts)
if lhs._guts.rawBits == rhs._guts.rawBits { return false }
return _compareStringsLess(lhs, rhs)
}
}
File diff suppressed because it is too large Load Diff
+119 -159
View File
@@ -9,176 +9,136 @@
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// String Creation Helpers
//===----------------------------------------------------------------------===//
extension String {
/// Constructs a `String` in `resultStorage` containing the given UTF-8.
///
/// Low-level construction interface used by introspection
/// implementation in the runtime library.
@inlinable
@_silgen_name("swift_stringFromUTF8InRawMemory")
public // COMPILER_INTRINSIC
static func _fromUTF8InRawMemory(
_ resultStorage: UnsafeMutablePointer<String>,
start: UnsafeMutablePointer<UTF8.CodeUnit>,
utf8CodeUnitCount: Int
) {
resultStorage.initialize(to:
String._fromWellFormedUTF8(
UnsafeBufferPointer(start: start, count: utf8CodeUnitCount)))
}
@usableFromInline
static func _fromUTF8(
_ input: UnsafeBufferPointer<UInt8>, repair: Bool
) -> String? {
if _isAllASCII(input) {
return _fromASCII(input)
}
return _fromNonASCIIUTF8(input, repair: repair)
}
@usableFromInline
static func _fromASCII(_ input: UnsafeBufferPointer<UInt8>) -> String {
if let smol = _SmallUTF8String(input) {
return String(_StringGuts(smol))
}
let storage = _SwiftStringStorage<UInt8>.create(
capacity: input.count, count: input.count)
_sanityCheck(storage.count == input.count)
storage.start.initialize(
from: input.baseAddress._unsafelyUnwrappedUnchecked, count: input.count)
return String(_StringGuts(_large: storage))
}
@usableFromInline
static func _fromWellFormedUTF8(
_ input: UnsafeBufferPointer<UInt8>, repair: Bool = false
internal static func _fromASCII(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
return String._fromUTF8(input, repair: repair)!
}
@inlinable
static func _fromWellFormedUTF16CodeUnits<C : RandomAccessCollection>(
_ input: C, repair: Bool = false
) -> String where C.Element == UTF16.CodeUnit {
if let smol = _SmallUTF8String(input) {
if let smol = _SmallString(input) {
return String(_StringGuts(smol))
}
return String._fromCodeUnits(
input, encoding: UTF16.self, repairIllFormedSequences: repair)!
// TODO(UTF8): Do we want to do remember ASCII-ness?
let storage = _StringStorage.create(initializingFrom: input)
return storage.asString
}
@inlinable
internal static func _fromCodeUnits<
Input: Collection, Encoding: Unicode.Encoding
>(
_ input: Input, encoding: Encoding.Type, repairIllFormedSequences: Bool
) -> String?
where Input.Element == Encoding.CodeUnit {
@usableFromInline
internal static func _tryFromUTF8(
_ input: UnsafeBufferPointer<UInt8>
) -> String? {
// TODO(UTF8 perf): More efficient validation
// TODO(UTF8 perf): Skip intermediary array
var contents: [UInt8] = []
contents.reserveCapacity(input.count)
let repaired = transcode(
input.makeIterator(),
from: UTF8.self,
to: UTF8.self,
stoppingOnError: true,
into: { contents.append($0) })
guard !repaired else { return nil }
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
}
@usableFromInline
internal static func _fromUTF8Repairing(
_ input: UnsafeBufferPointer<UInt8>
) -> (String, Bool) {
// TODO(UTF8 perf): More efficient validation
// TODO(UTF8 perf): Skip intermediary array
var contents: [UInt8] = []
contents.reserveCapacity(input.count)
let repaired = transcode(
input.makeIterator(),
from: UTF8.self,
to: UTF8.self,
stoppingOnError: false,
into: { contents.append($0) })
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
return (str, repaired)
}
@usableFromInline
internal static func _uncheckedFromUTF8(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
if let smol = _SmallString(input) {
return String(_StringGuts(smol))
}
// TODO(UTF8): Do we want to do an ascii scan?
let storage = _StringStorage.create(initializingFrom: input)
return storage.asString
}
@usableFromInline
internal static func _uncheckedFromUTF16(
_ input: UnsafeBufferPointer<UInt16>
) -> String {
// TODO(UTF8): smol strings
// TODO(UTF8): Faster transcoding...
// TODO(UTF8): Skip intermediary array
var contents: [UInt8] = []
contents.reserveCapacity(input.count)
let repaired = transcode(
input.makeIterator(),
from: UTF16.self,
to: UTF8.self,
stoppingOnError: false,
into: { contents.append($0) })
_sanityCheck(!repaired, "Error present")
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
}
internal func _withUnsafeBufferPointerToUTF8<R>(
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
) rethrows -> R {
if isEmpty {
var nothing: UInt8 = 0
return try body(UnsafeBufferPointer(start: &nothing, count: 0))
}
if _fastPath(_guts.isFastUTF8) {
return try _guts.withFastUTF8(body)
}
unimplemented_utf8()
}
@usableFromInline @inline(never) // slow-path
internal static func _fromCodeUnits<
Input: Collection,
Encoding: Unicode.Encoding
>(
_ input: Input,
encoding: Encoding.Type,
repair: Bool
) -> (String, repairsMade: Bool)?
where Input.Element == Encoding.CodeUnit {
// TODO(SSO): small check
// Determine how many UTF-16 code units we'll need
let inputStream = input.makeIterator()
guard let (utf16Count, isASCII) = UTF16.transcodedLength(
of: inputStream,
decodedAs: encoding,
repairingIllFormedSequences: repairIllFormedSequences) else {
return nil
}
let capacity = utf16Count
if isASCII {
if let small = _SmallUTF8String(
_fromCodeUnits: input,
utf16Length: utf16Count,
isASCII: true,
Encoding.self
) {
return String(_StringGuts(small))
}
let storage = _SwiftStringStorage<UInt8>.create(
capacity: capacity,
count: utf16Count)
var p = storage.start
let sink: (UTF32.CodeUnit) -> Void = {
p.pointee = UTF8.CodeUnit($0)
p += 1
}
let hadError = transcode(
input.makeIterator(),
from: encoding, to: UTF32.self,
stoppingOnError: true,
into: sink)
_sanityCheck(!hadError,
"string cannot be ASCII if there were decoding errors")
return String(_largeStorage: storage)
} else {
// TODO(SSO): Small transcoded string
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: capacity,
count: utf16Count)
var p = storage.start
let sink: (UTF16.CodeUnit) -> Void = {
p.pointee = $0
p += 1
}
_ = transcode(
input.makeIterator(),
from: encoding, to: UTF16.self,
stoppingOnError: !repairIllFormedSequences,
into: sink)
return String(_largeStorage: storage)
}
}
internal static func _fromNonASCIIUTF8(
_ input: UnsafeBufferPointer<UInt8>, repair: Bool
) -> String? {
if let smol = _SmallUTF8String(input) {
return String(_StringGuts(smol))
}
// Determine how many UTF-16 code units we'll need
let inputStream = input.makeIterator()
// TODO: Replace with much, much faster length check
guard let (utf16Count, isASCII) = UTF16.transcodedLength(
of: inputStream,
decodedAs: UTF8.self,
repairingIllFormedSequences: repair) else {
return nil
}
let capacity = utf16Count
_sanityCheck(!isASCII, "was given ASCII UTF-8")
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: capacity,
count: utf16Count)
var p = storage.start
let sink: (UTF16.CodeUnit) -> Void = {
p.pointee = $0
p += 1
}
// TODO: Replace with much, much faster transcoding
_ = transcode(
// TODO(UTF8): Skip intermediary array
var contents: [UInt8] = []
contents.reserveCapacity(input.underestimatedCount)
let repaired = transcode(
input.makeIterator(),
from: UTF8.self, to: UTF16.self,
stoppingOnError: !repair,
into: sink)
return String(_largeStorage: storage)
}
from: Encoding.self,
to: UTF8.self,
stoppingOnError: false,
into: { contents.append($0) })
guard repair || !repaired else { return nil }
// For testing purposes only, allow ourselves to have invalid contents
@usableFromInline // @testable
static internal
func _fromInvalidUTF16(_ cus: UnsafeBufferPointer<UInt16>) -> String {
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: cus.count, count: cus.count)
_ = storage._initialize(fromCodeUnits: cus, encoding: UTF16.self)
return String(_StringGuts(_large: storage))
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
return (str, repaired)
}
}
}
+135 -385
View File
@@ -12,411 +12,161 @@
import SwiftShims
/// CR and LF are common special cases in grapheme breaking logic
@inlinable // FIXME(sil-serialize-all)
internal var _CR: UInt8 { return 0x0d }
@inlinable // FIXME(sil-serialize-all)
internal var _LF: UInt8 { return 0x0a }
extension _StringVariant {
@inlinable
internal func _stride(at i: String.Index) -> Int {
if let stride = i.characterStride { return stride }
return characterStride(atOffset: i.encodedOffset)
@_effects(releasenone)
internal func _measureCharacterStride(
of utf8: UnsafeBufferPointer<UInt8>, startingAt i: Int
) -> Int {
let iterator = _ThreadLocalStorage.getUBreakIterator(utf8)
let offset = __swift_stdlib_ubrk_following(
iterator, Int32(truncatingIfNeeded: i))
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
if _fastPath(offset != -1) {
// The offset into our buffer is the distance.
_sanityCheck(offset > i, "zero-sized grapheme?")
return Int(truncatingIfNeeded: offset) &- i
}
return utf8.count &- i
}
@inlinable
internal func characterStride(atOffset offset: Int) -> Int {
let slice = self.checkedSlice(from: offset)
return slice.measureFirstExtendedGraphemeCluster()
@_effects(releasenone)
internal func _measureCharacterStride(
of utf16: UnsafeBufferPointer<UInt16>, startingAt i: Int
) -> Int {
let iterator = _ThreadLocalStorage.getUBreakIterator(utf16)
let offset = __swift_stdlib_ubrk_following(
iterator, Int32(truncatingIfNeeded: i))
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
if _fastPath(offset != -1) {
// The offset into our buffer is the distance.
_sanityCheck(offset > i, "zero-sized grapheme?")
return Int(truncatingIfNeeded: offset) &- i
}
return utf16.count &- i
}
@inlinable
internal func characterIndex(atOffset offset: Int) -> String.Index {
let stride = self.characterStride(atOffset: offset)
return String.Index(encodedOffset: offset, characterStride: stride)
@_effects(releasenone)
internal func _measureCharacterStride(
of utf8: UnsafeBufferPointer<UInt8>, endingAt i: Int
) -> Int {
let iterator = _ThreadLocalStorage.getUBreakIterator(utf8)
let offset = __swift_stdlib_ubrk_preceding(
iterator, Int32(truncatingIfNeeded: i))
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
if _fastPath(offset != -1) {
// The offset into our buffer is the distance.
_sanityCheck(offset < i, "zero-sized grapheme?")
return i &- Int(truncatingIfNeeded: offset)
}
return i &- utf8.count
}
@inlinable
internal func characterIndex(after i: String.Index) -> String.Index {
@_effects(releasenone)
internal func _measureCharacterStride(
of utf16: UnsafeBufferPointer<UInt16>, endingAt i: Int
) -> Int {
let iterator = _ThreadLocalStorage.getUBreakIterator(utf16)
let offset = __swift_stdlib_ubrk_preceding(
iterator, Int32(truncatingIfNeeded: i))
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
if _fastPath(offset != -1) {
// The offset into our buffer is the distance.
_sanityCheck(offset < i, "zero-sized grapheme?")
return i &- Int(truncatingIfNeeded: offset)
}
return i &- utf16.count
}
extension _StringGuts {
@usableFromInline @inline(never)
@_effects(releasenone)
internal func isOnGraphemeClusterBoundary(_ i: String.Index) -> Bool {
let offset = i.encodedOffset
_precondition(offset >= 0, "String index is out of bounds")
_precondition(offset < count, "Can't advance past endIndex")
// Find the current grapheme distance
let slice = self[offset..<count]
let stride1 = _stride(at: i)
// Calculate and cache the next grapheme distance
let stride2 = slice.dropFirst(stride1).measureFirstExtendedGraphemeCluster()
return String.Index(
encodedOffset: offset &+ stride1,
characterStride: stride2)
if offset == 0 || offset == self.count { return true }
guard isOnUnicodeScalarBoundary(i) else { return false }
let str = String(self)
return i == str.index(before: str.index(after: i))
}
@inlinable
internal func characterIndex(before i: String.Index) -> String.Index {
let offset = i.encodedOffset
_precondition(offset > 0, "Can't move before startIndex")
_precondition(offset <= count, "String index is out of bounds")
let slice = self[0..<offset]
let stride = slice.measureLastExtendedGraphemeCluster()
_sanityCheck(stride > 0 && stride <= UInt16.max)
return String.Index(
encodedOffset: offset &- stride,
characterStride: stride)
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _opaqueCharacterStride(startingAt i: Int) -> Int {
if _slowPath(isForeign) {
return _foreignOpaqueCharacterStride(startingAt: i)
}
// TODO(UTF8 perf): grapheme breaking fast-paths...
return self.withFastUTF8 {
return _measureCharacterStride(of: $0, startingAt: i)
}
}
@inlinable
internal func characterIndex(
_ i: String.Index,
offsetBy n: Int
) -> String.Index {
var i = i
if n >= 0 {
for _ in 0 ..< n {
i = characterIndex(after: i)
}
} else {
for _ in n ..< 0 {
i = characterIndex(before: i)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignOpaqueCharacterStride(startingAt i: Int) -> Int {
_sanityCheck(isForeign)
// TODO(UTF8 perf): grapheme breaking fast-paths...
// TODO(UTF8 perf): local stack first, before nuclear solution
// TODO(UTF8 perf): even nuclear solution should copy to larger arrays in a
// loop
let count = _object.largeCount
let cocoa = _object.cocoaObject
var codeUnits = Array<UInt16>(repeating: 0, count: count)
codeUnits.withUnsafeMutableBufferPointer {
_cocoaStringCopyCharacters(
from: cocoa,
range: 0..<count,
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
}
return codeUnits.withUnsafeBufferPointer {
_measureCharacterStride(of: $0, startingAt: i)
}
return i
}
@inlinable
internal func characterIndex(
_ i: String.Index,
offsetBy n: Int,
limitedBy limit: String.Index
) -> String.Index? {
var i = i
if n >= 0 {
for _ in 0 ..< n {
// Note condition is >=, not ==: we do not want to jump
// over limit if it's in the middle of a grapheme cluster.
// https://bugs.swift.org/browse/SR-6545
if i >= limit { return nil }
i = characterIndex(after: i)
}
} else {
for _ in n ..< 0 {
if i <= limit { return nil } // See note above.
i = characterIndex(before: i)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _opaqueCharacterStride(endingAt i: Int) -> Int {
if _slowPath(isForeign) {
return _foreignOpaqueCharacterStride(endingAt: i)
}
// TODO(UTF8 perf): grapheme breaking fast-paths...
return self.withFastUTF8 {
return _measureCharacterStride(of: $0, endingAt: i)
}
return i
}
public func characterDistance(
from start: String.Index,
to end: String.Index
) -> Int {
var i = start
var count = 0
if start < end {
// Note that the loop condition isn't just an equality check: we do not
// want to jump over `end` if it's in the middle of a grapheme cluster.
// https://bugs.swift.org/browse/SR-6546
while i < end {
count += 1
i = characterIndex(after: i)
}
} else {
while i > end { // See note above.
count -= 1
i = characterIndex(before: i)
}
}
return count
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignOpaqueCharacterStride(endingAt i: Int) -> Int {
_sanityCheck(isForeign)
@inlinable
internal func character(at i: String.Index) -> Character {
let stride = _stride(at: i)
let offset = i.encodedOffset
if _slowPath(stride > 1) {
return Character(_unverified: self.checkedSlice(offset..<offset + stride))
// TODO(UTF8 perf): grapheme breaking fast-paths...
// TODO(UTF8 perf): local stack first, before nuclear solution
// TODO(UTF8 perf): even nuclear solution should copy to larger arrays in a
// loop
let count = _object.largeCount
let cocoa = _object.cocoaObject
var codeUnits = Array<UInt16>(repeating: 0, count: count)
codeUnits.withUnsafeMutableBufferPointer {
_cocoaStringCopyCharacters(
from: cocoa,
range: 0..<count,
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
}
let u = self.codeUnit(atCheckedOffset: offset)
if _slowPath(!UTF16._isScalar(u)) {
return Character(Unicode.Scalar._replacementCharacter)
return codeUnits.withUnsafeBufferPointer {
_measureCharacterStride(of: $0, endingAt: i)
}
return Character(_singleCodeUnit: u)
}
}
extension _StringVariant {
// NOTE: Because this function is inlineable, it should contain only the fast
// paths of grapheme breaking that we have high confidence won't change.
/// Returns the length of the first extended grapheme cluster in UTF-16
/// code units.
@inlinable
internal
func measureFirstExtendedGraphemeCluster() -> Int {
// No more graphemes at end of string.
if count == 0 { return 0 }
// If there is a single code unit left, the grapheme length must be 1.
if count == 1 { return 1 }
if isASCII {
_onFastPath() // Please agressively inline
// The only multi-scalar ASCII grapheme cluster is CR/LF.
if _slowPath(self[0] == _CR && self[1] == _LF) {
return 2
}
return 1
}
if _fastPath(
UTF16._quickCheckGraphemeBreakBetween(self[0], self[1])) {
return 1
}
return self._measureFirstExtendedGraphemeClusterSlow()
}
// NOTE: Because this function is inlineable, it should contain only the fast
// paths of grapheme breaking that we have high confidence won't change.
//
/// Returns the length of the last extended grapheme cluster in UTF-16
/// code units.
@inlinable
internal
func measureLastExtendedGraphemeCluster() -> Int {
let count = self.count
// No more graphemes at end of string.
if count == 0 { return 0 }
// If there is a single code unit left, the grapheme length must be 1.
if count == 1 { return 1 }
if isASCII {
_onFastPath() // Please agressively inline
// The only multi-scalar ASCII grapheme cluster is CR/LF.
if _slowPath(self[count-1] == _LF && self[count-2] == _CR) {
return 2
}
return 1
}
if _fastPath(
UTF16._quickCheckGraphemeBreakBetween(self[count - 2], self[count - 1])) {
return 1
}
return self._measureLastExtendedGraphemeClusterSlow()
}
}
extension _UnmanagedString {
@inline(never)
@usableFromInline
internal func _measureFirstExtendedGraphemeClusterSlow() -> Int {
// ASCII case handled entirely on fast path.
// FIXME: Have separate implementations for ASCII & UTF-16 views.
_sanityCheck(CodeUnit.self == UInt16.self)
return UTF16._measureFirstExtendedGraphemeCluster(
in: UnsafeBufferPointer(
start: rawStart.assumingMemoryBound(to: UInt16.self),
count: count))
}
@inline(never)
@usableFromInline
internal func _measureLastExtendedGraphemeClusterSlow() -> Int {
// ASCII case handled entirely on fast path.
// FIXME: Have separate implementations for ASCII & UTF-16 views.
_sanityCheck(CodeUnit.self == UInt16.self)
return UTF16._measureLastExtendedGraphemeCluster(
in: UnsafeBufferPointer(
start: rawStart.assumingMemoryBound(to: UInt16.self),
count: count))
}
}
extension _UnmanagedOpaqueString {
@inline(never)
@usableFromInline
internal func _measureFirstExtendedGraphemeClusterSlow() -> Int {
_sanityCheck(count >= 2, "should have at least two code units")
// Pull out some code units into a fixed array and try to perform grapheme
// breaking on that.
typealias ShortBuffer = _FixedArray16<UInt16>
var shortBuffer = ShortBuffer(count: Swift.min(ShortBuffer.capacity, count))
shortBuffer.withUnsafeMutableBufferPointer { buffer in
self.prefix(buffer.count)._copy(into: buffer)
}
let shortLength = shortBuffer.withUnsafeBufferPointer { buffer in
UTF16._measureFirstExtendedGraphemeCluster(in: buffer)
}
if _fastPath(shortLength < shortBuffer.capacity) {
return shortLength
}
// Nuclear option: copy out the rest of the string into a contiguous buffer.
let longStart = UnsafeMutablePointer<UInt16>.allocate(capacity: count)
defer { longStart.deallocate() }
self._copy(into: UnsafeMutableBufferPointer(start: longStart, count: count))
return UTF16._measureFirstExtendedGraphemeCluster(
in: UnsafeBufferPointer(start: longStart, count: count))
}
@inline(never)
@usableFromInline
internal func _measureLastExtendedGraphemeClusterSlow() -> Int {
_sanityCheck(count >= 2, "should have at least two code units")
// Pull out some code units into a fixed array and try to perform grapheme
// breaking on that.
typealias ShortBuffer = _FixedArray16<UInt16>
var shortBuffer = ShortBuffer(count: Swift.min(ShortBuffer.capacity, count))
shortBuffer.withUnsafeMutableBufferPointer { buffer in
self.suffix(buffer.count)._copy(into: buffer)
}
let shortLength = shortBuffer.withUnsafeBufferPointer { buffer in
UTF16._measureLastExtendedGraphemeCluster(in: buffer)
}
if _fastPath(shortLength < shortBuffer.capacity) {
return shortLength
}
// Nuclear option: copy out the rest of the string into a contiguous buffer.
let longStart = UnsafeMutablePointer<UInt16>.allocate(capacity: count)
defer { longStart.deallocate() }
self._copy(into: UnsafeMutableBufferPointer(start: longStart, count: count))
return UTF16._measureLastExtendedGraphemeCluster(
in: UnsafeBufferPointer(start: longStart, count: count))
}
}
extension Unicode.UTF16 {
/// Fast check for a (stable) grapheme break between two UInt16 code units
@inlinable // Safe to inline
internal static func _quickCheckGraphemeBreakBetween(
_ lhs: UInt16, _ rhs: UInt16
) -> Bool {
// With the exception of CR-LF, there is always a grapheme break between two
// sub-0x300 code units
if lhs < 0x300 && rhs < 0x300 {
return lhs != UInt16(_CR) && rhs != UInt16(_LF)
}
return _internalExtraCheckGraphemeBreakBetween(lhs, rhs)
}
@inline(never) // @inline(resilient_only)
@usableFromInline
internal static func _internalExtraCheckGraphemeBreakBetween(
_ lhs: UInt16, _ rhs: UInt16
) -> Bool {
_sanityCheck(
lhs != _CR || rhs != _LF,
"CR-LF special case handled by _quickCheckGraphemeBreakBetween")
// Whether the given scalar, when it appears paired with another scalar
// satisfying this property, has a grapheme break between it and the other
// scalar.
func hasBreakWhenPaired(_ x: UInt16) -> Bool {
// TODO: This doesn't generate optimal code, tune/re-write at a lower
// level.
//
// NOTE: Order of case ranges affects codegen, and thus performance. All
// things being equal, keep existing order below.
switch x {
// Unified CJK Han ideographs, common and some supplemental, amongst
// others:
// 0x3400-0xA4CF
case 0x3400...0xa4cf: return true
// Repeat sub-300 check, this is beneficial for common cases of Latin
// characters embedded within non-Latin script (e.g. newlines, spaces,
// proper nouns and/or jargon, punctuation).
//
// NOTE: CR-LF special case has already been checked.
case 0x0000...0x02ff: return true
// Non-combining kana:
// 0x3041-0x3096
// 0x30A1-0x30FA
case 0x3041...0x3096: return true
case 0x30a1...0x30fa: return true
// Non-combining modern (and some archaic) Cyrillic:
// 0x0400-0x0482 (first half of Cyrillic block)
case 0x0400...0x0482: return true
// Modern Arabic, excluding extenders and prependers:
// 0x061D-0x064A
case 0x061d...0x064a: return true
// Precomposed Hangul syllables:
// 0xAC000xD7AF
case 0xac00...0xd7af: return true
// Common general use punctuation, excluding extenders:
// 0x2010-0x2029
case 0x2010...0x2029: return true
// CJK punctuation characters, excluding extenders:
// 0x3000-0x3029
case 0x3000...0x3029: return true
default: return false
}
}
return hasBreakWhenPaired(lhs) && hasBreakWhenPaired(rhs)
}
// NOT @usableFromInline
internal static func _measureFirstExtendedGraphemeCluster(
in buffer: UnsafeBufferPointer<CodeUnit>
) -> Int {
// ICU can only handle 32-bit offsets; don't feed it more than that.
// https://bugs.swift.org/browse/SR-6544
let count: Int32
if _fastPath(buffer.count <= Int(Int32.max)) {
count = Int32(truncatingIfNeeded: buffer.count)
} else {
count = Int32.max
}
let iterator = _ThreadLocalStorage.getUBreakIterator(
start: buffer.baseAddress!,
count: count)
let offset = __swift_stdlib_ubrk_following(iterator, 0)
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
if _fastPath(offset != -1) {
// The offset into our buffer is the distance.
_sanityCheck(offset > 0, "zero-sized grapheme?")
return Int(offset)
}
return Int(count)
}
// NOT @usableFromInline
internal static func _measureLastExtendedGraphemeCluster(
in buffer: UnsafeBufferPointer<CodeUnit>
) -> Int {
// ICU can only handle 32-bit offsets; don't feed it more than that.
// https://bugs.swift.org/browse/SR-6544
let count: Int32
let start: UnsafePointer<CodeUnit>
if _fastPath(buffer.count <= Int(Int32.max)) {
count = Int32(truncatingIfNeeded: buffer.count)
start = buffer.baseAddress!
} else {
count = Int32.max
start = buffer.baseAddress! + buffer.count - Int(Int32.max)
}
let iterator = _ThreadLocalStorage.getUBreakIterator(
start: start,
count: count)
let offset = __swift_stdlib_ubrk_preceding(iterator, count)
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
if _fastPath(offset != -1) {
// The offset into our buffer is the distance.
_sanityCheck(offset < count, "zero-sized grapheme?")
return Int(count - offset)
}
return Int(count)
}
}
File diff suppressed because it is too large Load Diff
-212
View File
@@ -10,215 +10,3 @@
//
//===----------------------------------------------------------------------===//
// TODO: describe
//
// HACK HACK HACK: For whatever reason, having this directly on String instead
// of _StringGuts avoids a cascade of ARC. Also note, we can have a global
// function that forwards, but that function **must not be on _StringGuts**,
// else ARC.
//
extension String {
@inlinable
@inline(__always)
func _visit<Result>(
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>) -> Result,
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>) -> Result,
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString) -> Result
) -> Result {
if _slowPath(_guts._isOpaque) {
return self._visitOpaque(
range: range, ascii: ascii, utf16: utf16, opaque: opaque)
}
defer { _fixLifetime(self) }
if _guts.isASCII {
var view = _guts._unmanagedASCIIView
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return ascii(view)
} else {
var view = _guts._unmanagedUTF16View
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return utf16(view)
}
}
@usableFromInline
@_effects(readonly)
@inline(never) // @_outlined
func _visitOpaque<Result>(
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>) -> Result,
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>) -> Result,
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString) -> Result
) -> Result {
_sanityCheck(_guts._isOpaque)
if _guts._isSmall {
_sanityCheck(_guts._object._isSmallUTF8, "no other small forms yet")
let small = _guts._smallUTF8String
if small.isASCII {
return small.withUnmanagedASCII { view in
var view = view
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return ascii(view)
}
}
return small.withUnmanagedUTF16 { view in
var view = view
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return utf16(view)
}
}
// TODO: But can it provide a pointer+length representation?
defer { _fixLifetime(self) }
var view = _guts._asOpaque()
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return opaque(view)
}
@inlinable
@inline(__always)
func _visit<T, Result>(
range: (Range<Int>, performBoundsCheck: Bool)?,
args x: T,
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>, T) -> Result,
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>, T) -> Result,
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString, T) -> Result
) -> Result {
if _slowPath(_guts._isOpaque) {
return self._visitOpaque(
range: range, args: x, ascii: ascii, utf16: utf16, opaque: opaque)
}
defer { _fixLifetime(self) }
if _guts.isASCII {
var view = _guts._unmanagedASCIIView
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return ascii(view, x)
} else {
var view = _guts._unmanagedUTF16View
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return utf16(view, x)
}
}
@usableFromInline // @opaque
@_effects(readonly)
@inline(never)
func _visitOpaque<T, Result>(
range: (Range<Int>, performBoundsCheck: Bool)?,
args x: T,
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>, T) -> Result,
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>, T) -> Result,
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString, T) -> Result
) -> Result {
_sanityCheck(_guts._isOpaque)
if _fastPath(_guts._isSmall) {
_sanityCheck(_guts._object._isSmallUTF8, "no other small forms yet")
let small = _guts._smallUTF8String
if small.isASCII {
return small.withUnmanagedASCII { view in
var view = view
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return ascii(view, x)
}
}
return small.withUnmanagedUTF16 { view in
var view = view
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return utf16(view, x)
}
}
// TODO: But can it provide a pointer+length representation?
defer { _fixLifetime(self) }
var view = _guts._asOpaque()
if let (range, boundsCheck) = range {
if boundsCheck {
view._boundsCheck(offsetRange: range)
}
view = view[range]
}
return opaque(view, x)
}
}
@inlinable
@inline(__always)
internal
func _visitGuts<Result>(
_ guts: _StringGuts,
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>) -> Result,
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>) -> Result,
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString) -> Result
) -> Result {
return String(guts)._visit(
range: range, ascii: ascii, utf16: utf16, opaque: opaque)
}
@inlinable
@inline(__always)
internal
func _visitGuts<T, Result>(
_ guts: _StringGuts,
range: (Range<Int>, performBoundsCheck: Bool)? = nil,
args x: T,
ascii: /*@convention(thin)*/ (_UnmanagedString<UInt8>, T) -> Result,
utf16: /*@convention(thin)*/ (_UnmanagedString<UInt16>, T) -> Result,
opaque: /*@convention(thin)*/ (_UnmanagedOpaqueString, T) -> Result
) -> Result {
return String(guts)._visit(
range: range, args: x, ascii: ascii, utf16: utf16, opaque: opaque)
}
+19 -182
View File
@@ -12,190 +12,17 @@
import SwiftShims
extension _UnmanagedString where CodeUnit == UInt8 {
internal func hashASCII(into core: inout Hasher._BufferingCore) {
core.combine(bytes: rawBuffer)
}
}
extension BidirectionalCollection where Element == UInt16, SubSequence == Self {
internal func hashUTF16(into core: inout Hasher._BufferingCore) {
for i in self.indices {
let cu = self[i]
let cuIsASCII = cu <= 0x7F
let isSingleSegmentScalar = self.hasNormalizationBoundary(after: i)
if cuIsASCII && isSingleSegmentScalar {
core.combine(UInt8(truncatingIfNeeded: cu))
} else {
for encodedScalar in Unicode._ParsingIterator(
codeUnits: _NormalizedCodeUnitIterator(self[i..<endIndex]),
parser: Unicode.UTF16.ForwardParser()
) {
let transcoded = Unicode.UTF8.transcode(
encodedScalar, from: Unicode.UTF16.self
).unsafelyUnwrapped // never fails
let (bytes, count) = transcoded._bytes
core.combine(bytes: bytes, count: count)
}
return
}
}
}
}
extension _UnmanagedString where CodeUnit == UInt8 {
internal func hash(into hasher: inout Hasher) {
self.hashASCII(into: &hasher._core)
hasher._core.combine(0xFF as UInt8) // terminator
}
internal func _rawHashValue(seed: Int) -> Int {
return Hasher._hash(seed: seed, bytes: rawBuffer)
}
}
extension _UnmanagedString where CodeUnit == UInt16 {
internal func hash(into hasher: inout Hasher) {
self.hashUTF16(into: &hasher._core)
hasher._core.combine(0xFF as UInt8) // terminator
}
internal func _rawHashValue(seed: Int) -> Int {
var core = Hasher._BufferingCore(seed: seed)
self.hashUTF16(into: &core)
return Int(truncatingIfNeeded: core.finalize())
}
}
extension _UnmanagedOpaqueString {
internal func hash(into hasher: inout Hasher) {
self.hashUTF16(into: &hasher._core)
hasher._core.combine(0xFF as UInt8) // terminator
}
internal func _rawHashValue(seed: Int) -> Int {
var core = Hasher._BufferingCore(seed: seed)
self.hashUTF16(into: &core)
return Int(truncatingIfNeeded: core.finalize())
}
}
extension _SmallUTF8String {
internal func hash(into hasher: inout Hasher) {
#if arch(i386) || arch(arm)
unsupportedOn32bit()
#else
if isASCII {
self.withUnmanagedASCII { $0.hash(into: &hasher) }
return
}
self.withUnmanagedUTF16 { $0.hash(into: &hasher) }
#endif // 64-bit
}
internal func _rawHashValue(seed: Int) -> Int {
#if arch(i386) || arch(arm)
unsupportedOn32bit()
#else
if isASCII {
return self.withUnmanagedASCII { $0._rawHashValue(seed: seed) }
}
return self.withUnmanagedUTF16 { $0._rawHashValue(seed: seed) }
#endif // 64-bit
}
}
extension _StringGuts {
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
@usableFromInline
internal func hash(into hasher: inout Hasher) {
if _isSmall {
_smallUTF8String.hash(into: &hasher)
return
}
defer { _fixLifetime(self) }
if _slowPath(_isOpaque) {
_asOpaque().hash(into: &hasher)
return
}
if isASCII {
_unmanagedASCIIView.hash(into: &hasher)
return
}
_unmanagedUTF16View.hash(into: &hasher)
}
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
@usableFromInline
internal func hash(_ range: Range<Int>, into hasher: inout Hasher) {
if _isSmall {
_smallUTF8String[range].hash(into: &hasher)
return
}
defer { _fixLifetime(self) }
if _slowPath(_isOpaque) {
_asOpaque()[range].hash(into: &hasher)
return
}
if isASCII {
_unmanagedASCIIView[range].hash(into: &hasher)
return
}
_unmanagedUTF16View[range].hash(into: &hasher)
}
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
@usableFromInline
internal func _rawHashValue(seed: Int) -> Int {
if _isSmall {
return _smallUTF8String._rawHashValue(seed: seed)
}
defer { _fixLifetime(self) }
if _slowPath(_isOpaque) {
return _asOpaque()._rawHashValue(seed: seed)
}
if isASCII {
return _unmanagedASCIIView._rawHashValue(seed: seed)
}
return _unmanagedUTF16View._rawHashValue(seed: seed)
}
@_effects(releasenone) // FIXME: Is this valid in the opaque case?
@usableFromInline
internal func _rawHashValue(_ range: Range<Int>, seed: Int) -> Int {
if _isSmall {
return _smallUTF8String[range]._rawHashValue(seed: seed)
}
defer { _fixLifetime(self) }
if _slowPath(_isOpaque) {
return _asOpaque()[range]._rawHashValue(seed: seed)
}
if isASCII {
return _unmanagedASCIIView[range]._rawHashValue(seed: seed)
}
return _unmanagedUTF16View[range]._rawHashValue(seed: seed)
}
}
extension String : Hashable {
/// Hashes the essential components of this value by feeding them into the
/// given hasher.
///
/// - Parameter hasher: The hasher to use when combining the components
/// of this instance.
@inlinable
@inlinable // For pre-normal fast paths
public func hash(into hasher: inout Hasher) {
_guts.hash(into: &hasher)
}
// TODO(UTF8 perf): pre-normal checks, fast-paths, etc.
@inlinable
public func _rawHashValue(seed: Int) -> Int {
return _guts._rawHashValue(seed: seed)
_guts._normalizedHash(into: &hasher)
}
}
@@ -207,11 +34,21 @@ extension StringProtocol {
/// of this instance.
@inlinable
public func hash(into hasher: inout Hasher) {
_wholeString._guts.hash(_encodedOffsetRange, into: &hasher)
}
@inlinable
public func _rawHashValue(seed: Int) -> Int {
return _wholeString._guts._rawHashValue(_encodedOffsetRange, seed: seed)
unimplemented_utf8()
}
}
extension _StringGuts {
@usableFromInline // @opaque
@inline(never) // slow-path
internal func _normalizedHash(into hasher: inout Hasher) {
// TODO(UTF8 perf): fast-paths, incremental (non-allocating) normalization,
// etc. This approach is very slow.
String(self)._normalize().withUnsafeBytes {
hasher.combine(bytes: $0)
}
hasher.combine(0xFF as UInt8) // terminator
}
}
+199 -85
View File
@@ -9,66 +9,131 @@
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
extension String {
/// A position of a character or code unit in a string.
@_fixed_layout // FIXME(sil-serialize-all)
public struct Index {
@usableFromInline
internal typealias _UTF8Buffer = UTF8.EncodedScalar
@usableFromInline // FIXME(sil-serialize-all)
internal var _compoundOffset: UInt64
import SwiftShims
@usableFromInline
internal var _utf8Buffer = _UTF8Buffer()
/*
@usableFromInline
internal var _graphemeStrideCache: UInt16 = 0
}
}
String's Index has the following layout:
b63:b16 b15:b14 b13:b8 b7:b0
position transcoded offset grapheme cache reserved
- grapheme cache: A 6-bit value remembering the distance to the next grapheme
boundary
- position aka `encodedOffset`: An offset into the string's code units
- transcoded offset: a sub-scalar offset, derived from transcoding
The use and interpretation of both `reserved` and `grapheme cache` is not part
of Index's ABI; it should be hidden behind non-inlinable calls. However, the
position of the sequence of 14 bits allocated is part of Index's ABI, as well as
the default value being `0`.
*/
/// Convenience accessors
extension String.Index {
@inlinable // FIXME(sil-serialize-all)
internal var utf8Buffer: String.Index._UTF8Buffer? {
guard !_utf8Buffer.isEmpty else { return nil }
return _utf8Buffer
}
extension String.Index {
@inlinable
internal var orderingValue: UInt64 {
@inline(__always) get { return _rawBits &>> 14 }
}
@inlinable // FIXME(sil-serialize-all)
/// The offset into a string's code units for this index.
@inlinable
public var encodedOffset: Int {
@inline(__always) get { return Int(truncatingIfNeeded: _rawBits &>> 16) }
}
@inlinable
internal var transcodedOffset: Int {
@inline(__always) get {
return Int(truncatingIfNeeded: orderingValue & 0x3)
}
}
@usableFromInline
internal var characterStride: Int? {
guard _graphemeStrideCache > 0 else { return nil }
return Int(truncatingIfNeeded: _graphemeStrideCache)
let value = (_rawBits & 0x00FC_0000_0000_0000) &>> 50
return value > 0 ? Int(truncatingIfNeeded: value) : nil
}
// TODO: Probably worth carving a bit for, or maybe a isSubScalar bit...
@inlinable // FIXME(sil-serialize-all)
internal var isUTF8: Bool {
return self.utf8Buffer != nil || self.transcodedOffset > 0
@inlinable @inline(__always)
internal init(encodedOffset: Int, transcodedOffset: Int) {
#if arch(i386) || arch(arm)
unimplemented_utf8_32bit()
#else
_sanityCheck(encodedOffset == encodedOffset & 0x0000_FFFF_FFFF_FFFF)
_sanityCheck(transcodedOffset <= 3)
let pos = UInt64(truncatingIfNeeded: encodedOffset)
let trans = UInt64(truncatingIfNeeded: transcodedOffset)
self.init((pos &<< 16) | (trans &<< 14))
#endif
}
/// Creates a new index at the specified code unit offset.
///
/// - Parameter offset: An offset in code units.
@inlinable @inline(__always)
public init(encodedOffset: Int) {
self.init(encodedOffset: encodedOffset, transcodedOffset: 0)
}
@usableFromInline
internal init(
encodedOffset: Int, transcodedOffset: Int, characterStride: Int
) {
self.init(encodedOffset: encodedOffset, transcodedOffset: transcodedOffset)
if _slowPath(characterStride > 63) { return }
_sanityCheck(characterStride == characterStride & 0x3F)
self._rawBits |= UInt64(truncatingIfNeeded: characterStride)
self._invariantCheck()
}
@usableFromInline
internal init(encodedOffset pos: Int, characterStride char: Int) {
self.init(encodedOffset: pos, transcodedOffset: 0, characterStride: char)
}
}
extension String.Index : Equatable {
// A combined code unit and transcoded offset, for comparison purposes
@inlinable // FIXME(sil-serialize-all)
internal var _orderingValue: UInt64 {
return _compoundOffset
// Creation helpers
extension String.Index {
@inlinable @inline(__always)
internal init(transcodedAfter i: String.Index) {
_sanityCheck((0...2) ~= i.transcodedOffset)
self.init(
encodedOffset: i.encodedOffset, transcodedOffset: i.transcodedOffset &+ 1)
}
@inlinable @inline(__always)
internal init(transcodedBefore i: String.Index) {
_sanityCheck((1...3) ~= i.transcodedOffset)
self.init(
encodedOffset: i.encodedOffset, transcodedOffset: i.transcodedOffset &- 1)
}
}
@inlinable // FIXME(sil-serialize-all)
extension String.Index: Equatable {
@inlinable @inline(__always)
public static func == (lhs: String.Index, rhs: String.Index) -> Bool {
return lhs._orderingValue == rhs._orderingValue
return lhs.orderingValue == rhs.orderingValue
}
}
extension String.Index : Comparable {
@inlinable // FIXME(sil-serialize-all)
extension String.Index: Comparable {
@inlinable @inline(__always)
public static func < (lhs: String.Index, rhs: String.Index) -> Bool {
return lhs._orderingValue < rhs._orderingValue
return lhs.orderingValue < rhs.orderingValue
}
}
extension String.Index : Hashable {
extension String.Index: Hashable {
/// Hashes the essential components of this value by feeding them into the
/// given hasher.
///
@@ -76,66 +141,115 @@ extension String.Index : Hashable {
/// of this instance.
@inlinable
public func hash(into hasher: inout Hasher) {
hasher.combine(_orderingValue)
hasher.combine(orderingValue)
}
}
// TODO(UTF8): restore these to StringIndexConversions.swift
extension String.Index {
@inline(__always)
@inlinable
internal init(encodedOffset: Int, transcodedOffset: Int) {
let cuOffset = UInt64(truncatingIfNeeded: encodedOffset)
_sanityCheck(
cuOffset & 0xFFFF_0000_0000_0000 == 0, "String length capped at 48bits")
let transOffset = UInt64(truncatingIfNeeded: transcodedOffset)
_sanityCheck(transOffset <= 4, "UTF-8 max transcoding is 4 code units")
self._compoundOffset = cuOffset &<< 2 | transOffset
}
@inline(__always)
@inlinable
internal init(from other: String.Index, adjustingEncodedOffsetBy adj: Int) {
self.init(
encodedOffset: other.encodedOffset &+ adj,
transcodedOffset: other.transcodedOffset)
self._utf8Buffer = other._utf8Buffer
self._graphemeStrideCache = other._graphemeStrideCache
}
/// Creates a new index at the specified UTF-16 offset.
/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
/// - Parameter offset: An offset in UTF-16 code units.
@inlinable // FIXME(sil-serialize-all)
public init(encodedOffset offset: Int) {
self.init(encodedOffset: offset, transcodedOffset: 0)
}
@inlinable // FIXME(sil-serialize-all)
internal init(
encodedOffset offset: Int, transcodedOffset: Int, buffer: _UTF8Buffer
/// If the index passed as `sourcePosition` represents the start of an
/// extended grapheme cluster---the element type of a string---then the
/// initializer succeeds.
///
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string. The character at that
/// position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(cafe[...stringIndex])
/// // Prints "Café"
///
/// If the index passed as `sourcePosition` doesn't have an exact
/// corresponding position in `target`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the combining
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
/// their own position in a string.
///
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
///
/// print(nextStringIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - sourcePosition: A position in a view of the `target` parameter.
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(
_ sourcePosition: String.Index,
within target: String
) {
self.init(encodedOffset: offset, transcodedOffset: transcodedOffset)
self._utf8Buffer = buffer
}
@inlinable
internal init(encodedOffset: Int, characterStride: Int) {
self.init(encodedOffset: encodedOffset, transcodedOffset: 0)
if characterStride < UInt16.max {
self._graphemeStrideCache = UInt16(truncatingIfNeeded: characterStride)
guard target._guts.isOnGraphemeClusterBoundary(sourcePosition) else {
return nil
}
self = sourcePosition
}
/// The offset into a string's UTF-16 encoding for this index.
/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// This example first finds the position of the character `"é"`, and then
/// uses this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.firstIndex(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)!
/// print(Array(cafe.utf8[j...]))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion. This index
/// must be a valid index of at least one view of the string shared by
/// `utf8`.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
/// If this index does not have an exact corresponding position in `utf8`,
/// this method returns `nil`. For example, an attempt to convert the
/// position of a UTF-16 trailing surrogate returns `nil`.
@inlinable // FIXME(sil-serialize-all)
public var encodedOffset : Int {
return Int(truncatingIfNeeded: _compoundOffset &>> 2)
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}
/// The offset of this index within whatever encoding this is being viewed as
/// Returns the position in the given UTF-16 view that corresponds exactly to
/// this index.
///
/// The index must be a valid index of `String(utf16)`.
///
/// This example first finds the position of the character `"é"` and then
/// uses this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.firstIndex(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)!
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion. This index
/// must be a valid index of at least one view of the string shared by
/// `utf16`.
/// - Returns: The position in `utf16` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `utf16`, this method returns `nil`. For example, an attempt to convert
/// the position of a UTF-8 continuation byte returns `nil`.
@inlinable // FIXME(sil-serialize-all)
internal var transcodedOffset: Int {
return Int(truncatingIfNeeded: _compoundOffset & 0x3)
public func samePosition(
in utf16: String.UTF16View
) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: utf16)
}
}
@@ -9,110 +9,3 @@
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
extension String.Index {
/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
/// If the index passed as `sourcePosition` represents the start of an
/// extended grapheme cluster---the element type of a string---then the
/// initializer succeeds.
///
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string. The character at that
/// position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(cafe[...stringIndex])
/// // Prints "Café"
///
/// If the index passed as `sourcePosition` doesn't have an exact
/// corresponding position in `target`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the combining
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
/// their own position in a string.
///
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
///
/// print(nextStringIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - sourcePosition: A position in a view of the `target` parameter.
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(
_ sourcePosition: String.Index,
within target: String
) {
guard target.unicodeScalars._isOnGraphemeClusterBoundary(sourcePosition)
else { return nil }
self = target._index(atEncodedOffset: sourcePosition.encodedOffset)
}
/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// This example first finds the position of the character `"é"`, and then
/// uses this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.firstIndex(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)!
/// print(Array(cafe.utf8[j...]))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion. This index
/// must be a valid index of at least one view of the string shared by
/// `utf8`.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
/// If this index does not have an exact corresponding position in `utf8`,
/// this method returns `nil`. For example, an attempt to convert the
/// position of a UTF-16 trailing surrogate returns `nil`.
@inlinable // trivial-implementation
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}
/// Returns the position in the given UTF-16 view that corresponds exactly to
/// this index.
///
/// The index must be a valid index of `String(utf16)`.
///
/// This example first finds the position of the character `"é"` and then
/// uses this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.firstIndex(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)!
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion. This index
/// must be a valid index of at least one view of the string shared by
/// `utf16`.
/// - Returns: The position in `utf16` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `utf16`, this method returns `nil`. For example, an attempt to convert
/// the position of a UTF-8 continuation byte returns `nil`.
@inlinable // trivial-implementation
public func samePosition(
in utf16: String.UTF16View
) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: utf16)
}
}
+2 -5
View File
@@ -197,14 +197,11 @@ extension DefaultStringInterpolation: CustomStringConvertible {
extension DefaultStringInterpolation: TextOutputStream {
@inlinable
public mutating func write(_ string: String) {
// Most interpolations will not append to an empty string, so we bypass the
// empty-singleton check.
_storage._guts._appendSlow(string._guts)
_storage.append(string)
}
@inlinable
public mutating func _writeASCII(_ buffer: UnsafeBufferPointer<UInt8>) {
_storage._guts.append(_UnmanagedString(buffer))
_storage._guts.append(_StringGuts(buffer, isKnownASCII: true))
}
}
+27 -146
View File
@@ -12,25 +12,6 @@
import SwiftShims
extension _StringVariant {
@usableFromInline
func _repeated(_ count: Int) -> _SwiftStringStorage<CodeUnit> {
_sanityCheck(count > 0)
let c = self.count
let storage = _copyToNativeStorage(
of: CodeUnit.self,
unusedCapacity: (count - 1) * c)
var p = storage.start + c
for _ in 1 ..< count {
p.initialize(from: storage.start, count: c)
p += c
}
_sanityCheck(p == storage.start + count * c)
storage.count = p - storage.start
return storage
}
}
extension String {
/// Creates a new string representing the given string repeated the specified
/// number of times.
@@ -46,49 +27,30 @@ extension String {
/// - repeatedValue: The string to repeat.
/// - count: The number of times to repeat `repeatedValue` in the resulting
/// string.
@inlinable // FIXME(sil-serialize-all)
public init(repeating repeatedValue: String, count: Int) {
precondition(count >= 0, "Negative count not allowed")
guard count > 1 else {
self = count == 0 ? "" : repeatedValue
// TODO(UTF8 merge): use string literal
self = count == 0 ? String() : repeatedValue
return
}
self = String(repeatedValue._guts._repeated(count))
// TODO(UTF8 perf): Not the fastest approach...
var result = String()
result.reserveCapacity(repeatedValue._guts.count &* count)
for _ in 0..<count {
result += repeatedValue
}
self = result
}
/// A Boolean value indicating whether a string has no characters.
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var isEmpty: Bool {
return _guts.count == 0
@inline(__always) get { return _guts.isEmpty }
}
}
// TODO: since this is generally useful, make public via evolution proposal.
extension BidirectionalCollection {
@inlinable
internal func _ends<Suffix: BidirectionalCollection>(
with suffix: Suffix, by areEquivalent: (Element,Element) -> Bool
) -> Bool where Suffix.Element == Element {
var (i,j) = (self.endIndex,suffix.endIndex)
while i != self.startIndex, j != suffix.startIndex {
self.formIndex(before: &i)
suffix.formIndex(before: &j)
if !areEquivalent(self[i],suffix[j]) { return false }
}
return j == suffix.startIndex
}
}
extension BidirectionalCollection where Element: Equatable {
@inlinable
internal func _ends<Suffix: BidirectionalCollection>(
with suffix: Suffix
) -> Bool where Suffix.Element == Element {
return _ends(with: suffix, by: ==)
}
}
extension StringProtocol {
/// Returns a Boolean value indicating whether the string begins with the
/// specified prefix.
@@ -155,100 +117,37 @@ extension StringProtocol {
/// - Returns: `true` if the string ends with `suffix`; otherwise, `false`.
@inlinable
public func hasSuffix<Suffix: StringProtocol>(_ suffix: Suffix) -> Bool {
return self._ends(with: suffix)
return self.reversed().starts(with: suffix.reversed())
}
}
extension String {
public func hasPrefix(_ prefix: String) -> Bool {
let prefixCount = prefix._guts.count
if prefixCount == 0 { return true }
// TODO: replace with 2-way vistor
if self._guts._isSmall && prefix._guts._isSmall {
let selfSmall = self._guts._smallUTF8String
let prefixSmall = prefix._guts._smallUTF8String
if selfSmall.isASCII && prefixSmall.isASCII {
return selfSmall.withUnmanagedASCII { selfASCII in
return prefixSmall.withUnmanagedASCII { prefixASCII in
if prefixASCII.count > selfASCII.count { return false }
return (0 as CInt) == _swift_stdlib_memcmp(
selfASCII.rawStart,
prefixASCII.rawStart,
prefixASCII.count)
}
if _fastPath(self._guts.isNFCFastUTF8 && prefix._guts.isNFCFastUTF8) {
guard prefix._guts.count <= self._guts.count else { return false }
return prefix._guts.withFastUTF8 { nfcPrefix in
let prefixEnd = nfcPrefix.count
return self._guts.withFastUTF8(range: 0..<prefixEnd) { nfcSlicedSelf in
return _binaryCompare(nfcSlicedSelf, nfcPrefix) == 0
}
}
}
if _fastPath(!self._guts._isOpaque && !prefix._guts._isOpaque) {
if self._guts.isASCII && prefix._guts.isASCII {
let result: Bool
let selfASCII = self._guts._unmanagedASCIIView
let prefixASCII = prefix._guts._unmanagedASCIIView
if prefixASCII.count > selfASCII.count {
// Prefix is longer than self.
result = false
} else {
result = (0 as CInt) == _swift_stdlib_memcmp(
selfASCII.rawStart,
prefixASCII.rawStart,
prefixASCII.count)
}
_fixLifetime(self)
_fixLifetime(prefix)
return result
}
else {
}
}
return self.starts(with: prefix)
return starts(with: prefix)
}
public func hasSuffix(_ suffix: String) -> Bool {
let suffixCount = suffix._guts.count
if suffixCount == 0 { return true }
// TODO: replace with 2-way vistor
if self._guts._isSmall && suffix._guts._isSmall {
let selfSmall = self._guts._smallUTF8String
let suffixSmall = suffix._guts._smallUTF8String
if selfSmall.isASCII && suffixSmall.isASCII {
return selfSmall.withUnmanagedASCII { selfASCII in
return suffixSmall.withUnmanagedASCII { suffixASCII in
if suffixASCII.count > selfASCII.count { return false }
return (0 as CInt) == _swift_stdlib_memcmp(
selfASCII.rawStart + (selfASCII.count - suffixASCII.count),
suffixASCII.rawStart,
suffixASCII.count)
}
if _fastPath(self._guts.isNFCFastUTF8 && suffix._guts.isNFCFastUTF8) {
guard suffix._guts.count <= self._guts.count else { return false }
return suffix._guts.withFastUTF8 { nfcSuffix in
let suffixStart = self._guts.count - nfcSuffix.count
return self._guts.withFastUTF8(range: suffixStart..<self._guts.count) {
nfcSlicedSelf in return _binaryCompare(nfcSlicedSelf, nfcSuffix) == 0
}
}
}
if _fastPath(!self._guts._isOpaque && !suffix._guts._isOpaque) {
if self._guts.isASCII && suffix._guts.isASCII {
let result: Bool
let selfASCII = self._guts._unmanagedASCIIView
let suffixASCII = suffix._guts._unmanagedASCIIView
if suffixASCII.count > selfASCII.count {
// Suffix is longer than self.
result = false
} else {
result = (0 as CInt) == _swift_stdlib_memcmp(
selfASCII.rawStart + (selfASCII.count - suffixASCII.count),
suffixASCII.rawStart,
suffixASCII.count)
}
_fixLifetime(self)
_fixLifetime(suffix)
return result
}
}
return self._ends(with: suffix)
return self.reversed().starts(with: suffix.reversed())
}
}
@@ -290,21 +189,3 @@ extension String {
self = value._description(radix: radix, uppercase: uppercase)
}
}
extension _StringGuts {
@inlinable
func _repeated(_ n: Int) -> _StringGuts {
_sanityCheck(n > 1)
if self._isSmall {
// TODO: visitor pattern for something like this...
if let small = self._smallUTF8String._repeated(n) {
return _StringGuts(small)
}
}
return _visitGuts(self, range: nil, args: n,
ascii: { ascii, n in return _StringGuts(_large: ascii._repeated(n)) },
utf16: { utf16, n in return _StringGuts(_large: utf16._repeated(n)) },
opaque: { opaque, n in return _StringGuts(_large: opaque._repeated(n)) })
}
}
+54 -71
View File
@@ -12,10 +12,11 @@
import SwiftShims
// A namespace for various heuristics
//
internal enum _Normalization {
// ICU's NFC unorm2 instance
//
// TODO(UTF8 perf): Should we cache one on TLS? Is this an expensive call?
internal static var _nfcNormalizer: OpaquePointer = {
var err = __swift_stdlib_U_ZERO_ERROR
let normalizer = __swift_stdlib_unorm2_getNFCInstance(&err)
@@ -27,77 +28,59 @@ internal enum _Normalization {
return normalizer
}()
// Whether this buffer of code units satisfies the quickCheck=YES property for
// normality checking under NFC.
//
// ICU provides a quickCheck, which may yield "YES", "NO", or "MAYBE". YES
// means that the string was determined to definitely be normal under NFC. In
// practice, the majority of Strings have this property. Checking for YES is
// considerably faster than trying to distinguish between NO and MAYBE.
internal static func _prenormalQuickCheckYes(
_ buffer: UnsafeBufferPointer<UInt16>
) -> Bool {
var err = __swift_stdlib_U_ZERO_ERROR
let length = __swift_stdlib_unorm2_spanQuickCheckYes(
_Normalization._nfcNormalizer,
buffer.baseAddress._unsafelyUnwrappedUnchecked,
Int32(buffer.count),
&err)
guard err.isSuccess else {
// This shouldn't be possible unless some deep (unrecoverable) system
// invariants are violated
fatalError("Unable to talk to ICU")
}
return length == buffer.count
}
internal static func _prenormalQuickCheckYes(
_ string: _UnmanagedString<UInt16>
) -> Bool {
var err = __swift_stdlib_U_ZERO_ERROR
let length = __swift_stdlib_unorm2_spanQuickCheckYes(
_Normalization._nfcNormalizer,
string.start,
Int32(string.count),
&err)
guard err.isSuccess else {
// This shouldn't be possible unless some deep (unrecoverable) system
// invariants are violated
fatalError("Unable to talk to ICU")
}
return length == string.count
}
}
extension UnicodeScalar {
// Normalization boundary - a place in a string where everything left of the
// boundary can be normalized independently from everything right of the
// boundary. The concatenation of each result is the same as if the entire
// string had been normalized as a whole.
//
// Normalization segment - a sequence of code units between two normalization
// boundaries (without any boundaries in the middle). Note that normalization
// segments can, as a process of normalization, expand, contract, and even
// produce new sub-segments.
// Whether this scalar value always has a normalization boundary before it.
internal var _hasNormalizationBoundaryBefore: Bool {
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
let value = Int32(bitPattern: self.value)
return 0 != __swift_stdlib_unorm2_hasBoundaryBefore(
_Normalization._nfcNormalizer, value)
}
}
extension _Normalization {
// When normalized in NFC, some segments may expand in size (e.g. some non-BMP
// musical notes). This expansion is capped by the maximum expansion factor of
// the normal form. For NFC, that is 3x.
internal static let _maxNFCExpansionFactor = 3
// A small output buffer to use for normalizing a single normalization
// segment. Fits all but pathological arbitrary-length segments (i.e. zalgo-
// segments)
internal typealias _SegmentOutputBuffer = _FixedArray16<UInt16>
}
extension String {
// TODO(UTF8 perf): Change into a lazy sequence with fast-paths...
@inline(never) // slow-path
internal func _normalize() -> Array<UInt8> {
func _tryNormalize(
_ input: UnsafeBufferPointer<UInt16>,
into outputBuffer: UnsafeMutableBufferPointer<UInt16>
) -> Int? {
var err = __swift_stdlib_U_ZERO_ERROR
let count = __swift_stdlib_unorm2_normalize(
_Normalization._nfcNormalizer,
input.baseAddress._unsafelyUnwrappedUnchecked,
numericCast(input.count),
outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
numericCast(outputBuffer.count),
&err
)
guard err.isSuccess else {
// The output buffer needs to grow
return nil
}
return numericCast(count)
}
let transcoded = Array(self.utf16)
let normalized: Array<UInt16> = transcoded.withUnsafeBufferPointer {
(inputBufPtr) -> Array<UInt16> in
var output = Array<UInt16>(
repeating: 0,
count: 1 + inputBufPtr.count * _Normalization._maxNFCExpansionFactor)
let lenOpt = output.withUnsafeMutableBufferPointer { outputBufPtr in
return _tryNormalize(inputBufPtr, into: outputBufPtr)
}
guard let len = lenOpt else {
_sanityCheckFailure("normalization beyond max expansion factor")
}
_sanityCheck(len <= output.count)
output.removeLast(output.count - len)
return output
}
var codeUnits = Array<UInt8>()
codeUnits.reserveCapacity(normalized.count)
_ = transcode(
normalized.makeIterator(),
from: UTF16.self,
to: UTF8.self,
stoppingOnError: false,
into: { codeUnits.append($0) })
return codeUnits
}
}
File diff suppressed because it is too large Load Diff
+6 -67
View File
@@ -30,8 +30,6 @@ public protocol StringProtocol
associatedtype UnicodeScalarView : BidirectionalCollection
where UnicodeScalarView.Element == Unicode.Scalar
associatedtype SubSequence = Substring
var utf8: UTF8View { get }
var utf16: UTF16View { get }
@@ -112,75 +110,16 @@ public protocol StringProtocol
encodedAs targetEncoding: Encoding.Type,
_ body: (UnsafePointer<Encoding.CodeUnit>) throws -> Result
) rethrows -> Result
/// The entire String onto whose slice this view is a projection.
var _wholeString : String { get }
/// The range of storage offsets of this view in `_wholeString`.
var _encodedOffsetRange : Range<Int> { get }
}
extension StringProtocol {
public var _wholeString: String {
return String(self)
}
public var _encodedOffsetRange: Range<Int> {
return 0 ..< numericCast(self.utf16.count)
}
}
/// A protocol that provides fast access to a known representation of String.
///
/// Can be used to specialize generic functions that would otherwise end up
/// doing grapheme breaking to vend individual characters.
@usableFromInline // FIXME(sil-serialize-all)
internal protocol _SwiftStringView {
/// A `String`, having the same contents as `self`, that may be unsuitable for
/// long-term storage.
var _ephemeralContent : String { get }
/// A `String`, having the same contents as `self`, that is suitable for
/// long-term storage.
//
// FIXME: Remove once _StringGuts has append(contentsOf:).
var _persistentContent : String { get }
/// The entire String onto whose slice this view is a projection.
var _wholeString : String { get }
/// The range of storage offsets of this view in `_wholeString`.
var _encodedOffsetRange : Range<Int> { get }
}
extension _SwiftStringView {
@inlinable // FIXME(sil-serialize-all)
internal var _ephemeralContent : String { return _persistentContent }
}
extension StringProtocol {
@inlinable // FIXME(sil-serialize-all)
public // Used in the Foundation overlay
var _ephemeralString : String {
if _fastPath(self is _SwiftStringView) {
return (self as! _SwiftStringView)._ephemeralContent
// TODO(UTF8): Wean NSStringAPI.swift off of this
public // @SPI(NSStringAPI.swift)
var _ephemeralString: String {
if let str = self as? String {
return str
}
// TODO: Smol check and then shared storage substring
return String(self)
}
}
extension String : _SwiftStringView {
@inlinable // FIXME(sil-serialize-all)
internal var _persistentContent : String {
return self
}
@inlinable // FIXME(sil-serialize-all)
public var _wholeString : String {
return self
}
@inlinable // FIXME(sil-serialize-all)
public var _encodedOffsetRange : Range<Int> {
return 0..<_guts.count
}
}
@@ -10,7 +10,9 @@
//
//===----------------------------------------------------------------------===//
extension String : StringProtocol, RangeReplaceableCollection {
extension String: StringProtocol {}
extension String: RangeReplaceableCollection {
/// Creates a string representing the given character repeated the specified
/// number of times.
///
@@ -25,9 +27,8 @@ extension String : StringProtocol, RangeReplaceableCollection {
/// - repeatedValue: The character to repeat.
/// - count: The number of times to repeat `repeatedValue` in the
/// resulting string.
@inlinable // FIXME(sil-serialize-all)
public init(repeating repeatedValue: Character, count: Int) {
self.init(repeating: String(repeatedValue), count: count)
self.init(repeating: repeatedValue._str, count: count)
}
// This initializer disambiguates between the following intitializers, now
@@ -56,41 +57,6 @@ extension String : StringProtocol, RangeReplaceableCollection {
self = other.description
}
@inlinable
@inline(__always)
internal func _boundsCheck(_ index: Index) {
_precondition(index.encodedOffset >= 0 && index.encodedOffset < _guts.count,
"String index is out of bounds")
}
@inlinable
@inline(__always)
internal func _boundsCheck(_ range: Range<Index>) {
_precondition(
range.lowerBound.encodedOffset >= 0 &&
range.upperBound.encodedOffset <= _guts.count,
"String index range is out of bounds")
}
@inlinable
@inline(__always)
internal func _boundsCheck(_ range: ClosedRange<Index>) {
_precondition(
range.lowerBound.encodedOffset >= 0 &&
range.upperBound.encodedOffset < _guts.count,
"String index range is out of bounds")
}
internal func _index(atEncodedOffset offset: Int) -> Index {
return _visitGuts(_guts, args: offset,
ascii: { ascii, offset in return ascii.characterIndex(atOffset: offset) },
utf16: { utf16, offset in return utf16.characterIndex(atOffset: offset) },
opaque: { opaque, offset in
return opaque.characterIndex(atOffset: offset) })
}
}
extension String {
/// Creates a new string containing the characters in the given sequence.
///
/// You can use this initializer to create a new string from the result of
@@ -106,9 +72,9 @@ extension String {
///
/// - Parameter characters: A string instance or another sequence of
/// characters.
@inlinable // FIXME(sil-serialize-all)
@inlinable // specialize
public init<S : Sequence>(_ characters: S)
where S.Iterator.Element == Character {
where S.Iterator.Element == Character {
self = ""
self.append(contentsOf: characters)
}
@@ -126,7 +92,7 @@ extension String {
///
/// - Complexity: O(*n*)
public mutating func reserveCapacity(_ n: Int) {
_guts.reserveCapacity(n)
self._guts.reserveCapacity(n)
}
/// Appends the given character to the string.
@@ -140,39 +106,28 @@ extension String {
///
/// - Parameter c: The character to append to the string.
public mutating func append(_ c: Character) {
if let small = c._smallUTF16 {
_guts.append(contentsOf: small)
} else {
_guts.append(c._largeUTF16!.unmanagedView)
_fixLifetime(c)
}
self.append(c._str)
}
public mutating func append(contentsOf newElements: String) {
append(newElements)
self.append(newElements)
}
public mutating func append(contentsOf newElements: Substring) {
_guts.append(
newElements._wholeString._guts,
range: newElements._encodedOffsetRange)
// TODO(UTF8 perf): This is a horribly slow means...
self.append(String(newElements))
}
/// Appends the characters in the given sequence to the string.
///
/// - Parameter newElements: A sequence of characters.
@inlinable // FIXME(sil-serialize-all)
public mutating func append<S : Sequence>(contentsOf newElements: S)
where S.Iterator.Element == Character {
if _fastPath(newElements is _SwiftStringView) {
let v = newElements as! _SwiftStringView
_guts.append(v._wholeString._guts, range: v._encodedOffsetRange)
return
}
_guts.reserveUnusedCapacity(
newElements.underestimatedCount,
ascii: _guts.isASCII)
for c in newElements { self.append(c) }
where S.Iterator.Element == Character {
// TODO(UTF8 perf): This is a horribly slow means...
let scalars = String(
decoding: newElements.map { $0.unicodeScalars }.joined().map { $0.value },
as: UTF32.self)
self.append(scalars)
}
/// Replaces the text within the specified bounds with the given characters.
@@ -189,15 +144,17 @@ extension String {
/// `newElements`. If the call to `replaceSubrange(_:with:)` simply
/// removes text at the end of the string, the complexity is O(*n*), where
/// *n* is equal to `bounds.count`.
@inlinable // FIXME(sil-serialize-all)
@_specialize(where C == String)
@_specialize(where C == Substring)
@_specialize(where C == Array<Character>)
public mutating func replaceSubrange<C>(
_ bounds: Range<Index>,
with newElements: C
) where C : Collection, C.Iterator.Element == Character {
let offsetRange: Range<Int> =
bounds.lowerBound.encodedOffset ..< bounds.upperBound.encodedOffset
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
_guts.replaceSubrange(offsetRange, with: lazyUTF16)
// TODO(UTF8 perf): This is a horribly slow means...
let prefix = self[..<bounds.lowerBound]
let suffix = self[bounds.upperBound...]
self = prefix + String(newElements) + suffix
}
/// Inserts a new character at the specified position.
@@ -211,10 +168,9 @@ extension String {
/// index, this methods appends `newElement` to the string.
///
/// - Complexity: O(*n*), where *n* is the length of the string.
@inlinable // FIXME(sil-serialize-all)
public mutating func insert(_ newElement: Character, at i: Index) {
let offset = i.encodedOffset
_guts.replaceSubrange(offset..<offset, with: newElement.utf16)
// TODO(UTF8 perf): Operate on storage direclty, sliding down elements
self.replaceSubrange(i..<i, with: newElement._str)
}
/// Inserts a collection of characters at the specified position.
@@ -231,13 +187,15 @@ extension String {
///
/// - Complexity: O(*n*), where *n* is the combined length of the string and
/// `newElements`.
@inlinable // FIXME(sil-serialize-all)
@_specialize(where S == String)
@_specialize(where S == Substring)
@_specialize(where S == Array<Character>)
public mutating func insert<S : Collection>(
contentsOf newElements: S, at i: Index
) where S.Iterator.Element == Character {
let offset = i.encodedOffset
let utf16 = newElements.lazy.flatMap { $0.utf16 }
_guts.replaceSubrange(offset..<offset, with: utf16)
) where S.Element == Character {
// TODO(UTF8 perf): Operate on storage direclty, sliding down elements
// TODO(UTF8 perf): This is a horribly slow means...
self.replaceSubrange(i..<i, with: String(newElements))
}
/// Removes and returns the character at the specified position.
@@ -258,29 +216,12 @@ extension String {
/// - Parameter i: The position of the character to remove. `i` must be a
/// valid index of the string that is not equal to the string's end index.
/// - Returns: The character that was removed.
@inlinable // FIXME(sil-serialize-all)
@discardableResult
public mutating func remove(at i: Index) -> Character {
let offset = i.encodedOffset
let stride = _stride(of: i)
let range: Range<Int> = offset ..< offset + stride
let old = Character(_unverified: _guts, range: range)
_guts.replaceSubrange(range, with: EmptyCollection())
return old
}
@inlinable // FIXME(sil-serialize-all)
internal func _stride(of i: Index) -> Int {
if let stride = i.characterStride { return stride }
let offset = i.encodedOffset
return _visitGuts(_guts, args: offset,
ascii: { ascii, offset in
return ascii.characterStride(atOffset: offset) },
utf16: { utf16, offset in
return utf16.characterStride(atOffset: offset) },
opaque: { opaque, offset in
return opaque.characterStride(atOffset: offset) })
// TODO(UTF8 perf): Operate on storage directly, sliding down elements
let c = self[i]
self.replaceSubrange(i..<i, with: String()) // TODO(UTF8): empty literal
return c
}
/// Removes the characters in the given range.
@@ -293,11 +234,9 @@ extension String {
/// equal to the string's end index.
/// - Parameter bounds: The range of the elements to remove. The upper and
/// lower bounds of `bounds` must be valid indices of the string.
@inlinable // FIXME(sil-serialize-all)
public mutating func removeSubrange(_ bounds: Range<Index>) {
let start = bounds.lowerBound.encodedOffset
let end = bounds.upperBound.encodedOffset
_guts.replaceSubrange(start..<end, with: EmptyCollection())
// TODO(UTF8 perf): Operate on storage directly, sliding down elements
self.replaceSubrange(bounds, with: String())
}
/// Replaces this string with the empty string.
@@ -309,13 +248,37 @@ extension String {
/// string's allocated storage. Retaining the storage can be a useful
/// optimization when you're planning to grow the string again. The
/// default value is `false`.
@inlinable // FIXME(sil-serialize-all)
public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) {
if keepCapacity {
_guts.replaceSubrange(0..<_guts.count, with: EmptyCollection())
} else {
_guts = _StringGuts()
guard keepCapacity || _guts.capacity != nil else {
self = String()
return
}
unimplemented_utf8()
}
}
extension String {
@inlinable @inline(__always)
internal func _boundsCheck(_ index: Index) {
_precondition(index.encodedOffset >= 0 && index.encodedOffset < _guts.count,
"String index is out of bounds")
}
@inlinable @inline(__always)
internal func _boundsCheck(_ range: Range<Index>) {
_precondition(
range.lowerBound.encodedOffset >= 0 &&
range.upperBound.encodedOffset <= _guts.count,
"String index range is out of bounds")
}
@inlinable @inline(__always)
internal func _boundsCheck(_ range: ClosedRange<Index>) {
_precondition(
range.lowerBound.encodedOffset >= 0 &&
range.upperBound.encodedOffset < _guts.count,
"String index range is out of bounds")
}
}
@@ -335,16 +298,14 @@ extension String {
}
}
//===----------------------------------------------------------------------===//
extension Sequence where Element == String {
@available(*, unavailable, message: "Operator '+' cannot be used to append a String to a sequence of strings")
public static func + (lhs: Self, rhs: String) -> Never {
fatalError()
unimplemented_utf8()
}
@available(*, unavailable, message: "Operator '+' cannot be used to append a String to a sequence of strings")
public static func + (lhs: String, rhs: Self) -> Never {
fatalError()
unimplemented_utf8()
}
}
+222 -242
View File
@@ -12,308 +12,288 @@
import SwiftShims
// TODO(UTF8): We can drop the nonobjc annotations soon
@_fixed_layout
@usableFromInline
class _SwiftRawStringStorage : __SwiftNativeNSString {
@nonobjc
@usableFromInline
final var capacity: Int
@nonobjc
@usableFromInline
final var count: Int
@nonobjc
internal init(_doNotCallMe: ()) {
_sanityCheckFailure("Use the create method")
}
@inlinable
@nonobjc
internal var rawStart: UnsafeMutableRawPointer {
_abstract()
}
@inlinable
@nonobjc
final var unusedCapacity: Int {
_sanityCheck(capacity >= count)
return capacity - count
}
@objc
internal class _AbstractStringStorage: _SwiftNativeNSString, _NSStringCore {
// Abstract interface
internal var asString: String { get { Builtin.unreachable() } }
internal var count: Int { get { Builtin.unreachable() } }
}
internal typealias _ASCIIStringStorage = _SwiftStringStorage<UInt8>
@usableFromInline // FIXME(sil-serialize-all)
internal typealias _UTF16StringStorage = _SwiftStringStorage<UTF16.CodeUnit>
@_fixed_layout
@usableFromInline
final class _SwiftStringStorage<CodeUnit>
: _SwiftRawStringStorage, _NSStringCore
where CodeUnit : UnsignedInteger & FixedWidthInteger {
/// Create uninitialized storage of at least the specified capacity.
@usableFromInline
@nonobjc
@_specialize(where CodeUnit == UInt8)
@_specialize(where CodeUnit == UInt16)
internal static func create(
capacity: Int,
count: Int = 0
) -> _SwiftStringStorage<CodeUnit> {
_sanityCheck(count >= 0 && count <= capacity)
#if arch(i386) || arch(arm)
#else
// TODO(SR-7594): Restore below invariant
// _sanityCheck(
// CodeUnit.self != UInt8.self || capacity > _SmallUTF8String.capacity,
// "Should prefer a small representation")
#endif // 64-bit
let storage = Builtin.allocWithTailElems_1(
_SwiftStringStorage<CodeUnit>.self,
capacity._builtinWordValue, CodeUnit.self)
let storageAddr = UnsafeMutableRawPointer(
Builtin.bridgeToRawPointer(storage))
let endAddr = (
storageAddr + _swift_stdlib_malloc_size(storageAddr)
).assumingMemoryBound(to: CodeUnit.self)
storage.capacity = endAddr - storage.start
storage.count = count
_sanityCheck(storage.capacity >= capacity)
return storage
}
@inlinable
@nonobjc
internal override final var rawStart: UnsafeMutableRawPointer {
return UnsafeMutableRawPointer(start)
}
// ObjC interfaces
#if _runtime(_ObjC)
// NSString API
@objc(initWithCoder:)
@usableFromInline
convenience init(coder aDecoder: AnyObject) {
_sanityCheckFailure("init(coder:) not implemented for _SwiftStringStorage")
}
extension _AbstractStringStorage {
@objc(length)
@usableFromInline
var length: Int {
return count
}
final internal var length: Int { return asString._utf16Length() }
@objc(characterAtIndex:)
@usableFromInline
func character(at index: Int) -> UInt16 {
defer { _fixLifetime(self) }
precondition(index >= 0 && index < count, "Index out of bounds")
return UInt16(start[index])
final func character(at index: Int) -> UInt16 {
return asString._utf16CodeUnitAtOffset(index)
}
@objc(getCharacters:range:)
@usableFromInline
func getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange
) {
final func getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange) {
_precondition(aRange.location >= 0 && aRange.length >= 0,
"Range out of bounds")
_precondition(aRange.location + aRange.length <= Int(count),
"Range out of bounds")
let slice = unmanagedView[
aRange.location ..< aRange.location + aRange.length]
slice._copy(
into: UnsafeMutableBufferPointer<UTF16.CodeUnit>(
start: buffer,
count: aRange.length))
_fixLifetime(self)
let range = Range(
uncheckedBounds: (aRange.location, aRange.location+aRange.length))
let slice = asString.utf16[asString._utf16OffsetToIndex(range)]
let outputBufPtr = UnsafeMutableBufferPointer(
start: buffer, count: range.count)
let _ = slice._copyContents(initializing: outputBufPtr)
}
@objc(_fastCharacterContents)
@usableFromInline
func _fastCharacterContents() -> UnsafePointer<UInt16>? {
guard CodeUnit.self == UInt16.self else { return nil }
return UnsafePointer(rawStart.assumingMemoryBound(to: UInt16.self))
final func _fastCharacterContents() -> UnsafePointer<UInt16>? {
return nil
}
@objc(_fastCStringContents)
final func _fastCStringContents() -> UnsafePointer<CChar>? {
if let native = self as? _StringStorage {
return native.start._asCChar
}
// TODO(UTF8 perf): shared from literals are nul-terminated...
return nil
}
@objc(copyWithZone:)
@usableFromInline
func copy(with zone: _SwiftNSZone?) -> AnyObject {
// While _SwiftStringStorage instances aren't immutable in general,
final func copy(with zone: _SwiftNSZone?) -> AnyObject {
// While _StringStorage instances aren't immutable in general,
// mutations may only occur when instances are uniquely referenced.
// Therefore, it is safe to return self here; any outstanding Objective-C
// reference will make the instance non-unique.
return self
}
}
#endif // _runtime(_ObjC)
}
extension _SwiftStringStorage {
// Basic properties
@inlinable
@_fixed_layout
@usableFromInline
final internal class _StringStorage: _AbstractStringStorage {
@nonobjc
internal final var start: UnsafeMutablePointer<CodeUnit> {
return UnsafeMutablePointer(Builtin.projectTailElems(self, CodeUnit.self))
@usableFromInline
internal var capacity: Int
@nonobjc
@usableFromInline
internal var _count: Int
@nonobjc
@inlinable
override internal var count: Int { @inline(__always) get { return _count } }
@nonobjc
@inlinable
override internal var asString: String {
@inline(__always) get { return String(_StringGuts(self)) }
}
@inlinable
@nonobjc
internal final var end: UnsafeMutablePointer<CodeUnit> {
return start + count
}
@inlinable
@nonobjc
internal final var capacityEnd: UnsafeMutablePointer<CodeUnit> {
return start + capacity
}
@inlinable
@nonobjc
var usedBuffer: UnsafeMutableBufferPointer<CodeUnit> {
return UnsafeMutableBufferPointer(start: start, count: count)
}
@inlinable
@nonobjc
var unusedBuffer: UnsafeMutableBufferPointer<CodeUnit> {
@inline(__always)
get {
return UnsafeMutableBufferPointer(start: end, count: capacity - count)
}
}
@inlinable
@nonobjc
var unmanagedView: _UnmanagedString<CodeUnit> {
return _UnmanagedString(start: self.start, count: self.count)
internal init(_doNotCallMe: ()) {
_sanityCheckFailure("Use the create method")
}
}
extension _SwiftStringStorage {
// Append operations
// Creation
extension _StringStorage {
@nonobjc
internal final func _appendInPlace<OtherCodeUnit>(
_ other: _UnmanagedString<OtherCodeUnit>
)
where OtherCodeUnit : FixedWidthInteger & UnsignedInteger {
let otherCount = Int(other.count)
_sanityCheck(self.count + otherCount <= self.capacity)
other._copy(into: self.unusedBuffer)
self.count += otherCount
internal static func create(
capacity: Int, count: Int = 0
) -> _StringStorage {
_sanityCheck(capacity >= count)
// Reserve enough capacity for a trailing nul character
let capacity = 1 + Swift.max(capacity, _SmallUTF8String.capacity)
_sanityCheck(capacity > count)
let storage = Builtin.allocWithTailElems_1(
_StringStorage.self,
capacity._builtinWordValue, UInt8.self)
let storageAddr = UnsafeRawPointer(
Builtin.bridgeToRawPointer(storage))
let endAddr = (
storageAddr + _stdlib_malloc_size(storageAddr)
).assumingMemoryBound(to: UInt8.self)
storage.capacity = endAddr - storage.start
storage._count = count
_sanityCheck(storage.capacity >= capacity)
storage.unusedStorage[0] = 0 // nul-terminated
storage._invariantCheck()
return storage
}
@nonobjc
internal final func _appendInPlace(_ other: _UnmanagedOpaqueString) {
let otherCount = Int(other.count)
_sanityCheck(self.count + otherCount <= self.capacity)
other._copy(into: self.unusedBuffer)
self.count += otherCount
internal static func create(
initializingFrom bufPtr: UnsafeBufferPointer<UInt8>, capacity: Int
) -> _StringStorage {
_sanityCheck(capacity >= bufPtr.count)
let storage = _StringStorage.create(
capacity: capacity, count: bufPtr.count)
let addr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
storage.mutableStart.initialize(from: addr, count: bufPtr.count)
return storage
}
@nonobjc
internal final func _appendInPlace<C: Collection>(contentsOf other: C)
where C.Element == CodeUnit {
let otherCount = Int(other.count)
_sanityCheck(self.count + otherCount <= self.capacity)
var (remainder, writtenUpTo) =
other._copyContents(initializing: self.unusedBuffer)
_precondition(remainder.next() == nil, "Collection underreported its count")
_precondition(writtenUpTo == otherCount, "Collection misreported its count")
count += otherCount
internal static func create(
initializingFrom bufPtr: UnsafeBufferPointer<UInt8>
) -> _StringStorage {
return _StringStorage.create(
initializingFrom: bufPtr, capacity: bufPtr.count)
}
@_specialize(where C == Character._SmallUTF16, CodeUnit == UInt8)
@nonobjc
internal final func _appendInPlaceUTF16<C: Collection>(contentsOf other: C)
where C.Element == UInt16 {
let otherCount = Int(other.count)
_sanityCheck(self.count + otherCount <= self.capacity)
// TODO: Use _copyContents(initializing:) for UTF16->UTF16 case
var it = other.makeIterator()
for p in end ..< end + otherCount {
p.pointee = CodeUnit(it.next()!)
}
_precondition(it.next() == nil, "Collection underreported its count")
count += otherCount
internal static func create(
initializingFrom bufPtr: UnsafeBufferPointer<UInt8>,
andAppending secondBufPtr: UnsafeBufferPointer<UInt8>
) -> _StringStorage {
let size = bufPtr.count + secondBufPtr.count
let storage = _StringStorage.create(
capacity: size, count: size)
let addr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
storage.mutableStart.initialize(from: addr, count: bufPtr.count)
let secondAddr = secondBufPtr.baseAddress._unsafelyUnwrappedUnchecked
(storage.mutableStart + bufPtr.count).initialize(
from: secondAddr, count: secondBufPtr.count)
return storage
}
}
extension _SwiftStringStorage {
@nonobjc
internal final func _appendInPlace(_ other: _StringGuts, range: Range<Int>) {
if _slowPath(other._isOpaque) {
_opaqueAppendInPlace(opaqueOther: other, range: range)
return
}
// TODO(UTF8 perf): Append helpers, which can keep nul-termination
defer { _fixLifetime(other) }
if other.isASCII {
_appendInPlace(other._unmanagedASCIIView[range])
} else {
_appendInPlace(other._unmanagedUTF16View[range])
// Usage
extension _StringStorage {
@nonobjc
@inlinable
internal var mutableStart: UnsafeMutablePointer<UInt8> {
@inline(__always) get {
return UnsafeMutablePointer(Builtin.projectTailElems(self, UInt8.self))
}
}
@usableFromInline // @opaque
internal final func _opaqueAppendInPlace(
opaqueOther other: _StringGuts, range: Range<Int>
) {
_sanityCheck(other._isOpaque)
if other._isSmall {
other._smallUTF8String[range].withUnmanagedUTF16 {
self._appendInPlace($0)
}
return
}
defer { _fixLifetime(other) }
_appendInPlace(other._asOpaque()[range])
@nonobjc
@inlinable
internal var mutableEnd: UnsafeMutablePointer<UInt8> {
@inline(__always) get { return mutableStart + count }
}
@nonobjc
internal final func _appendInPlace(_ other: _StringGuts) {
if _slowPath(other._isOpaque) {
_opaqueAppendInPlace(opaqueOther: other)
return
}
defer { _fixLifetime(other) }
if other.isASCII {
_appendInPlace(other._unmanagedASCIIView)
} else {
_appendInPlace(other._unmanagedUTF16View)
}
}
@usableFromInline // @opaque
internal final func _opaqueAppendInPlace(opaqueOther other: _StringGuts) {
_sanityCheck(other._isOpaque)
if other._isSmall {
other._smallUTF8String.withUnmanagedUTF16 {
self._appendInPlace($0)
}
return
}
defer { _fixLifetime(other) }
_appendInPlace(other._asOpaque())
@inlinable
internal var start: UnsafePointer<UInt8> {
@inline(__always) get { return UnsafePointer(mutableStart) }
}
@nonobjc
internal final func _appendInPlace(_ other: String) {
self._appendInPlace(other._guts)
@inlinable
internal final var end: UnsafePointer<UInt8> {
@inline(__always) get { return UnsafePointer(mutableEnd) }
}
@nonobjc
internal final func _appendInPlace<S : StringProtocol>(_ other: S) {
self._appendInPlace(
other._wholeString._guts,
range: other._encodedOffsetRange)
@inlinable
internal var codeUnits: UnsafeBufferPointer<UInt8> {
@inline(__always) get {
return UnsafeBufferPointer(start: start, count: count)
}
}
@inlinable
@nonobjc
internal var unusedStorage: UnsafeMutableBufferPointer<UInt8> {
@inline(__always) get {
return UnsafeMutableBufferPointer(
start: mutableEnd, count: unusedCapacity)
}
}
@nonobjc
@inlinable
internal var unusedCapacity: Int {
@inline(__always) get { return capacity &- count }
}
@nonobjc
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
let rawSelf = UnsafeRawPointer(Builtin.bridgeToRawPointer(self))
let rawStart = UnsafeRawPointer(start)
_sanityCheck(rawSelf + Int(_StringObject.nativeBias) == rawStart)
_sanityCheck(self.capacity > self.count, "no room for nul-terminator")
_sanityCheck(self.unusedStorage[0] == 0, "not nul terminated")
#endif
}
}
// For bridging literals
//
// TODO(UTF8): Unify impls with _StringStorage
//
@_fixed_layout
@usableFromInline
final internal class _SharedStringStorage: _AbstractStringStorage {
@nonobjc
@usableFromInline
internal var owner: AnyObject?
@nonobjc
@usableFromInline
internal var contents: UnsafeBufferPointer<UInt8>
@nonobjc
@usableFromInline
internal var start: UnsafePointer<UInt8> {
return contents.baseAddress._unsafelyUnwrappedUnchecked
}
@nonobjc
@usableFromInline
override internal var count: Int { return contents.count }
@nonobjc
internal init(owner: AnyObject, contents bufPtr: UnsafeBufferPointer<UInt8>) {
self.owner = owner
self.contents = bufPtr
super.init()
self._invariantCheck()
}
@nonobjc
internal init(immortal bufPtr: UnsafeBufferPointer<UInt8>) {
self.owner = nil
self.contents = bufPtr
super.init()
self._invariantCheck()
}
@nonobjc
override internal var asString: String { return String(_StringGuts(self)) }
}
extension _SharedStringStorage {
@nonobjc
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
#endif
}
}
+22 -13
View File
@@ -23,6 +23,7 @@ struct _StringRepresentation {
case _cocoa(object: AnyObject)
case _native(object: AnyObject)
case _immortal(address: UInt)
// TODO: shared native
}
public var _form: _Form
@@ -37,27 +38,35 @@ struct _StringRepresentation {
extension String {
public // @testable
func _classify() -> _StringRepresentation {
func _classify() -> _StringRepresentation { return _guts._classify() }
}
extension _StringGuts {
internal func _classify() -> _StringRepresentation {
var result = _StringRepresentation(
_isASCII: _guts._isASCIIOrSmallASCII,
_count: _guts.count,
_capacity: _guts.capacity,
_isASCII: self.isKnownASCII,
_count: self.count,
_capacity: self.capacity ?? 0,
_form: ._small
)
if _guts._isSmall {
if _object.isSmall {
return result
}
if _guts._isNative {
result._form = ._native(object: _guts._owner!)
if _object.largeIsCocoa {
result._form = ._cocoa(object: _object.cocoaObject)
return result
}
if _guts._isCocoa {
result._form = ._cocoa(object: _guts._owner!)
return result
}
if _guts._isUnmanaged {
// TODO: shared native
_sanityCheck(_object.providesFastUTF8)
_sanityCheck(_object.largeFastIsNative)
if _object.isImmortal {
result._form = ._immortal(
address: UInt(bitPattern: _guts._unmanagedRawStart))
address: UInt(bitPattern: _object.nativeUTF8Start))
return result
}
if _object.hasNativeStorage {
result._form = ._native(object: _object.nativeStorage)
return result
}
fatalError()
+270 -311
View File
@@ -10,6 +10,42 @@
//
//===----------------------------------------------------------------------===//
// TODO(UTF8 merge): Find a common place for these helpers
extension _StringGuts {
@_effects(releasenone)
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
internal func foreignUTF16CodeUnit(at i: Int) -> UInt16 {
// Currently, foreign means NSString
return _cocoaStringSubscript(_object.cocoaObject, i)
}
}
internal let _leadingSurrogateBias: UInt16 = 0xd800
internal let _trailingSurrogateBias: UInt16 = 0xdc00
internal let _surrogateMask: UInt16 = 0xfc00
@inline(__always)
internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool {
return cu & _surrogateMask == _trailingSurrogateBias
}
@inline(__always)
internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool {
return cu & _surrogateMask == _leadingSurrogateBias
}
internal func _numTranscodedUTF8CodeUnits(_ x: UInt16) -> Int {
_sanityCheck(!_isTrailingSurrogate(x))
if _slowPath(_isLeadingSurrogate(x)) { return 4 }
switch x {
case 0..<0x80: return 1
case 0x80..<0x0800: return 2
case _: return 3
}
}
// FIXME(ABI)#71 : The UTF-16 string view should have a custom iterator type to
// allow performance optimizations of linear traversals.
@@ -99,230 +135,190 @@ extension String {
/// }
/// // Prints "Let it snow!"
@_fixed_layout // FIXME(sil-serialize-all)
public struct UTF16View
: BidirectionalCollection,
CustomStringConvertible,
CustomDebugStringConvertible {
public typealias Index = String.Index
/// The position of the first code unit if the `String` is
/// nonempty; identical to `endIndex` otherwise.
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index {
return Index(encodedOffset: _offset)
}
/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index {
return Index(encodedOffset: _offset + _length)
}
@_fixed_layout // FIXME(sil-serialize-all)
public struct Indices {
@inlinable // FIXME(sil-serialize-all)
internal init(
_elements: String.UTF16View, _startIndex: Index, _endIndex: Index
) {
self._elements = _elements
self._startIndex = _startIndex
self._endIndex = _endIndex
}
@usableFromInline // FIXME(sil-serialize-all)
internal var _elements: String.UTF16View
@usableFromInline // FIXME(sil-serialize-all)
internal var _startIndex: Index
@usableFromInline // FIXME(sil-serialize-all)
internal var _endIndex: Index
}
@inlinable // FIXME(sil-serialize-all)
public var indices: Indices {
return Indices(
_elements: self, startIndex: startIndex, endIndex: endIndex)
}
// TODO: swift-3-indexing-model - add docs
@inlinable // FIXME(sil-serialize-all)
public func index(after i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return Index(encodedOffset: _unsafePlus(i.encodedOffset, 1))
}
// TODO: swift-3-indexing-model - add docs
@inlinable // FIXME(sil-serialize-all)
public func index(before i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return Index(encodedOffset: _unsafeMinus(i.encodedOffset, 1))
}
// TODO: swift-3-indexing-model - add docs
@inlinable // FIXME(sil-serialize-all)
public func index(_ i: Index, offsetBy n: Int) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return Index(encodedOffset: i.encodedOffset.advanced(by: n))
}
// TODO: swift-3-indexing-model - add docs
@inlinable // FIXME(sil-serialize-all)
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
// FIXME: swift-3-indexing-model: range check i?
let d = i.encodedOffset.distance(to: limit.encodedOffset)
if (d >= 0) ? (d < n) : (d > n) {
return nil
}
return Index(encodedOffset: i.encodedOffset.advanced(by: n))
}
// TODO: swift-3-indexing-model - add docs
@inlinable // FIXME(sil-serialize-all)
public func distance(from start: Index, to end: Index) -> Int {
// FIXME: swift-3-indexing-model: range check start and end?
return start.encodedOffset.distance(to: end.encodedOffset)
}
@inlinable // FIXME(sil-serialize-all)
internal func _internalIndex(at i: Int) -> Int {
return _guts.startIndex + i
}
/// Accesses the code unit at the given position.
///
/// The following example uses the subscript to print the value of a
/// string's first UTF-16 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf16.startIndex
/// print("First character's UTF-16 code unit: \(greeting.utf16[i])")
/// // Prints "First character's UTF-16 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position` must be
/// less than the view's end index.
@inlinable // FIXME(sil-serialize-all)
public subscript(i: Index) -> UTF16.CodeUnit {
_precondition(i >= startIndex && i < endIndex,
"out-of-range access on a UTF16View")
let index = _internalIndex(at: i.encodedOffset)
let u = _guts.codeUnit(atCheckedOffset: index)
if _fastPath(UTF16._isScalar(u)) {
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
// of 1 code unit.
return u
}
if UTF16.isLeadSurrogate(u) {
// Sequence is well-formed if `u` is followed by a low-surrogate.
if _fastPath(
index + 1 < _guts.count &&
UTF16.isTrailSurrogate(_guts.codeUnit(atCheckedOffset: index + 1)))
{
return u
}
return UTF16._replacementCodeUnit
}
// `u` is a low-surrogate. Sequence is well-formed if
// previous code unit is a high-surrogate.
if _fastPath(
index != 0 &&
UTF16.isLeadSurrogate(_guts.codeUnit(atCheckedOffset: index - 1)))
{
return u
}
return UTF16._replacementCodeUnit
}
#if _runtime(_ObjC)
// These may become less important once <rdar://problem/19255291> is addressed.
@available(
*, unavailable,
message: "Indexing a String's UTF16View requires a String.UTF16View.Index, which can be constructed from Int when Foundation is imported")
public subscript(i: Int) -> UTF16.CodeUnit {
Builtin.unreachable()
}
@available(
*, unavailable,
message: "Slicing a String's UTF16View requires a Range<String.UTF16View.Index>, String.UTF16View.Index can be constructed from Int when Foundation is imported")
public subscript(bounds: Range<Int>) -> UTF16View {
Builtin.unreachable()
}
#endif
@inlinable // FIXME(sil-serialize-all)
internal init(_ _guts: _StringGuts) {
self.init(_guts, offset: 0, length: _guts.count)
}
@inlinable // FIXME(sil-serialize-all)
internal init(_ _guts: _StringGuts, offset: Int, length: Int) {
self._offset = offset
self._length = length
self._guts = _guts
}
public var description: String {
return String(_guts._extractSlice(_encodedOffsetRange))
}
public var debugDescription: String {
return "StringUTF16(\(self.description.debugDescription))"
}
@usableFromInline // FIXME(sil-serialize-all)
internal var _offset: Int
@usableFromInline // FIXME(sil-serialize-all)
internal var _length: Int
public struct UTF16View {
@usableFromInline
internal var _guts: _StringGuts
@inlinable // FIXME(sil-serialize-all)
internal init(_ guts: _StringGuts) {
self._guts = guts
_invariantCheck()
}
}
}
extension String.UTF16View {
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
#endif
}
}
extension String.UTF16View: BidirectionalCollection {
public typealias Index = String.Index
/// The position of the first code unit if the `String` is
/// nonempty; identical to `endIndex` otherwise.
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index {
@inline(__always) get { return Index(encodedOffset: 0) }
}
/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index {
@inline(__always) get { return Index(encodedOffset: _guts.count) }
}
@inlinable @inline(__always)
public func index(after i: Index) -> Index {
// TODO(UTF8) known-ASCII fast path
if _slowPath(_guts.isForeign) { return _foreignIndex(after: i) }
// For a BMP scalar (1-3 UTF-8 code units), advance past it. For a non-BMP
// scalar, use a transcoded offset first.
let len = _guts.fastUTF8ScalarLength(startingAt: i.encodedOffset)
if len == 4 && i.transcodedOffset == 0 {
return Index(transcodedAfter: i)
}
return Index(encodedOffset: i.encodedOffset &+ len)
}
@inlinable @inline(__always)
public func index(before i: Index) -> Index {
precondition(i.encodedOffset > 0)
if _slowPath(_guts.isForeign) { return _foreignIndex(before: i) }
// TODO(UTF8) known-ASCII fast path
if i.transcodedOffset != 0 {
_sanityCheck(i.transcodedOffset == 1)
return Index(encodedOffset: i.encodedOffset)
}
let len = _guts.fastUTF8ScalarLength(endingAt: i.encodedOffset)
if len == 4 {
return Index(
encodedOffset: i.encodedOffset &- len,
transcodedOffset: 1)
}
_sanityCheck((1...3) ~= len)
return Index(encodedOffset: i.encodedOffset &- len)
}
@inlinable @inline(__always)
public func index(_ i: Index, offsetBy n: Int) -> Index {
if _slowPath(_guts.isForeign) {
return _foreignIndex(i, offsetBy: n)
}
// TODO(UTF8) known-ASCII fast path
return __index(i, offsetBy: n)
}
@inlinable @inline(__always)
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
if _slowPath(_guts.isForeign) {
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
}
// TODO(UTF8) known-ASCII fast paths
return __index(i, offsetBy: n, limitedBy: limit)
}
@inlinable @inline(__always)
public func distance(from start: Index, to end: Index) -> Int {
if _slowPath(_guts.isForeign) {
return _foreignDistance(from: start, to: end)
}
// TODO(UTF8) known-ASCII fast paths
return __distance(from: start, to: end)
}
/// Accesses the code unit at the given position.
///
/// The following example uses the subscript to print the value of a
/// string's first UTF-16 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf16.startIndex
/// print("First character's UTF-16 code unit: \(greeting.utf16[i])")
/// // Prints "First character's UTF-16 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position` must be
/// less than the view's end index.
@inlinable
public subscript(i: Index) -> UTF16.CodeUnit {
@inline(__always) get {
_precondition(i.encodedOffset >= 0 && i < endIndex)
// TODO(UTF8): known-ASCII fast path
if _fastPath(_guts.isFastUTF8) {
let scalar = _guts.fastUTF8Scalar(startingAt: i.encodedOffset)
if scalar.value <= 0xFFFF {
return UInt16(truncatingIfNeeded: scalar.value)
}
return scalar.utf16[i.transcodedOffset]
}
return _foreignSubscript(position: i)
}
}
}
extension String.UTF16View: CustomStringConvertible {
@inlinable
public var description: String {
@inline(__always) get { return String(_guts) }
}
}
extension String.UTF16View: CustomDebugStringConvertible {
public var debugDescription: String {
return "StringUTF16(\(self.description.debugDescription))"
}
}
extension String {
/// A UTF-16 encoding of `self`.
@inlinable // FIXME(sil-serialize-all)
public var utf16: UTF16View {
get {
return UTF16View(_guts)
}
set {
self = String(describing: newValue)
}
@inline(__always) get { return UTF16View(_guts) }
@inline(__always) set { self = String(newValue._guts) }
}
/// Creates a string corresponding to the given sequence of UTF-16 code units.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
@available(swift, introduced: 4.0)
public init(_ utf16: UTF16View) {
self = String(utf16._guts)
self.init(utf16._guts)
}
/// The index type for subscripting a string.
public typealias UTF16Index = UTF16View.Index
}
extension String.UTF16View : _SwiftStringView {
@inlinable // FIXME(sil-serialize-all)
internal var _ephemeralContent : String { return _persistentContent }
@inlinable // FIXME(sil-serialize-all)
internal var _persistentContent : String { return String(self._guts) }
@inlinable // FIXME(sil-serialize-all)
var _wholeString : String {
return String(_guts)
#if _runtime(_ObjC)
// These may become less important once <rdar://problem/19255291> is addressed.
@available(
*, unavailable,
message: "Indexing a String's UTF16View requires a String.UTF16View.Index, which can be constructed from Int when Foundation is imported")
public subscript(i: Int) -> UTF16.CodeUnit {
Builtin.unreachable()
}
@inlinable // FIXME(sil-serialize-all)
var _encodedOffsetRange : Range<Int> {
return _offset..<_offset+_length
@available(
*, unavailable,
message: "Slicing a String's UTF16View requires a Range<String.UTF16View.Index>, String.UTF16View.Index can be constructed from Int when Foundation is imported")
public subscript(bounds: Range<Int>) -> UTF16View {
Builtin.unreachable()
}
#endif
}
// Index conversions
@@ -399,112 +395,75 @@ extension String.UTF16View : CustomReflectable {
}
}
extension String.UTF16View.Indices : BidirectionalCollection {
public typealias Index = String.UTF16View.Index
public typealias Indices = String.UTF16View.Indices
public typealias SubSequence = String.UTF16View.Indices
@inlinable // FIXME(sil-serialize-all)
internal init(
_elements: String.UTF16View,
startIndex: Index,
endIndex: Index
) {
self._elements = _elements
self._startIndex = startIndex
self._endIndex = endIndex
}
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index {
return _startIndex
}
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index {
return _endIndex
}
@inlinable // FIXME(sil-serialize-all)
public var indices: Indices {
return self
}
@inlinable // FIXME(sil-serialize-all)
public subscript(i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check.
return i
}
@inlinable // FIXME(sil-serialize-all)
public subscript(bounds: Range<Index>) -> String.UTF16View.Indices {
// FIXME: swift-3-indexing-model: range check.
return String.UTF16View.Indices(
_elements: _elements,
startIndex: bounds.lowerBound,
endIndex: bounds.upperBound)
}
@inlinable // FIXME(sil-serialize-all)
public func index(after i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check.
return _elements.index(after: i)
}
@inlinable // FIXME(sil-serialize-all)
public func formIndex(after i: inout Index) {
// FIXME: swift-3-indexing-model: range check.
_elements.formIndex(after: &i)
}
@inlinable // FIXME(sil-serialize-all)
public func index(before i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check.
return _elements.index(before: i)
}
@inlinable // FIXME(sil-serialize-all)
public func formIndex(before i: inout Index) {
// FIXME: swift-3-indexing-model: range check.
_elements.formIndex(before: &i)
}
@inlinable // FIXME(sil-serialize-all)
public func index(_ i: Index, offsetBy n: Int) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return _elements.index(i, offsetBy: n)
}
@inlinable // FIXME(sil-serialize-all)
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
// FIXME: swift-3-indexing-model: range check i?
return _elements.index(i, offsetBy: n, limitedBy: limit)
}
// TODO: swift-3-indexing-model - add docs
@inlinable // FIXME(sil-serialize-all)
public func distance(from start: Index, to end: Index) -> Int {
// FIXME: swift-3-indexing-model: range check start and end?
return _elements.distance(from: start, to: end)
}
}
//===--- Slicing Support --------------------------------------------------===//
/// In Swift 3.2, in the absence of type context,
///
/// someString.utf16[someString.utf16.startIndex..<someString.utf16.endIndex]
///
/// was deduced to be of type `String.UTF16View`. Provide a more-specific
/// Swift-3-only `subscript` overload that continues to produce
/// `String.UTF16View`.
// Slicing
extension String.UTF16View {
public typealias SubSequence = Substring.UTF16View
@inlinable // FIXME(sil-serialize-all)
@available(swift, introduced: 4)
public subscript(bounds: Range<Index>) -> String.UTF16View.SubSequence {
return String.UTF16View.SubSequence(self, _bounds: bounds)
public subscript(r: Range<Index>) -> Substring.UTF16View {
return Substring.UTF16View(self, _bounds: r)
}
}
// Foreign string support
extension String.UTF16View {
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(after i: Index) -> Index {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
return Index(encodedOffset: i.encodedOffset + 1)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(before i: Index) -> Index {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
return Index(encodedOffset: i.encodedOffset - 1)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignSubscript(position i: Index) -> UTF16.CodeUnit {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
return _guts.foreignUTF16CodeUnit(at: i.encodedOffset)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignDistance(from start: Index, to end: Index) -> Int {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
return end.encodedOffset - start.encodedOffset
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
let l = limit.encodedOffset - i.encodedOffset
if n > 0 ? l >= 0 && l < n : l <= 0 && n < l {
return nil
}
return Index(encodedOffset: i.encodedOffset + n)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(_ i: Index, offsetBy n: Int) -> Index {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
return Index(encodedOffset: i.encodedOffset + n)
}
}
+385 -534
View File
@@ -85,330 +85,247 @@ extension String {
/// // Prints "-17"
/// print(String(s1.utf8.prefix(15)))
/// // Prints "They call me 'B"
@_fixed_layout // FIXME(sil-serialize-all)
public struct UTF8View
: BidirectionalCollection,
CustomStringConvertible,
CustomDebugStringConvertible {
/// Underlying UTF-16-compatible representation
@_fixed_layout
public struct UTF8View {
@usableFromInline
internal var _guts: _StringGuts
/// Distances to `(startIndex, endIndex)` from the endpoints of _guts,
/// measured in UTF-8 code units.
///
/// Note: this is *only* here to support legacy Swift3-style slicing where
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
/// those semantics are no longer supported.
@usableFromInline
internal let _legacyOffsets: (start: Int8, end: Int8)
/// Flags indicating whether the limits of this view did not originally fall
/// on grapheme cluster boundaries in the original string. This is used to
/// emulate (undocumented) Swift 3 behavior where String.init?(_:) returned
/// nil in such cases.
///
/// Note: this is *only* here to support legacy Swift3-style slicing where
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
/// those semantics are no longer supported.
@usableFromInline
internal let _legacyPartialCharacters: (start: Bool, end: Bool)
@inlinable // FIXME(sil-serialize-all)
internal init(
_ _guts: _StringGuts,
legacyOffsets: (Int, Int) = (0, 0),
legacyPartialCharacters: (Bool, Bool) = (false, false)
) {
self._guts = _guts
self._legacyOffsets = (Int8(legacyOffsets.0), Int8(legacyOffsets.1))
self._legacyPartialCharacters = legacyPartialCharacters
}
public typealias Index = String.Index
/// The position of the first code unit if the UTF-8 view is
/// nonempty.
///
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index {
let r: Index
if _fastPath(_guts._isASCIIOrSmallASCII) {
r = Index(encodedOffset: 0)
} else {
r = _nonASCIIIndex(atEncodedOffset: 0)
}
_sanityCheck(r.encodedOffset == 0)
if _fastPath(_legacyOffsets.start == 0) { return r }
return index(r, offsetBy: numericCast(_legacyOffsets.start))
}
/// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index {
_sanityCheck(_legacyOffsets.end >= -3 && _legacyOffsets.end <= 0,
"out of bounds legacy end")
var r = Index(encodedOffset: _guts.endIndex)
if _fastPath(_legacyOffsets.end == 0) {
return r
}
switch _legacyOffsets.end {
case -3: r = index(before: r); fallthrough
case -2: r = index(before: r); fallthrough
case -1: return index(before: r)
default: Builtin.unreachable()
}
}
@inline(never)
@_effects(releasenone)
@usableFromInline
internal func _nonASCIIIndex(atEncodedOffset n: Int) -> Index {
_sanityCheck(!_guts._isASCIIOrSmallASCII)
let count = _guts.count
if n == count { return endIndex }
let buffer: Index._UTF8Buffer = _visitGuts(
_guts, range: (n..<count, performBoundsCheck: true),
ascii: { _ in
Builtin.unreachable()
/* return Index._UTF8Buffer() */ },
utf16: { utf16 in
var i = utf16.makeIterator()
return UTF8View._fillBuffer(from: &i) },
opaque: { opaque in
var i = opaque.makeIterator()
return UTF8View._fillBuffer(from: &i)}
)
return Index(encodedOffset: n, transcodedOffset: 0, buffer: buffer)
}
@inline(__always)
internal
static func _fillBuffer<Iter: IteratorProtocol>(
from i: inout Iter
) -> Index._UTF8Buffer where Iter.Element == UInt16 {
var p = UTF16.ForwardParser()
var buffer = Index._UTF8Buffer()
while true {
switch p.parseScalar(from: &i) {
case .valid(let u16):
let u8 = Unicode.UTF8.transcode(u16, from: Unicode.UTF16.self)
._unsafelyUnwrappedUnchecked
if buffer.count + u8.count > buffer.capacity {
return buffer
}
buffer.append(contentsOf: u8)
case .error:
let u8 = Unicode.UTF8.encodedReplacementCharacter
if buffer.count + u8.count > buffer.capacity {
return buffer
}
buffer.append(contentsOf: u8)
case .emptyInput:
return buffer
}
}
}
/// Returns the next consecutive position after `i`.
///
/// - Precondition: The next position is representable.
@inlinable // FIXME(sil-serialize-all)
@inline(__always)
public func index(after i: Index) -> Index {
if _fastPath(_guts._isASCIIOrSmallASCII) {
precondition(i.encodedOffset < _guts.count)
return Index(encodedOffset: i.encodedOffset + 1)
}
return _nonASCIIIndex(after: i)
}
@inline(never)
@_effects(releasenone)
@usableFromInline
internal func _nonASCIIIndex(after i: Index) -> Index {
_sanityCheck(!_guts._isASCIIOrSmallASCII)
var j = i
// Ensure j's cache is utf8
if _slowPath(j.utf8Buffer == nil) {
j = _nonASCIIIndex(atEncodedOffset: j.encodedOffset)
precondition(j != endIndex, "Index out of bounds")
}
let buffer = j.utf8Buffer._unsafelyUnwrappedUnchecked
var scalarLength16 = 1
let b0 = buffer.first._unsafelyUnwrappedUnchecked
var nextBuffer = buffer
let leading1s = (~b0).leadingZeroBitCount
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
nextBuffer.removeFirst()
}
else {
// Number of bytes consumed in this scalar
let n8 = j.transcodedOffset + 1
// If we haven't reached a scalar boundary...
if _fastPath(n8 < leading1s) {
// Advance to the next position in this scalar
return Index(
encodedOffset: j.encodedOffset,
transcodedOffset: n8, buffer: buffer)
}
// We reached a scalar boundary; compute the underlying utf16's width
// based on the number of utf8 code units
scalarLength16 = n8 >> 2 + 1
nextBuffer.removeFirst(n8)
}
if _fastPath(!nextBuffer.isEmpty) {
return Index(
encodedOffset: j.encodedOffset + scalarLength16,
transcodedOffset: 0,
buffer: nextBuffer)
}
// If nothing left in the buffer, refill it.
return _nonASCIIIndex(atEncodedOffset: j.encodedOffset + scalarLength16)
}
@inlinable // FIXME(sil-serialize-all)
public func index(before i: Index) -> Index {
if _fastPath(_guts._isASCIIOrSmallASCII) {
precondition(i.encodedOffset > 0)
return Index(encodedOffset: i.encodedOffset - 1)
}
return _nonASCIIIndex(before: i)
}
@inline(never)
@_effects(releasenone)
@usableFromInline
internal func _nonASCIIIndex(before i: Index) -> Index {
_sanityCheck(!_guts._isASCIIOrSmallASCII)
if i.transcodedOffset != 0 {
_sanityCheck(i.utf8Buffer != nil)
return Index(
encodedOffset: i.encodedOffset,
transcodedOffset: i.transcodedOffset &- 1,
buffer: i.utf8Buffer._unsafelyUnwrappedUnchecked)
}
// Handle the scalar boundary the same way as the not-a-utf8-index case.
_precondition(i.encodedOffset > 0, "Can't move before startIndex")
// Parse a single scalar
let u = _guts.unicodeScalar(endingAt: i.encodedOffset)
let u8 = Unicode.UTF8.encode(u)._unsafelyUnwrappedUnchecked
return Index(
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
transcodedOffset: u8.count &- 1,
buffer: String.Index._UTF8Buffer(u8))
}
@inlinable // FIXME(sil-serialize-all)
public func distance(from i: Index, to j: Index) -> Int {
if _fastPath(_guts._isASCIIOrSmallASCII) {
return j.encodedOffset - i.encodedOffset
}
return _nonASCIIDistance(from: i, to: j)
}
@inline(never)
@_effects(releasenone)
@usableFromInline
internal func _nonASCIIDistance(from i: Index, to j: Index) -> Int {
let forwards = j >= i
let start, end: Index
if forwards {
start = i
end = j
} else {
start = j
end = i
}
let countAbs = end.transcodedOffset - start.transcodedOffset
+ _gutsNonASCIIUTF8Count(start.encodedOffset..<end.encodedOffset)
return forwards ? countAbs : -countAbs
}
/// Accesses the code unit at the given position.
///
/// The following example uses the subscript to print the value of a
/// string's first UTF-8 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf8.startIndex
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
/// // Prints "First character's UTF-8 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position`
/// must be less than the view's end index.
@inlinable // FIXME(sil-serialize-all)
public subscript(position: Index) -> UTF8.CodeUnit {
@inline(__always)
get {
if _fastPath(_guts._isASCIIOrSmallASCII) {
let offset = position.encodedOffset
_precondition(offset < _guts.count, "Index out of bounds")
if _guts._isSmall {
return _guts._smallUTF8String[offset]
}
return _guts._unmanagedASCIIView.buffer[offset]
}
return _nonASCIISubscript(position: position)
}
}
@inline(never)
@_effects(releasenone)
@usableFromInline
internal func _nonASCIISubscript(position: Index) -> UTF8.CodeUnit {
_sanityCheck(!_guts._isASCIIOrSmallASCII)
var j = position
while true {
if let buffer = j.utf8Buffer {
_onFastPath()
return buffer[
buffer.index(buffer.startIndex, offsetBy: j.transcodedOffset)]
}
j = _nonASCIIIndex(atEncodedOffset: j.encodedOffset)
precondition(j < endIndex, "Index out of bounds")
}
}
@inlinable // FIXME(sil-serialize-all)
public var description: String {
return String(_guts)
}
public var debugDescription: String {
return "UTF8View(\(self.description.debugDescription))"
@inlinable @inline(__always)
internal init(_ guts: _StringGuts) {
self._guts = guts
_invariantCheck()
}
}
}
/// A UTF-8 encoding of `self`.
@inlinable // FIXME(sil-serialize-all)
public var utf8: UTF8View {
get {
return UTF8View(self._guts)
extension String.UTF8View {
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
#endif
}
}
// TODO(UTF8 merge): when this refactoring lands on master and we can get a
// toolchain, remove these and use the single-underscore ones. Will still need
// to solve access control somehow, perhaps shims need to expose them...
extension BidirectionalCollection {
/// Do not use this method directly; call advanced(by: n) instead.
@inlinable
@inline(__always)
internal func __advanceForward(_ i: Index, by n: Int) -> Index {
_precondition(n >= 0,
"Only BidirectionalCollections can be advanced by a negative amount")
var i = i
for _ in stride(from: 0, to: n, by: 1) {
formIndex(after: &i)
}
return i
}
/// Do not use this method directly; call advanced(by: n, limit) instead.
@inlinable
@inline(__always)
internal func __advanceForward(
_ i: Index, by n: Int, limitedBy limit: Index
) -> Index? {
_precondition(n >= 0,
"Only BidirectionalCollections can be advanced by a negative amount")
var i = i
for _ in stride(from: 0, to: n, by: 1) {
if i == limit {
return nil
}
formIndex(after: &i)
}
return i
}
@inlinable // FIXME(sil-serialize-all)
public func __index(_ i: Index, offsetBy n: Int) -> Index {
if n >= 0 {
return __advanceForward(i, by: n)
}
var i = i
for _ in stride(from: 0, to: n, by: -1) {
formIndex(before: &i)
}
return i
}
@inlinable // FIXME(sil-serialize-all)
public func __index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
if n >= 0 {
return __advanceForward(i, by: n, limitedBy: limit)
}
var i = i
for _ in stride(from: 0, to: n, by: -1) {
if i == limit {
return nil
}
formIndex(before: &i)
}
return i
}
@inlinable // FIXME(sil-serialize-all)
internal func __distance(from start: Index, to end: Index) -> Int {
var start = start
var count = 0
if start < end {
while start != end {
count += 1
formIndex(after: &start)
}
}
else if start > end {
while start != end {
count -= 1
formIndex(before: &start)
}
}
return count
}
}
extension String.UTF8View: BidirectionalCollection {
public typealias Index = String.Index
public typealias Element = UTF8.CodeUnit
/// The position of the first code unit if the UTF-8 view is
/// nonempty.
///
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
@inlinable
public var startIndex: Index {
@inline(__always) get { return Index(encodedOffset: 0) }
}
/// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
///
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
@inlinable
public var endIndex: Index {
@inline(__always) get { return Index(encodedOffset: _guts.count) }
}
/// Returns the next consecutive position after `i`.
///
/// - Precondition: The next position is representable.
@inlinable @inline(__always)
public func index(after i: Index) -> Index {
if _fastPath(_guts.isFastUTF8) {
return Index(encodedOffset: i.encodedOffset &+ 1)
}
return _foreignIndex(after: i)
}
@inlinable @inline(__always)
public func index(before i: Index) -> Index {
precondition(i.encodedOffset > 0)
if _fastPath(_guts.isFastUTF8) {
return Index(encodedOffset: i.encodedOffset &- 1)
}
return _foreignIndex(before: i)
}
@inlinable @inline(__always)
public func index(_ i: Index, offsetBy n: Int) -> Index {
if _fastPath(_guts.isFastUTF8) {
let offset = i.encodedOffset + n
_precondition(offset >= 0 && offset <= _guts.count)
return Index(encodedOffset: offset)
}
return _foreignIndex(i, offsetBy: n)
}
@inlinable @inline(__always)
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
if _fastPath(_guts.isFastUTF8) {
// Check the limit: ignore limit if it precedes `i` (in the correct
// direction), otherwise must not be beyond limit (in the correct
// direction).
let iOffset = i.encodedOffset
let result = iOffset + n
let limitOffset = limit.encodedOffset
if n >= 0 {
guard limitOffset < iOffset || result <= limitOffset else { return nil }
} else {
guard limitOffset > iOffset || result >= limitOffset else { return nil }
}
return Index(encodedOffset: result)
}
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
}
@inlinable @inline(__always)
public func distance(from i: Index, to j: Index) -> Int {
if _fastPath(_guts.isFastUTF8) {
return j.encodedOffset &- i.encodedOffset
}
return _foreignDistance(from: i, to: j)
}
/// Accesses the code unit at the given position.
///
/// The following example uses the subscript to print the value of a
/// string's first UTF-8 code unit.
///
/// let greeting = "Hello, friend!"
/// let i = greeting.utf8.startIndex
/// print("First character's UTF-8 code unit: \(greeting.utf8[i])")
/// // Prints "First character's UTF-8 code unit: 72"
///
/// - Parameter position: A valid index of the view. `position`
/// must be less than the view's end index.
@inlinable
public subscript(i: Index) -> UTF8.CodeUnit {
@inline(__always) get {
_precondition(i.encodedOffset >= 0 && i < endIndex)
if _fastPath(_guts.isFastUTF8) {
return _guts.withFastUTF8 { utf8 in utf8[i.encodedOffset] }
}
return _foreignSubscript(position: i)
}
}
}
extension String.UTF8View: CustomStringConvertible {
@inlinable
public var description: String {
@inline(__always) get { return String(String(_guts)) }
}
}
extension String.UTF8View: CustomDebugStringConvertible {
public var debugDescription: String {
return "UTF8View(\(self.description.debugDescription))"
}
}
extension String {
/// A UTF-8 encoding of `self`.
@inlinable
public var utf8: UTF8View {
@inline(__always) get { return UTF8View(self._guts) }
set {
self = String(describing: newValue)
unimplemented_utf8()
}
}
@@ -427,208 +344,65 @@ extension String {
/// }
/// // Prints "6"
public var utf8CString: ContiguousArray<CChar> {
if _fastPath(_guts.isFastUTF8) {
var result = _guts.withFastUTF8 { return ContiguousArray($0._asCChar) }
result.append(0)
return result
}
return _slowUTF8CString()
}
@usableFromInline @inline(never) // slow-path
internal func _slowUTF8CString() -> ContiguousArray<CChar> {
var result = ContiguousArray<CChar>()
result.reserveCapacity(utf8.count + 1)
for c in utf8 {
result.reserveCapacity(self._guts.count + 1)
for c in self.utf8 {
result.append(CChar(bitPattern: c))
}
result.append(0)
return result
}
@inlinable // FIXME(sil-serialize-all)
internal func _withUnsafeBufferPointerToUTF8<R>(
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
) rethrows -> R {
if _guts.isASCII {
return try body(_guts._unmanagedASCIIView.buffer)
}
var nullTerminatedUTF8 = ContiguousArray<UTF8.CodeUnit>()
nullTerminatedUTF8.reserveCapacity(utf8.count + 1)
nullTerminatedUTF8 += utf8
nullTerminatedUTF8.append(0)
return try nullTerminatedUTF8.withUnsafeBufferPointer(body)
}
/// Creates a string corresponding to the given sequence of UTF-8 code units.
@inlinable // FIXME(sil-serialize-all)
@available(swift, introduced: 4.0, message:
"Please use failable String.init?(_:UTF8View) when in Swift 3.2 mode")
"Please use failable String.init?(_:UTF8View) when in Swift 3.2 mode")
@inlinable @inline(__always)
public init(_ utf8: UTF8View) {
self = String(utf8._guts)
}
/// The index type for subscripting a string.
public typealias UTF8Index = UTF8View.Index
}
extension String.UTF8View : _SwiftStringView {
@inlinable // FIXME(sil-serialize-all)
internal var _persistentContent : String {
return String(self._guts)
}
@inlinable // FIXME(sil-serialize-all)
var _wholeString : String {
return String(_guts)
}
@inlinable // FIXME(sil-serialize-all)
var _encodedOffsetRange : Range<Int> {
return 0..<_guts.count
}
}
// TODO(UTF8): design specialized iterator, rather than default indexing one
//extension String.UTF8View {
// @_fixed_layout // FIXME(sil-serialize-all)
// public struct Iterator {
// // TODO(UTF8):
// }
//
// public func makeIterator() -> Iterator {
// unimplemented_utf8()
// }
//}
//
//extension String.UTF8View.Iterator : IteratorProtocol {
// public typealias Element = String.UTF8View.Element
//
// @inlinable @inline(__always)
// public mutating func next() -> Unicode.UTF8.CodeUnit? {
// unimplemented_utf8()
// }
//}
extension String.UTF8View {
@_fixed_layout // FIXME(sil-serialize-all)
public struct Iterator {
@usableFromInline
internal typealias _OutputBuffer = _ValidUTF8Buffer<UInt64>
@usableFromInline
internal let _guts: _StringGuts
@usableFromInline
internal let _endOffset: Int
@usableFromInline // FIXME(sil-serialize-all)
internal var _nextOffset: Int
@usableFromInline // FIXME(sil-serialize-all)
internal var _buffer: _OutputBuffer
}
public func makeIterator() -> Iterator {
return Iterator(self)
}
}
extension String.UTF8View.Iterator : IteratorProtocol {
public typealias Element = String.UTF8View.Element
@inlinable // FIXME(sil-serialize-all)
internal init(_ utf8: String.UTF8View) {
self._guts = utf8._guts
self._nextOffset = 0
self._buffer = _OutputBuffer()
self._endOffset = utf8._guts.count
}
internal mutating func _clear() {
self._nextOffset = self._endOffset
self._buffer = _OutputBuffer()
}
@inlinable // FIXME(sil-serialize-all)
@inline(__always)
public mutating func next() -> Unicode.UTF8.CodeUnit? {
if _slowPath(_nextOffset == _endOffset) {
if _slowPath(_buffer.isEmpty) {
return nil
}
}
if _guts.isASCII {
defer { _nextOffset += 1 }
return _guts._unmanagedASCIIView.buffer[_nextOffset]
}
if _guts._isSmall {
defer { _nextOffset += 1 }
return _guts._smallUTF8String[_nextOffset]
}
if _fastPath(!_buffer.isEmpty) {
return _buffer.removeFirst()
}
return _fillBuffer()
}
@usableFromInline
@inline(never)
internal mutating func _fillBuffer() -> Unicode.UTF8.CodeUnit {
_sanityCheck(!_guts.isASCII, "next() already checks for known ASCII")
if _slowPath(_guts._isOpaque) {
return _opaqueFillBuffer()
}
defer { _fixLifetime(_guts) }
return _fillBuffer(from: _guts._unmanagedUTF16View)
}
@usableFromInline // @opaque
internal mutating func _opaqueFillBuffer() -> Unicode.UTF8.CodeUnit {
_sanityCheck(_guts._isOpaque)
defer { _fixLifetime(_guts) }
return _fillBuffer(from: _guts._asOpaque())
}
// NOT @usableFromInline
internal mutating func _fillBuffer<V: _StringVariant>(
from variant: V
) -> Unicode.UTF8.CodeUnit {
// Eat as many ASCII characters as possible
let asciiEnd = Swift.min(_nextOffset + _buffer.capacity, _endOffset)
for cu in variant[_nextOffset..<asciiEnd] {
if !UTF16._isASCII(cu) { break }
_buffer.append(UInt8(truncatingIfNeeded: cu))
_nextOffset += 1
}
if _nextOffset == asciiEnd {
return _buffer.removeFirst()
}
// Decode UTF-16, encode UTF-8
for scalar in IteratorSequence(
variant[_nextOffset..<_endOffset].makeUnicodeScalarIterator()) {
let u8 = UTF8.encode(scalar)._unsafelyUnwrappedUnchecked
let c8 = u8.count
guard _buffer.count + c8 <= _buffer.capacity else { break }
_buffer.append(contentsOf: u8)
_nextOffset += 1 &+ (c8 &>> 2)
}
return _buffer.removeFirst()
}
}
// Used to calculate a running count. For non-BMP scalars, it's important if the
// prior code unit was a leading surrogate (validity).
internal func _utf8Count(_ utf16CU: UInt16, prev: UInt16) -> Int {
switch utf16CU {
case 0..<0x80: return 1
case 0x80..<0x800: return 2
case 0x800..<0xDC00: return 3
case 0xDC00..<0xE000: return UTF16.isLeadSurrogate(prev) ? 1 : 3
default: return 3
}
}
extension String.UTF8View {
internal static func _count<Source: RandomAccessCollection>(
fromUTF16 source: Source
) -> Int where Source.Element == Unicode.UTF16.CodeUnit {
var result = 0
var prev: Unicode.UTF16.CodeUnit = 0
for u in source {
result += _utf8Count(u, prev: prev)
prev = u
}
return result
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var count: Int {
let gutsCount = _guts.count
if _fastPath(_guts._isASCIIOrSmallASCII) { return gutsCount }
return _gutsNonASCIIUTF8Count(0..<gutsCount)
}
@inline(never)
@_effects(releasenone)
@usableFromInline
internal func _gutsNonASCIIUTF8Count(
_ range: Range<Int>
) -> Int {
_sanityCheck(!_guts._isASCIIOrSmallASCII)
return _visitGuts(_guts, range: (range, performBoundsCheck: true),
ascii: { ascii in return ascii.count },
utf16: { utf16 in return String.UTF8View._count(fromUTF16: utf16) },
opaque: { opaque in return String.UTF8View._count(fromUTF16: opaque) }
)
@inline(__always) get {
if _fastPath(_guts.isFastUTF8) {
return _guts.count
}
return _foreignCount()
}
}
}
@@ -673,13 +447,7 @@ extension String.UTF8View.Index {
/// - target: The `UTF8View` in which to find the new position.
@inlinable // FIXME(sil-serialize-all)
public init?(_ idx: String.Index, within target: String.UTF8View) {
guard idx.isUTF8 ||
String.UnicodeScalarView(target._guts)._isOnUnicodeScalarBoundary(idx)
else {
return nil
}
self = idx
unimplemented_utf8()
}
}
@@ -691,6 +459,7 @@ extension String.UTF8View : CustomReflectable {
}
}
// TODO(UTF8): Can we just unify this view?
//===--- Slicing Support --------------------------------------------------===//
/// In Swift 3.2, in the absence of type context,
///
@@ -702,10 +471,10 @@ extension String.UTF8View : CustomReflectable {
extension String.UTF8View {
public typealias SubSequence = Substring.UTF8View
@inlinable // FIXME(sil-serialize-all)
@inlinable
@available(swift, introduced: 4)
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
return String.UTF8View.SubSequence(self, _bounds: r)
return Substring.UTF8View(self, _bounds: r)
}
}
@@ -713,37 +482,119 @@ extension String.UTF8View {
/// Copies `self` into the supplied buffer.
///
/// - Precondition: The memory in `self` is uninitialized. The buffer must
/// contain sufficient uninitialized memory to accommodate `source.underestimatedCount`.
/// contain sufficient uninitialized memory to accommodate
/// `source.underestimatedCount`.
///
/// - Postcondition: The `Pointee`s at `buffer[startIndex..<returned index]` are
/// initialized.
/// - Postcondition: The `Pointee`s at `buffer[startIndex..<returned index]`
/// are initialized.
@inlinable @inline(__always)
public func _copyContents(
initializing buffer: UnsafeMutableBufferPointer<Iterator.Element>
) -> (Iterator,UnsafeMutableBufferPointer<Iterator.Element>.Index) {
guard var ptr = buffer.baseAddress else {
) -> (Iterator, UnsafeMutableBufferPointer<Iterator.Element>.Index) {
guard buffer.baseAddress != nil else {
_preconditionFailure(
"Attempt to copy string contents into nil buffer pointer")
}
var it = self.makeIterator()
if _guts.isASCII {
defer { _fixLifetime(_guts) }
let asciiView = _guts._unmanagedASCIIView
_precondition(asciiView.count <= buffer.count,
guard let written = _guts.copyUTF8(into: buffer) else {
_preconditionFailure(
"Insufficient space allocated to copy string contents")
ptr.initialize(from: asciiView.start, count: asciiView.count)
it._clear()
return (it, buffer.index(buffer.startIndex, offsetBy: asciiView.count))
}
else {
for idx in buffer.startIndex..<buffer.count {
guard let x = it.next() else {
return (it, idx)
}
ptr.initialize(to: x)
ptr += 1
}
return (it,buffer.endIndex)
}
let it = String().utf8.makeIterator()
return (it, buffer.index(buffer.startIndex, offsetBy: written))
}
}
// Foreign string support
extension String.UTF8View {
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(after i: Index) -> Index {
_sanityCheck(_guts.isForeign)
let cu = _guts.foreignUTF16CodeUnit(at: i.encodedOffset)
let len = _numTranscodedUTF8CodeUnits(cu)
if len == 1 {
_sanityCheck(i.transcodedOffset == 0)
return Index(encodedOffset: i.encodedOffset + 1)
}
// Check if we're still transcoding sub-scalar
if i.transcodedOffset < len - 1 {
return Index(transcodedAfter: i)
}
// Skip to the next scalar
let scalarLen = len == 4 ? 2 : 1
return Index(encodedOffset: i.encodedOffset + scalarLen)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(before i: Index) -> Index {
_sanityCheck(_guts.isForeign)
if i.transcodedOffset != 0 {
_sanityCheck((1...3) ~= i.transcodedOffset)
return Index(transcodedBefore: i)
}
var offset = i.encodedOffset &- 1
var cu = _guts.foreignUTF16CodeUnit(at: offset)
if _isTrailingSurrogate(cu) {
offset = offset &- 1
_sanityCheck(offset >= 0)
cu = _guts.foreignUTF16CodeUnit(at: offset)
}
let len = _numTranscodedUTF8CodeUnits(cu)
return Index(encodedOffset: offset, transcodedOffset: len &- 1)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignSubscript(position i: Index) -> UTF8.CodeUnit {
_sanityCheck(_guts.isForeign)
// Currently, foreign means NSString
// TODO(UTF8 perf): Could probably work just off a single code unit
let scalar = _guts.foreignScalar(startingAt: i.encodedOffset)
let encoded = Unicode.UTF8.encode(scalar)._unsafelyUnwrappedUnchecked
_sanityCheck(i.transcodedOffset < 1+encoded.count)
return encoded[
encoded.index(encoded.startIndex, offsetBy: i.transcodedOffset)]
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(_ i: Index, offsetBy n: Int) -> Index {
_sanityCheck(_guts.isForeign)
return __index(i, offsetBy: n)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
_sanityCheck(_guts.isForeign)
return __index(i, offsetBy: n, limitedBy: limit)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignDistance(from i: Index, to j: Index) -> Int {
_sanityCheck(_guts.isForeign)
return __distance(from: i, to: j)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignCount() -> Int {
_sanityCheck(_guts.isForeign)
return __distance(from: startIndex, to: endIndex)
}
}
+390 -347
View File
@@ -10,6 +10,184 @@
//
//===----------------------------------------------------------------------===//
// TODO(UTF8 merge): Find a common place for these helpers
@inlinable @inline(__always)
internal func _isASCII(_ x: UInt8) -> Bool {
return x & 0b1000_0000 == 0
}
@inlinable @inline(__always)
internal func _decodeUTF8(_ x: UInt8) -> Unicode.Scalar {
_sanityCheck(_isASCII(x))
return Unicode.Scalar(_unchecked: UInt32(x))
}
@inlinable @inline(__always)
internal func _decodeUTF8(_ x: UInt8, _ y: UInt8) -> Unicode.Scalar {
_sanityCheck(_utf8ScalarLength(x) == 2)
_sanityCheck(_isContinuation(y))
let x = UInt32(x)
let value = ((x & 0b0001_1111) &<< 6) | _continuationPayload(y)
return Unicode.Scalar(_unchecked: value)
}
@inlinable @inline(__always)
internal func _decodeUTF8(
_ x: UInt8, _ y: UInt8, _ z: UInt8
) -> Unicode.Scalar {
_sanityCheck(_utf8ScalarLength(x) == 3)
_sanityCheck(_isContinuation(y) && _isContinuation(z))
let x = UInt32(x)
let value = ((x & 0b0000_1111) &<< 12)
| (_continuationPayload(y) &<< 6)
| _continuationPayload(z)
return Unicode.Scalar(_unchecked: value)
}
@inlinable @inline(__always)
internal func _decodeUTF8(
_ x: UInt8, _ y: UInt8, _ z: UInt8, _ w: UInt8
) -> Unicode.Scalar {
_sanityCheck(_utf8ScalarLength(x) == 4)
_sanityCheck(
_isContinuation(y) && _isContinuation(z) && _isContinuation(w))
let x = UInt32(x)
let value = ((x & 0b0000_1111) &<< 18)
| (_continuationPayload(y) &<< 12)
| (_continuationPayload(z) &<< 6)
| _continuationPayload(w)
return Unicode.Scalar(_unchecked: value)
}
@usableFromInline @inline(__always)
internal func _utf8ScalarLength(_ x: UInt8) -> Int {
_sanityCheck(!_isContinuation(x))
if _isASCII(x) { return 1 }
// TODO(UTF8): Not great codegen on x86
return (~x).leadingZeroBitCount
}
@usableFromInline @inline(__always)
internal func _isContinuation(_ x: UInt8) -> Bool {
return x & 0b1100_0000 == 0b1000_0000
}
@usableFromInline @inline(__always)
internal func _continuationPayload(_ x: UInt8) -> UInt32 {
return UInt32(x & 0x3F)
}
@inline(__always)
internal func _decodeSurrogatePair(
leading high: UInt16, trailing low: UInt16
) -> UInt32 {
_sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low))
let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias)
_sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something")
let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias)
_sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something")
return ((hi10 &<< 10) | lo10) &+ 0x1_00_00
}
extension _StringGuts {
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
@usableFromInline @inline(__always)
internal func fastUTF8ScalarLength(startingAt i: Int) -> Int {
_sanityCheck(isFastUTF8)
let len = _utf8ScalarLength(self.withFastUTF8 { $0[i] })
_sanityCheck((1...4) ~= len)
return len
}
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
@usableFromInline @inline(__always)
internal func fastUTF8ScalarLength(endingAt i: Int) -> Int {
_sanityCheck(isFastUTF8)
return self.withFastUTF8 { utf8 in
_sanityCheck(i == utf8.count || !_isContinuation(utf8[i]))
var len = 1
while _isContinuation(utf8[i - len]) {
_sanityCheck(i - len > 0)
len += 1
}
_sanityCheck(len <= 4)
return len
}
}
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
@usableFromInline @inline(__always)
internal func fastUTF8Scalar(startingAt i: Int) -> Unicode.Scalar {
_sanityCheck(isFastUTF8)
return self.withFastUTF8 { utf8 in
let cu0 = utf8[i]
switch _utf8ScalarLength(cu0) {
case 1: return _decodeUTF8(cu0)
case 2: return _decodeUTF8(cu0, utf8[i &+ 1])
case 3: return _decodeUTF8(cu0, utf8[i &+ 1], utf8[i &+ 2])
case 4: return _decodeUTF8(cu0, utf8[i &+ 1], utf8[i &+ 2], utf8[i &+ 3])
default: Builtin.unreachable()
}
}
}
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
@_effects(releasenone)
internal func foreignScalar(startingAt i: Int) -> Unicode.Scalar {
let cu = foreignUTF16CodeUnit(at: i)
_sanityCheck(!_isTrailingSurrogate(cu))
if _slowPath(_isLeadingSurrogate(cu)) {
let trailing = foreignUTF16CodeUnit(at: i+1)
return Unicode.Scalar(
_unchecked: _decodeSurrogatePair(leading: cu, trailing: trailing))
}
return Unicode.Scalar(_unchecked: UInt32(cu))
}
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
@_effects(releasenone)
internal func foreignScalarLength(startingAt i: Int) -> Int {
let cu = foreignUTF16CodeUnit(at: i)
_sanityCheck(!_isTrailingSurrogate(cu))
if _slowPath(_isLeadingSurrogate(cu)) {
return 2
}
return 1
}
// TODO(UTF8): Should probably take a String.Index, assert no transcoding
@_effects(releasenone)
internal func foreignScalarLength(endingAt i: Int) -> Int {
let cu = foreignUTF16CodeUnit(at: i &- 1)
_sanityCheck(!_isLeadingSurrogate(cu))
if _slowPath(_isTrailingSurrogate(cu)) {
return 2
}
return 1
}
@_effects(releasenone)
internal func isOnUnicodeScalarBoundary(_ i: String.Index) -> Bool {
// TODO(UTF8 perf): isASCII check
// TODO(UTF8): Guts bounds check helper, or something in terms of Index
if i.encodedOffset == 0 || i.encodedOffset == self.count { return true }
if _fastPath(isFastUTF8) {
return self.withFastUTF8 { return !_isContinuation($0[i.encodedOffset]) }
}
let cu = foreignUTF16CodeUnit(at: i.encodedOffset)
return !_isTrailingSurrogate(cu)
}
}
extension String {
/// A view of a string's contents as a collection of Unicode scalar values.
///
@@ -57,218 +235,162 @@ extension String {
/// print(asciiPrefix)
/// }
/// // Prints "My favorite emoji is "
@_fixed_layout // FIXME(sil-serialize-all)
public struct UnicodeScalarView :
BidirectionalCollection,
CustomStringConvertible,
CustomDebugStringConvertible
{
@_fixed_layout
public struct UnicodeScalarView {
@usableFromInline
internal var _guts: _StringGuts
/// The offset of this view's `_guts` from the start of an original string,
/// in UTF-16 code units. This is here to support legacy Swift 3-style
/// slicing where `s.unicodeScalars[i..<j]` produces a
/// `String.UnicodeScalarView`. The offset should be subtracted from the
/// `encodedOffset` of view indices before it is passed to `_guts`.
///
/// Note: This should be removed when Swift 3 semantics are no longer
/// supported.
@usableFromInline // FIXME(sil-serialize-all)
internal var _coreOffset: Int
@inlinable // FIXME(sil-serialize-all)
internal init(_ _guts: _StringGuts, coreOffset: Int = 0) {
@inlinable @inline(__always)
internal init(_ _guts: _StringGuts) {
self._guts = _guts
self._coreOffset = coreOffset
}
public typealias Index = String.Index
/// Translates a `_guts` index into a `UnicodeScalarIndex` using this
/// view's `_coreOffset`.
@inlinable // FIXME(sil-serialize-all)
internal func _fromCoreIndex(_ i: Int) -> Index {
return Index(encodedOffset: i + _coreOffset)
}
/// Translates a `UnicodeScalarIndex` into a `_guts` index using this
/// view's `_coreOffset`.
@inlinable // FIXME(sil-serialize-all)
internal func _toCoreIndex(_ i: Index) -> Int {
return i.encodedOffset - _coreOffset
}
/// The position of the first Unicode scalar value if the string is
/// nonempty.
///
/// If the string is empty, `startIndex` is equal to `endIndex`.
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index {
return _fromCoreIndex(_guts.startIndex)
}
/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index {
return _fromCoreIndex(_guts.endIndex)
}
/// Returns the next consecutive location after `i`.
///
/// - Precondition: The next location exists.
@inlinable // FIXME(sil-serialize-all)
public func index(after i: Index) -> Index {
let offset = _toCoreIndex(i)
let length: Int = _visitGuts(_guts, args: offset,
ascii: { (_,_) -> Int in return 1 },
utf16: { utf16, offset in
return utf16.unicodeScalarWidth(startingAt: offset) },
opaque: { opaque, offset in
return opaque.unicodeScalarWidth(startingAt: offset) }
)
return _fromCoreIndex(offset + length)
}
/// Returns the previous consecutive location before `i`.
///
/// - Precondition: The previous location exists.
@inlinable // FIXME(sil-serialize-all)
public func index(before i: Index) -> Index {
let offset = _toCoreIndex(i)
let length: Int = _visitGuts(_guts, args: offset,
ascii: { (_,_) -> Int in return 1 },
utf16: { utf16, offset in
return utf16.unicodeScalarWidth(endingAt: offset) },
opaque: { opaque, offset in
return opaque.unicodeScalarWidth(endingAt: offset) }
)
return _fromCoreIndex(offset - length)
}
/// Accesses the Unicode scalar value at the given position.
///
/// The following example searches a string's Unicode scalars view for a
/// capital letter and then prints the character and Unicode scalar value
/// at the found index:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.unicodeScalars.firstIndex(where: { "A"..."Z" ~= $0 }) {
/// print("First capital letter: \(greeting.unicodeScalars[i])")
/// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
/// }
/// // Prints "First capital letter: H"
/// // Prints "Unicode scalar value: 72"
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
@inlinable // FIXME(sil-serialize-all)
public subscript(position: Index) -> Unicode.Scalar {
let offset = position.encodedOffset
return _guts.unicodeScalar(startingAt: offset)
}
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
/// collection.
@_fixed_layout // FIXME(sil-serialize-all)
public struct Iterator : IteratorProtocol {
@usableFromInline // FIXME(sil-serialize-all)
internal var _guts: _StringGuts
// FIXME(TODO: JIRA): the below is absurdly wasteful.
// UnicodeScalarView.Iterator should be able to be passed in-registers.
@usableFromInline // FIXME(sil-serialize-all)
internal var _asciiIterator: _UnmanagedASCIIString.UnicodeScalarIterator?
@usableFromInline // FIXME(sil-serialize-all)
internal var _utf16Iterator: _UnmanagedUTF16String.UnicodeScalarIterator?
@usableFromInline // FIXME(sil-serialize-all)
internal var _opaqueIterator: _UnmanagedOpaqueString.UnicodeScalarIterator?
@usableFromInline
internal var _smallIterator: _SmallUTF8String.UnicodeScalarIterator?
@inlinable // FIXME(sil-serialize-all)
internal init(_ guts: _StringGuts) {
if _slowPath(guts._isOpaque) {
self.init(_opaque: guts)
return
}
self.init(_concrete: guts)
}
@inlinable // FIXME(sil-serialize-all)
@inline(__always)
internal init(_concrete guts: _StringGuts) {
_sanityCheck(!guts._isOpaque)
self._guts = guts
defer { _fixLifetime(self) }
if _guts.isASCII {
self._asciiIterator =
_guts._unmanagedASCIIView.makeUnicodeScalarIterator()
} else {
self._utf16Iterator =
_guts._unmanagedUTF16View.makeUnicodeScalarIterator()
}
}
@usableFromInline // @opaque
init(_opaque _guts: _StringGuts) {
_sanityCheck(_guts._isOpaque)
defer { _fixLifetime(self) }
self._guts = _guts
// TODO: Replace the whole iterator scheme with a sensible solution.
if self._guts._isSmall {
self._smallIterator =
_guts._smallUTF8String.makeUnicodeScalarIterator()
} else {
self._opaqueIterator = _guts._asOpaque().makeUnicodeScalarIterator()
}
}
/// Advances to the next element and returns it, or `nil` if no next
/// element exists.
///
/// Once `nil` has been returned, all subsequent calls return `nil`.
///
/// - Precondition: `next()` has not been applied to a copy of `self`
/// since the copy was made.
@inlinable // FIXME(sil-serialize-all)
public mutating func next() -> Unicode.Scalar? {
if _slowPath(_opaqueIterator != nil) {
return _opaqueIterator!.next()
}
if _asciiIterator != nil {
return _asciiIterator!.next()
}
if _guts._isSmall {
return _smallIterator!.next()
}
return _utf16Iterator!.next()
}
}
/// Returns an iterator over the Unicode scalars that make up this view.
///
/// - Returns: An iterator over this collection's `Unicode.Scalar` elements.
@inlinable // FIXME(sil-serialize-all)
public func makeIterator() -> Iterator {
return Iterator(_guts)
}
@inlinable // FIXME(sil-serialize-all)
public var description: String {
return String(_guts)
}
public var debugDescription: String {
return "StringUnicodeScalarView(\(self.description.debugDescription))"
_invariantCheck()
}
}
}
extension String.UnicodeScalarView {
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
#endif
}
}
extension String.UnicodeScalarView: BidirectionalCollection {
public typealias Index = String.Index
/// The position of the first Unicode scalar value if the string is
/// nonempty.
///
/// If the string is empty, `startIndex` is equal to `endIndex`.
@inlinable
public var startIndex: Index {
@inline(__always) get { return Index(encodedOffset: 0) }
}
/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
@inlinable
public var endIndex: Index {
@inline(__always) get { return Index(encodedOffset: _guts.count) }
}
/// Returns the next consecutive location after `i`.
///
/// - Precondition: The next location exists.
@inlinable @inline(__always)
public func index(after i: Index) -> Index {
_sanityCheck(i < endIndex)
// TODO(UTF8): isKnownASCII bit fast-path...
if _fastPath(_guts.isFastUTF8) {
let len = _guts.fastUTF8ScalarLength(startingAt: i.encodedOffset)
return Index(encodedOffset: i.encodedOffset &+ len)
}
return _foreignIndex(after: i)
}
/// Returns the previous consecutive location before `i`.
///
/// - Precondition: The previous location exists.
@inlinable @inline(__always)
public func index(before i: Index) -> Index {
precondition(i.encodedOffset > 0)
if _fastPath(_guts.isFastUTF8) {
// TODO(UTF8): isKnownASCII bit fast-path...
let len = _guts.withFastUTF8 { utf8 -> Int in
var len = 1
while _isContinuation(utf8[i.encodedOffset &- len]) {
len += 1
}
_sanityCheck(len == _utf8ScalarLength(utf8[i.encodedOffset - len]))
return len
}
_sanityCheck(len <= 4, "invalid UTF8")
return Index(encodedOffset: i.encodedOffset &- len)
}
return _foreignIndex(before: i)
}
/// Accesses the Unicode scalar value at the given position.
///
/// The following example searches a string's Unicode scalars view for a
/// capital letter and then prints the character and Unicode scalar value
/// at the found index:
///
/// let greeting = "Hello, friend!"
/// if let i = greeting.unicodeScalars.firstIndex(where: { "A"..."Z" ~= $0 }) {
/// print("First capital letter: \(greeting.unicodeScalars[i])")
/// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
/// }
/// // Prints "First capital letter: H"
/// // Prints "Unicode scalar value: 72"
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
@inlinable
public subscript(position: Index) -> Unicode.Scalar {
@inline(__always) get {
if _fastPath(_guts.isFastUTF8) {
return _guts.fastUTF8Scalar(startingAt: position.encodedOffset)
}
return _foreignSubscript(position: position)
}
}
}
// TODO(UTF8): design specialized iterator, rather than default indexing one
// extension String.UnicodeScalarView {
// /// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
// /// collection.
// @_fixed_layout
// public struct Iterator : IteratorProtocol {
// // TODO:
// /// Advances to the next element and returns it, or `nil` if no next
// /// element exists.
// ///
// /// Once `nil` has been returned, all subsequent calls return `nil`.
// ///
// /// - Precondition: `next()` has not been applied to a copy of `self`
// /// since the copy was made.
// @inlinable @inline(__always)
// public mutating func next() -> Unicode.Scalar? {
// unimplemented_utf8()
// }
// }
// }
// extension String.UnicodeScalarView {
// /// Returns an iterator over the Unicode scalars that make up this view.
// ///
// /// - Returns: An iterator over this collection's `Unicode.Scalar` elements.
// @inlinable @inline(__always)
// public func makeIterator() -> Iterator {
// unimplemented_utf8()
// }
// }
extension String.UnicodeScalarView: CustomStringConvertible {
@inlinable
public var description: String {
@inline(__always) get { return String(_guts) }
}
}
extension String.UnicodeScalarView: CustomDebugStringConvertible {
public var debugDescription: String {
return "StringUnicodeScalarView(\(self.description.debugDescription))"
}
}
extension String {
/// Creates a string corresponding to the given collection of Unicode
/// scalars.
///
@@ -286,74 +408,27 @@ extension String {
/// slice of the `picnicGuest.unicodeScalars` view.
///
/// - Parameter unicodeScalars: A collection of Unicode scalar values.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public init(_ unicodeScalars: UnicodeScalarView) {
self.init(unicodeScalars._guts)
}
/// The index type for a string's `unicodeScalars` view.
public typealias UnicodeScalarIndex = UnicodeScalarView.Index
}
extension _StringGuts {
@inlinable
internal func unicodeScalar(startingAt offset: Int) -> Unicode.Scalar {
return _visitGuts(self, args: offset,
ascii: { ascii, offset in
let u = ascii.codeUnit(atCheckedOffset: offset)
return Unicode.Scalar(_unchecked: UInt32(u)) },
utf16: { utf16, offset in
return utf16.unicodeScalar(startingAt: offset) },
opaque: { opaque, offset in
return opaque.unicodeScalar(startingAt: offset) })
}
@inlinable
internal func unicodeScalar(endingAt offset: Int) -> Unicode.Scalar {
return _visitGuts(self, args: offset,
ascii: { ascii, offset in
let u = ascii.codeUnit(atCheckedOffset: offset &- 1)
return Unicode.Scalar(_unchecked: UInt32(u)) },
utf16: { utf16, offset in
return utf16.unicodeScalar(endingAt: offset) },
opaque: { opaque, offset in
return opaque.unicodeScalar(endingAt: offset) })
}
}
extension String.UnicodeScalarView : _SwiftStringView {
@inlinable // FIXME(sil-serialize-all)
internal var _persistentContent : String { return String(_guts) }
@inlinable // FIXME(sil-serialize-all)
var _wholeString : String {
return String(_guts)
}
@inlinable // FIXME(sil-serialize-all)
var _encodedOffsetRange : Range<Int> {
return 0..<_guts.count
}
}
extension String {
/// The string's value represented as a collection of Unicode scalar values.
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var unicodeScalars: UnicodeScalarView {
get {
return UnicodeScalarView(_guts)
}
set {
_guts = newValue._guts
}
@inline(__always) get { return UnicodeScalarView(_guts) }
@inline(__always) set { _guts = newValue._guts }
}
}
extension String.UnicodeScalarView : RangeReplaceableCollection {
/// Creates an empty view instance.
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public init() {
self = String.UnicodeScalarView(_StringGuts())
self.init(_StringGuts())
}
/// Reserves enough space in the view's underlying storage to store the
@@ -369,34 +444,15 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
///
/// - Complexity: O(*n*), where *n* is the capacity being reserved.
public mutating func reserveCapacity(_ n: Int) {
_guts.reserveCapacity(n)
self._guts.reserveCapacity(n)
}
/// Appends the given Unicode scalar to the view.
///
/// - Parameter c: The character to append to the string.
public mutating func append(_ c: Unicode.Scalar) {
if _fastPath(_guts.isASCII && c.value <= 0x7f) {
_guts.withMutableASCIIStorage(unusedCapacity: 1) { storage in
unowned(unsafe) let s = storage._value
s.end.pointee = UInt8(c.value)
s.count += 1
}
} else {
let width = UTF16.width(c)
_guts.withMutableUTF16Storage(unusedCapacity: width) { storage in
unowned(unsafe) let s = storage._value
_sanityCheck(s.count + width <= s.capacity)
if _fastPath(width == 1) {
s.end.pointee = UTF16.CodeUnit(c.value)
} else {
_sanityCheck(width == 2)
s.end[0] = UTF16.leadSurrogate(c)
s.end[1] = UTF16.trailSurrogate(c)
}
s.count += width
}
}
// TODO(UTF8 perf): This is a horribly slow means...
self.append(contentsOf: [c])
}
/// Appends the Unicode scalar values in the given sequence to the view.
@@ -406,30 +462,9 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
/// - Complexity: O(*n*), where *n* is the length of the resulting view.
public mutating func append<S : Sequence>(contentsOf newElements: S)
where S.Element == Unicode.Scalar {
// FIXME: Keep ASCII storage if possible
_guts.reserveUnusedCapacity(newElements.underestimatedCount)
var it = newElements.makeIterator()
var next = it.next()
while let n = next {
_guts.withMutableUTF16Storage(unusedCapacity: UTF16.width(n)) { storage in
var p = storage._value.end
let limit = storage._value.capacityEnd
while let n = next {
let w = UTF16.width(n)
guard p + w <= limit else { break }
if w == 1 {
p.pointee = UTF16.CodeUnit(n.value)
} else {
_sanityCheck(w == 2)
p[0] = UTF16.leadSurrogate(n)
p[1] = UTF16.trailSurrogate(n)
}
p += w
next = it.next()
}
storage._value.count = p - storage._value.start
}
}
// TODO(UTF8 perf): This is a horribly slow means...
let scalars = String(decoding: newElements.map { $0.value }, as: UTF32.self)
self = (String(self._guts) + scalars).unicodeScalars
}
/// Replaces the elements within the specified bounds with the given Unicode
@@ -451,10 +486,18 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
_ bounds: Range<Index>,
with newElements: C
) where C : Collection, C.Element == Unicode.Scalar {
let rawSubRange: Range<Int> = _toCoreIndex(bounds.lowerBound) ..<
_toCoreIndex(bounds.upperBound)
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
_guts.replaceSubrange(rawSubRange, with: lazyUTF16)
// TODO(UTF8 perf): This is a horribly slow means...
//
// TODO(UTF8 perf): Consider storing a string directly, or implemeting RSR
// on guts.
let utf8Replacement = newElements.flatMap { String($0).utf8 }
let replacement = utf8Replacement.withUnsafeBufferPointer {
return String._uncheckedFromUTF8($0)
}
var copy = String(_guts)
copy.replaceSubrange(bounds, with: replacement)
self = copy.unicodeScalars
}
}
@@ -481,16 +524,18 @@ extension String.UnicodeScalarIndex {
/// the trailing surrogate of a UTF-16 surrogate pair results in `nil`.
///
/// - Parameters:
/// - sourcePosition: A position in the `utf16` view of a string. `utf16Index`
/// must be an element of `String(unicodeScalars).utf16.indices`.
/// - sourcePosition: A position in the `utf16` view of a string.
/// `utf16Index` must be an element of
/// `String(unicodeScalars).utf16.indices`.
/// - unicodeScalars: The `UnicodeScalarView` in which to find the new
/// position.
@inlinable // FIXME(sil-serialize-all)
public init?(
_ sourcePosition: String.UTF16Index,
_ sourcePosition: String.Index,
within unicodeScalars: String.UnicodeScalarView
) {
if !unicodeScalars._isOnUnicodeScalarBoundary(sourcePosition) { return nil }
guard unicodeScalars._guts.isOnUnicodeScalarBoundary(sourcePosition) else {
return nil
}
self = sourcePosition
}
@@ -514,42 +559,11 @@ extension String.UnicodeScalarIndex {
/// position in `characters`, this method returns `nil`. For example,
/// an attempt to convert the position of a UTF-8 continuation byte
/// returns `nil`.
@inlinable // FIXME(sil-serialize-all)
public func samePosition(in characters: String) -> String.Index? {
return String.Index(self, within: characters)
}
}
extension String.UnicodeScalarView {
@inlinable // FIXME(sil-serialize-all)
internal func _isOnUnicodeScalarBoundary(_ i: Index) -> Bool {
if _fastPath(_guts.isASCII) { return true }
if i == startIndex || i == endIndex {
return true
}
if i.transcodedOffset != 0 { return false }
let i2 = _toCoreIndex(i)
if _fastPath(
!UTF16.isTrailSurrogate(_guts.codeUnit(atCheckedOffset: i2))) {
return true
}
return i2 == 0 || !UTF16.isLeadSurrogate(
_guts.codeUnit(atCheckedOffset:i2 &- 1))
}
// NOTE: Don't make this function inlineable. Grapheme cluster
// segmentation uses a completely different algorithm in Unicode 9.0.
@inlinable // FIXME(sil-serialize-all)
internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool {
if i == startIndex || i == endIndex {
return true
}
if !_isOnUnicodeScalarBoundary(i) { return false }
let str = String(_guts)
return i == str.index(before: str.index(after: i))
}
}
// Reflection
extension String.UnicodeScalarView : CustomReflectable {
/// Returns a mirror that reflects the Unicode scalars view of a string.
@@ -558,6 +572,7 @@ extension String.UnicodeScalarView : CustomReflectable {
}
}
//===--- Slicing Support --------------------------------------------------===//
/// In Swift 3.2, in the absence of type context,
///
@@ -571,9 +586,37 @@ extension String.UnicodeScalarView : CustomReflectable {
extension String.UnicodeScalarView {
public typealias SubSequence = Substring.UnicodeScalarView
@inlinable // FIXME(sil-serialize-all)
@available(swift, introduced: 4)
public subscript(bounds: Range<Index>) -> String.UnicodeScalarView.SubSequence {
return String.UnicodeScalarView.SubSequence(self, _bounds: bounds)
public subscript(r: Range<Index>) -> String.UnicodeScalarView.SubSequence {
return String.UnicodeScalarView.SubSequence(self, _bounds: r)
}
}
// Foreign string Support
extension String.UnicodeScalarView {
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(after i: Index) -> Index {
_sanityCheck(_guts.isForeign)
let len = _guts.foreignScalarLength(startingAt: i.encodedOffset)
return Index(encodedOffset: i.encodedOffset + len)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignIndex(before i: Index) -> Index {
_sanityCheck(_guts.isForeign)
let len = _guts.foreignScalarLength(endingAt: i.encodedOffset)
return Index(encodedOffset: i.encodedOffset - len)
}
@usableFromInline @inline(never)
@_effects(releasenone)
internal func _foreignSubscript(position i: Index) -> Unicode.Scalar {
_sanityCheck(_guts.isForeign)
return _guts.foreignScalar(startingAt: i.encodedOffset)
}
}
-171
View File
@@ -10,174 +10,3 @@
//
//===----------------------------------------------------------------------===//
@usableFromInline
internal protocol _StringVariant : RandomAccessCollection
where
Element == Unicode.UTF16.CodeUnit,
SubSequence == Self {
// FIXME associatedtype Encoding : _UnicodeEncoding
associatedtype CodeUnit : FixedWidthInteger & UnsignedInteger
associatedtype UnicodeScalarIterator : IteratorProtocol
where UnicodeScalarIterator.Element == Unicode.Scalar
var isASCII: Bool { get }
// Offset-based subscripts allow integer offsets within 0..<count,
// regardless of what the Index type is.
subscript(offset: Int) -> Element { get }
subscript(offsetRange: Range<Int>) -> Self { get }
func makeUnicodeScalarIterator() -> UnicodeScalarIterator
// Measure the length in UTF-16 code units of the first extended grapheme
// cluster in self.
func measureFirstExtendedGraphemeCluster() -> Int
// Measure the length in UTF-16 code units of the last extended grapheme
// cluster in self.
func measureLastExtendedGraphemeCluster() -> Int
// Slow path for measuring the length in UTF-16 code units of the first
// extended grapheme cluster in self.
func _measureFirstExtendedGraphemeClusterSlow() -> Int
// Slow path for measuring the length in UTF-16 code units of the last
// extended grapheme cluster in self.
func _measureLastExtendedGraphemeClusterSlow() -> Int
func _copy<TargetCodeUnit>(
into target: UnsafeMutableBufferPointer<TargetCodeUnit>
) where TargetCodeUnit : FixedWidthInteger & UnsignedInteger
}
extension _StringVariant {
@inlinable // FIXME(sil-serialize-all)
internal func _copyToNativeStorage<TargetCodeUnit>(
of codeUnit: TargetCodeUnit.Type = TargetCodeUnit.self,
unusedCapacity: Int = 0
) -> _SwiftStringStorage<TargetCodeUnit>
where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
let storage = _SwiftStringStorage<TargetCodeUnit>.create(
capacity: count + unusedCapacity,
count: count)
_copy(into: storage.usedBuffer)
return storage
}
@inlinable
@inline(__always)
func _boundsCheck(_ i: Index) {
_precondition(i >= startIndex && i < endIndex,
"String index is out of bounds")
}
@inlinable
@inline(__always)
func _boundsCheck(_ range: Range<Index>) {
_precondition(range.lowerBound >= startIndex,
"String index range is out of bounds")
_precondition(range.upperBound <= endIndex,
"String index range is out of bounds")
}
@inlinable
@inline(__always)
func _boundsCheck(offset i: Int) {
_precondition(i >= 0 && i < count,
"String index is out of bounds")
}
@inlinable
@inline(__always)
func _boundsCheck(offsetRange range: Range<Int>) {
_precondition(range.lowerBound >= 0 && range.upperBound <= count,
"String index range is out of bounds")
}
@inlinable
internal func codeUnit(atCheckedIndex index: Index) -> Element {
_boundsCheck(index)
return self[index]
}
@inlinable
internal func codeUnit(atCheckedOffset offset: Int) -> Element {
_boundsCheck(offset: offset)
return self[offset]
}
@inlinable
internal func checkedSlice(_ range: Range<Int>) -> Self {
_boundsCheck(offsetRange: range)
return self[range]
}
@inlinable
internal func checkedSlice(from startOffset: Int) -> Self {
let r: Range<Int> = startOffset..<count
_boundsCheck(offsetRange: r)
return self[r]
}
@inlinable
internal func checkedSlice(upTo endOffset: Int) -> Self {
let r: Range<Int> = 0..<endOffset
_boundsCheck(offsetRange: r)
return self[r]
}
}
extension _StringVariant {
@inlinable
internal func unicodeScalarWidth(startingAt offset: Int) -> Int {
_boundsCheck(offset: offset)
if _slowPath(UTF16.isLeadSurrogate(self[offset])) {
if offset + 1 < self.count &&
UTF16.isTrailSurrogate(self[offset + 1]) {
return 2
}
}
return 1
}
@inlinable
func unicodeScalarWidth(endingAt offset: Int) -> Int {
_boundsCheck(offset: offset - 1)
if _slowPath(UTF16.isTrailSurrogate(self[offset - 1])) {
if offset >= 2 && UTF16.isLeadSurrogate(self[offset - 2]) {
return 2
}
}
return 1
}
@inlinable
func unicodeScalar(startingAt offset: Int) -> Unicode.Scalar {
let u0 = self.codeUnit(atCheckedOffset: offset)
if _fastPath(UTF16._isScalar(u0)) {
return Unicode.Scalar(_unchecked: UInt32(u0))
}
if UTF16.isLeadSurrogate(u0) && offset + 1 < count {
let u1 = self[offset + 1]
if UTF16.isTrailSurrogate(u1) {
return UTF16._decodeSurrogates(u0, u1)
}
}
return Unicode.Scalar._replacementCharacter
}
@inlinable
func unicodeScalar(endingAt offset: Int) -> Unicode.Scalar {
let u1 = self.codeUnit(atCheckedOffset: offset - 1)
if _fastPath(UTF16._isScalar(u1)) {
return Unicode.Scalar(_unchecked: UInt32(u1))
}
if UTF16.isTrailSurrogate(u1) && offset >= 2 {
let u0 = self[offset - 2]
if UTF16.isLeadSurrogate(u0) {
return UTF16._decodeSurrogates(u0, u1)
}
}
return Unicode.Scalar._replacementCharacter
}
}
+186 -262
View File
@@ -18,9 +18,11 @@ extension String {
/// instance.
///
/// - Complexity: O(*n*), where *n* is the length of `substring`.
@inlinable
public init(_ substring: Substring) {
let wholeGuts = substring._wholeString._guts
self.init(wholeGuts._extractSlice(substring._encodedOffsetRange))
self = substring.withUnsafeBytes {
return String._uncheckedFromUTF8($0._asUInt8)
}
}
}
@@ -92,88 +94,94 @@ extension String {
/// when there is no other reference to the original string. Storing
/// substrings may, therefore, prolong the lifetime of string data that is
/// no longer otherwise accessible, which can appear to be memory leakage.
@_fixed_layout // FIXME(sil-serialize-all)
public struct Substring : StringProtocol {
@_fixed_layout
public struct Substring {
@usableFromInline
internal var _slice: Slice<String>
@inlinable @inline(__always)
internal init(_ slice: Slice<String>) {
self._slice = slice
_invariantCheck()
}
/// Creates an empty substring.
@inlinable @inline(__always)
public init() {
self.init(Slice())
}
}
extension Substring {
@inlinable
internal var wholeGuts: _StringGuts {
@inline(__always) get { return _slice.base._guts }
}
@inlinable
internal var wholeString: String {
@inline(__always) get { return String(self.wholeGuts) }
}
@usableFromInline // TODO(UTF8 merge): for testing, drop this decl after merge
internal var _wholeString: String { return wholeString }
@inlinable
internal var offsetRange: Range<Int> {
let start = _slice.startIndex
let end = _slice.endIndex
_sanityCheck(start.transcodedOffset == 0 && end.transcodedOffset == 0)
return Range(uncheckedBounds: (start.encodedOffset, end.encodedOffset))
}
@inlinable @inline(__always)
internal func _invariantCheck() {
#if INTERNAL_CHECKS_ENABLED
self.wholeString._invariantCheck()
#endif
}
}
extension Substring: StringProtocol {
public typealias Index = String.Index
public typealias SubSequence = Substring
@usableFromInline // FIXME(sil-serialize-all)
internal var _slice: Slice<String>
/// Creates an empty substring.
@inlinable // FIXME(sil-serialize-all)
public init() {
_slice = Slice()
@inlinable
public var startIndex: Index {
@inline(__always) get { return _slice.startIndex }
}
@inlinable
public var endIndex: Index {
@inline(__always) get { return _slice.endIndex }
}
@inlinable // FIXME(sil-serialize-all)
internal init(_slice: Slice<String>) {
self._slice = _slice
}
@inlinable // FIXME(sil-serialize-all)
internal init(_ guts: _StringGuts, _ offsetRange: Range<Int>) {
self.init(
_base: String(guts),
Index(encodedOffset: offsetRange.lowerBound) ..<
Index(encodedOffset: offsetRange.upperBound))
}
/// Creates a substring with the specified bounds within the given string.
///
/// - Parameters:
/// - base: The string to create a substring of.
/// - bounds: The range of `base` to use for the substring. The lower and
/// upper bounds of `bounds` must be valid indices of `base`.
@inlinable // FIXME(sil-serialize-all)
public init(_base base: String, _ bounds: Range<Index>) {
_slice = Slice(base: base, bounds: bounds)
}
@inlinable // FIXME(sil-serialize-all)
internal init<R: RangeExpression>(
_base base: String, _ bounds: R
) where R.Bound == Index {
self.init(_base: base, bounds.relative(to: base))
}
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Index { return _slice.startIndex }
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Index { return _slice.endIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public func index(after i: Index) -> Index {
_precondition(i < endIndex, "Cannot increment beyond endIndex")
_precondition(i >= startIndex, "Cannot increment an invalid index")
// FIXME(strings): slice types currently lack necessary bound checks
return _slice.index(after: i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public func index(before i: Index) -> Index {
_precondition(i <= endIndex, "Cannot decrement an invalid index")
_precondition(i > startIndex, "Cannot decrement beyond startIndex")
// FIXME(strings): slice types currently lack necessary bound checks
return _slice.index(before: i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public func index(_ i: Index, offsetBy n: Int) -> Index {
let result = _slice.index(i, offsetBy: n)
// FIXME(strings): slice types currently lack necessary bound checks
_precondition(
(_slice._startIndex ... _slice.endIndex).contains(result),
"Operation results in an invalid index")
return result
}
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
let result = _slice.index(i, offsetBy: n, limitedBy: limit)
// FIXME(strings): slice types currently lack necessary bound checks
_precondition(result.map {
(_slice._startIndex ... _slice.endIndex).contains($0)
} ?? true,
@@ -181,26 +189,22 @@ public struct Substring : StringProtocol {
return result
}
@inlinable // FIXME(sil-serialize-all)
@inlinable @inline(__always)
public func distance(from start: Index, to end: Index) -> Int {
return _slice.distance(from: start, to: end)
}
@inlinable // FIXME(sil-serialize-all)
public subscript(i: Index) -> Character {
return _slice[i]
}
@inlinable // FIXME(sil-serialize-all)
public mutating func replaceSubrange<C>(
_ bounds: Range<Index>,
with newElements: C
) where C : Collection, C.Iterator.Element == Iterator.Element {
// FIXME(strings): slice types currently lack necessary bound checks
_slice.replaceSubrange(bounds, with: newElements)
}
@inlinable // FIXME(sil-serialize-all)
public mutating func replaceSubrange(
_ bounds: Range<Index>, with newElements: Substring
) {
@@ -215,7 +219,7 @@ public struct Substring : StringProtocol {
/// specified in `sourceEncoding`.
/// - sourceEncoding: The encoding in which `codeUnits` should be
/// interpreted.
@inlinable // FIXME(sil-serialize-all)
@inlinable // specialization
public init<C: Collection, Encoding: _UnicodeEncoding>(
decoding codeUnits: C, as sourceEncoding: Encoding.Type
) where C.Iterator.Element == Encoding.CodeUnit {
@@ -227,7 +231,6 @@ public struct Substring : StringProtocol {
///
/// - Parameter nullTerminatedUTF8: A pointer to a sequence of contiguous,
/// UTF-8 encoded bytes ending just before the first zero byte.
@inlinable // FIXME(sil-serialize-all)
public init(cString nullTerminatedUTF8: UnsafePointer<CChar>) {
self.init(String(cString: nullTerminatedUTF8))
}
@@ -241,7 +244,7 @@ public struct Substring : StringProtocol {
/// before the first zero code unit.
/// - sourceEncoding: The encoding in which the code units should be
/// interpreted.
@inlinable // FIXME(sil-serialize-all)
@inlinable // specialization
public init<Encoding: _UnicodeEncoding>(
decodingCString nullTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
as sourceEncoding: Encoding.Type
@@ -263,14 +266,12 @@ public struct Substring : StringProtocol {
/// `withCString(_:)` method. The pointer argument is valid only for the
/// duration of the method's execution.
/// - Returns: The return value, if any, of the `body` closure parameter.
@inlinable // FIXME(sil-serialize-all)
@inlinable // specialization
public func withCString<Result>(
_ body: (UnsafePointer<CChar>) throws -> Result) rethrows -> Result {
return try _wholeString._guts._withCSubstringAndLength(
in: _encodedOffsetRange,
encoding: UTF8.self) { p, length in
try p.withMemoryRebound(to: CChar.self, capacity: length, body)
}
// TODO(UTF8 perf): Detect when we cover the rest of a nul-terminated
// String, and thus can avoid a copy.
return try String(self).withCString(body)
}
/// Calls the given closure with a pointer to the contents of the string,
@@ -289,68 +290,48 @@ public struct Substring : StringProtocol {
/// - targetEncoding: The encoding in which the code units should be
/// interpreted.
/// - Returns: The return value, if any, of the `body` closure parameter.
@inlinable // FIXME(sil-serialize-all)
@inlinable // specialization
public func withCString<Result, TargetEncoding: _UnicodeEncoding>(
encodedAs targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
) rethrows -> Result {
return try _wholeString._guts._withCSubstring(
in: _encodedOffsetRange,
encoding: targetEncoding,
body)
// TODO(UTF8 perf): Detect when we cover the rest of a nul-terminated
// String, and thus can avoid a copy.
return try String(self).withCString(encodedAs: targetEncoding, body)
}
}
extension Substring : _SwiftStringView {
@inlinable // FIXME(sil-serialize-all)
internal var _persistentContent: String {
return String(self)
}
@inlinable // FIXME(sil-serialize-all)
public // @testable
var _ephemeralContent: String {
return _persistentContent
}
@inlinable // FIXME(sil-serialize-all)
public var _wholeString: String {
return _slice._base
}
@inlinable // FIXME(sil-serialize-all)
public var _encodedOffsetRange: Range<Int> {
return startIndex.encodedOffset..<endIndex.encodedOffset
}
}
extension Substring : CustomReflectable {
public var customMirror: Mirror {
return String(self).customMirror
}
}
//extension Substring : CustomReflectable {
// public var customMirror: Mirror { return String(self).customMirror }
//}
//extension Substring : CustomPlaygroundQuickLookable {
// @available(*, deprecated, message: "Substring.customPlaygroundQuickLook will be removed in a future Swift version")
// public var customPlaygroundQuickLook: PlaygroundQuickLook {
// return String(self).customPlaygroundQuickLook
// }
//}
//
extension Substring : CustomStringConvertible {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var description: String {
return String(self)
@inline(__always) get { return String(self) }
}
}
extension Substring : CustomDebugStringConvertible {
public var debugDescription: String {
return String(self).debugDescription
}
}
extension Substring : LosslessStringConvertible {
@inlinable // FIXME(sil-serialize-all)
public init(_ content: String) {
self.init(_base: content, content.startIndex ..< content.endIndex)
}
public var debugDescription: String { return String(self).debugDescription }
}
//extension Substring : LosslessStringConvertible {
// @inlinable @inline(__always)
// public init(_ content: String) {
// self.init(
// Slice(base: content, bounds: content.beginIndex..<content.endIndex))
// }
//}
// TODO(UTF8 merge): Can we just unify all these?
extension Substring {
@_fixed_layout // FIXME(sil-serialize-all)
public struct UTF8View {
@@ -366,60 +347,49 @@ extension Substring.UTF8View : BidirectionalCollection {
public typealias SubSequence = Substring.UTF8View
/// Creates an instance that slices `base` at `_bounds`.
@inlinable // FIXME(sil-serialize-all)
@inlinable
internal init(_ base: String.UTF8View, _bounds: Range<Index>) {
_slice = Slice(
base: String(base._guts).utf8,
bounds: _bounds)
}
/// The entire String onto whose slice this view is a projection.
@inlinable // FIXME(sil-serialize-all)
internal var _wholeString: String {
return String(_slice._base._guts)
}
@inlinable // FIXME(sil-serialize-all)
internal var _encodedOffsetRange: Range<Int> {
return startIndex.encodedOffset..<endIndex.encodedOffset
}
//
// Plumb slice operations through
//
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var startIndex: Index { return _slice.startIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var endIndex: Index { return _slice.endIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public subscript(index: Index) -> Element { return _slice[index] }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var indices: Indices { return _slice.indices }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(after i: Index) -> Index { return _slice.index(after: i) }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func formIndex(after i: inout Index) {
_slice.formIndex(after: &i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(_ i: Index, offsetBy n: Int) -> Index {
return _slice.index(i, offsetBy: n)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
return _slice.index(i, offsetBy: n, limitedBy: limit)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func distance(from start: Index, to end: Index) -> Int {
return _slice.distance(from: start, to: end)
}
@@ -436,28 +406,25 @@ extension Substring.UTF8View : BidirectionalCollection {
_slice._failEarlyRangeCheck(range, bounds: bounds)
}
@inlinable // FIXME(sil-serialize-all)
public func index(before i: Index) -> Index { return _slice.index(before: i) }
@inlinable // FIXME(sil-serialize-all)
public func formIndex(before i: inout Index) {
_slice.formIndex(before: &i)
}
@inlinable // FIXME(sil-serialize-all)
public subscript(r: Range<Index>) -> Substring.UTF8View {
// FIXME(strings): tests.
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
"UTF8View index range out of bounds")
return Substring.UTF8View(_wholeString.utf8, _bounds: r)
return Substring.UTF8View(_slice.base, _bounds: r)
}
}
extension Substring {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var utf8: UTF8View {
get {
return UTF8View(_wholeString.utf8, _bounds: startIndex..<endIndex)
return wholeString.utf8[startIndex..<endIndex]
}
set {
self = Substring(newValue)
@@ -467,9 +434,10 @@ extension Substring {
/// Creates a Substring having the given content.
///
/// - Complexity: O(1)
@inlinable // FIXME(sil-serialize-all)
public init(_ content: UTF8View) {
self = content._wholeString[content.startIndex..<content.endIndex]
self = String(
content._slice.base._guts
)[content.startIndex..<content.endIndex]
}
}
@@ -480,20 +448,20 @@ extension String {
///
/// - Complexity: O(N), where N is the length of the resulting `String`'s
/// UTF-16.
@inlinable // FIXME(sil-serialize-all)
public init?(_ codeUnits: Substring.UTF8View) {
let wholeString = codeUnits._wholeString
guard
codeUnits.startIndex.samePosition(in: wholeString.unicodeScalars) != nil
&& codeUnits.endIndex.samePosition(in: wholeString.unicodeScalars) != nil
else { return nil }
let guts = codeUnits._slice.base._guts
guard guts.isOnUnicodeScalarBoundary(codeUnits._slice.startIndex),
guts.isOnUnicodeScalarBoundary(codeUnits._slice.endIndex) else {
return nil
}
self = String(Substring(codeUnits))
}
}
extension Substring {
@_fixed_layout // FIXME(sil-serialize-all)
@_fixed_layout
public struct UTF16View {
@usableFromInline // FIXME(sil-serialize-all)
@usableFromInline
internal var _slice: Slice<String.UTF16View>
}
}
@@ -505,98 +473,84 @@ extension Substring.UTF16View : BidirectionalCollection {
public typealias SubSequence = Substring.UTF16View
/// Creates an instance that slices `base` at `_bounds`.
@inlinable // FIXME(sil-serialize-all)
@inlinable
internal init(_ base: String.UTF16View, _bounds: Range<Index>) {
_slice = Slice(
base: String(base._guts).utf16,
bounds: _bounds)
}
/// The entire String onto whose slice this view is a projection.
@inlinable // FIXME(sil-serialize-all)
internal var _wholeString: String {
return String(_slice._base._guts)
}
@inlinable // FIXME(sil-serialize-all)
internal var _encodedOffsetRange: Range<Int> {
return startIndex.encodedOffset..<endIndex.encodedOffset
}
//
// Plumb slice operations through
//
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var startIndex: Index { return _slice.startIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var endIndex: Index { return _slice.endIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public subscript(index: Index) -> Element { return _slice[index] }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var indices: Indices { return _slice.indices }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(after i: Index) -> Index { return _slice.index(after: i) }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func formIndex(after i: inout Index) {
_slice.formIndex(after: &i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(_ i: Index, offsetBy n: Int) -> Index {
return _slice.index(i, offsetBy: n)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
return _slice.index(i, offsetBy: n, limitedBy: limit)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func distance(from start: Index, to end: Index) -> Int {
return _slice.distance(from: start, to: end)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
_slice._failEarlyRangeCheck(index, bounds: bounds)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func _failEarlyRangeCheck(
_ range: Range<Index>, bounds: Range<Index>
) {
_slice._failEarlyRangeCheck(range, bounds: bounds)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(before i: Index) -> Index { return _slice.index(before: i) }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func formIndex(before i: inout Index) {
_slice.formIndex(before: &i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public subscript(r: Range<Index>) -> Substring.UTF16View {
// FIXME(strings): tests.
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
"UTF16View index range out of bounds")
return Substring.UTF16View(_wholeString.utf16, _bounds: r)
return Substring.UTF16View(_slice.base, _bounds: r)
}
}
extension Substring {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var utf16: UTF16View {
get {
return UTF16View(_wholeString.utf16, _bounds: startIndex..<endIndex)
return wholeString.utf16[startIndex..<endIndex]
}
set {
self = Substring(newValue)
@@ -606,9 +560,10 @@ extension Substring {
/// Creates a Substring having the given content.
///
/// - Complexity: O(1)
@inlinable // FIXME(sil-serialize-all)
public init(_ content: UTF16View) {
self = content._wholeString[content.startIndex..<content.endIndex]
self = String(
content._slice.base._guts
)[content.startIndex..<content.endIndex]
}
}
@@ -619,20 +574,20 @@ extension String {
///
/// - Complexity: O(N), where N is the length of the resulting `String`'s
/// UTF-16.
@inlinable // FIXME(sil-serialize-all)
public init?(_ codeUnits: Substring.UTF16View) {
let wholeString = codeUnits._wholeString
guard
codeUnits.startIndex.samePosition(in: wholeString.unicodeScalars) != nil
&& codeUnits.endIndex.samePosition(in: wholeString.unicodeScalars) != nil
else { return nil }
let guts = codeUnits._slice.base._guts
guard guts.isOnUnicodeScalarBoundary(codeUnits._slice.startIndex),
guts.isOnUnicodeScalarBoundary(codeUnits._slice.endIndex) else {
return nil
}
self = String(Substring(codeUnits))
}
}
extension Substring {
@_fixed_layout // FIXME(sil-serialize-all)
@_fixed_layout
public struct UnicodeScalarView {
@usableFromInline // FIXME(sil-serialize-all)
@usableFromInline
internal var _slice: Slice<String.UnicodeScalarView>
}
}
@@ -644,98 +599,84 @@ extension Substring.UnicodeScalarView : BidirectionalCollection {
public typealias SubSequence = Substring.UnicodeScalarView
/// Creates an instance that slices `base` at `_bounds`.
@inlinable // FIXME(sil-serialize-all)
@inlinable
internal init(_ base: String.UnicodeScalarView, _bounds: Range<Index>) {
_slice = Slice(
base: String(base._guts).unicodeScalars,
bounds: _bounds)
}
/// The entire String onto whose slice this view is a projection.
@inlinable // FIXME(sil-serialize-all)
internal var _wholeString: String {
return String(_slice._base._guts)
}
@inlinable // FIXME(sil-serialize-all)
internal var _encodedOffsetRange: Range<Int> {
return startIndex.encodedOffset..<endIndex.encodedOffset
}
//
// Plumb slice operations through
//
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var startIndex: Index { return _slice.startIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var endIndex: Index { return _slice.endIndex }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public subscript(index: Index) -> Element { return _slice[index] }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var indices: Indices { return _slice.indices }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(after i: Index) -> Index { return _slice.index(after: i) }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func formIndex(after i: inout Index) {
_slice.formIndex(after: &i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(_ i: Index, offsetBy n: Int) -> Index {
return _slice.index(i, offsetBy: n)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
return _slice.index(i, offsetBy: n, limitedBy: limit)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func distance(from start: Index, to end: Index) -> Int {
return _slice.distance(from: start, to: end)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
_slice._failEarlyRangeCheck(index, bounds: bounds)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func _failEarlyRangeCheck(
_ range: Range<Index>, bounds: Range<Index>
) {
_slice._failEarlyRangeCheck(range, bounds: bounds)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func index(before i: Index) -> Index { return _slice.index(before: i) }
@inlinable // FIXME(sil-serialize-all)
@inlinable
public func formIndex(before i: inout Index) {
_slice.formIndex(before: &i)
}
@inlinable // FIXME(sil-serialize-all)
@inlinable
public subscript(r: Range<Index>) -> Substring.UnicodeScalarView {
// FIXME(strings): tests.
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
"UnicodeScalarView index range out of bounds")
return Substring.UnicodeScalarView(_wholeString.unicodeScalars, _bounds: r)
return Substring.UnicodeScalarView(_slice.base, _bounds: r)
}
}
extension Substring {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public var unicodeScalars: UnicodeScalarView {
get {
return UnicodeScalarView(_wholeString.unicodeScalars, _bounds: startIndex..<endIndex)
return wholeString.unicodeScalars[startIndex..<endIndex]
}
set {
self = Substring(newValue)
@@ -745,9 +686,10 @@ extension Substring {
/// Creates a Substring having the given content.
///
/// - Complexity: O(1)
@inlinable // FIXME(sil-serialize-all)
public init(_ content: UnicodeScalarView) {
self = content._wholeString[content.startIndex..<content.endIndex]
self = String(
content._slice.base._guts
)[content.startIndex..<content.endIndex]
}
}
@@ -756,7 +698,6 @@ extension String {
///
/// - Complexity: O(N), where N is the length of the resulting `String`'s
/// UTF-16.
@inlinable // FIXME(sil-serialize-all)
public init(_ content: Substring.UnicodeScalarView) {
self = String(Substring(content))
}
@@ -764,10 +705,9 @@ extension String {
// FIXME: The other String views should be RangeReplaceable too.
extension Substring.UnicodeScalarView : RangeReplaceableCollection {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public init() { _slice = Slice.init() }
@inlinable // FIXME(sil-serialize-all)
public mutating func replaceSubrange<C : Collection>(
_ target: Range<Index>, with replacement: C
) where C.Element == Element {
@@ -776,18 +716,12 @@ extension Substring.UnicodeScalarView : RangeReplaceableCollection {
}
extension Substring : RangeReplaceableCollection {
@inlinable // FIXME(sil-serialize-all)
public init<S : Sequence>(_ elements: S)
where S.Element == Character {
let e0 = elements as? _SwiftStringView
if _fastPath(e0 != nil), let e = e0 {
self.init(e._wholeString._guts, e._encodedOffsetRange)
} else {
self.init(String(elements))
}
self = String(elements)[...]
}
@inlinable // FIXME(sil-serialize-all)
@inlinable // specialize
public mutating func append<S : Sequence>(contentsOf elements: S)
where S.Element == Character {
var string = String(self)
@@ -798,17 +732,14 @@ extension Substring : RangeReplaceableCollection {
}
extension Substring {
@inlinable // FIXME(sil-serialize-all)
public func lowercased() -> String {
return String(self).lowercased()
}
@inlinable // FIXME(sil-serialize-all)
public func uppercased() -> String {
return String(self).uppercased()
}
@inlinable // FIXME(sil-serialize-all)
public func filter(
_ isIncluded: (Element) throws -> Bool
) rethrows -> String {
@@ -817,61 +748,54 @@ extension Substring {
}
extension Substring : TextOutputStream {
@inlinable // FIXME(sil-serialize-all)
public mutating func write(_ other: String) {
append(contentsOf: other)
}
}
extension Substring : TextOutputStreamable {
@inlinable // FIXME(sil-serialize-all)
@inlinable // specializable
public func write<Target : TextOutputStream>(to target: inout Target) {
target.write(String(self))
}
}
extension Substring : ExpressibleByUnicodeScalarLiteral {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public init(unicodeScalarLiteral value: String) {
self.init(_base: value, value.startIndex ..< value.endIndex)
self.init(value)
}
}
extension Substring : ExpressibleByExtendedGraphemeClusterLiteral {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public init(extendedGraphemeClusterLiteral value: String) {
self.init(_base: value, value.startIndex ..< value.endIndex)
self.init(value)
}
}
extension Substring : ExpressibleByStringLiteral {
@inlinable // FIXME(sil-serialize-all)
@inlinable
public init(stringLiteral value: String) {
self.init(_base: value, value.startIndex ..< value.endIndex)
self.init(value)
}
}
//===--- String/Substring Slicing Support ---------------------------------===//
/// In Swift 3.2, in the absence of type context,
///
/// someString[someString.startIndex..<someString.endIndex]
///
/// was deduced to be of type `String`. Therefore have a more-specific
/// Swift-3-only `subscript` overload on `String` (and `Substring`) that
/// continues to produce `String`.
// String/Substring Slicing
extension String {
@inlinable // FIXME(sil-serialize-all)
@inlinable
@available(swift, introduced: 4)
public subscript(r: Range<Index>) -> Substring {
_boundsCheck(r)
return Substring(
_slice: Slice(base: self, bounds: r))
return Substring(Slice(base: self, bounds: r))
}
}
extension Substring {
@inlinable // FIXME(sil-serialize-all)
@inlinable
@available(swift, introduced: 4)
public subscript(r: Range<Index>) -> Substring {
return Substring(_slice: _slice[r])
return Substring(_slice[r])
}
}
+29 -3
View File
@@ -71,14 +71,40 @@ internal struct _ThreadLocalStorage {
}
internal static func getUBreakIterator(
start: UnsafePointer<UTF16.CodeUnit>,
count: Int32
_ bufPtr: UnsafeBufferPointer<UTF16.CodeUnit>
) -> OpaquePointer {
let tlsPtr = getPointer()
let brkIter = tlsPtr[0].uBreakIterator
let utext = tlsPtr[0].uText
var err = __swift_stdlib_U_ZERO_ERROR
__swift_stdlib_ubrk_setText(brkIter, start, count, &err)
let start = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
_ = __swift_stdlib_utext_openUChars(
utext, start, Int64(bufPtr.count), &err)
_precondition(err.isSuccess, "Unexpected utext_openUChars failure")
__swift_stdlib_ubrk_setUText(brkIter, utext, &err)
_precondition(err.isSuccess, "Unexpected ubrk_setUText failure")
return brkIter
}
internal static func getUBreakIterator(
_ bufPtr: UnsafeBufferPointer<UTF8.CodeUnit>
) -> OpaquePointer {
let tlsPtr = getPointer()
let brkIter = tlsPtr[0].uBreakIterator
let utext = tlsPtr[0].uText
var err = __swift_stdlib_U_ZERO_ERROR
let start = bufPtr.baseAddress._unsafelyUnwrappedUnchecked._asCChar
_ = __swift_stdlib_utext_openUTF8(
utext, start, Int64(bufPtr.count), &err)
_precondition(err.isSuccess, "Unexpected utext_openUChars failure")
__swift_stdlib_ubrk_setUText(brkIter, utext, &err)
_precondition(err.isSuccess, "Unexpected ubrk_setUText failure")
return brkIter
@@ -690,47 +690,37 @@ extension Unicode.Scalar.Properties {
/// all current case mappings. In the event more space is needed, it will be
/// allocated on the heap.
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
var scratchBuffer = _Normalization._SegmentOutputBuffer(allZeros: ())
let count = scratchBuffer.withUnsafeMutableBufferPointer { bufPtr -> Int in
return _scalar.withUTF16CodeUnits { utf16 in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16.baseAddress._unsafelyUnwrappedUnchecked,
Int32(utf16.count),
"",
&err)
guard err.isSuccess ||
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
fatalError("Unexpected error case-converting Unicode scalar.")
}
return Int(correctSize)
}
}
let utf16Length = UnicodeScalar(UInt32(_value))!.utf16.count
var utf16 = _utf16CodeUnits
if _fastPath(count <= scratchBuffer.count) {
scratchBuffer.count = count
return String._fromWellFormedUTF16CodeUnits(scratchBuffer)
}
// TODO(UTF8 perf): Stack buffer first and then detect real count
let count = 64
var array = Array<UInt16>(repeating: 0, count: count)
array.withUnsafeMutableBufferPointer { bufPtr in
return _scalar.withUTF16CodeUnits { utf16 in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16.baseAddress._unsafelyUnwrappedUnchecked,
Int32(utf16.count),
"",
&err)
guard err.isSuccess else {
fatalError("Unexpected error case-converting Unicode scalar.")
let len: Int = array.withUnsafeMutableBufferPointer { bufPtr in
return withUnsafePointer(to: &utf16) {
(tuplePtr) -> Int in
return tuplePtr.withMemoryRebound(to: UInt16.self, capacity: 2) {
(utf16Pointer) -> Int in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16Pointer,
Int32(utf16Length),
"",
&err)
guard err.isSuccess else {
fatalError("Unexpected error case-converting Unicode scalar.")
}
// TODO: _sanityCheck(count == correctSize, "inconsistent ICU behavior")
return Int(correctSize)
}
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
}
}
return String._fromWellFormedUTF16CodeUnits(array[..<count])
// TODO: replace `len` with `count`
return array[..<len].withUnsafeBufferPointer {
return String._uncheckedFromUTF16($0)
}
}
/// The lowercase mapping of the scalar.
@@ -10,390 +10,3 @@
//
//===----------------------------------------------------------------------===//
public protocol _OpaqueString: class {
var length: Int { get }
func character(at index: Int) -> UInt16
// FIXME: This is not an NSString method; I'd like to use
// `getCharacters(_:,range:)`, but it would be weird to define
// `_SwiftNSRange` without an Objective-C runtime.
func copyCodeUnits(
from range: Range<Int>,
into dest: UnsafeMutablePointer<UInt16>)
}
@usableFromInline
@_fixed_layout
internal struct _UnmanagedOpaqueString {
#if _runtime(_ObjC) // FIXME unify
@usableFromInline
unowned(unsafe) let object: _CocoaString
#else
@usableFromInline
unowned(unsafe) let object: _OpaqueString
#endif
@usableFromInline
let range: Range<Int>
@usableFromInline
let isSlice: Bool
#if _runtime(_ObjC) // FIXME unify
@inlinable
init(_ object: _CocoaString, range: Range<Int>, isSlice: Bool) {
self.object = object
self.range = range
self.isSlice = isSlice
}
@inline(never)
init(_ object: _CocoaString) {
let count = _stdlib_binary_CFStringGetLength(object)
self.init(object, count: count)
}
@inlinable
init(_ object: _CocoaString, count: Int) {
self.init(object, range: 0..<count, isSlice: false)
}
#else
@inlinable
init(_ object: _OpaqueString, range: Range<Int>, isSlice: Bool) {
self.object = object
self.range = range
self.isSlice = isSlice
}
@inline(never)
init(_ object: _OpaqueString) {
self.init(object, count: object.length)
}
@inlinable
init(_ object: _OpaqueString, count: Int) {
self.init(object, range: 0..<count, isSlice: false)
}
#endif
}
extension _UnmanagedOpaqueString : Sequence {
typealias Element = UTF16.CodeUnit
@inlinable
func makeIterator() -> Iterator {
return Iterator(self, startingAt: range.lowerBound)
}
@inlinable // FIXME(sil-serialize-all)
internal func makeIterator(startingAt position: Int) -> Iterator {
return Iterator(self, startingAt: position)
}
@usableFromInline
@_fixed_layout
struct Iterator : IteratorProtocol {
@usableFromInline
internal typealias Element = UTF16.CodeUnit
#if _runtime(_ObjC) // FIXME unify
@usableFromInline
internal let _object: _CocoaString
#else
@usableFromInline
internal let _object: _OpaqueString
#endif
@usableFromInline
internal var _range: Range<Int>
@usableFromInline
internal var _buffer = _FixedArray16<Element>()
@usableFromInline
internal var _bufferIndex: Int8 = 0
@inlinable
init(_ string: _UnmanagedOpaqueString, startingAt start: Int) {
self._object = string.object
self._range = start..<string.range.upperBound
}
@inlinable
@inline(__always)
mutating func next() -> Element? {
if _fastPath(_bufferIndex < _buffer.count) {
let result = _buffer[Int(_bufferIndex)]
_bufferIndex += 1
return result
}
if _slowPath(_range.isEmpty) { return nil }
return _nextOnSlowPath()
}
@usableFromInline
@inline(never)
mutating func _nextOnSlowPath() -> Element {
// Fill buffer
_sanityCheck(!_range.isEmpty)
let end = Swift.min(
_range.lowerBound + _buffer.capacity,
_range.upperBound)
let r: Range<Int> = _range.lowerBound..<end
let opaque = _UnmanagedOpaqueString(_object, range: r, isSlice: true)
_buffer.count = r.count
_buffer.withUnsafeMutableBufferPointer { b in
_sanityCheck(b.count == r.count)
opaque._copy(into: b)
}
_bufferIndex = 1
_range = r.upperBound ..< _range.upperBound
_fixLifetime(_object)
return _buffer[0]
}
}
}
extension _UnmanagedOpaqueString : RandomAccessCollection {
internal typealias IndexDistance = Int
internal typealias Indices = Range<Index>
@usableFromInline // FIXME(sil-serialize-all)
internal typealias SubSequence = _UnmanagedOpaqueString
@_fixed_layout
@usableFromInline
struct Index : Strideable {
@usableFromInline
internal var _value: Int
@inlinable
@inline(__always)
init(_ value: Int) {
self._value = value
}
@inlinable
@inline(__always)
func distance(to other: Index) -> Int {
return other._value - self._value
}
@inlinable
@inline(__always)
func advanced(by n: Int) -> Index {
return Index(_value + n)
}
}
@inlinable
var startIndex: Index {
return Index(range.lowerBound)
}
@inlinable
var endIndex: Index {
return Index(range.upperBound)
}
@inlinable
var count: Int {
return range.count
}
@inlinable // FIXME(sil-serialize-all)
subscript(position: Index) -> UTF16.CodeUnit {
_sanityCheck(position._value >= range.lowerBound)
_sanityCheck(position._value < range.upperBound)
#if _runtime(_ObjC) // FIXME unify
return _cocoaStringSubscript(object, position._value)
#else
return object.character(at: position._value)
#endif
}
@inlinable // FIXME(sil-serialize-all)
subscript(bounds: Range<Index>) -> _UnmanagedOpaqueString {
_sanityCheck(bounds.lowerBound._value >= range.lowerBound)
_sanityCheck(bounds.upperBound._value <= range.upperBound)
let b: Range<Int> = bounds.lowerBound._value ..< bounds.upperBound._value
let newSlice = self.isSlice || b.count != range.count
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
}
}
extension _UnmanagedOpaqueString : _StringVariant {
@usableFromInline internal typealias Encoding = Unicode.UTF16
@usableFromInline internal typealias CodeUnit = Encoding.CodeUnit
@inlinable
var isASCII: Bool {
@inline(__always) get { return false }
}
@inlinable
@inline(__always)
func _boundsCheck(_ i: Index) {
_precondition(i._value >= range.lowerBound && i._value < range.upperBound,
"String index is out of bounds")
}
@inlinable
@inline(__always)
func _boundsCheck(_ range: Range<Index>) {
_precondition(
range.lowerBound._value >= self.range.lowerBound &&
range.upperBound._value <= self.range.upperBound,
"String index range is out of bounds")
}
@inlinable
@inline(__always)
func _boundsCheck(offset: Int) {
_precondition(offset >= 0 && offset < range.count,
"String index is out of bounds")
}
@inlinable
@inline(__always)
func _boundsCheck(offsetRange range: Range<Int>) {
_precondition(range.lowerBound >= 0 && range.upperBound <= count,
"String index range is out of bounds")
}
@inlinable // FIXME(sil-serialize-all)
subscript(offset: Int) -> UTF16.CodeUnit {
_sanityCheck(offset >= 0 && offset < count)
#if _runtime(_ObjC) // FIXME unify
return _cocoaStringSubscript(object, range.lowerBound + offset)
#else
return object.character(at: range.lowerBound + offset)
#endif
}
@inlinable // FIXME(sil-serialize-all)
subscript(offsetRange: Range<Int>) -> _UnmanagedOpaqueString {
_sanityCheck(offsetRange.lowerBound >= 0)
_sanityCheck(offsetRange.upperBound <= range.count)
let b: Range<Int> =
range.lowerBound + offsetRange.lowerBound ..<
range.lowerBound + offsetRange.upperBound
let newSlice = self.isSlice || b.count != range.count
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: PartialRangeUpTo<Int>) -> SubSequence {
_sanityCheck(offsetRange.upperBound <= range.count)
let b: Range<Int> =
range.lowerBound ..<
range.lowerBound + offsetRange.upperBound
let newSlice = self.isSlice || b.count != range.count
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: PartialRangeThrough<Int>) -> SubSequence {
_sanityCheck(offsetRange.upperBound <= range.count)
let b: Range<Int> =
range.lowerBound ..<
range.lowerBound + offsetRange.upperBound + 1
let newSlice = self.isSlice || b.count != range.count
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: PartialRangeFrom<Int>) -> SubSequence {
_sanityCheck(offsetRange.lowerBound < range.count)
let b: Range<Int> =
range.lowerBound + offsetRange.lowerBound ..<
range.upperBound
let newSlice = self.isSlice || b.count != range.count
return _UnmanagedOpaqueString(object, range: b, isSlice: newSlice)
}
@inlinable // FIXME(sil-serialize-all)
internal func _copy(
into dest: UnsafeMutableBufferPointer<UTF16.CodeUnit>
) {
_sanityCheck(dest.count >= range.count)
guard range.count > 0 else { return }
#if _runtime(_ObjC) // FIXME unify
_cocoaStringCopyCharacters(
from: object,
range: range,
into: dest.baseAddress!)
#else
object.copyCodeUnits(from: range, into: dest.baseAddress!)
#endif
}
@inlinable // FIXME(sil-serialize-all)
internal func _copy<TargetCodeUnit>(
into dest: UnsafeMutableBufferPointer<TargetCodeUnit>
)
where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
guard TargetCodeUnit.bitWidth == 16 else {
_sanityCheckFailure("Narrowing copy from opaque strings is not implemented")
}
_sanityCheck(dest.count >= range.count)
guard range.count > 0 else { return }
let d = UnsafeMutableRawPointer(dest.baseAddress!)
.assumingMemoryBound(to: UTF16.CodeUnit.self)
#if _runtime(_ObjC) // FIXME unify
_cocoaStringCopyCharacters(from: object, range: range, into: d)
#else
object.copyCodeUnits(from: range, into: d)
#endif
}
@usableFromInline // FIXME(sil-serialize-all)
@_fixed_layout // FIXME(resilience)
internal struct UnicodeScalarIterator : IteratorProtocol {
var _base: _UnmanagedOpaqueString.Iterator
var _peek: UTF16.CodeUnit?
@usableFromInline // FIXME(sil-serialize-all)
init(_ base: _UnmanagedOpaqueString) {
self._base = base.makeIterator()
self._peek = _base.next()
}
@usableFromInline // FIXME(sil-serialize-all)
mutating func next() -> Unicode.Scalar? {
if _slowPath(_peek == nil) { return nil }
let u0 = _peek._unsafelyUnwrappedUnchecked
_peek = _base.next()
if _fastPath(UTF16._isScalar(u0)) {
return Unicode.Scalar(_unchecked: UInt32(u0))
}
if UTF16.isLeadSurrogate(u0) && _peek != nil {
let u1 = _peek._unsafelyUnwrappedUnchecked
if UTF16.isTrailSurrogate(u1) {
_peek = _base.next()
return UTF16._decodeSurrogates(u0, u1)
}
}
return Unicode.Scalar._replacementCharacter
}
}
@usableFromInline // FIXME(sil-serialize-all)
@inline(never)
func makeUnicodeScalarIterator() -> UnicodeScalarIterator {
return UnicodeScalarIterator(self)
}
}
#if _runtime(_ObjC)
extension _UnmanagedOpaqueString {
@usableFromInline
@inline(never)
internal func cocoaSlice() -> _CocoaString {
guard isSlice else { return object }
// FIXME: This usually copies storage; maybe add an NSString subclass
// for opaque slices?
return _cocoaStringSlice(object, range)
}
}
#endif
-286
View File
@@ -11,289 +11,3 @@
//===----------------------------------------------------------------------===//
import SwiftShims
@usableFromInline
internal typealias _UnmanagedASCIIString = _UnmanagedString<UInt8>
@usableFromInline
internal typealias _UnmanagedUTF16String = _UnmanagedString<UTF16.CodeUnit>
@inlinable
@inline(__always)
internal
func memcpy_zext<
Target: FixedWidthInteger & UnsignedInteger,
Source: FixedWidthInteger & UnsignedInteger
>(
dst: UnsafeMutablePointer<Target>, src: UnsafePointer<Source>, count: Int
) {
_sanityCheck(Source.bitWidth < Target.bitWidth)
_sanityCheck(count >= 0)
// Don't use the for-in-range syntax to avoid precondition checking in Range.
// This enables vectorization of the memcpy loop.
var i = 0
while i < count {
dst[i] = Target(src[i])
i = i &+ 1
}
}
@inlinable
@inline(__always)
internal
func memcpy_trunc<
Target: FixedWidthInteger & UnsignedInteger,
Source: FixedWidthInteger & UnsignedInteger
>(
dst: UnsafeMutablePointer<Target>, src: UnsafePointer<Source>, count: Int
) {
_sanityCheck(Source.bitWidth > Target.bitWidth)
_sanityCheck(count >= 0)
// Don't use the for-in-range syntax to avoid precondition checking in Range.
// This enables vectorization of the memcpy loop.
var i = 0
while i < count {
dst[i] = Target(truncatingIfNeeded: src[i])
i = i &+ 1
}
}
@inlinable
@inline(__always)
internal
func memcpy_<
Source: FixedWidthInteger & UnsignedInteger
>(
dst: UnsafeMutablePointer<Source>, src: UnsafePointer<Source>, count: Int
) {
// Don't use the for-in-range syntax to avoid precondition checking in Range.
// This enables vectorization of the memcpy loop.
var i = 0
while i < count {
dst[i] = src[i]
i = i &+ 1
}
}
@_fixed_layout
@usableFromInline
internal
struct _UnmanagedString<CodeUnit>
where CodeUnit : FixedWidthInteger & UnsignedInteger {
// TODO: Use the extra 13 bits
//
// StringGuts when representing UnmanagedStrings should have an extra 13 bits
// *at least* to store whatever we want, e.g. flags. x86_64 ABI has at least
// 13 bits due to:
// * 8 bits from count: 56-bit (max) address spaces means we need at most
// 56-bit count
// * 5 bits from BridgeObject: 64 - 2 tagging - 56-bit address space - 1 bit
// designating UnsafeString
//
@usableFromInline
internal var start: UnsafePointer<CodeUnit>
@usableFromInline
internal var count: Int
@inlinable
init(start: UnsafePointer<CodeUnit>, count: Int) {
_sanityCheck(CodeUnit.self == UInt8.self || CodeUnit.self == UInt16.self)
self.start = start
self.count = count
}
@inlinable
init(_ bufPtr: UnsafeBufferPointer<CodeUnit>) {
self.init(
start: bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
count: bufPtr.count)
}
}
extension _UnmanagedString {
@inlinable
internal var end: UnsafePointer<CodeUnit> {
return start + count
}
@inlinable
internal var rawStart: UnsafeRawPointer {
return UnsafeRawPointer(start)
}
@inlinable
internal var rawEnd: UnsafeRawPointer {
return UnsafeRawPointer(end)
}
@inlinable
internal var buffer: UnsafeBufferPointer<CodeUnit> {
return .init(start: start, count: count)
}
@inlinable
internal var rawBuffer: UnsafeRawBufferPointer {
return .init(start: rawStart, count: rawEnd - rawStart)
}
}
extension _UnmanagedString : RandomAccessCollection {
internal typealias Element = UTF16.CodeUnit
// Note that the Index type can't be an integer offset because Collection
// requires that SubSequence share indices with the original collection.
// Therefore, we use pointers as the index type; however, we also provide
// integer subscripts as a convenience, in a separate extension below.
@usableFromInline // FIXME(sil-serialize-all)
internal typealias Index = UnsafePointer<CodeUnit>
internal typealias IndexDistance = Int
internal typealias Indices = Range<Index>
@usableFromInline // FIXME(sil-serialize-all)
internal typealias SubSequence = _UnmanagedString
@inlinable
internal
var startIndex: Index { return start }
@inlinable
internal
var endIndex: Index { return end }
@inlinable
internal subscript(position: Index) -> UTF16.CodeUnit {
@inline(__always)
get {
_sanityCheck(position >= start && position < end)
return UTF16.CodeUnit(position.pointee)
}
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(_ bounds: Range<Index>) -> SubSequence {
_sanityCheck(bounds.lowerBound >= start && bounds.upperBound <= end)
return _UnmanagedString(start: bounds.lowerBound, count: bounds.count)
}
}
extension _UnmanagedString : _StringVariant {
@inlinable
internal var isASCII: Bool {
// NOTE: For now, single byte means ASCII. Might change in future
return CodeUnit.bitWidth == 8
}
@inlinable
internal subscript(offset: Int) -> UTF16.CodeUnit {
@inline(__always)
get {
_sanityCheck(offset >= 0 && offset < count)
return UTF16.CodeUnit(start[offset])
}
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: Range<Int>) -> _UnmanagedString {
_sanityCheck(offsetRange.lowerBound >= 0 && offsetRange.upperBound <= count)
return _UnmanagedString(
start: start + offsetRange.lowerBound,
count: offsetRange.count)
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: PartialRangeFrom<Int>) -> SubSequence {
_sanityCheck(offsetRange.lowerBound >= 0)
return _UnmanagedString(
start: start + offsetRange.lowerBound,
count: self.count - offsetRange.lowerBound
)
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: PartialRangeUpTo<Int>) -> SubSequence {
_sanityCheck(offsetRange.upperBound <= count)
return _UnmanagedString(
start: start,
count: offsetRange.upperBound
)
}
@inlinable // FIXME(sil-serialize-all)
internal subscript(offsetRange: PartialRangeThrough<Int>) -> SubSequence {
_sanityCheck(offsetRange.upperBound < count)
return _UnmanagedString(
start: start,
count: offsetRange.upperBound + 1
)
}
@inlinable // FIXME(sil-serialize-all)
@inline(__always)
internal func _copy<TargetCodeUnit>(
into target: UnsafeMutableBufferPointer<TargetCodeUnit>
) where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
_sanityCheck(
TargetCodeUnit.self == UInt8.self || TargetCodeUnit.self == UInt16.self)
guard count > 0 else { return }
_sanityCheck(target.count >= self.count)
if CodeUnit.bitWidth == TargetCodeUnit.bitWidth {
_memcpy(
dest: target.baseAddress!,
src: self.start,
size: UInt(self.count * MemoryLayout<CodeUnit>.stride))
} else if CodeUnit.bitWidth == 8 {
_sanityCheck(TargetCodeUnit.bitWidth == 16)
memcpy_zext(
dst: target.baseAddress._unsafelyUnwrappedUnchecked,
src: start,
count: self.count)
} else {
_sanityCheck(CodeUnit.bitWidth == 16 && TargetCodeUnit.bitWidth == 8)
_sanityCheck(self.filter { $0 >= UInt8.max }.isEmpty, "ASCII only")
memcpy_trunc(
dst: target.baseAddress._unsafelyUnwrappedUnchecked,
src: start,
count: self.count)
}
}
@_fixed_layout
@usableFromInline // FIXME(sil-serialize-all)
internal struct UnicodeScalarIterator : IteratorProtocol {
@usableFromInline // FIXME(sil-serialize-all)
let _base: _UnmanagedString
@usableFromInline // FIXME(sil-serialize-all)
var _offset: Int
@inlinable // FIXME(sil-serialize-all)
init(_ base: _UnmanagedString) {
self._base = base
self._offset = 0
}
@inlinable // FIXME(sil-serialize-all)
mutating func next() -> Unicode.Scalar? {
if _slowPath(_offset == _base.count) { return nil }
let u0 = _base[_offset]
if _fastPath(CodeUnit.bitWidth == 8 || UTF16._isScalar(u0)) {
_offset += 1
return Unicode.Scalar(u0)
}
if UTF16.isLeadSurrogate(u0) && _offset + 1 < _base.count {
let u1 = _base[_offset + 1]
if UTF16.isTrailSurrogate(u1) {
_offset += 2
return UTF16._decodeSurrogates(u0, u1)
}
}
_offset += 1
return Unicode.Scalar._replacementCharacter
}
}
@inlinable
func makeUnicodeScalarIterator() -> UnicodeScalarIterator {
return UnicodeScalarIterator(self)
}
}