mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
This is a giant squashing of a lot of individual changes prototyping a switch of String in Swift 5 to be natively encoded as UTF-8. It includes what's necessary for a functional prototype, dropping some history, but still leaves plenty of history available for future commits. My apologies to anyone trying to do code archeology between this commit and the one prior. This was the lesser of evils.
751 lines
22 KiB
Swift
751 lines
22 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
// NOTE: This is a prototype, it does not have e.g. 32-bit support yet.
|
|
//
|
|
|
|
//
|
|
// StringObject abstracts the bit-level interpretation and creation of the
|
|
// String struct.
|
|
//
|
|
|
|
@_fixed_layout
|
|
@usableFromInline
|
|
internal struct _StringObject {
|
|
//
|
|
// Laid out as (_otherBits, _object), which allows small string contents to
|
|
// naturally start on vector-alignment.
|
|
//
|
|
|
|
@usableFromInline
|
|
internal var _otherBits: UInt
|
|
|
|
@usableFromInline
|
|
internal var _object: Builtin.BridgeObject
|
|
|
|
// @inlinable @inline(__always)
|
|
// internal init(_ otherBits: UInt, _ object: Builtin.BridgeObject) {
|
|
// self._otherBits = otherBits
|
|
// self._object = object
|
|
// _invariantCheck()
|
|
// }
|
|
|
|
@inlinable @inline(__always)
|
|
internal init(zero: ()) {
|
|
self._otherBits = 0
|
|
self._object = Builtin.reinterpretCast(0)
|
|
}
|
|
}
|
|
|
|
// Raw
|
|
extension _StringObject {
|
|
@usableFromInline
|
|
internal typealias RawBitPattern = (UInt, UInt)
|
|
|
|
@inlinable
|
|
internal var rawBits: RawBitPattern {
|
|
@inline(__always) get { return (_otherBits, objectRawBits) }
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal init(rawObject: UInt, rawDiscrim: UInt, otherBits: UInt) {
|
|
let builtinRawObject: Builtin.Int64 = Builtin.reinterpretCast(rawObject)
|
|
let builtinDiscrim: Builtin.Int64 = Builtin.reinterpretCast(rawDiscrim)
|
|
self._object = Builtin.reinterpretCast(
|
|
Builtin.stringObjectOr_Int64(builtinRawObject, builtinDiscrim))
|
|
|
|
self._otherBits = otherBits
|
|
_invariantCheck()
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal init(raw bits: RawBitPattern) {
|
|
self.init(zero:())
|
|
self._otherBits = bits.0
|
|
self._object = Builtin.reinterpretCast(bits.1)
|
|
_sanityCheck(self.rawBits == bits)
|
|
_invariantCheck()
|
|
}
|
|
|
|
@inlinable @_transparent
|
|
internal var objectRawBits: UInt {
|
|
@inline(__always) get { return Builtin.reinterpretCast(_object) }
|
|
}
|
|
|
|
@inlinable @_transparent
|
|
internal var undiscriminatedObjectRawBits: UInt {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return (objectRawBits & 0x00FF_FFFF_FFFF_FFFF)
|
|
#endif
|
|
}
|
|
}
|
|
|
|
// Namespace to hold magic numbers
|
|
@usableFromInline @_frozen
|
|
enum Nibbles {}
|
|
}
|
|
|
|
/*
|
|
|
|
Encoding is optimized for common fast creation. The canonical empty string,
|
|
ASCII small strings, as well as most literals, have all consecutive 1s in their
|
|
high nibble mask, and thus can all be encoded as a logical immediate operand
|
|
on arm64.
|
|
|
|
The high nibble of the bridge object has the following encoding:
|
|
|
|
┌───────┬──────────┬───────┬─────┬──────┐
|
|
│ Form │ b63 │ b62 │ b61 │ b60 │
|
|
├───────┼──────────┼───────┼─────┼──────┤
|
|
│ Small │ 1 │ ASCII │ 1 │ 0 │
|
|
│ Large │ Immortal │ ObjC │ 0 │ Slow │
|
|
└───────┴──────────┴───────┴─────┴──────┘
|
|
|
|
Immortal (b63): Should the Swift runtime skip ARC
|
|
- Small strings are just values, always immortal
|
|
- Large strings can sometimes be immortal, e.g. literals
|
|
ObjC (b62): Should the runtime perform ObjC ARC rather than Swift ARC
|
|
- Lazily-bridged NSStrings set this bit, are managed by ObjC ARC
|
|
- Note: Small strings repurpose this bit to denote ASCII-only contents
|
|
isSmall (b61): Dedicated bit to denote small strings
|
|
Slow (b60): If set, string is unable to provide access to contiguous UTF-8
|
|
- Small strings can spill to the stack
|
|
- Native strings are UTF-8
|
|
- Lazily bridged Cocoa strings or other foreign strings may be able to
|
|
|
|
The canonical empty string is the zero-sized small string. It has a leading
|
|
nibble of 1110, and all other bits are 0.
|
|
|
|
*/
|
|
extension _StringObject.Nibbles {
|
|
// The canonical empty sting is an empty small string
|
|
@usableFromInline
|
|
internal static var emptyString: UInt {
|
|
@inline(__always) get { return _StringObject.Nibbles.small(isASCII: true) }
|
|
}
|
|
}
|
|
|
|
/*
|
|
|
|
Large strings can either be "native", "shared", or "foreign".
|
|
|
|
Native strings have tail-allocated storage, which begins at an offset of
|
|
`nativeBias` from the storage object's address. String literals, which reside
|
|
in the constant section, are encoded as their start address minus `nativeBias`,
|
|
unifying code paths for both literals ("immortal native") and native strings.
|
|
Native Strings are always managed by the Swift runtime.
|
|
|
|
Shared strings do not have tail-allocated storage, but can provide access
|
|
upon query to contiguous UTF-8 code units. Lazily-bridged NSStrings capable of
|
|
providing access to contiguous ASCII/UTF-8 set the ObjC bit. Accessing shared
|
|
string's pointer should always be behind a resilience barrier, permitting
|
|
future evolution.
|
|
|
|
Foreign strings, currently, only encompass lazily-bridged NSStrings that
|
|
cannot be treated as "shared". Such strings may provide access to contiguous
|
|
UTF-16, or may be discontiguous in storage. Accessing foreign strings should
|
|
remain behind a resilience barrier for future evolution.
|
|
|
|
┌────────────┐
|
|
│ nativeBias │
|
|
├────────────┤
|
|
│ 32 │
|
|
└────────────┘
|
|
|
|
|
|
The rest of the "object" bit representation for large strings:
|
|
|
|
┌──────────┬─────────┬─────┬─────┬─────┬─────┬────────────┐
|
|
│ Form │ b63:b60 │ b59 │ b58 │ b57 │ b56 │ b55:b0 │
|
|
├──────────┼─────────┼─────┼─────┼─────┼─────┼────────────┤
|
|
│ Native │ x 0 0 0 │ 0 │ TBD │ TBD │ TBD │ objectAddr │
|
|
│ Shared │ x x 0 0 │ 1 │ TBD │ TBD │ TBD │ objectAddr │
|
|
│ Foreign │ x 0 0 1 │ 1 │ TBD │ TBD │ TBD │ objectAddr │
|
|
└──────────┴─────────┴─────┴─────┴─────┴─────┴────────────┘
|
|
|
|
b59: Set if not a typical tail-allocated native Swift string
|
|
- Native Swift strings are tail-allocated contiguous UTF-8
|
|
- Shared strings can provide access to contiguous UTF-8
|
|
- Foreign string cannot
|
|
|
|
TODO(UTF8): Allocate the rest of the bits appropriately
|
|
|
|
objectAddr: The address of the beginning of the potentially-managed object.
|
|
|
|
Other foreign forms are reserved for the future.
|
|
|
|
All forms share the same structure for the rest of the bits:
|
|
|
|
┌────────────┬───────┐
|
|
│ b63:48 │ b47:0 │
|
|
├────────────┼───────┤
|
|
│ perf flags │ count │
|
|
└────────────┴───────┘
|
|
|
|
Allocation of performance flags is TBD, un-used bits will be reserved for
|
|
future use. Count stores the number of code units: corresponds to `endIndex`.
|
|
|
|
*/
|
|
extension _StringObject.Nibbles {
|
|
// Discriminator for small strings
|
|
@inlinable @inline(__always)
|
|
internal static func small(isASCII: Bool) -> UInt {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return isASCII ? 0xE000_0000_0000_0000 : 0xA000_0000_0000_0000
|
|
#endif
|
|
}
|
|
|
|
// Discriminator for large, immortal, swift-native strings
|
|
@inlinable @inline(__always)
|
|
internal static func largeImmortal() -> UInt {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return 0x8000_0000_0000_0000
|
|
#endif
|
|
}
|
|
|
|
// Discriminator for large, mortal (i.e. managed), swift-native strings
|
|
@inlinable @inline(__always)
|
|
internal static func largeMortal() -> UInt {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return 0x0000_0000_0000_0000
|
|
#endif
|
|
}
|
|
|
|
// Discriminator for large, shared, mortal (i.e. managed), swift-native
|
|
// strings
|
|
@inlinable @inline(__always)
|
|
internal static func largeSharedMortal() -> UInt {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return 0x0800_0000_0000_0000
|
|
#endif
|
|
}
|
|
|
|
internal static func largeCocoa(providesFastUTF8: Bool) -> UInt {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return providesFastUTF8 ? 0x4800_0000_0000_0000 : 0x5800_0000_0000_0000
|
|
#endif
|
|
}
|
|
}
|
|
|
|
extension _StringObject {
|
|
@inlinable
|
|
internal static var nativeBias: UInt { @inline(__always) get { return 32 } }
|
|
|
|
@inlinable
|
|
internal var isImmortal: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return (objectRawBits & 0x8000_0000_0000_0000) != 0
|
|
#endif
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var isMortal: Bool {
|
|
@inline(__always) get { return !isImmortal }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isSmall: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return (objectRawBits & 0x2000_0000_0000_0000) != 0
|
|
#endif
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var isLarge: Bool { @inline(__always) get { return !isSmall } }
|
|
|
|
// Whether this string can provide access to contiguous UTF-8 code units:
|
|
// - Small strings can by spilling to the stack
|
|
// - Large native strings can through an offset
|
|
// - Shared strings can:
|
|
// - Cocoa strings which respond to e.g. CFStringGetCStringPtr()
|
|
// - TODO(UTF8): non-Cocoa shared strings
|
|
@inlinable
|
|
internal var providesFastUTF8: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return (objectRawBits & 0x1000_0000_0000_0000) == 0
|
|
#endif
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var isForeign: Bool {
|
|
@inline(__always) get { return !providesFastUTF8 }
|
|
}
|
|
|
|
// Whether we are a mortal, native string
|
|
@inlinable
|
|
internal var hasNativeStorage: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return (objectRawBits & 0xF800_0000_0000_0000) == 0
|
|
#endif
|
|
}
|
|
}
|
|
|
|
// Whether we are a mortal, shared string (managed by Swift runtime)
|
|
internal var hasSharedStorage: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
return (objectRawBits & 0xF800_0000_0000_0000)
|
|
== Nibbles.largeSharedMortal()
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
// Queries conditional on being in a large or fast form.
|
|
extension _StringObject {
|
|
// Whether this string is native, presupposing it is both large and fast
|
|
@inlinable
|
|
internal var largeFastIsNative: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isLarge && providesFastUTF8)
|
|
return (objectRawBits & 0x0800_0000_0000_0000) == 0
|
|
#endif
|
|
}
|
|
}
|
|
// Whether this string is shared, presupposing it is both large and fast
|
|
@inlinable
|
|
internal var largeFastIsShared: Bool {
|
|
@inline(__always) get { return !largeFastIsNative }
|
|
}
|
|
|
|
// Whether this string is a lazily-bridged NSString, presupposing it is large
|
|
@inlinable
|
|
internal var largeIsCocoa: Bool {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isLarge)
|
|
return (objectRawBits & 0x4000_0000_0000_0000) != 0
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
|
|
Small strings have the following per-byte layout. When stored in memory
|
|
(little-endian), their first character ('a') is in the lowest address and their
|
|
top-nibble and count is in the highest address.
|
|
|
|
┌───┬───┬───┬───┬───┬───┬───┬───┬────┬────┬────┬────┬────┬────┬───┬───────────┐
|
|
│ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 │
|
|
├───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┼────┼────┼────┼───┼───────────┤
|
|
│ h │ g │ f │ e │ d │ c │ b │ a │ p │ o │ n │ l │ k │ j │ i │ 1x0x count│
|
|
└───┴───┴───┴───┴───┴───┴───┴───┴────┴────┴────┴────┴────┴────┴───┴───────────┘
|
|
|
|
*/
|
|
extension _StringObject {
|
|
@inlinable
|
|
internal var smallCount: Int {
|
|
@inline(__always)
|
|
get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isSmall)
|
|
return Int(bitPattern: (objectRawBits & 0x0F00_0000_0000_0000) &>> 56)
|
|
#endif
|
|
}
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
mutating internal func setSmallCount(_ count: Int, isASCII: Bool) {
|
|
// TODO(UTF8 codegen): Ensure this is just a couple simple ops
|
|
_sanityCheck(isSmall && count <= _SmallString.capacity)
|
|
|
|
let rawObject = self.undiscriminatedObjectRawBits
|
|
let discrim = Nibbles.small(isASCII: isASCII)
|
|
| (UInt(bitPattern: count) &<< 56)
|
|
self = _StringObject(
|
|
rawObject: rawObject, rawDiscrim: discrim, otherBits: self._otherBits)
|
|
}
|
|
|
|
@inlinable
|
|
internal var smallIsASCII: Bool {
|
|
@inline(__always)
|
|
get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isSmall)
|
|
return objectRawBits & 0x4000_0000_0000_0000 != 0
|
|
#endif
|
|
}
|
|
}
|
|
@inlinable
|
|
internal var asSmallString: _SmallString {
|
|
@inline(__always)
|
|
get {
|
|
_sanityCheck(isSmall)
|
|
return _SmallString(raw:rawBits)
|
|
}
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal init(_ small: _SmallString) {
|
|
self.init(raw: small.rawBits)
|
|
}
|
|
|
|
// Canonical empty pattern: small zero-length string
|
|
@inlinable @inline(__always)
|
|
internal init(empty:()) {
|
|
self._otherBits = 0
|
|
self._object = Builtin.reinterpretCast(Nibbles.emptyString)
|
|
|
|
_sanityCheck(self.smallCount == 0)
|
|
_invariantCheck()
|
|
}
|
|
}
|
|
|
|
// Extract
|
|
extension _StringObject {
|
|
@inlinable
|
|
internal var largeCount: Int {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isLarge)
|
|
return Int(bitPattern: (_otherBits & 0x0000_FFFF_FFFF_FFFF))
|
|
#endif
|
|
}
|
|
@inline(__always) set {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(largeCount == 0)
|
|
_sanityCheck(newValue == newValue & 0x0000_FFFF_FFFF_FFFF, "too large")
|
|
_otherBits |= UInt(bitPattern: newValue)
|
|
_sanityCheck(newValue == largeCount)
|
|
#endif
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var largeAddressBits: UInt {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isLarge)
|
|
return undiscriminatedObjectRawBits
|
|
#endif
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var nativeUTF8Start: UnsafePointer<UInt8> {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(largeFastIsNative)
|
|
let largeAddressBits = objectRawBits & 0x00FF_FFFF_FFFF_FFFF
|
|
|
|
return UnsafePointer(
|
|
bitPattern: largeAddressBits &+ _StringObject.nativeBias
|
|
)._unsafelyUnwrappedUnchecked
|
|
#endif
|
|
}
|
|
}
|
|
@inlinable
|
|
internal var nativeUTF8: UnsafeBufferPointer<UInt8> {
|
|
@inline(__always) get {
|
|
_sanityCheck(largeFastIsNative)
|
|
return UnsafeBufferPointer(start: nativeUTF8Start, count: largeCount)
|
|
}
|
|
}
|
|
|
|
// Resilient way to fetch a pointer
|
|
@usableFromInline @inline(never)
|
|
@_effects(releasenone)
|
|
internal func getSharedUTF8Start() -> UnsafePointer<UInt8> {
|
|
_sanityCheck(largeFastIsShared)
|
|
if largeIsCocoa {
|
|
return _cocoaUTF8Pointer(cocoaObject)._unsafelyUnwrappedUnchecked
|
|
}
|
|
|
|
return sharedStorage.start
|
|
}
|
|
|
|
@inlinable
|
|
internal var sharedUTF8: UnsafeBufferPointer<UInt8> {
|
|
@inline(__always) get {
|
|
_sanityCheck(largeFastIsShared)
|
|
let start = self.getSharedUTF8Start()
|
|
return UnsafeBufferPointer(start: start, count: largeCount)
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var nativeStorage: _StringStorage {
|
|
@inline(__always) get {
|
|
_sanityCheck(hasNativeStorage)
|
|
return Builtin.reinterpretCast(largeAddressBits)
|
|
}
|
|
}
|
|
|
|
internal var sharedStorage: _SharedStringStorage {
|
|
@inline(__always) get {
|
|
_sanityCheck(largeFastIsShared && !largeIsCocoa)
|
|
_sanityCheck(hasSharedStorage)
|
|
return Builtin.reinterpretCast(largeAddressBits)
|
|
}
|
|
}
|
|
|
|
internal var cocoaObject: AnyObject {
|
|
@inline(__always) get {
|
|
_sanityCheck(largeIsCocoa && !isImmortal)
|
|
return Builtin.reinterpretCast(largeAddressBits)
|
|
}
|
|
}
|
|
|
|
@_fixed_layout @usableFromInline
|
|
internal struct _StringObjectPerfFlags {
|
|
@usableFromInline
|
|
internal var bits: UInt16
|
|
}
|
|
|
|
@inlinable
|
|
internal var largePerfFlags: _StringObjectPerfFlags {
|
|
@inline(__always) get {
|
|
#if arch(i386) || arch(arm)
|
|
unimplemented_utf8_32bit()
|
|
#else
|
|
_sanityCheck(isLarge)
|
|
return Builtin.reinterpretCast(
|
|
UInt16(
|
|
truncatingIfNeeded: _otherBits & 0xFFFF_0000_0000_0000) &>> 48)
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
// Aggregate queries / abstractions
|
|
extension _StringObject {
|
|
// The number of code units stored
|
|
//
|
|
// TODO(UTF8 compiles): Check generated code
|
|
@inlinable
|
|
internal var count: Int {
|
|
@inline(__always) get { return isSmall ? smallCount : largeCount }
|
|
}
|
|
|
|
// Whether the string is all ASCII
|
|
//
|
|
//
|
|
@inlinable
|
|
internal var isASCII: Bool {
|
|
@inline(__always) get {
|
|
if isSmall { return smallIsASCII }
|
|
// TODO(UTF8 perf and testing): track known asciiness
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Get access to fast UTF-8 contents for large strings which provide it.
|
|
@inlinable
|
|
internal var fastUTF8: UnsafeBufferPointer<UInt8> {
|
|
@inline(__always) get {
|
|
_sanityCheck(self.isLarge && self.providesFastUTF8)
|
|
let start: UnsafePointer<UInt8>
|
|
let count = self.largeCount
|
|
if _slowPath(self.largeFastIsShared) {
|
|
start = getSharedUTF8Start()
|
|
} else {
|
|
start = self.nativeUTF8Start
|
|
}
|
|
return UnsafeBufferPointer(start: start, count: count)
|
|
}
|
|
}
|
|
|
|
// Whether the object stored can be bridged directly as a NSString
|
|
@usableFromInline // @opaque
|
|
internal var hasObjCBridgeableObject: Bool {
|
|
// Currently, all mortal objects can zero-cost bridge
|
|
return !self.isImmortal
|
|
}
|
|
|
|
// Fetch the stored subclass of NSString for bridging
|
|
@inlinable
|
|
internal var objCBridgeableObject: AnyObject {
|
|
@inline(__always) get {
|
|
_sanityCheck(hasObjCBridgeableObject)
|
|
return Builtin.reinterpretCast(largeAddressBits)
|
|
}
|
|
}
|
|
|
|
// Whether the object provides fast UTF-8 contents that are nul-terminated
|
|
@inlinable
|
|
internal var isFastZeroTerminated: Bool {
|
|
if _slowPath(!providesFastUTF8) { return false }
|
|
|
|
// Small strings nul-terminate when spilling for contiguous access
|
|
if isSmall { return true }
|
|
|
|
// TODO(UTF8 perf): Use performance flag, which could be more inclusive. For
|
|
// now, we only know native strings and small strings (when accessed) are.
|
|
// We could also know about some shared strings.
|
|
|
|
return largeFastIsNative
|
|
}
|
|
}
|
|
|
|
// Object creation
|
|
extension _StringObject {
|
|
@inlinable @inline(__always)
|
|
init(start: UnsafePointer<UInt8>, discrim: UInt, largeCount: Int) {
|
|
// TODO: Use with a perf flags helper
|
|
self.init(
|
|
rawObject: UInt(bitPattern: start) &- _StringObject.nativeBias,
|
|
rawDiscrim: discrim,
|
|
otherBits: UInt(bitPattern: largeCount))
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
init(immortal bufPtr: UnsafeBufferPointer<UInt8>, isASCII: Bool) {
|
|
// TODO(UTF8) perf flags to track known ascii
|
|
self.init(
|
|
start: bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
|
|
discrim: Nibbles.largeImmortal(),
|
|
largeCount: bufPtr.count)
|
|
}
|
|
@inlinable @inline(__always)
|
|
init(_ storage: _StringStorage, isASCII: Bool) {
|
|
// TODO(UTF8) perf flags to track known ascii
|
|
self.init(
|
|
start: storage.start,
|
|
discrim: Nibbles.largeMortal(),
|
|
largeCount: storage.count)
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
init(_ storage: _SharedStringStorage, isASCII: Bool) {
|
|
// TODO(UTF8) perf flags to track known ascii
|
|
self.init(
|
|
rawObject: Builtin.reinterpretCast(storage),
|
|
rawDiscrim: Nibbles.largeSharedMortal(),
|
|
otherBits: UInt(bitPattern: storage.count))
|
|
}
|
|
|
|
init(cocoa: AnyObject, providesFastUTF8: Bool, length: Int) {
|
|
let discrim = Nibbles.largeCocoa(providesFastUTF8: providesFastUTF8)
|
|
|
|
// TODO: Use with a perf flags helper
|
|
self.init(
|
|
rawObject: Builtin.reinterpretCast(cocoa),
|
|
rawDiscrim: discrim,
|
|
otherBits: UInt(bitPattern: length))
|
|
|
|
_sanityCheck(self.largeAddressBits == Builtin.reinterpretCast(cocoa))
|
|
_sanityCheck(self.providesFastUTF8 == providesFastUTF8)
|
|
_sanityCheck(self.largeCount == length)
|
|
}
|
|
}
|
|
|
|
// Internal invariants
|
|
extension _StringObject {
|
|
@inlinable @inline(__always)
|
|
internal func _invariantCheck() {
|
|
#if INTERNAL_CHECKS_ENABLED
|
|
_sanityCheck(MemoryLayout<_StringObject>.size == 16)
|
|
if isForeign {
|
|
_sanityCheck(largeIsCocoa, "No other foreign forms yet")
|
|
}
|
|
|
|
if isSmall {
|
|
_sanityCheck(isImmortal)
|
|
_sanityCheck(smallCount <= 15)
|
|
_sanityCheck(smallCount == count)
|
|
_sanityCheck(!hasObjCBridgeableObject)
|
|
} else {
|
|
_sanityCheck(isLarge)
|
|
_sanityCheck(largeCount == count)
|
|
if providesFastUTF8 && largeFastIsNative {
|
|
_sanityCheck(!isSmall)
|
|
_sanityCheck(!largeIsCocoa)
|
|
|
|
if isImmortal {
|
|
_sanityCheck(!hasNativeStorage)
|
|
_sanityCheck(!hasObjCBridgeableObject)
|
|
} else {
|
|
_sanityCheck(hasNativeStorage)
|
|
_sanityCheck(hasObjCBridgeableObject)
|
|
}
|
|
}
|
|
if largeIsCocoa {
|
|
_sanityCheck(hasObjCBridgeableObject)
|
|
_sanityCheck(!isSmall)
|
|
if isForeign {
|
|
|
|
} else {
|
|
_sanityCheck(largeFastIsShared)
|
|
}
|
|
}
|
|
}
|
|
#endif // INTERNAL_CHECKS_ENABLED
|
|
}
|
|
|
|
internal func _dump() {
|
|
// TODO(UTF8): Print out any useful internal information
|
|
#if INTERNAL_CHECKS_ENABLED
|
|
if isSmall {
|
|
asSmallString._dump()
|
|
return
|
|
}
|
|
if providesFastUTF8 && largeFastIsNative {
|
|
print("Native")
|
|
} else if largeIsCocoa {
|
|
print("Cocoa")
|
|
} else {
|
|
print("TODO...")
|
|
}
|
|
#endif // INTERNAL_CHECKS_ENABLED
|
|
}
|
|
}
|