//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // The code units in _SmallString are always stored in memory in the same order // that they would be stored in an array. This means that on big-endian // platforms the order of the bytes in storage is reversed compared to // _StringObject whereas on little-endian platforms the order is the same. // // Memory layout: // // |0 1 2 3 4 5 6 7 8 9 A B C D E F| ← hexadecimal offset in bytes // | _storage.0 | _storage.1 | ← raw bits // | code units | | ← encoded layout // ↑ ↑ // first (leftmost) code unit discriminator (incl. count) // // On Android AArch64, there is one less byte available because the discriminator // is stored in the penultimate code unit instead, to match where it's stored // for large strings. @frozen @usableFromInline internal struct _SmallString { @usableFromInline internal typealias RawBitPattern = (UInt64, UInt64) // Small strings are values; store them raw @usableFromInline internal var _storage: RawBitPattern @inlinable @inline(__always) internal var rawBits: RawBitPattern { return _storage } @inlinable internal var leadingRawBits: UInt64 { @inline(__always) get { return _storage.0 } @inline(__always) set { _storage.0 = newValue } } @inlinable internal var trailingRawBits: UInt64 { @inline(__always) get { return _storage.1 } @inline(__always) set { _storage.1 = newValue } } @inlinable @inline(__always) internal init(rawUnchecked bits: RawBitPattern) { self._storage = bits } @inlinable @inline(__always) internal init(raw bits: RawBitPattern) { self.init(rawUnchecked: bits) _invariantCheck() } @inlinable @inline(__always) internal init(_ object: _StringObject) { _internalInvariant(object.isSmall) // On big-endian platforms the byte order is the reverse of _StringObject. let leading = object.rawBits.0.littleEndian let trailing = object.rawBits.1.littleEndian self.init(raw: (leading, trailing)) } @inlinable @inline(__always) internal init() { self.init(_StringObject(empty:())) } } extension _SmallString { @inlinable @inline(__always) internal static var capacity: Int { #if os(watchOS) && _pointerBitWidth(_32) return 10 #elseif _pointerBitWidth(_32) || _pointerBitWidth(_16) // Note: changed from 10 for contiguous storage. return 8 #elseif os(Android) && arch(arm64) return 14 #elseif _pointerBitWidth(_64) return 15 #else #error("Unknown platform") #endif } @_alwaysEmitIntoClient @inline(__always) internal static func contiguousCapacity() -> Int { #if os(watchOS) && _pointerBitWidth(_32) return capacity &- 2 #else return capacity #endif } // Get an integer equivalent to the _StringObject.discriminatedObjectRawBits // computed property. @inlinable @inline(__always) internal var rawDiscriminatedObject: UInt64 { // Reverse the bytes on big-endian systems. return _storage.1.littleEndian } @inlinable @inline(__always) internal var capacity: Int { return _SmallString.capacity } @inlinable @inline(__always) internal var count: Int { return _StringObject.getSmallCount(fromRaw: rawDiscriminatedObject) } @inlinable @inline(__always) internal var unusedCapacity: Int { return capacity &- count } @inlinable @inline(__always) internal var isASCII: Bool { return _StringObject.getSmallIsASCII(fromRaw: rawDiscriminatedObject) } // Give raw, nul-terminated code units. This is only for limited internal // usage: it always clears the discriminator and count (in case it's full) @inlinable @inline(__always) internal var zeroTerminatedRawCodeUnits: RawBitPattern { #if os(Android) && arch(arm64) let smallStringCodeUnitMask = ~UInt64(0xFFFF).bigEndian // zero last two bytes #else let smallStringCodeUnitMask = ~UInt64(0xFF).bigEndian // zero last byte #endif return (self._storage.0, self._storage.1 & smallStringCodeUnitMask) } internal func computeIsASCII() -> Bool { let asciiMask: UInt64 = 0x8080_8080_8080_8080 let raw = zeroTerminatedRawCodeUnits return (raw.0 | raw.1) & asciiMask == 0 } } // Internal invariants extension _SmallString { #if !INTERNAL_CHECKS_ENABLED @inlinable @inline(__always) internal func _invariantCheck() {} #else @usableFromInline @inline(never) @_effects(releasenone) internal func _invariantCheck() { _internalInvariant(count <= _SmallString.capacity) _internalInvariant(isASCII == computeIsASCII()) // No bits should be set between the last code unit and the discriminator var copy = self unsafe withUnsafeBytes(of: ©._storage) { unsafe _internalInvariant( $0[count..<_SmallString.capacity].allSatisfy { $0 == 0 }) } } #endif // INTERNAL_CHECKS_ENABLED internal func _dump() { #if INTERNAL_CHECKS_ENABLED print(""" smallUTF8: count: \(self.count), codeUnits: \( self.map { String($0, radix: 16) }.joined() ) """) #endif // INTERNAL_CHECKS_ENABLED } } // Provide a RAC interface extension _SmallString: RandomAccessCollection, MutableCollection { @usableFromInline internal typealias Index = Int @usableFromInline internal typealias Element = UInt8 @usableFromInline internal typealias SubSequence = _SmallString @inlinable @inline(__always) internal var startIndex: Int { return 0 } @inlinable @inline(__always) internal var endIndex: Int { return count } @inlinable internal subscript(_ idx: Int) -> UInt8 { @inline(__always) get { _internalInvariant(idx >= 0 && idx <= 15) if idx < 8 { return leadingRawBits._uncheckedGetByte(at: idx) } else { return trailingRawBits._uncheckedGetByte(at: idx &- 8) } } @inline(__always) set { _internalInvariant(idx >= 0 && idx <= 15) if idx < 8 { leadingRawBits._uncheckedSetByte(at: idx, to: newValue) } else { trailingRawBits._uncheckedSetByte(at: idx &- 8, to: newValue) } } } @inlinable @inline(__always) internal subscript(_ bounds: Range) -> SubSequence { get { // TODO(String performance): In-vector-register operation return self.withUTF8 { utf8 in let rebased = unsafe UnsafeBufferPointer(rebasing: utf8[bounds]) return unsafe _SmallString(rebased)._unsafelyUnwrappedUnchecked } } // This setter is required for _SmallString to be a valid MutableCollection. // Since _SmallString is internal and this setter unused, we cheat. @_alwaysEmitIntoClient set { fatalError() } @_alwaysEmitIntoClient _modify { fatalError() } } } extension _SmallString { @inlinable @inline(__always) @safe internal func withUTF8( _ f: (UnsafeBufferPointer) throws -> Result ) rethrows -> Result { let count = self.count var raw = self.zeroTerminatedRawCodeUnits return try unsafe Swift._withUnprotectedUnsafeBytes(of: &raw) { let rawPtr = unsafe $0.baseAddress._unsafelyUnwrappedUnchecked // Rebind the underlying (UInt64, UInt64) tuple to UInt8 for the // duration of the closure. Accessing self after this rebind is undefined. let ptr = unsafe rawPtr.bindMemory(to: UInt8.self, capacity: count) defer { // Restore the memory type of self._storage _ = unsafe rawPtr.bindMemory(to: RawBitPattern.self, capacity: 1) } return try unsafe f(unsafe UnsafeBufferPointer(_uncheckedStart: ptr, count: count)) } } // Overwrite stored code units, including uninitialized. `f` should return the // new count. This will re-establish the invariant after `f` that all bits // between the last code unit and the discriminator are unset. @inline(__always) fileprivate mutating func withMutableCapacity( _ f: (UnsafeMutableRawBufferPointer) throws -> Int ) rethrows { let len = try unsafe withUnsafeMutableBytes(of: &_storage) { try unsafe f(.init(start: $0.baseAddress, count: _SmallString.capacity)) } if len <= 0 { _debugPrecondition(len == 0) self = _SmallString() return } _SmallString.zeroTrailingBytes(of: &_storage, from: len) self = _SmallString(leading: _storage.0, trailing: _storage.1, count: len) } @inlinable @_alwaysEmitIntoClient internal static func zeroTrailingBytes( of storage: inout RawBitPattern, from index: Int ) { _internalInvariant(index > 0) _internalInvariant(index <= _SmallString.capacity) // FIXME: Verify this on big-endian architecture let mask0 = (UInt64(bitPattern: ~0) &>> (8 &* ( 8 &- Swift.min(index, 8)))) let mask1 = (UInt64(bitPattern: ~0) &>> (8 &* (16 &- Swift.max(index, 8)))) storage.0 &= (index <= 0) ? 0 : mask0.littleEndian storage.1 &= (index <= 8) ? 0 : mask1.littleEndian } } // Creation extension _SmallString { @inlinable @inline(__always) internal init(leading: UInt64, trailing: UInt64, count: Int) { _internalInvariant(count <= _SmallString.capacity) let isASCII = (leading | trailing) & 0x8080_8080_8080_8080 == 0 let discriminator = _StringObject.Nibbles .small(withCount: count, isASCII: isASCII) .littleEndian // reversed byte order on big-endian platforms _internalInvariant(trailing & discriminator == 0) self.init(raw: (leading, trailing | discriminator)) _internalInvariant(self.count == count) } // Direct from UTF-8 @inlinable @inline(__always) internal init?(_ input: UnsafeBufferPointer) { if input.isEmpty { self.init() return } let count = input.count guard count <= _SmallString.capacity else { return nil } // TODO(SIMD): The below can be replaced with just be a masked unaligned // vector load let ptr = unsafe input.baseAddress._unsafelyUnwrappedUnchecked let leading = unsafe _bytesToUInt64(ptr, Swift.min(input.count, 8)) let trailing = unsafe count > 8 ? _bytesToUInt64(ptr + 8, count &- 8) : 0 self.init(leading: leading, trailing: trailing, count: count) } @inline(__always) internal init( initializingUTF8With initializer: ( _ buffer: UnsafeMutableBufferPointer ) throws -> Int ) rethrows { self.init() try unsafe self.withMutableCapacity { try unsafe $0.withMemoryRebound(to: UInt8.self, initializer) } self._invariantCheck() } @usableFromInline // @testable internal init?(_ base: _SmallString, appending other: _SmallString) { let totalCount = base.count + other.count guard totalCount <= _SmallString.capacity else { return nil } // TODO(SIMD): The below can be replaced with just be a couple vector ops var result = base var writeIdx = base.count for readIdx in 0.. UInt8 { _internalInvariant(i >= 0 && i < MemoryLayout.stride) #if _endian(big) let shift = (7 - UInt64(truncatingIfNeeded: i)) &* 8 #else let shift = UInt64(truncatingIfNeeded: i) &* 8 #endif return UInt8(truncatingIfNeeded: (self &>> shift)) } // Sets the `i`th byte in memory order. On little-endian systems the byte // at i=0 is the least significant byte (LSB) while on big-endian systems the // byte at i=7 is the LSB. @inlinable @inline(__always) internal mutating func _uncheckedSetByte(at i: Int, to value: UInt8) { _internalInvariant(i >= 0 && i < MemoryLayout.stride) #if _endian(big) let shift = (7 - UInt64(truncatingIfNeeded: i)) &* 8 #else let shift = UInt64(truncatingIfNeeded: i) &* 8 #endif let valueMask: UInt64 = 0xFF &<< shift self = (self & ~valueMask) | (UInt64(truncatingIfNeeded: value) &<< shift) } } @inlinable @inline(__always) internal func _bytesToUInt64( _ input: UnsafePointer, _ c: Int ) -> UInt64 { // FIXME: This should be unified with _loadPartialUnalignedUInt64LE. // Unfortunately that causes regressions in literal concatenation tests. (Some // owned to guaranteed specializations don't get inlined.) var r: UInt64 = 0 var shift: Int = 0 for idx in 0..