Files
swift-mirror/stdlib/public/core/StringCreate.swift
Stephen Canon d10b3f82fc Optimization pass over String and UTF8Span's allASCII helper (#82540)
This ranges between parity (for very small strings) and 5x faster (for
32-63B strings) in benchmarking on M1 MBP. For largeish strings it
delivers a roughly 2x speedup; further increase in blocksize nets a
small win in microbenchmarks that I do not expect would translate to
real world usage due to codesize impact and the fact that most strings
are smallish.

There's some opportunity for further work here; in particular, if people
start building Swift for a baseline of AVX2 or AVX512, we should have
paths for that (and we should also implement them if/when we get better
multiversioning dispatch machinery in the language). Span adoption would
be interesting. It's likely we should have a dedicated "small core"
implementation that uses only aligned accesses. Still, this is a
significant improvement as-is, and we should land it.


![allASCII](https://github.com/user-attachments/assets/ebbc45ba-5ba8-42dd-bf63-31ca77844fca)
2025-07-11 12:08:17 -04:00

476 lines
16 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// String Creation Helpers
//===----------------------------------------------------------------------===//
internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
//--------------- Implementation building blocks ---------------------------//
#if arch(arm64_32)
typealias Word = UInt64
#else
typealias Word = UInt
#endif
let mask = Word(truncatingIfNeeded: 0x80808080_80808080 as UInt64)
#if (arch(i386) || arch(x86_64)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES
// TODO: Should consider AVX2 / AVX512 / AVX10 path here
typealias Block = (SIMD16<UInt8>, SIMD16<UInt8>)
@_transparent func pmovmskb(_ vec: SIMD16<UInt8>) -> UInt16 {
UInt16(Builtin.bitcast_Vec16xInt1_Int16(
Builtin.cmp_slt_Vec16xInt8(vec._storage._value, Builtin.zeroInitializer())
))
}
#elseif (arch(arm64) || arch(arm64_32)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES
typealias Block = (SIMD16<UInt8>, SIMD16<UInt8>)
@_transparent func umaxv(_ vec: SIMD16<UInt8>) -> UInt8 {
UInt8(Builtin.int_vector_reduce_umax_Vec16xInt8(vec._storage._value))
}
#else
typealias Block = (Word, Word, Word, Word)
#endif
@_transparent
func allASCII(wordAt pointer: UnsafePointer<UInt8>) -> Bool {
let word = unsafe UnsafeRawPointer(pointer).loadUnaligned(as: Word.self)
return word & mask == 0
}
@_transparent
func allASCII(blockAt pointer: UnsafePointer<UInt8>) -> Bool {
let block = unsafe UnsafeRawPointer(pointer).loadUnaligned(as: Block.self)
#if (arch(i386) || arch(x86_64)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES
return pmovmskb(block.0 | block.1) == 0
#elseif (arch(arm64) || arch(arm64_32)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES
return umaxv(block.0 | block.1) < 0x80
#else
return (block.0 | block.1 | block.2 | block.3) & mask == 0
#endif
}
//----------------------- Implementation proper ----------------------------//
guard input.count >= MemoryLayout<Word>.size else {
// They gave us a region of memory
// whose size is as modest as it can be.
// We'll check every byte
// for the bit of most height
// and return if we happen on any
//
// I'm sorry, I'm sorry, I'm trying to delete it. (This chunk of code, not
// the Limerick. I would wager that--at least for Strings--we could
// unconditionally load 16B here,¹ because of the small string encoding,
// and check them all at once, which would be much more efficient. That
// probably has to happen by lifting this check into the SmallString
// initializer directly, though.)
//
// ¹ well, most of the time, which makes it a rather conditional
// "unconditionally".
return unsafe input.allSatisfy { $0 < 0x80 }
}
// bytes.count is non-zero, so we can unconditionally unwrap baseAddress.
let base = unsafe input.baseAddress._unsafelyUnwrappedUnchecked
let n = input.count
var i = 0
guard n >= MemoryLayout<Block>.size else {
// The size isn't yet to a block
// word-by-word we are forced to walk.
// So as to not leave a gap
// the last word may lap
// the word that we already chalked.
//
// 0 k 2k 3k ?k n-k n-1
// | | | | | | |
// +------+------+------+ +------+ |
// | word | word | word | ... | word | |
// +------+------+------+ +------+ v
// +------+
// possibly overlapping final word > | word |
// +------+
//
// This means that we check any bytes in the overlap region twice, but
// that's much preferrable to using smaller accesses to avoid rechecking,
// because the entire last word is about as expensive as checking just
// one byte would be, and on average there's more than one byte remaining.
//
// Note that we don't bother trying to align any of these accesses, because
// there is minimal benefit to doing so on "modern" OoO cores, which can
// handle cacheline-crossing loads at full speed. If the string happens to
// be aligned, they'll be aligned, if not, they won't be. It will likely
// make sense to add a path that does align everything for more limited
// embedded CPUs, though.
let k = MemoryLayout<Word>.size
let last = n &- k
while i < last {
guard unsafe allASCII(wordAt: base + i) else { return false }
i &+= k
}
return unsafe allASCII(wordAt: base + last)
}
// check block-by-block, with a possibly overlapping last block to avoid
// sub-block cleanup. We should be able to avoid manual index arithmetic
// and write this loop and the one above something like the following:
//
// return stride(from: 0, to: last, by: k).allSatisfy {
// allASCII(blockAt: base + $0)
// } && allASCII(blockAt: base + last)
//
// but LLVM leaves one unnecessary conditional operation in the loop
// when we do that, so we write them out as while loops instead for now.
let k = MemoryLayout<Block>.size
let last = n &- k
while i < last {
guard unsafe allASCII(blockAt: base + i) else { return false }
i &+= k
}
return unsafe allASCII(blockAt: base + last)
}
extension String {
internal static func _uncheckedFromASCII(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
if let smol = unsafe _SmallString(input) {
return String(_StringGuts(smol))
}
let storage = unsafe __StringStorage.create(initializingFrom: input, isASCII: true)
return storage.asString
}
@usableFromInline
internal static func _fromASCII(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
unsafe _internalInvariant(_allASCII(input), "not actually ASCII")
return unsafe _uncheckedFromASCII(input)
}
internal static func _fromASCIIValidating(
_ input: UnsafeBufferPointer<UInt8>
) -> String? {
if unsafe _fastPath(_allASCII(input)) {
return unsafe _uncheckedFromASCII(input)
}
return nil
}
public // SPI(Foundation)
static func _tryFromUTF8(_ input: UnsafeBufferPointer<UInt8>) -> String? {
guard case .success(let extraInfo) = unsafe validateUTF8(input) else {
return nil
}
return unsafe String._uncheckedFromUTF8(input, isASCII: extraInfo.isASCII)
}
@usableFromInline
internal static func _fromUTF8Repairing(
_ input: UnsafeBufferPointer<UInt8>
) -> (result: String, repairsMade: Bool) {
switch unsafe validateUTF8(input) {
case .success(let extraInfo):
return unsafe (String._uncheckedFromUTF8(
input, asciiPreScanResult: extraInfo.isASCII
), false)
case .error(_, let initialRange):
return unsafe (repairUTF8(input, firstKnownBrokenRange: initialRange), true)
}
}
internal static func _fromLargeUTF8Repairing(
uninitializedCapacity capacity: Int,
initializingWith initializer: (
_ buffer: UnsafeMutableBufferPointer<UInt8>
) throws -> Int
) rethrows -> String {
let result = try unsafe __StringStorage.create(
uninitializedCodeUnitCapacity: capacity,
initializingUncheckedUTF8With: initializer)
switch unsafe validateUTF8(result.codeUnits) {
case .success(let info):
result._updateCountAndFlags(
newCount: result.count,
newIsASCII: info.isASCII
)
return result.asString
case .error(_, let initialRange):
defer { _fixLifetime(result) }
//This could be optimized to use excess tail capacity
return unsafe repairUTF8(result.codeUnits, firstKnownBrokenRange: initialRange)
}
}
@usableFromInline
internal static func _uncheckedFromUTF8(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
return unsafe _uncheckedFromUTF8(input, isASCII: _allASCII(input))
}
@usableFromInline
internal static func _uncheckedFromUTF8(
_ input: UnsafeBufferPointer<UInt8>,
isASCII: Bool
) -> String {
if let smol = unsafe _SmallString(input) {
return String(_StringGuts(smol))
}
let storage = unsafe __StringStorage.create(
initializingFrom: input, isASCII: isASCII)
return storage.asString
}
// If we've already pre-scanned for ASCII, just supply the result
@usableFromInline
internal static func _uncheckedFromUTF8(
_ input: UnsafeBufferPointer<UInt8>, asciiPreScanResult: Bool
) -> String {
if let smol = unsafe _SmallString(input) {
return String(_StringGuts(smol))
}
let isASCII = asciiPreScanResult
let storage = unsafe __StringStorage.create(
initializingFrom: input, isASCII: isASCII)
return storage.asString
}
@usableFromInline
internal static func _uncheckedFromUTF16(
_ input: UnsafeBufferPointer<UInt16>
) -> String {
// TODO(String Performance): Attempt to form smol strings
// TODO(String performance): Skip intermediary array, transcode directly
// into a StringStorage space.
var contents: [UInt8] = []
contents.reserveCapacity(input.count)
let repaired = unsafe transcode(
input.makeIterator(),
from: UTF16.self,
to: UTF8.self,
stoppingOnError: false,
into: { contents.append($0) })
_internalInvariant(!repaired, "Error present")
return unsafe contents.withUnsafeBufferPointer { unsafe String._uncheckedFromUTF8($0) }
}
@inline(never) // slow path
private static func _slowFromCodeUnits<
Input: Collection,
Encoding: Unicode.Encoding
>(
_ input: Input,
encoding: Encoding.Type,
repair: Bool
) -> (String, repairsMade: Bool)?
where Input.Element == Encoding.CodeUnit {
// TODO(String Performance): Attempt to form smol strings
// TODO(String performance): Skip intermediary array, transcode directly
// into a StringStorage space.
var contents: [UInt8] = []
contents.reserveCapacity(input.underestimatedCount)
let repaired = transcode(
input.makeIterator(),
from: Encoding.self,
to: UTF8.self,
stoppingOnError: false,
into: { contents.append($0) })
guard repair || !repaired else { return nil }
let str = unsafe contents.withUnsafeBufferPointer { unsafe String._uncheckedFromUTF8($0) }
return (str, repaired)
}
@usableFromInline @inline(never) // can't be inlined w/out breaking ABI
@_specialize(
where Input == UnsafeBufferPointer<UInt8>, Encoding == Unicode.ASCII)
@_specialize(
where Input == Array<UInt8>, Encoding == Unicode.ASCII)
internal static func _fromCodeUnits<
Input: Collection,
Encoding: Unicode.Encoding
>(
_ input: Input,
encoding: Encoding.Type,
repair: Bool
) -> (String, repairsMade: Bool)?
where Input.Element == Encoding.CodeUnit {
guard _fastPath(encoding == Unicode.ASCII.self) else {
return _slowFromCodeUnits(input, encoding: encoding, repair: repair)
}
// Helper to simplify early returns
func resultOrSlow(_ resultOpt: String?) -> (String, repairsMade: Bool)? {
guard let result = resultOpt else {
return _slowFromCodeUnits(input, encoding: encoding, repair: repair)
}
return (result, repairsMade: false)
}
#if !$Embedded
// Fast path for untyped raw storage and known stdlib types
if let contigBytes = input as? _HasContiguousBytes,
contigBytes._providesContiguousBytesNoCopy {
return resultOrSlow(contigBytes.withUnsafeBytes { rawBufPtr in
let buffer = unsafe UnsafeBufferPointer(
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
count: rawBufPtr.count)
return unsafe String._fromASCIIValidating(buffer)
})
}
#endif
// Fast path for user-defined Collections
if let strOpt = input.withContiguousStorageIfAvailable({
(buffer: UnsafeBufferPointer<Input.Element>) -> String? in
return unsafe String._fromASCIIValidating(
UnsafeRawBufferPointer(buffer).bindMemory(to: UInt8.self))
}) {
return resultOrSlow(strOpt)
}
return unsafe resultOrSlow(Array(input).withUnsafeBufferPointer {
let buffer = unsafe UnsafeRawBufferPointer($0).bindMemory(to: UInt8.self)
return unsafe String._fromASCIIValidating(buffer)
})
}
public // @testable
static func _fromInvalidUTF16(
_ utf16: UnsafeBufferPointer<UInt16>
) -> String {
return unsafe String._fromCodeUnits(utf16, encoding: UTF16.self, repair: true)!.0
}
@usableFromInline
internal static func _fromSubstring(
_ substring: __shared Substring
) -> String {
if substring._offsetRange == substring.base._offsetRange {
return substring.base
}
return String._copying(substring)
}
@_alwaysEmitIntoClient
@inline(never) // slow-path
internal static func _copying(_ str: String) -> String {
return String._copying(str[...])
}
@_alwaysEmitIntoClient
@inline(never) // slow-path
internal static func _copying(_ str: Substring) -> String {
if _fastPath(str._wholeGuts.isFastUTF8) {
var new = unsafe str._wholeGuts.withFastUTF8(range: str._offsetRange) {
unsafe String._uncheckedFromUTF8($0)
}
#if os(watchOS) && _pointerBitWidth(_32)
// Required for compatibility with some small strings that
// may be encoded in the 32-bit slice of watchOS binaries.
if str._wholeGuts.isSmall,
str._wholeGuts.count > _SmallString.contiguousCapacity() {
new.reserveCapacity(_SmallString.capacity + 1)
return new
}
#endif
return new
}
return unsafe Array(str.utf8).withUnsafeBufferPointer {
unsafe String._uncheckedFromUTF8($0)
}
}
@usableFromInline
@available(SwiftStdlib 6.0, *)
internal static func _validate<Encoding: Unicode.Encoding>(
_ input: UnsafeBufferPointer<Encoding.CodeUnit>,
as encoding: Encoding.Type
) -> String? {
if encoding.CodeUnit.self == UInt8.self {
let bytes = unsafe _identityCast(input, to: UnsafeBufferPointer<UInt8>.self)
if encoding.self == UTF8.self {
guard case .success(let info) = unsafe validateUTF8(bytes) else { return nil }
return unsafe String._uncheckedFromUTF8(bytes, asciiPreScanResult: info.isASCII)
} else if encoding.self == Unicode.ASCII.self {
guard unsafe _allASCII(bytes) else { return nil }
return unsafe String._uncheckedFromASCII(bytes)
}
}
// slow-path
var isASCII = true
var buffer: UnsafeMutableBufferPointer<UInt8>
unsafe buffer = UnsafeMutableBufferPointer.allocate(capacity: input.count*3)
var written = buffer.startIndex
var parser = Encoding.ForwardParser()
var input = unsafe input.makeIterator()
transcodingLoop:
while true {
switch unsafe parser.parseScalar(from: &input) {
case .valid(let s):
let scalar = Encoding.decode(s)
guard let utf8 = Unicode.UTF8.encode(scalar) else {
// transcoding error: clean up and return nil
fallthrough
}
if buffer.count < written + utf8.count {
let newCapacity = buffer.count + (buffer.count >> 1)
let copy: UnsafeMutableBufferPointer<UInt8>
unsafe copy = UnsafeMutableBufferPointer.allocate(capacity: newCapacity)
let copied = unsafe copy.moveInitialize(
fromContentsOf: buffer.prefix(upTo: written)
)
unsafe buffer.deallocate()
unsafe buffer = unsafe copy
written = copied
}
if isASCII && utf8.count > 1 {
isASCII = false
}
written = unsafe buffer.suffix(from: written).initialize(fromContentsOf: utf8)
break
case .error:
// validation error: clean up and return nil
unsafe buffer.prefix(upTo: written).deinitialize()
unsafe buffer.deallocate()
return nil
case .emptyInput:
break transcodingLoop
}
}
let storage = unsafe buffer.baseAddress.map {
unsafe __SharedStringStorage(
_mortal: $0,
countAndFlags: _StringObject.CountAndFlags(
count: buffer.startIndex.distance(to: written),
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: false,
isTailAllocated: false
)
)
}
return storage?.asString
}
}