mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
Add inlinability annotations to restore performance parity with 4.2 String. Take advantage of known NFC as a fast-path for comparison, and overhaul comparison dispatch. RRC improvements and optmizations.
188 lines
5.5 KiB
Swift
188 lines
5.5 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// String Creation Helpers
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
|
|
// NOTE: Avoiding for-in syntax to avoid bounds checks
|
|
//
|
|
// TODO(UTF8 perf): Vectorize and/or incorporate into validity checking,
|
|
// perhaps both.
|
|
//
|
|
let ptr = input.baseAddress._unsafelyUnwrappedUnchecked
|
|
var i = 0
|
|
while i < input.count {
|
|
guard ptr[i] <= 0x7F else { return false }
|
|
i &+= 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
extension String {
|
|
@usableFromInline
|
|
internal static func _fromASCII(
|
|
_ input: UnsafeBufferPointer<UInt8>
|
|
) -> String {
|
|
_sanityCheck(_allASCII(input), "not actually ASCII")
|
|
|
|
if let smol = _SmallString(input) {
|
|
return String(_StringGuts(smol))
|
|
}
|
|
|
|
let storage = _StringStorage.create(initializingFrom: input, isASCII: true)
|
|
return storage.asString
|
|
}
|
|
|
|
@usableFromInline
|
|
internal static func _tryFromUTF8(
|
|
_ input: UnsafeBufferPointer<UInt8>
|
|
) -> String? {
|
|
// TODO(UTF8 perf): More efficient validation
|
|
|
|
// TODO(UTF8 perf): Skip intermediary array
|
|
var contents: [UInt8] = []
|
|
contents.reserveCapacity(input.count)
|
|
let repaired = transcode(
|
|
input.makeIterator(),
|
|
from: UTF8.self,
|
|
to: UTF8.self,
|
|
stoppingOnError: true,
|
|
into: { contents.append($0) })
|
|
guard !repaired else { return nil }
|
|
|
|
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
|
}
|
|
|
|
@usableFromInline
|
|
internal static func _fromUTF8Repairing(
|
|
_ input: UnsafeBufferPointer<UInt8>
|
|
) -> (result: String, repairsMade: Bool) {
|
|
if _allASCII(input) {
|
|
return (String._uncheckedFromUTF8(input, asciiPreScanResult: true), false)
|
|
}
|
|
|
|
// TODO(UTF8 perf): More efficient validation
|
|
|
|
// TODO(UTF8 perf): Skip intermediary array
|
|
var contents: [UInt8] = []
|
|
contents.reserveCapacity(input.count)
|
|
let repaired = transcode(
|
|
input.makeIterator(),
|
|
from: UTF8.self,
|
|
to: UTF8.self,
|
|
stoppingOnError: false,
|
|
into: { contents.append($0) })
|
|
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
|
return (str, repaired)
|
|
}
|
|
|
|
@usableFromInline
|
|
internal static func _uncheckedFromUTF8(
|
|
_ input: UnsafeBufferPointer<UInt8>
|
|
) -> String {
|
|
if let smol = _SmallString(input) {
|
|
return String(_StringGuts(smol))
|
|
}
|
|
|
|
let isASCII = _allASCII(input)
|
|
let storage = _StringStorage.create(
|
|
initializingFrom: input, isASCII: isASCII)
|
|
return storage.asString
|
|
}
|
|
|
|
// If we've already pre-scanned for ASCII, just supply the result
|
|
@usableFromInline
|
|
internal static func _uncheckedFromUTF8(
|
|
_ input: UnsafeBufferPointer<UInt8>, asciiPreScanResult: Bool
|
|
) -> String {
|
|
if let smol = _SmallString(input) {
|
|
return String(_StringGuts(smol))
|
|
}
|
|
|
|
let isASCII = asciiPreScanResult
|
|
let storage = _StringStorage.create(
|
|
initializingFrom: input, isASCII: isASCII)
|
|
return storage.asString
|
|
}
|
|
|
|
@usableFromInline
|
|
internal static func _uncheckedFromUTF16(
|
|
_ input: UnsafeBufferPointer<UInt16>
|
|
) -> String {
|
|
// TODO(UTF8): smol strings
|
|
|
|
// TODO(UTF8): Faster transcoding...
|
|
|
|
// TODO(UTF8): Skip intermediary array
|
|
var contents: [UInt8] = []
|
|
contents.reserveCapacity(input.count)
|
|
let repaired = transcode(
|
|
input.makeIterator(),
|
|
from: UTF16.self,
|
|
to: UTF8.self,
|
|
stoppingOnError: false,
|
|
into: { contents.append($0) })
|
|
_sanityCheck(!repaired, "Error present")
|
|
|
|
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
|
}
|
|
|
|
internal func _withUnsafeBufferPointerToUTF8<R>(
|
|
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
|
|
) rethrows -> R {
|
|
return try self.withUnsafeBytes { rawBufPtr in
|
|
let rawPtr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
|
return try body(UnsafeBufferPointer(
|
|
start: rawPtr.assumingMemoryBound(to: UInt8.self),
|
|
count: rawBufPtr.count))
|
|
}
|
|
}
|
|
|
|
@usableFromInline @inline(never) // slow-path
|
|
internal static func _fromCodeUnits<
|
|
Input: Collection,
|
|
Encoding: Unicode.Encoding
|
|
>(
|
|
_ input: Input,
|
|
encoding: Encoding.Type,
|
|
repair: Bool
|
|
) -> (String, repairsMade: Bool)?
|
|
where Input.Element == Encoding.CodeUnit {
|
|
// TODO(SSO): small check
|
|
|
|
// TODO(UTF8): Skip intermediary array
|
|
var contents: [UInt8] = []
|
|
contents.reserveCapacity(input.underestimatedCount)
|
|
let repaired = transcode(
|
|
input.makeIterator(),
|
|
from: Encoding.self,
|
|
to: UTF8.self,
|
|
stoppingOnError: false,
|
|
into: { contents.append($0) })
|
|
guard repair || !repaired else { return nil }
|
|
|
|
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
|
|
return (str, repaired)
|
|
}
|
|
|
|
public // @testable
|
|
static func _fromInvalidUTF16(
|
|
_ utf16: UnsafeBufferPointer<UInt16>
|
|
) -> String {
|
|
// TODO(UTF8 test): How much does ahead-of-time fix defeat the purpose of
|
|
// validation-test/stdlib/StringViews.swift ?
|
|
|
|
return String._fromCodeUnits(utf16, encoding: UTF16.self, repair: true)!.0
|
|
}
|
|
}
|
|
|