mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
String.Index has an encodedOffset-based initializer and computed property that exists for serialization purposes. It was documented as UTF-16 in the SE proposal introducing it, which was String's underlying encoding at the time, but the dream of String even then was to abstract away whatever encoding happend to be used. Serialization needs an explicit encoding for serialized indices to make sense: the offsets need to align with the view. With String utilizing UTF-8 encoding for native contents in Swift 5, serialization isn't necessarily the most efficient in UTF-16. Furthermore, the majority of usage of encodedOffset in the wild is buggy and operates under the assumption that a UTF-16 code unit was a Swift Character, which isn't even valid if the String is known to be all-ASCII (because CR-LF). This change introduces a pair of semantics-preserving alternatives to encodedOffset that explicitly call out the UTF-16 assumption. These serve as a gentle off-ramp for current mis-uses of encodedOffset.
324 lines
8.6 KiB
Swift
324 lines
8.6 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
import SwiftShims
|
|
|
|
//
|
|
// StringGuts is a parameterization over String's representations. It provides
|
|
// functionality and guidance for efficiently working with Strings.
|
|
//
|
|
@_fixed_layout
|
|
public // SPI(corelibs-foundation)
|
|
struct _StringGuts {
|
|
@usableFromInline
|
|
internal var _object: _StringObject
|
|
|
|
@inlinable @inline(__always)
|
|
internal init(_ object: _StringObject) {
|
|
self._object = object
|
|
_invariantCheck()
|
|
}
|
|
|
|
// Empty string
|
|
@inlinable @inline(__always)
|
|
init() {
|
|
self.init(_StringObject(empty: ()))
|
|
}
|
|
}
|
|
|
|
// Raw
|
|
extension _StringGuts {
|
|
@inlinable
|
|
internal var rawBits: _StringObject.RawBitPattern {
|
|
@inline(__always) get { return _object.rawBits }
|
|
}
|
|
}
|
|
|
|
// Creation
|
|
extension _StringGuts {
|
|
@inlinable @inline(__always)
|
|
internal init(_ smol: _SmallString) {
|
|
self.init(_StringObject(smol))
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal init(_ bufPtr: UnsafeBufferPointer<UInt8>, isASCII: Bool) {
|
|
self.init(_StringObject(immortal: bufPtr, isASCII: isASCII))
|
|
}
|
|
|
|
@inline(__always)
|
|
internal init(_ storage: __StringStorage) {
|
|
self.init(_StringObject(storage))
|
|
}
|
|
|
|
internal init(_ storage: __SharedStringStorage) {
|
|
self.init(_StringObject(storage))
|
|
}
|
|
|
|
internal init(
|
|
cocoa: AnyObject, providesFastUTF8: Bool, isASCII: Bool, length: Int
|
|
) {
|
|
self.init(_StringObject(
|
|
cocoa: cocoa,
|
|
providesFastUTF8: providesFastUTF8,
|
|
isASCII: isASCII,
|
|
length: length))
|
|
}
|
|
}
|
|
|
|
// Queries
|
|
extension _StringGuts {
|
|
// The number of code units
|
|
@inlinable
|
|
internal var count: Int { @inline(__always) get { return _object.count } }
|
|
|
|
@inlinable
|
|
internal var isEmpty: Bool { @inline(__always) get { return count == 0 } }
|
|
|
|
@inlinable
|
|
internal var isSmall: Bool {
|
|
@inline(__always) get { return _object.isSmall }
|
|
}
|
|
|
|
internal var isSmallASCII: Bool {
|
|
@inline(__always) get { return _object.isSmall && _object.smallIsASCII }
|
|
}
|
|
|
|
@inlinable
|
|
internal var asSmall: _SmallString {
|
|
@inline(__always) get { return _SmallString(_object) }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isASCII: Bool {
|
|
@inline(__always) get { return _object.isASCII }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isFastASCII: Bool {
|
|
@inline(__always) get { return isFastUTF8 && _object.isASCII }
|
|
}
|
|
|
|
@inline(__always)
|
|
internal var isNFC: Bool { return _object.isNFC }
|
|
|
|
@inline(__always)
|
|
internal var isNFCFastUTF8: Bool {
|
|
// TODO(String micro-performance): Consider a dedicated bit for this
|
|
return _object.isNFC && isFastUTF8
|
|
}
|
|
|
|
internal var hasNativeStorage: Bool { return _object.hasNativeStorage }
|
|
|
|
internal var hasSharedStorage: Bool { return _object.hasSharedStorage }
|
|
|
|
internal var hasBreadcrumbs: Bool {
|
|
return hasNativeStorage || hasSharedStorage
|
|
}
|
|
}
|
|
|
|
//
|
|
extension _StringGuts {
|
|
// Whether we can provide fast access to contiguous UTF-8 code units
|
|
@_transparent
|
|
@inlinable
|
|
internal var isFastUTF8: Bool { return _fastPath(_object.providesFastUTF8) }
|
|
|
|
// A String which does not provide fast access to contiguous UTF-8 code units
|
|
@inlinable
|
|
internal var isForeign: Bool {
|
|
@inline(__always) get { return _slowPath(_object.isForeign) }
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal func withFastUTF8<R>(
|
|
_ f: (UnsafeBufferPointer<UInt8>) throws -> R
|
|
) rethrows -> R {
|
|
_internalInvariant(isFastUTF8)
|
|
|
|
if self.isSmall { return try _SmallString(_object).withUTF8(f) }
|
|
|
|
defer { _fixLifetime(self) }
|
|
return try f(_object.fastUTF8)
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal func withFastUTF8<R>(
|
|
range: Range<Int>,
|
|
_ f: (UnsafeBufferPointer<UInt8>) throws -> R
|
|
) rethrows -> R {
|
|
return try self.withFastUTF8 { wholeUTF8 in
|
|
return try f(UnsafeBufferPointer(rebasing: wholeUTF8[range]))
|
|
}
|
|
}
|
|
|
|
@inlinable @inline(__always)
|
|
internal func withFastCChar<R>(
|
|
_ f: (UnsafeBufferPointer<CChar>) throws -> R
|
|
) rethrows -> R {
|
|
return try self.withFastUTF8 { utf8 in
|
|
let ptr = utf8.baseAddress._unsafelyUnwrappedUnchecked._asCChar
|
|
return try f(UnsafeBufferPointer(start: ptr, count: utf8.count))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Internal invariants
|
|
extension _StringGuts {
|
|
#if !INTERNAL_CHECKS_ENABLED
|
|
@inlinable @inline(__always) internal func _invariantCheck() {}
|
|
#else
|
|
@usableFromInline @inline(never) @_effects(releasenone)
|
|
internal func _invariantCheck() {
|
|
#if arch(i386) || arch(arm)
|
|
_internalInvariant(MemoryLayout<String>.size == 12, """
|
|
the runtime is depending on this, update Reflection.mm and \
|
|
this if you change it
|
|
""")
|
|
#else
|
|
_internalInvariant(MemoryLayout<String>.size == 16, """
|
|
the runtime is depending on this, update Reflection.mm and \
|
|
this if you change it
|
|
""")
|
|
#endif
|
|
}
|
|
#endif // INTERNAL_CHECKS_ENABLED
|
|
|
|
internal func _dump() { _object._dump() }
|
|
}
|
|
|
|
// C String interop
|
|
extension _StringGuts {
|
|
@inlinable @inline(__always) // fast-path: already C-string compatible
|
|
internal func withCString<Result>(
|
|
_ body: (UnsafePointer<Int8>) throws -> Result
|
|
) rethrows -> Result {
|
|
if _slowPath(!_object.isFastZeroTerminated) {
|
|
return try _slowWithCString(body)
|
|
}
|
|
|
|
return try self.withFastCChar {
|
|
return try body($0.baseAddress._unsafelyUnwrappedUnchecked)
|
|
}
|
|
}
|
|
|
|
@inline(never) // slow-path
|
|
@usableFromInline
|
|
internal func _slowWithCString<Result>(
|
|
_ body: (UnsafePointer<Int8>) throws -> Result
|
|
) rethrows -> Result {
|
|
_internalInvariant(!_object.isFastZeroTerminated)
|
|
return try String(self).utf8CString.withUnsafeBufferPointer {
|
|
let ptr = $0.baseAddress._unsafelyUnwrappedUnchecked
|
|
return try body(ptr)
|
|
}
|
|
}
|
|
}
|
|
|
|
extension _StringGuts {
|
|
// Copy UTF-8 contents. Returns number written or nil if not enough space.
|
|
// Contents of the buffer are unspecified if nil is returned.
|
|
@inlinable
|
|
internal func copyUTF8(into mbp: UnsafeMutableBufferPointer<UInt8>) -> Int? {
|
|
let ptr = mbp.baseAddress._unsafelyUnwrappedUnchecked
|
|
if _fastPath(self.isFastUTF8) {
|
|
return self.withFastUTF8 { utf8 in
|
|
guard utf8.count <= mbp.count else { return nil }
|
|
|
|
let utf8Start = utf8.baseAddress._unsafelyUnwrappedUnchecked
|
|
ptr.initialize(from: utf8Start, count: utf8.count)
|
|
return utf8.count
|
|
}
|
|
}
|
|
|
|
return _foreignCopyUTF8(into: mbp)
|
|
}
|
|
@_effects(releasenone)
|
|
@usableFromInline @inline(never) // slow-path
|
|
internal func _foreignCopyUTF8(
|
|
into mbp: UnsafeMutableBufferPointer<UInt8>
|
|
) -> Int? {
|
|
var ptr = mbp.baseAddress._unsafelyUnwrappedUnchecked
|
|
var numWritten = 0
|
|
for cu in String(self).utf8 {
|
|
guard numWritten < mbp.count else { return nil }
|
|
ptr.initialize(to: cu)
|
|
ptr += 1
|
|
numWritten += 1
|
|
}
|
|
|
|
return numWritten
|
|
}
|
|
|
|
internal var utf8Count: Int {
|
|
@inline(__always) get {
|
|
if _fastPath(self.isFastUTF8) { return count }
|
|
return String(self).utf8.count
|
|
}
|
|
}
|
|
}
|
|
|
|
// Index
|
|
extension _StringGuts {
|
|
@usableFromInline
|
|
internal typealias Index = String.Index
|
|
|
|
@inlinable
|
|
internal var startIndex: String.Index {
|
|
@inline(__always) get { return Index(_encodedOffset: 0) }
|
|
}
|
|
@inlinable
|
|
internal var endIndex: String.Index {
|
|
@inline(__always) get { return Index(_encodedOffset: self.count) }
|
|
}
|
|
}
|
|
|
|
// Old SPI(corelibs-foundation)
|
|
extension _StringGuts {
|
|
@available(*, deprecated)
|
|
public // SPI(corelibs-foundation)
|
|
var _isContiguousASCII: Bool {
|
|
return !isSmall && isFastUTF8 && isASCII
|
|
}
|
|
|
|
@available(*, deprecated)
|
|
public // SPI(corelibs-foundation)
|
|
var _isContiguousUTF16: Bool {
|
|
return false
|
|
}
|
|
|
|
// FIXME: Remove. Still used by swift-corelibs-foundation
|
|
@available(*, deprecated)
|
|
public var startASCII: UnsafeMutablePointer<UInt8> {
|
|
return UnsafeMutablePointer(mutating: _object.fastUTF8.baseAddress!)
|
|
}
|
|
|
|
// FIXME: Remove. Still used by swift-corelibs-foundation
|
|
@available(*, deprecated)
|
|
public var startUTF16: UnsafeMutablePointer<UTF16.CodeUnit> {
|
|
fatalError("Not contiguous UTF-16")
|
|
}
|
|
}
|
|
|
|
@available(*, deprecated)
|
|
public // SPI(corelibs-foundation)
|
|
func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
|
|
guard let s = p else { return nil }
|
|
let count = Int(_swift_stdlib_strlen(s))
|
|
var result = [CChar](repeating: 0, count: count + 1)
|
|
for i in 0..<count {
|
|
result[i] = s[i]
|
|
}
|
|
return result
|
|
}
|
|
|