mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
String.Index has an encodedOffset-based initializer and computed property that exists for serialization purposes. It was documented as UTF-16 in the SE proposal introducing it, which was String's underlying encoding at the time, but the dream of String even then was to abstract away whatever encoding happend to be used. Serialization needs an explicit encoding for serialized indices to make sense: the offsets need to align with the view. With String utilizing UTF-8 encoding for native contents in Swift 5, serialization isn't necessarily the most efficient in UTF-16. Furthermore, the majority of usage of encodedOffset in the wild is buggy and operates under the assumption that a UTF-16 code unit was a Swift Character, which isn't even valid if the String is known to be all-ASCII (because CR-LF). This change introduces a pair of semantics-preserving alternatives to encodedOffset that explicitly call out the UTF-16 assumption. These serve as a gentle off-ramp for current mis-uses of encodedOffset.
319 lines
9.8 KiB
Swift
319 lines
9.8 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// COW helpers
|
|
extension _StringGuts {
|
|
internal var nativeCapacity: Int? {
|
|
guard hasNativeStorage else { return nil }
|
|
return _object.nativeStorage.capacity
|
|
}
|
|
|
|
internal var nativeUnusedCapacity: Int? {
|
|
guard hasNativeStorage else { return nil }
|
|
return _object.nativeStorage.unusedCapacity
|
|
}
|
|
|
|
// If natively stored and uniquely referenced, return the storage's total
|
|
// capacity. Otherwise, nil.
|
|
internal var uniqueNativeCapacity: Int? {
|
|
@inline(__always) mutating get {
|
|
guard isUniqueNative else { return nil }
|
|
return _object.nativeStorage.capacity
|
|
}
|
|
}
|
|
|
|
// If natively stored and uniquely referenced, return the storage's spare
|
|
// capacity. Otherwise, nil.
|
|
internal var uniqueNativeUnusedCapacity: Int? {
|
|
@inline(__always) mutating get {
|
|
guard isUniqueNative else { return nil }
|
|
return _object.nativeStorage.unusedCapacity
|
|
}
|
|
}
|
|
|
|
@usableFromInline // @testable
|
|
internal var isUniqueNative: Bool {
|
|
@inline(__always) mutating get {
|
|
// Note: mutating so that self is `inout`.
|
|
guard hasNativeStorage else { return false }
|
|
defer { _fixLifetime(self) }
|
|
var bits: UInt = _object.largeAddressBits
|
|
return _isUnique_native(&bits)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Range-replaceable operation support
|
|
extension _StringGuts {
|
|
@inlinable
|
|
internal init(_initialCapacity capacity: Int) {
|
|
self.init()
|
|
if _slowPath(capacity > _SmallString.capacity) {
|
|
self.grow(capacity)
|
|
}
|
|
}
|
|
|
|
internal mutating func reserveCapacity(_ n: Int) {
|
|
// Check if there's nothing to do
|
|
if n <= _SmallString.capacity { return }
|
|
if let currentCap = self.uniqueNativeCapacity, currentCap >= n { return }
|
|
|
|
// Grow
|
|
self.grow(n)
|
|
}
|
|
|
|
// Grow to accomodate at least `n` code units
|
|
@usableFromInline
|
|
internal mutating func grow(_ n: Int) {
|
|
defer { self._invariantCheck() }
|
|
|
|
_internalInvariant(
|
|
self.uniqueNativeCapacity == nil || self.uniqueNativeCapacity! < n)
|
|
|
|
let growthTarget = Swift.max(n, (self.uniqueNativeCapacity ?? 0) * 2)
|
|
|
|
if _fastPath(isFastUTF8) {
|
|
let isASCII = self.isASCII
|
|
let storage = self.withFastUTF8 {
|
|
__StringStorage.create(
|
|
initializingFrom: $0, capacity: growthTarget, isASCII: isASCII)
|
|
}
|
|
|
|
self = _StringGuts(storage)
|
|
return
|
|
}
|
|
|
|
_foreignGrow(growthTarget)
|
|
}
|
|
|
|
@inline(never) // slow-path
|
|
private mutating func _foreignGrow(_ n: Int) {
|
|
// TODO(String performance): Skip intermediary array, transcode directly
|
|
// into a StringStorage space.
|
|
let selfUTF8 = Array(String(self).utf8)
|
|
selfUTF8.withUnsafeBufferPointer {
|
|
self = _StringGuts(__StringStorage.create(
|
|
initializingFrom: $0, capacity: n, isASCII: self.isASCII))
|
|
}
|
|
}
|
|
|
|
// Ensure unique native storage with sufficient capacity for the following
|
|
// append.
|
|
private mutating func prepareForAppendInPlace(
|
|
otherUTF8Count otherCount: Int
|
|
) {
|
|
defer {
|
|
_internalInvariant(self.uniqueNativeUnusedCapacity != nil,
|
|
"growth should produce uniqueness")
|
|
_internalInvariant(self.uniqueNativeUnusedCapacity! >= otherCount,
|
|
"growth should produce enough capacity")
|
|
}
|
|
|
|
// See if we can accomodate without growing or copying. If we have
|
|
// sufficient capacity, we do not need to grow, and we can skip the copy if
|
|
// unique. Otherwise, growth is required.
|
|
let sufficientCapacity: Bool
|
|
if let unused = self.nativeUnusedCapacity, unused >= otherCount {
|
|
sufficientCapacity = true
|
|
} else {
|
|
sufficientCapacity = false
|
|
}
|
|
if self.isUniqueNative && sufficientCapacity {
|
|
return
|
|
}
|
|
|
|
let totalCount = self.utf8Count + otherCount
|
|
|
|
// Non-unique storage: just make a copy of the appropriate size, otherwise
|
|
// grow like an array.
|
|
let growthTarget: Int
|
|
if sufficientCapacity {
|
|
growthTarget = totalCount
|
|
} else {
|
|
growthTarget = Swift.max(
|
|
totalCount, _growArrayCapacity(nativeCapacity ?? 0))
|
|
}
|
|
self.grow(growthTarget)
|
|
}
|
|
|
|
internal mutating func append(_ other: _StringGuts) {
|
|
if self.isSmall && other.isSmall {
|
|
if let smol = _SmallString(self.asSmall, appending: other.asSmall) {
|
|
self = _StringGuts(smol)
|
|
return
|
|
}
|
|
}
|
|
|
|
append(_StringGutsSlice(other))
|
|
}
|
|
|
|
internal mutating func append(_ slicedOther: _StringGutsSlice) {
|
|
defer { self._invariantCheck() }
|
|
|
|
if self.isSmall && slicedOther._guts.isSmall {
|
|
// TODO: In-register slicing
|
|
let smolSelf = self.asSmall
|
|
if let smol = slicedOther.withFastUTF8({ otherUTF8 in
|
|
return _SmallString(smolSelf, appending: _SmallString(otherUTF8)!)
|
|
}) {
|
|
self = _StringGuts(smol)
|
|
return
|
|
}
|
|
}
|
|
|
|
prepareForAppendInPlace(otherUTF8Count: slicedOther.utf8Count)
|
|
|
|
if slicedOther.isFastUTF8 {
|
|
let otherIsASCII = slicedOther.isASCII
|
|
slicedOther.withFastUTF8 { otherUTF8 in
|
|
self.appendInPlace(otherUTF8, isASCII: otherIsASCII)
|
|
}
|
|
return
|
|
}
|
|
|
|
_foreignAppendInPlace(slicedOther)
|
|
}
|
|
|
|
internal mutating func appendInPlace(
|
|
_ other: UnsafeBufferPointer<UInt8>, isASCII: Bool
|
|
) {
|
|
self._object.nativeStorage.appendInPlace(other, isASCII: isASCII)
|
|
|
|
// We re-initialize from the modified storage to pick up new count, flags,
|
|
// etc.
|
|
self = _StringGuts(self._object.nativeStorage)
|
|
}
|
|
|
|
@inline(never) // slow-path
|
|
private mutating func _foreignAppendInPlace(_ other: _StringGutsSlice) {
|
|
_internalInvariant(!other.isFastUTF8)
|
|
_internalInvariant(self.uniqueNativeUnusedCapacity != nil)
|
|
|
|
var iter = Substring(other).utf8.makeIterator()
|
|
self._object.nativeStorage.appendInPlace(&iter, isASCII: other.isASCII)
|
|
|
|
// We re-initialize from the modified storage to pick up new count, flags,
|
|
// etc.
|
|
self = _StringGuts(self._object.nativeStorage)
|
|
}
|
|
|
|
internal mutating func clear() {
|
|
guard isUniqueNative else {
|
|
self = _StringGuts()
|
|
return
|
|
}
|
|
|
|
// Reset the count
|
|
_object.nativeStorage.clear()
|
|
self = _StringGuts(_object.nativeStorage)
|
|
}
|
|
|
|
internal mutating func remove(from lower: Index, to upper: Index) {
|
|
let lowerOffset = lower._encodedOffset
|
|
let upperOffset = upper._encodedOffset
|
|
_internalInvariant(lower.transcodedOffset == 0 && upper.transcodedOffset == 0)
|
|
_internalInvariant(lowerOffset <= upperOffset && upperOffset <= self.count)
|
|
|
|
if isUniqueNative {
|
|
_object.nativeStorage.remove(from: lowerOffset, to: upperOffset)
|
|
// We re-initialize from the modified storage to pick up new count, flags,
|
|
// etc.
|
|
self = _StringGuts(self._object.nativeStorage)
|
|
return
|
|
}
|
|
|
|
// TODO(cleanup): Add append on guts taking range, use that
|
|
var result = String()
|
|
// FIXME: It should be okay to get rid of excess capacity
|
|
// here. rdar://problem/45635432
|
|
result.reserveCapacity(
|
|
nativeCapacity ?? (count &- (upperOffset &- lowerOffset)))
|
|
result.append(contentsOf: String(self)[..<lower])
|
|
result.append(contentsOf: String(self)[upper...])
|
|
self = result._guts
|
|
}
|
|
|
|
@inline(__always) // Always-specialize
|
|
internal mutating func replaceSubrange<C>(
|
|
_ bounds: Range<Index>,
|
|
with newElements: C
|
|
) where C : Collection, C.Iterator.Element == Character {
|
|
if isUniqueNative {
|
|
if let replStr = newElements as? String, replStr._guts.isFastUTF8 {
|
|
replStr._guts.withFastUTF8 {
|
|
uniqueNativeReplaceSubrange(
|
|
bounds, with: $0, isASCII: replStr._guts.isASCII)
|
|
}
|
|
return
|
|
}
|
|
uniqueNativeReplaceSubrange(
|
|
bounds, with: newElements.lazy.flatMap { $0.utf8 })
|
|
return
|
|
}
|
|
|
|
var result = String()
|
|
// FIXME: It should be okay to get rid of excess capacity
|
|
// here. rdar://problem/45635432
|
|
if let capacity = self.nativeCapacity {
|
|
result.reserveCapacity(capacity)
|
|
}
|
|
let selfStr = String(self)
|
|
result.append(contentsOf: selfStr[..<bounds.lowerBound])
|
|
result.append(contentsOf: newElements)
|
|
result.append(contentsOf: selfStr[bounds.upperBound...])
|
|
self = result._guts
|
|
}
|
|
|
|
internal mutating func uniqueNativeReplaceSubrange(
|
|
_ bounds: Range<Index>,
|
|
with codeUnits: UnsafeBufferPointer<UInt8>,
|
|
isASCII: Bool
|
|
) {
|
|
let neededCapacity =
|
|
bounds.lowerBound._encodedOffset
|
|
+ codeUnits.count + (self.count - bounds.upperBound._encodedOffset)
|
|
reserveCapacity(neededCapacity)
|
|
|
|
_internalInvariant(bounds.lowerBound.transcodedOffset == 0)
|
|
_internalInvariant(bounds.upperBound.transcodedOffset == 0)
|
|
|
|
_object.nativeStorage.replace(
|
|
from: bounds.lowerBound._encodedOffset,
|
|
to: bounds.upperBound._encodedOffset,
|
|
with: codeUnits)
|
|
self = _StringGuts(_object.nativeStorage)
|
|
}
|
|
|
|
internal mutating func uniqueNativeReplaceSubrange<C: Collection>(
|
|
_ bounds: Range<Index>,
|
|
with codeUnits: C
|
|
) where C.Element == UInt8 {
|
|
let replCount = codeUnits.count
|
|
|
|
let neededCapacity =
|
|
bounds.lowerBound._encodedOffset
|
|
+ replCount + (self.count - bounds.upperBound._encodedOffset)
|
|
reserveCapacity(neededCapacity)
|
|
|
|
_internalInvariant(bounds.lowerBound.transcodedOffset == 0)
|
|
_internalInvariant(bounds.upperBound.transcodedOffset == 0)
|
|
|
|
_object.nativeStorage.replace(
|
|
from: bounds.lowerBound._encodedOffset,
|
|
to: bounds.upperBound._encodedOffset,
|
|
with: codeUnits,
|
|
replacementCount: replCount)
|
|
self = _StringGuts(_object.nativeStorage)
|
|
}
|
|
}
|
|
|