mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
String.Index has an encodedOffset-based initializer and computed property that exists for serialization purposes. It was documented as UTF-16 in the SE proposal introducing it, which was String's underlying encoding at the time, but the dream of String even then was to abstract away whatever encoding happend to be used. Serialization needs an explicit encoding for serialized indices to make sense: the offsets need to align with the view. With String utilizing UTF-8 encoding for native contents in Swift 5, serialization isn't necessarily the most efficient in UTF-16. Furthermore, the majority of usage of encodedOffset in the wild is buggy and operates under the assumption that a UTF-16 code unit was a Swift Character, which isn't even valid if the String is known to be all-ASCII (because CR-LF). This change introduces a pair of semantics-preserving alternatives to encodedOffset that explicitly call out the UTF-16 assumption. These serve as a gentle off-ramp for current mis-uses of encodedOffset.
108 lines
3.0 KiB
Swift
108 lines
3.0 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// TODO(String performance): Unfortunately, this slice struct seems to add
|
|
// overhead. We may want to wean ourselves off of this and have all users just
|
|
// also store a range.
|
|
|
|
// A sliced _StringGuts, convenient for unifying String/Substring comparison,
|
|
// hashing, and RRC.
|
|
internal struct _StringGutsSlice {
|
|
internal var _guts: _StringGuts
|
|
|
|
internal var _offsetRange: Range<Int>
|
|
|
|
@inline(__always)
|
|
internal init(_ guts: _StringGuts) {
|
|
self._guts = guts
|
|
self._offsetRange = 0..<self._guts.count
|
|
}
|
|
|
|
@inline(__always)
|
|
internal init(_ guts: _StringGuts, _ offsetRange: Range<Int>) {
|
|
self._guts = guts
|
|
self._offsetRange = offsetRange
|
|
}
|
|
|
|
@inlinable
|
|
internal var start: Int {
|
|
@inline(__always) get { return _offsetRange.lowerBound }
|
|
}
|
|
@inlinable
|
|
internal var end: Int {
|
|
@inline(__always) get { return _offsetRange.upperBound }
|
|
}
|
|
|
|
@inlinable
|
|
internal var count: Int {
|
|
@inline(__always) get { return _offsetRange.count }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isNFCFastUTF8: Bool {
|
|
@inline(__always) get { return _guts.isNFCFastUTF8 }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isASCII: Bool {
|
|
@inline(__always) get { return _guts.isASCII }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isFastUTF8: Bool {
|
|
@inline(__always) get { return _guts.isFastUTF8 }
|
|
}
|
|
|
|
internal var utf8Count: Int {
|
|
@inline(__always) get {
|
|
if _fastPath(self.isFastUTF8) {
|
|
return _offsetRange.count
|
|
}
|
|
return Substring(self).utf8.count
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var range: Range<String.Index> {
|
|
@inline(__always) get {
|
|
return String.Index(_encodedOffset: _offsetRange.lowerBound)
|
|
..< String.Index(_encodedOffset: _offsetRange.upperBound)
|
|
}
|
|
}
|
|
|
|
@inline(__always)
|
|
internal func withFastUTF8<R>(
|
|
_ f: (UnsafeBufferPointer<UInt8>) throws -> R
|
|
) rethrows -> R {
|
|
return try _guts.withFastUTF8(range: _offsetRange, f)
|
|
}
|
|
|
|
@_effects(releasenone)
|
|
internal func foreignErrorCorrectedScalar(
|
|
startingAt idx: String.Index
|
|
) -> (Unicode.Scalar, scalarLength: Int) {
|
|
let (scalar, len) = _guts.foreignErrorCorrectedScalar(startingAt: idx)
|
|
if _slowPath(idx.encoded(offsetBy: len) > range.upperBound) {
|
|
return (Unicode.Scalar._replacementCharacter, 1)
|
|
}
|
|
return (scalar, len)
|
|
}
|
|
|
|
internal func foreignHasNormalizationBoundary(
|
|
before index: String.Index
|
|
) -> Bool {
|
|
if index == range.lowerBound || index == range.upperBound {
|
|
return true
|
|
}
|
|
return _guts.foreignHasNormalizationBoundary(before: index)
|
|
}
|
|
} |