mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Assign some previously reserved bits in String.Index and _StringObject to keep track of their associated storage encoding (either UTF-8 or UTF-16). None of these bits will be reliably set in processes that load binaries compiled with older stdlib releases, but when they do end up getting set, we can use them opportunistically to more reliably detect cases where an index is applied on a string with a mismatching encoding. As more and more code gets recompiled with 5.7+, the stdlib will gradually become able to detect such issues with complete accuracy. Code that misuses indices this way was always considered broken; however, String wasn’t able to reliably detect these runtime errors before. Therefore, I expect there is a large amount of broken code out there that keeps using bridged Cocoa String indices (UTF-16) after a mutation turns them into native UTF-8 strings. Therefore, instead of trapping, this commit silently corrects the issue, transcoding the offsets into the correct encoding. It would probably be a good idea to also emit a runtime warning in addition to recovering from the error. This would generate some noise that would gently nudge folks to fix their code. rdar://89369680
106 lines
3.0 KiB
Swift
106 lines
3.0 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// TODO(String performance): Unfortunately, this slice struct seems to add
|
|
// overhead. We may want to wean ourselves off of this and have all users just
|
|
// also store a range.
|
|
|
|
// A sliced _StringGuts, convenient for unifying String/Substring comparison,
|
|
// hashing, and RRC.
|
|
internal struct _StringGutsSlice {
|
|
internal var _guts: _StringGuts
|
|
|
|
internal var _offsetRange: Range<Int>
|
|
|
|
@inline(__always)
|
|
internal init(_ guts: _StringGuts) {
|
|
self._guts = guts
|
|
self._offsetRange = 0..<self._guts.count
|
|
}
|
|
|
|
@inline(__always)
|
|
internal init(_ guts: _StringGuts, _ offsetRange: Range<Int>) {
|
|
_internalInvariant(
|
|
guts.isOnUnicodeScalarBoundary(offsetRange.lowerBound)
|
|
&& guts.isOnUnicodeScalarBoundary(offsetRange.upperBound))
|
|
self._guts = guts
|
|
self._offsetRange = offsetRange
|
|
}
|
|
|
|
@inlinable
|
|
internal var start: Int {
|
|
@inline(__always) get { return _offsetRange.lowerBound }
|
|
}
|
|
@inlinable
|
|
internal var end: Int {
|
|
@inline(__always) get { return _offsetRange.upperBound }
|
|
}
|
|
|
|
@inlinable
|
|
internal var count: Int {
|
|
@inline(__always) get { return _offsetRange.count }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isNFCFastUTF8: Bool {
|
|
@inline(__always) get { return _guts.isNFCFastUTF8 }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isASCII: Bool {
|
|
@inline(__always) get { return _guts.isASCII }
|
|
}
|
|
|
|
@inlinable
|
|
internal var isFastUTF8: Bool {
|
|
@inline(__always) get { return _guts.isFastUTF8 }
|
|
}
|
|
|
|
internal var utf8Count: Int {
|
|
@inline(__always) get {
|
|
if _fastPath(self.isFastUTF8) {
|
|
return _offsetRange.count
|
|
}
|
|
return Substring(self).utf8.count
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var range: Range<String.Index> {
|
|
@inline(__always) get {
|
|
let lower = String.Index(_encodedOffset: _offsetRange.lowerBound)
|
|
._scalarAligned
|
|
let higher = String.Index(_encodedOffset: _offsetRange.upperBound)
|
|
._scalarAligned
|
|
return Range(_uncheckedBounds: (lower, higher))
|
|
}
|
|
}
|
|
|
|
@inline(__always)
|
|
internal func withFastUTF8<R>(
|
|
_ f: (UnsafeBufferPointer<UInt8>) throws -> R
|
|
) rethrows -> R {
|
|
return try _guts.withFastUTF8(range: _offsetRange, f)
|
|
}
|
|
|
|
@_effects(releasenone)
|
|
internal func foreignErrorCorrectedScalar(
|
|
startingAt idx: String.Index
|
|
) -> (Unicode.Scalar, scalarLength: Int) {
|
|
let (scalar, len) = _guts.foreignErrorCorrectedScalar(startingAt: idx)
|
|
if _slowPath(idx.encoded(offsetBy: len) > range.upperBound) {
|
|
return (Unicode.Scalar._replacementCharacter, 1)
|
|
}
|
|
return (scalar, len)
|
|
}
|
|
}
|