mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
String.Index has an encodedOffset-based initializer and computed property that exists for serialization purposes. It was documented as UTF-16 in the SE proposal introducing it, which was String's underlying encoding at the time, but the dream of String even then was to abstract away whatever encoding happend to be used. Serialization needs an explicit encoding for serialized indices to make sense: the offsets need to align with the view. With String utilizing UTF-8 encoding for native contents in Swift 5, serialization isn't necessarily the most efficient in UTF-16. Furthermore, the majority of usage of encodedOffset in the wild is buggy and operates under the assumption that a UTF-16 code unit was a Swift Character, which isn't even valid if the String is known to be all-ASCII (because CR-LF). This change introduces a pair of semantics-preserving alternatives to encodedOffset that explicitly call out the UTF-16 assumption. These serve as a gentle off-ramp for current mis-uses of encodedOffset.
145 lines
4.5 KiB
Swift
145 lines
4.5 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
// @opaque
|
|
internal final class _StringBreadcrumbs {
|
|
static var breadcrumbStride: Int { return 32 }
|
|
|
|
var utf16Length: Int
|
|
|
|
// TODO: does this need to be a pair?.... Can we be smaller than Int?
|
|
var crumbs: [String.Index]
|
|
|
|
// TODO: Does this need to be inout, unique, or how will we be enforcing
|
|
// atomicity?
|
|
init(_ str: String) {
|
|
let stride = _StringBreadcrumbs.breadcrumbStride
|
|
|
|
self.crumbs = []
|
|
|
|
if str.isEmpty {
|
|
self.utf16Length = 0
|
|
return
|
|
}
|
|
|
|
self.crumbs.reserveCapacity(
|
|
(str._guts.count / 3) / stride)
|
|
|
|
// TODO(String performance): More efficient implementation of initial scan.
|
|
// We'll also want to benchmark this initial scan in order to track changes.
|
|
|
|
let utf16 = str.utf16
|
|
var i = 0
|
|
var curIdx = utf16.startIndex
|
|
while curIdx != utf16.endIndex {
|
|
if i % stride == 0 { //i.isMultiple(of: stride) {
|
|
self.crumbs.append(curIdx)
|
|
}
|
|
i = i &+ 1
|
|
curIdx = utf16.index(after: curIdx)
|
|
}
|
|
|
|
// Corner case: index(_:offsetBy:) can produce the endIndex
|
|
if i % stride == 0 {
|
|
self.crumbs.append(utf16.endIndex)
|
|
}
|
|
|
|
self.utf16Length = i
|
|
_internalInvariant(self.crumbs.count == 1 + (self.utf16Length / stride))
|
|
|
|
_invariantCheck(for: str)
|
|
}
|
|
}
|
|
|
|
extension _StringBreadcrumbs {
|
|
var stride: Int {
|
|
@inline(__always) get { return _StringBreadcrumbs.breadcrumbStride }
|
|
}
|
|
|
|
// Fetch the lower-bound index corresponding to the given offset, returning
|
|
// the index and the remaining offset to adjust
|
|
internal func getBreadcrumb(
|
|
forOffset offset: Int
|
|
) -> (lowerBound: String.Index, remaining: Int) {
|
|
return (crumbs[offset / stride], offset % stride)
|
|
}
|
|
|
|
// Fetch the lower-bound offset corresponding to the given index, returning
|
|
// the lower-bound and its offset
|
|
internal func getBreadcrumb(
|
|
forIndex idx: String.Index
|
|
) -> (lowerBound: String.Index, offset: Int) {
|
|
var lowerBound = idx._encodedOffset / 3 / stride
|
|
var upperBound = Swift.min(1 + (idx._encodedOffset / stride), crumbs.count)
|
|
_internalInvariant(crumbs[lowerBound] <= idx)
|
|
_internalInvariant(upperBound == crumbs.count || crumbs[upperBound] >= idx)
|
|
|
|
while (upperBound &- lowerBound) > 1 {
|
|
let mid = lowerBound + ((upperBound &- lowerBound) / 2)
|
|
if crumbs[mid] <= idx { lowerBound = mid } else { upperBound = mid }
|
|
}
|
|
|
|
let crumb = crumbs[lowerBound]
|
|
_internalInvariant(crumb <= idx)
|
|
_internalInvariant(lowerBound == crumbs.count-1 || crumbs[lowerBound+1] > idx)
|
|
|
|
return (crumb, lowerBound &* stride)
|
|
}
|
|
|
|
#if !INTERNAL_CHECKS_ENABLED
|
|
@nonobjc @inline(__always) internal func _invariantCheck(for str: String) {}
|
|
#else
|
|
@nonobjc @inline(never) @_effects(releasenone)
|
|
internal func _invariantCheck(for str: String) {
|
|
_internalInvariant(self.utf16Length == str.utf16._distance(
|
|
from: str.startIndex, to: str.endIndex),
|
|
"Stale breadcrumbs")
|
|
}
|
|
#endif // INTERNAL_CHECKS_ENABLED
|
|
}
|
|
|
|
extension _StringGuts {
|
|
@_effects(releasenone)
|
|
internal func getBreadcrumbsPtr() -> UnsafePointer<_StringBreadcrumbs> {
|
|
_internalInvariant(hasBreadcrumbs)
|
|
|
|
let mutPtr: UnsafeMutablePointer<_StringBreadcrumbs?>
|
|
if hasNativeStorage {
|
|
mutPtr = _object.nativeStorage._breadcrumbsAddress
|
|
} else {
|
|
mutPtr = UnsafeMutablePointer(
|
|
Builtin.addressof(&_object.sharedStorage._breadcrumbs))
|
|
}
|
|
|
|
if _slowPath(mutPtr.pointee == nil) {
|
|
populateBreadcrumbs(mutPtr)
|
|
}
|
|
|
|
_internalInvariant(mutPtr.pointee != nil)
|
|
// assuming optional class reference and class reference can alias
|
|
return UnsafeRawPointer(mutPtr).assumingMemoryBound(to: _StringBreadcrumbs.self)
|
|
}
|
|
|
|
@inline(never) // slow-path
|
|
@_effects(releasenone)
|
|
internal func populateBreadcrumbs(
|
|
_ mutPtr: UnsafeMutablePointer<_StringBreadcrumbs?>
|
|
) {
|
|
// Thread-safe compare-and-swap
|
|
let crumbs = _StringBreadcrumbs(String(self))
|
|
_stdlib_atomicInitializeARCRef(
|
|
object: UnsafeMutableRawPointer(mutPtr).assumingMemoryBound(to: Optional<AnyObject>.self),
|
|
desired: crumbs)
|
|
}
|
|
}
|