mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
1197 lines
38 KiB
Swift
1197 lines
38 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
import SwiftShims
|
|
|
|
//HACK: This gets rid of some retains/releases that was slowing down the
|
|
//memcmp fast path for comparing ascii strings. rdar://problem/37473470
|
|
@inline(never) // @outlined
|
|
@effects(readonly)
|
|
@_versioned // @opaque
|
|
internal
|
|
func _compareUnicode(
|
|
_ lhs: _StringGuts._RawBitPattern, _ rhs: _StringGuts._RawBitPattern
|
|
) -> Int {
|
|
let left = _StringGuts(rawBits: lhs)
|
|
let right = _StringGuts(rawBits: rhs)
|
|
|
|
if _slowPath(!left._isContiguous || !right._isContiguous) {
|
|
if !left._isContiguous {
|
|
return left._asOpaque()._compareOpaque(right).rawValue
|
|
} else {
|
|
return right._asOpaque()._compareOpaque(left).flipped.rawValue
|
|
}
|
|
}
|
|
|
|
return left._compareContiguous(right)
|
|
}
|
|
|
|
@inline(never) // @outlined
|
|
@effects(readonly)
|
|
@_versioned // @opaque
|
|
internal
|
|
func _compareUnicode(
|
|
_ lhs: _StringGuts._RawBitPattern, _ leftRange: Range<Int>,
|
|
_ rhs: _StringGuts._RawBitPattern, _ rightRange: Range<Int>
|
|
) -> Int {
|
|
let left = _StringGuts(rawBits: lhs)
|
|
let right = _StringGuts(rawBits: rhs)
|
|
|
|
if _slowPath(!left._isContiguous || !right._isContiguous) {
|
|
if !left._isContiguous {
|
|
return left._asOpaque()[leftRange]._compareOpaque(
|
|
right, rightRange
|
|
).rawValue
|
|
} else {
|
|
return right._asOpaque()[rightRange]._compareOpaque(
|
|
left, leftRange
|
|
).flipped.rawValue
|
|
}
|
|
}
|
|
|
|
return left._compareContiguous(leftRange, right, rightRange)
|
|
}
|
|
|
|
//
|
|
// Pointer casting helpers
|
|
//
|
|
@inline(__always)
|
|
private func _unsafeMutableBufferPointerCast<T, U>(
|
|
_ ptr: UnsafeMutablePointer<T>,
|
|
_ count: Int,
|
|
to: U.Type = U.self
|
|
) -> UnsafeMutableBufferPointer<U> {
|
|
return UnsafeMutableBufferPointer(
|
|
start: UnsafeMutableRawPointer(ptr).assumingMemoryBound(to: U.self),
|
|
count: count
|
|
)
|
|
}
|
|
@inline(__always)
|
|
private func _unsafeBufferPointerCast<T, U>(
|
|
_ ptr: UnsafePointer<T>,
|
|
_ count: Int,
|
|
to: U.Type = U.self
|
|
) -> UnsafeBufferPointer<U> {
|
|
return UnsafeBufferPointer(
|
|
start: UnsafeRawPointer(ptr).assumingMemoryBound(to: U.self),
|
|
count: count
|
|
)
|
|
}
|
|
|
|
internal let _leadingSurrogateBias: UInt16 = 0xd800
|
|
internal let _trailingSurrogateBias: UInt16 = 0xdc00
|
|
internal let _surrogateMask: UInt16 = 0xfc00
|
|
|
|
@inline(__always)
|
|
internal func _isSurrogate(_ cu: UInt16) -> Bool {
|
|
return _isLeadingSurrogate(cu) || _isTrailingSurrogate(cu)
|
|
}
|
|
|
|
@inline(__always)
|
|
internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool {
|
|
// NOTE: Specifically match against the trailing surrogate mask, as it matches
|
|
// more cases.
|
|
return cu & _surrogateMask == _leadingSurrogateBias
|
|
}
|
|
|
|
@inline(__always)
|
|
internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool {
|
|
return cu & _surrogateMask == _trailingSurrogateBias
|
|
}
|
|
@inline(__always)
|
|
internal func _decodeSurrogatePair(
|
|
leading high: UInt16, trailing low: UInt16
|
|
) -> UInt32 {
|
|
_sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low))
|
|
let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias)
|
|
_sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something")
|
|
let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias)
|
|
_sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something")
|
|
|
|
return ((hi10 &<< 10) | lo10) &+ 0x1_00_00
|
|
}
|
|
|
|
internal func _hasNormalizationBoundary(before cu: UInt16) -> Bool {
|
|
guard !_isSurrogate(cu) else { return false }
|
|
return UnicodeScalar(_unchecked: UInt32(cu))._hasNormalizationBoundaryBefore
|
|
}
|
|
|
|
//
|
|
// Pointer casting helpers
|
|
//
|
|
internal func _castOutputBuffer(
|
|
_ ptr: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>,
|
|
endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity
|
|
) -> UnsafeMutableBufferPointer<UInt16> {
|
|
let bufPtr: UnsafeMutableBufferPointer<UInt16> =
|
|
_unsafeMutableBufferPointerCast(
|
|
ptr, _Normalization._SegmentOutputBuffer.capacity)
|
|
return UnsafeMutableBufferPointer<UInt16>(rebasing: bufPtr[..<endIdx])
|
|
}
|
|
internal func _castOutputBuffer(
|
|
_ ptr: UnsafePointer<_Normalization._SegmentOutputBuffer>,
|
|
endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity
|
|
) -> UnsafeBufferPointer<UInt16> {
|
|
let bufPtr: UnsafeBufferPointer<UInt16> =
|
|
_unsafeBufferPointerCast(
|
|
ptr, _Normalization._SegmentOutputBuffer.capacity)
|
|
return UnsafeBufferPointer<UInt16>(rebasing: bufPtr[..<endIdx])
|
|
}
|
|
|
|
extension _FixedArray16 where T == UInt16 {
|
|
mutating func fill(from other: _UnmanagedString<T>) {
|
|
_sanityCheck(other.count < _FixedArray16<T>.capacity,
|
|
"out of bounds fill")
|
|
for i in 0..<other.count {
|
|
self[i] = other[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
@_versioned internal
|
|
enum _Ordering: Int, Equatable {
|
|
case less = -1
|
|
case equal = 0
|
|
case greater = 1
|
|
|
|
@_versioned internal
|
|
var flipped: _Ordering {
|
|
switch self {
|
|
case .less: return .greater
|
|
case .equal: return .equal
|
|
case .greater: return .less
|
|
}
|
|
}
|
|
|
|
@inline(__always)
|
|
@_versioned internal
|
|
init(signedNotation int: Int) {
|
|
self = int < 0 ? .less : int == 0 ? .equal : .greater
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt8 {
|
|
// TODO: These should be SIMD-ized
|
|
internal func _findDiffIdx(_ other: _UnmanagedString<UInt16>) -> Int {
|
|
let count = Swift.min(self.count, other.count)
|
|
for idx in 0..<count {
|
|
guard UInt16(self[idx]) == other[idx] else {
|
|
return idx
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
}
|
|
|
|
internal func _findDiffIdx(
|
|
_ left: UnsafeBufferPointer<UInt8>,
|
|
_ right: UnsafeBufferPointer<UInt16>
|
|
) -> Int {
|
|
let count = Swift.min(left.count, right.count)
|
|
for idx in 0..<count {
|
|
guard UInt16(left[idx]) == right[idx] else {
|
|
return idx
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
internal func _findDiffIdx<CodeUnit>(
|
|
_ left: UnsafeBufferPointer<CodeUnit>,
|
|
_ right: UnsafeBufferPointer<CodeUnit>
|
|
) -> Int where CodeUnit : FixedWidthInteger & UnsignedInteger {
|
|
let count = Swift.min(left.count, right.count)
|
|
for idx in 0..<count {
|
|
guard left[idx] == right[idx] else {
|
|
return idx
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit : FixedWidthInteger & UnsignedInteger {
|
|
internal func _findDiffIdx<CodeUnit>(
|
|
_ other: _UnmanagedString<CodeUnit>
|
|
) -> Int {
|
|
let count = Swift.min(self.count, other.count)
|
|
for idx in 0..<count {
|
|
guard self[idx] == other[idx] else {
|
|
return idx
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedOpaqueString {
|
|
internal func _findDiffIdx(_ other: _StringGuts, _ otherRange: Range<Int>
|
|
) -> Int {
|
|
let count = Swift.min(self.count, otherRange.count)
|
|
for idx in 0..<count {
|
|
guard self[idx] == other[idx + otherRange.lowerBound] else {
|
|
return idx
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
}
|
|
|
|
internal func _lexicographicalCompare(_ lhs: Int, _ rhs: Int) -> _Ordering {
|
|
// TODO: inspect code quality
|
|
return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal)
|
|
}
|
|
|
|
internal func _lexicographicalCompare(
|
|
_ lhs: UInt16, _ rhs: UInt16
|
|
) -> _Ordering {
|
|
return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal)
|
|
}
|
|
|
|
internal func _lexicographicalCompare(
|
|
_ leftHS: UnsafeBufferPointer<UInt16>,
|
|
_ rightHS: UnsafeBufferPointer<UInt16>
|
|
) -> _Ordering {
|
|
let count = Swift.min(leftHS.count, rightHS.count)
|
|
|
|
let idx = _findDiffIdx(leftHS, rightHS)
|
|
guard idx < count else {
|
|
return _lexicographicalCompare(leftHS.count, rightHS.count)
|
|
}
|
|
let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked
|
|
let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked
|
|
return _lexicographicalCompare(leftHSPtr[idx], rightHSPtr[idx])
|
|
}
|
|
|
|
internal func _lexicographicalCompare(
|
|
_ leftHS: UnsafeBufferPointer<UInt8>,
|
|
_ rightHS: UnsafeBufferPointer<UInt16>
|
|
) -> _Ordering {
|
|
let count = Swift.min(leftHS.count, rightHS.count)
|
|
|
|
let idx = _findDiffIdx(leftHS, rightHS)
|
|
guard idx < count else {
|
|
return _lexicographicalCompare(leftHS.count, rightHS.count)
|
|
}
|
|
let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked
|
|
let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked
|
|
return _lexicographicalCompare(UInt16(leftHSPtr[idx]), rightHSPtr[idx])
|
|
}
|
|
@inline(__always)
|
|
internal func _lexicographicalCompare(
|
|
_ leftHS: UnsafePointer<_Normalization._SegmentOutputBuffer>,
|
|
leftCount: Int,
|
|
_ rightHS: UnsafePointer<_Normalization._SegmentOutputBuffer>,
|
|
rightCount: Int
|
|
) -> _Ordering {
|
|
return _lexicographicalCompare(
|
|
_castOutputBuffer(leftHS, endingAt: leftCount),
|
|
_castOutputBuffer(rightHS, endingAt: rightCount))
|
|
}
|
|
@inline(__always)
|
|
internal func _lexicographicalCompare(
|
|
_ leftHS: Array<UInt16>,
|
|
_ rightHS: Array<UInt16>
|
|
) -> _Ordering {
|
|
return leftHS.withUnsafeBufferPointer { leftPtr in
|
|
return rightHS.withUnsafeBufferPointer { rightPtr in
|
|
return _lexicographicalCompare(leftPtr, rightPtr)
|
|
}
|
|
}
|
|
}
|
|
|
|
internal func _parseRawScalar(
|
|
_ buf: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>,
|
|
startingFrom idx: Int = 0
|
|
) -> (UnicodeScalar, scalarEndIndex: Int) {
|
|
return Swift._parseRawScalar(buffer: _castOutputBuffer(buf), startingFrom: idx)
|
|
}
|
|
|
|
internal func _parseRawScalar(
|
|
buffer buf: UnsafeBufferPointer<UInt16>,
|
|
startingFrom idx: Int = 0
|
|
) -> (UnicodeScalar, scalarEndIndex: Int) {
|
|
let ptr = buf.baseAddress._unsafelyUnwrappedUnchecked
|
|
_sanityCheck(idx >= 0 && idx < buf.count, "out of bounds index")
|
|
let cu: UInt16 = ptr[idx]
|
|
if _slowPath(idx+1 == buf.count) {
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
|
|
}
|
|
guard _isLeadingSurrogate(cu) else {
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
|
|
}
|
|
let nextCu: UInt16 = ptr[idx+1]
|
|
guard _isTrailingSurrogate(nextCu) else {
|
|
// Invalid surrogate pair: just return the invalid value
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
|
|
}
|
|
|
|
// Decode
|
|
let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu)
|
|
_sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
|
|
return (UnicodeScalar(_unchecked: value), idx+2)
|
|
}
|
|
|
|
extension _UnmanagedOpaqueString {
|
|
internal func _parseRawScalar(
|
|
startingFrom idx: Int = 0
|
|
) -> (UnicodeScalar, scalarEndIndex: Int) {
|
|
var buffer = _FixedArray2<UInt16>(allZeros:())
|
|
if idx+1 < self.count {
|
|
buffer[0] = self[idx]
|
|
buffer[1] = self[idx+1]
|
|
|
|
let bufferPointer = _unsafeBufferPointerCast(
|
|
&buffer, 2, to: UInt16.self
|
|
)
|
|
return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0)
|
|
} else {
|
|
buffer[0] = self[idx]
|
|
|
|
let bufferPointer = _unsafeBufferPointerCast(
|
|
&buffer, 1, to: UInt16.self
|
|
)
|
|
return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0)
|
|
}
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt16 {
|
|
internal func _parseRawScalar(
|
|
startingFrom idx: Int = 0
|
|
) -> (UnicodeScalar, scalarEndIndex: Int) {
|
|
_sanityCheck(idx >= 0 && idx < self.count, "out of bounds index")
|
|
let cu = self[idx]
|
|
if _slowPath(idx+1 == self.count) {
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
|
|
}
|
|
guard _isLeadingSurrogate(cu) else {
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
|
|
}
|
|
let nextCu = self[idx+1]
|
|
guard _isTrailingSurrogate(nextCu) else {
|
|
// Invalid surrogate pair: just return the invalid value
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
|
|
}
|
|
|
|
// Decode
|
|
let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu)
|
|
_sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
|
|
return (UnicodeScalar(_unchecked: value), idx+2)
|
|
}
|
|
|
|
internal func _reverseParseRawScalar(
|
|
endingAt idx: Int // one-past-the-end
|
|
) -> (UnicodeScalar, scalarStartIndex: Int) {
|
|
_sanityCheck(idx > 0 && idx <= self.count, "out of bounds end index")
|
|
|
|
// Corner case: leading un-paired surrogate
|
|
if _slowPath(idx == 1) {
|
|
return (UnicodeScalar(_unchecked: UInt32(self[0])), 0)
|
|
}
|
|
|
|
let cu = self[idx-1]
|
|
guard _isTrailingSurrogate(cu) else {
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1)
|
|
}
|
|
let priorCU = self[idx-2]
|
|
guard _isLeadingSurrogate(priorCU) else {
|
|
return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1)
|
|
}
|
|
|
|
// Decode
|
|
let value: UInt32 = _decodeSurrogatePair(leading: priorCU, trailing: cu)
|
|
_sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
|
|
return (UnicodeScalar(_unchecked: value), idx-2)
|
|
}
|
|
|
|
internal func _tryNormalize(
|
|
into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
|
|
) -> Int? {
|
|
return self._tryNormalize(into: _castOutputBuffer(outputBuffer))
|
|
}
|
|
|
|
internal func _tryNormalize(
|
|
into outputBuffer: UnsafeMutableBufferPointer<UInt16>
|
|
) -> Int? {
|
|
var err = __swift_stdlib_U_ZERO_ERROR
|
|
let count = __swift_stdlib_unorm2_normalize(
|
|
_Normalization._nfcNormalizer,
|
|
self.start,
|
|
numericCast(self.count),
|
|
outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
|
|
numericCast(outputBuffer.count),
|
|
&err
|
|
)
|
|
guard err.isSuccess else {
|
|
// The output buffer needs to grow
|
|
return nil
|
|
}
|
|
return numericCast(count)
|
|
}
|
|
|
|
internal func _slowNormalize() -> [UInt16] {
|
|
_sanityCheck(self.count > 0, "called on empty string")
|
|
|
|
let canary = self.count * _Normalization._maxNFCExpansionFactor
|
|
var count = self.count
|
|
while true {
|
|
var result = Array<UInt16>(repeating: 0, count: count)
|
|
if let length = result.withUnsafeMutableBufferPointer({ (bufPtr) -> Int? in
|
|
return self._tryNormalize(into: bufPtr)
|
|
}) {
|
|
result.removeLast(count - length)
|
|
return result
|
|
}
|
|
// Otherwise, we need to grow
|
|
guard count <= canary else {
|
|
fatalError("Invariant broken: Max decomposition factor insufficient")
|
|
}
|
|
count *= 2
|
|
}
|
|
}
|
|
}
|
|
|
|
internal func _tryNormalize(
|
|
_ input: UnsafeBufferPointer<UInt16>,
|
|
into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
|
|
) -> Int? {
|
|
return _tryNormalize(input, into: _castOutputBuffer(outputBuffer))
|
|
}
|
|
internal func _tryNormalize(
|
|
_ input: UnsafeBufferPointer<UInt16>,
|
|
into outputBuffer: UnsafeMutableBufferPointer<UInt16>
|
|
) -> Int? {
|
|
var err = __swift_stdlib_U_ZERO_ERROR
|
|
let count = __swift_stdlib_unorm2_normalize(
|
|
_Normalization._nfcNormalizer,
|
|
input.baseAddress._unsafelyUnwrappedUnchecked,
|
|
numericCast(input.count),
|
|
outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
|
|
numericCast(outputBuffer.count),
|
|
&err
|
|
)
|
|
guard err.isSuccess else {
|
|
// The output buffer needs to grow
|
|
return nil
|
|
}
|
|
return numericCast(count)
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt8 {
|
|
@_inlineable // FIXME(sil-serialize-all)
|
|
@_versioned
|
|
internal func compareASCII(to other: _UnmanagedString<UInt8>) -> Int {
|
|
// FIXME Results should be the same across all platforms.
|
|
if self.start == other.start {
|
|
return (self.count &- other.count).signum()
|
|
}
|
|
var cmp = Int(truncatingIfNeeded:
|
|
_stdlib_memcmp(
|
|
self.rawStart, other.rawStart,
|
|
Swift.min(self.count, other.count)))
|
|
if cmp == 0 {
|
|
cmp = self.count &- other.count
|
|
}
|
|
return cmp.signum()
|
|
}
|
|
}
|
|
|
|
public extension _StringGuts {
|
|
@inline(__always)
|
|
public
|
|
func _compareContiguous(_ other: _StringGuts) -> Int {
|
|
_sanityCheck(self._isContiguous && other._isContiguous)
|
|
switch (self.isASCII, other.isASCII) {
|
|
case (true, true):
|
|
fatalError("Should have hit the ascii comp in StringComparable.compare()")
|
|
case (true, false):
|
|
return self._unmanagedASCIIView._compareStringsPreLoop(
|
|
other: other._unmanagedUTF16View
|
|
).rawValue
|
|
case (false, true):
|
|
// Same compare, just invert result
|
|
return other._unmanagedASCIIView._compareStringsPreLoop(
|
|
other: self._unmanagedUTF16View
|
|
).flipped.rawValue
|
|
case (false, false):
|
|
return self._unmanagedUTF16View._compareStringsPreLoop(
|
|
other: other._unmanagedUTF16View
|
|
).rawValue
|
|
}
|
|
}
|
|
|
|
@inline(__always)
|
|
public
|
|
func _compareContiguous(
|
|
_ selfRange: Range<Int>,
|
|
_ other: _StringGuts,
|
|
_ otherRange: Range<Int>
|
|
) -> Int {
|
|
_sanityCheck(self._isContiguous && other._isContiguous)
|
|
switch (self.isASCII, other.isASCII) {
|
|
case (true, true):
|
|
fatalError("Should have hit the ascii comp in StringComparable.compare()")
|
|
case (true, false):
|
|
return self._unmanagedASCIIView[selfRange]._compareStringsPreLoop(
|
|
other: other._unmanagedUTF16View[otherRange]
|
|
).rawValue
|
|
case (false, true):
|
|
// Same compare, just invert result
|
|
return other._unmanagedASCIIView[otherRange]._compareStringsPreLoop(
|
|
other: self._unmanagedUTF16View[selfRange]
|
|
).flipped.rawValue
|
|
case (false, false):
|
|
return self._unmanagedUTF16View[selfRange]._compareStringsPreLoop(
|
|
other: other._unmanagedUTF16View[otherRange]
|
|
).rawValue
|
|
}
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedOpaqueString {
|
|
@inline(never) // @outlined
|
|
@_versioned
|
|
internal
|
|
func _compareOpaque(_ other: _StringGuts) -> _Ordering {
|
|
return self._compareOpaque(other, 0..<other.count)
|
|
}
|
|
|
|
@inline(never) // @outlined
|
|
@_versioned
|
|
internal
|
|
func _compareOpaque(
|
|
_ other: _StringGuts, _ otherRange: Range<Int>
|
|
) -> _Ordering {
|
|
//
|
|
// Do a fast Latiny comparison loop; bail if that proves insufficient.
|
|
//
|
|
// The vast majority of the time, seemingly-non-contiguous Strings are
|
|
// really ASCII strings that were bridged improperly. E.g., unknown nul-
|
|
// termination of an all-ASCII file loaded by String.init(contentsOfFile:).
|
|
//
|
|
|
|
let selfCount = self.count
|
|
let otherCount = otherRange.count
|
|
let count = Swift.min(selfCount, otherCount)
|
|
let idx = self._findDiffIdx(other, otherRange)
|
|
if idx == count {
|
|
return _lexicographicalCompare(selfCount, otherCount)
|
|
}
|
|
|
|
let selfCU = self[idx]
|
|
let otherCU = other[idx + otherRange.lowerBound]
|
|
|
|
//
|
|
// Fast path: if one is ASCII, we can often compare the code units directly.
|
|
//
|
|
let selfIsASCII = selfCU <= 0x7F
|
|
let otherIsASCII = otherCU <= 0x7F
|
|
|
|
let selfIsSingleSegmentScalar =
|
|
self.hasNormalizationBoundary(after: idx)
|
|
&& _hasNormalizationBoundary(before: selfCU)
|
|
let otherIsSingleSegmentScalar =
|
|
other.hasNormalizationBoundary(after: idx)
|
|
&& _hasNormalizationBoundary(before: otherCU)
|
|
|
|
if _fastPath(selfIsASCII || otherIsASCII) {
|
|
_sanityCheck(idx < selfCount && idx < otherCount,
|
|
"Should be caught by check against min-count")
|
|
// Check if next CU is <0x300, or if we're in a
|
|
// "_isNormalizedSuperASCII" case. 99.9% of the time, we're here because
|
|
// the non-contig string is ASCII. We never want to hit the pathological
|
|
// path for those.
|
|
|
|
if selfIsASCII && otherIsASCII {
|
|
if selfIsSingleSegmentScalar && otherIsSingleSegmentScalar {
|
|
return _lexicographicalCompare(selfCU, otherCU)
|
|
}
|
|
|
|
return self._compareOpaquePathological(
|
|
other, otherRange, startingFrom: Swift.max(0, idx-1))
|
|
}
|
|
|
|
if selfIsASCII && selfIsSingleSegmentScalar
|
|
&& self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII {
|
|
return .less
|
|
} else if otherIsASCII && otherIsSingleSegmentScalar
|
|
&& self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII {
|
|
return .greater
|
|
}
|
|
}
|
|
|
|
return self._compareOpaquePathological(
|
|
other, otherRange, startingFrom: Swift.max(0, idx-1)
|
|
)
|
|
}
|
|
|
|
@inline(never)
|
|
func _compareOpaquePathological(
|
|
_ other: _StringGuts, _ otherRange: Range<Int>,
|
|
startingFrom: Int
|
|
) -> _Ordering {
|
|
// Compare by pulling in a segment at a time, normalizing then comparing
|
|
// individual code units
|
|
var selfIterator = _NormalizedCodeUnitIterator(self, startIndex: startingFrom)
|
|
return selfIterator.compare(with:
|
|
_NormalizedCodeUnitIterator(other, otherRange, startIndex: startingFrom)
|
|
)
|
|
}
|
|
}
|
|
|
|
extension UnicodeScalar {
|
|
internal func _normalize(
|
|
into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
|
|
) -> Int {
|
|
// Implementation: Perform the normalization on an input buffer and output
|
|
// buffer.
|
|
func impl(
|
|
_ input: UnsafeMutablePointer<_FixedArray2<UInt16>>,
|
|
count: Int,
|
|
into output: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
|
|
) -> Int {
|
|
let inputBuffer = _unsafeBufferPointerCast(
|
|
input, count, to: UInt16.self
|
|
)
|
|
let outputBuffer = _unsafeMutableBufferPointerCast(
|
|
output, _FixedArray8<UInt16>.capacity, to: UInt16.self
|
|
)
|
|
return _tryNormalize(
|
|
inputBuffer, into: outputBuffer
|
|
)._unsafelyUnwrappedUnchecked
|
|
}
|
|
|
|
var inBuffer = _FixedArray2<UInt16>(allZeros:())
|
|
var inLength = 0
|
|
for cu in self.utf16 {
|
|
inBuffer[inLength] = cu
|
|
inLength += 1
|
|
}
|
|
|
|
return impl(&inBuffer, count: inLength, into: outputBuffer)
|
|
}
|
|
|
|
static internal let maxValue = 0x0010_FFFF
|
|
}
|
|
|
|
private struct _UnicodeScalarExceptions {
|
|
fileprivate let _multiSegmentExpanders: Set<UInt32>
|
|
fileprivate let _normalizedASCIIStarter: Array<UInt32>
|
|
|
|
@inline(__always)
|
|
init() {
|
|
var msExpanders = Set<UInt32>()
|
|
msExpanders.reserveCapacity(16)
|
|
var normalizedASCIIStarter = Array<UInt32>()
|
|
normalizedASCIIStarter.reserveCapacity(8)
|
|
|
|
for rawValue in 0..<UnicodeScalar.maxValue {
|
|
guard let scalar = UnicodeScalar(rawValue) else { continue }
|
|
|
|
// Fast path: skip unassigned code points
|
|
guard scalar._isDefined else { continue }
|
|
|
|
// Fast path: skip unless QC_FCD=no
|
|
if _fastPath(!scalar._hasFullCompExclusion) {
|
|
continue
|
|
}
|
|
|
|
var outBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
|
|
let length = scalar._normalize(into: &outBuffer)
|
|
|
|
// See if this normalized to have an ASCII starter
|
|
if _slowPath(outBuffer[0] <= 0x7F) {
|
|
normalizedASCIIStarter.append(scalar.value)
|
|
}
|
|
|
|
// See if this normalizes to multiple segments
|
|
var i = 0
|
|
while i < length {
|
|
let (innerScalar, nextI) = _parseRawScalar(&outBuffer, startingFrom: i)
|
|
if _slowPath(i != 0 && innerScalar._hasNormalizationBoundaryBefore) {
|
|
guard innerScalar._hasNormalizationBoundaryBefore else {
|
|
fatalError(
|
|
"Unicode invariant violated: non-starter multi-segment expander")
|
|
}
|
|
msExpanders.insert(scalar.value)
|
|
break
|
|
}
|
|
i = nextI
|
|
}
|
|
}
|
|
|
|
self._multiSegmentExpanders = msExpanders
|
|
self._normalizedASCIIStarter = normalizedASCIIStarter
|
|
}
|
|
}
|
|
private let _unicodeScalarExceptions: _UnicodeScalarExceptions = {
|
|
return _UnicodeScalarExceptions()
|
|
}()
|
|
|
|
extension UnicodeScalar {
|
|
// Multi-Segment Expanders - Unicode defines "expanding canonical
|
|
// decompositions", where even in NFC a single scalar expands to multiple
|
|
// scalars. A small subset (currently 12 scalars circa Unicode 10) of these
|
|
// will expand into multiple normalization segments, breaking any kind of
|
|
// segment-by- segment logic or processing even under NFC. These are a subset
|
|
// of what is identified by the UCD as "composition exclusion" scalars. Since
|
|
// we don't have access to a UCD (available only at runtime), we go through
|
|
// ICU which lumps those and even more as "Full Composition Exclusions". Of
|
|
// the many full composition exclusions, this set (created once at runtime as
|
|
// this can change with Unicode version) tracks just those that can expand
|
|
// into multiple normalization segments.
|
|
internal var _isMultiSegmentExpander: Bool {
|
|
return _unicodeScalarExceptions._multiSegmentExpanders.contains(self.value)
|
|
}
|
|
|
|
// Whether, post-normalization, this scalar definitely compares greater than
|
|
// any ASCII scalar. This is true for all super-ASCII scalars that are not
|
|
// ASCII Normalized Starters.
|
|
//
|
|
// ASCII Normalized Starters - A handful of scalars normalize to have ASCII
|
|
// starters, e.g. Greek question mark ";". As of Unicode 10 there are 3 (all
|
|
// from Unicode 1.1 originally) and more are unlikely. But, there could be
|
|
// more in future versions, so determine at runtime.
|
|
internal var _isNormalizedSuperASCII: Bool {
|
|
if _slowPath(
|
|
_unicodeScalarExceptions._normalizedASCIIStarter.contains(self.value)
|
|
) {
|
|
return false
|
|
}
|
|
return self.value > 0x7F
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt8 {
|
|
@_versioned
|
|
internal func _compareStringsPreLoop(
|
|
other: _UnmanagedString<UInt16>
|
|
) -> _Ordering {
|
|
let count = Swift.min(self.count, other.count)
|
|
|
|
//
|
|
// Fast scan until we find a difference
|
|
//
|
|
let idx = self._findDiffIdx(other)
|
|
guard idx < count else {
|
|
return _lexicographicalCompare(self.count, other.count)
|
|
}
|
|
let otherCU = other[idx]
|
|
|
|
//
|
|
// Fast path: if other is super-ASCII post-normalization, we must be less. If
|
|
// other is ASCII and a single-scalar segment, we have our answer.
|
|
//
|
|
if otherCU > 0x7F {
|
|
if _fastPath(
|
|
other._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII
|
|
) {
|
|
return .less
|
|
}
|
|
} else {
|
|
let selfASCIIChar = UInt16(self[idx])
|
|
_sanityCheck(selfASCIIChar != otherCU, "should be different")
|
|
if idx+1 == other.count {
|
|
return _lexicographicalCompare(selfASCIIChar, otherCU)
|
|
}
|
|
if _fastPath(other.hasNormalizationBoundary(after: idx)) {
|
|
return _lexicographicalCompare(selfASCIIChar, otherCU)
|
|
}
|
|
}
|
|
|
|
//
|
|
// Otherwise, need to normalize the segment and then compare
|
|
//
|
|
let selfASCIIChar = UInt16(self[idx])
|
|
return _compareStringsPostSuffix(
|
|
selfASCIIChar: selfASCIIChar, otherUTF16: other[idx...]
|
|
)
|
|
}
|
|
}
|
|
|
|
extension _StringGuts {
|
|
func hasNormalizationBoundary(after index: Int) -> Bool {
|
|
let nextIndex = index + 1
|
|
if nextIndex >= self.count {
|
|
return true
|
|
}
|
|
|
|
let nextCU = self[nextIndex]
|
|
return _hasNormalizationBoundary(before: nextCU)
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedOpaqueString {
|
|
func hasNormalizationBoundary(after index: Int) -> Bool {
|
|
let nextIndex = index + 1
|
|
if nextIndex >= self.count {
|
|
return true
|
|
}
|
|
|
|
let nextCU = self[nextIndex]
|
|
return _hasNormalizationBoundary(before: nextCU)
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt16 {
|
|
func hasNormalizationBoundary(after index: Int) -> Bool {
|
|
let nextIndex = index + 1
|
|
if nextIndex >= self.count {
|
|
return true
|
|
}
|
|
|
|
let nextCU = self[nextIndex]
|
|
return _hasNormalizationBoundary(before: nextCU)
|
|
}
|
|
}
|
|
|
|
private func _compareStringsPostSuffix(
|
|
selfASCIIChar: UInt16,
|
|
otherUTF16: _UnmanagedString<UInt16>
|
|
) -> _Ordering {
|
|
let otherCU = otherUTF16[0]
|
|
_sanityCheck(otherCU <= 0x7F, "should be ASCII, otherwise no need to call")
|
|
|
|
let segmentEndIdx = otherUTF16._findNormalizationSegmentEnd(startingFrom: 0)
|
|
let segment = otherUTF16[..<segmentEndIdx]
|
|
|
|
// Fast path: If prenormal, we're done.
|
|
if _Normalization._prenormalQuickCheckYes(segment) {
|
|
return _lexicographicalCompare(selfASCIIChar, otherCU)
|
|
}
|
|
|
|
// Normalize segment, and then compare first code unit
|
|
var outputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
|
|
if _fastPath(
|
|
segment._tryNormalize(into: &outputBuffer) != nil
|
|
) {
|
|
return _lexicographicalCompare(selfASCIIChar, outputBuffer[0])
|
|
}
|
|
return _lexicographicalCompare(selfASCIIChar, segment._slowNormalize()[0])
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt16 {
|
|
//
|
|
// Find the end of the normalization segment
|
|
//
|
|
internal func _findNormalizationSegmentEnd(startingFrom idx: Int) -> Int {
|
|
let count = self.count
|
|
_sanityCheck(idx < count, "out of bounds")
|
|
|
|
// Normalization boundaries are best queried before known starters. Advance
|
|
// past one scalar first.
|
|
var (_, segmentEndIdx) = self._parseRawScalar(startingFrom: idx)
|
|
while segmentEndIdx < count {
|
|
let (scalar, nextIdx) = self._parseRawScalar(startingFrom: segmentEndIdx)
|
|
if scalar._hasNormalizationBoundaryBefore {
|
|
break
|
|
}
|
|
segmentEndIdx = nextIdx
|
|
}
|
|
return segmentEndIdx
|
|
}
|
|
|
|
internal func _findNormalizationSegmentStart(
|
|
endingAt idx: Int // one-past-the-end
|
|
) -> Int {
|
|
var idx = idx
|
|
let count = self.count
|
|
_sanityCheck(idx > 0 && idx <= count, "out of bounds")
|
|
|
|
while idx > 0 {
|
|
let (scalar, priorIdx) = _reverseParseRawScalar(endingAt: idx)
|
|
idx = priorIdx
|
|
if scalar._hasNormalizationBoundaryBefore {
|
|
break
|
|
}
|
|
}
|
|
return idx
|
|
}
|
|
|
|
internal func _findNormalizationSegment(spanning idx: Int) -> (Int, Int) {
|
|
var idx = idx
|
|
|
|
// Corner case: if we're sub-surrogate, back up
|
|
if _slowPath(
|
|
idx > 0
|
|
&& _isTrailingSurrogate(self[idx])
|
|
&& _isLeadingSurrogate(self[idx-1])
|
|
) {
|
|
idx -= 1
|
|
}
|
|
let segmentEnd = self._findNormalizationSegmentEnd(startingFrom: idx)
|
|
|
|
// Find the start
|
|
if _slowPath(idx == 0) {
|
|
return (0, segmentEnd)
|
|
}
|
|
|
|
// Check current scalar
|
|
if self._parseRawScalar(startingFrom: idx).0._hasNormalizationBoundaryBefore {
|
|
return (idx, segmentEnd)
|
|
}
|
|
|
|
// Reverse parse until we found the segment start
|
|
let segmentStart = self._findNormalizationSegmentStart(endingAt: idx)
|
|
|
|
return (segmentStart, segmentEnd)
|
|
}
|
|
|
|
// Wether the segment identified by `idx` is prenormal.
|
|
//
|
|
// Scalar values below 0x300 are special: normalization segments containing only
|
|
// one such scalar are trivially prenormal under NFC. Most Latin-derived scripts
|
|
// can be represented entirely by <0x300 scalar values, meaning that many user
|
|
// strings satisfy this prenormal check. We call sub-0x300 scalars "Latiny" (not
|
|
// official terminology).
|
|
//
|
|
// The check is effectively:
|
|
// 1) Whether the current scalar <0x300, AND
|
|
// 2) Whether the current scalar comprises the entire segment
|
|
//
|
|
internal func _isLatinyPrenormal(idx: Int
|
|
) -> Bool {
|
|
_sanityCheck(idx < self.count, "out of bounds")
|
|
|
|
let cu = self[idx]
|
|
if _slowPath(cu >= 0x300) {
|
|
return false
|
|
}
|
|
if _slowPath(idx+1 == self.count) {
|
|
return true
|
|
}
|
|
|
|
let nextCU = self[idx+1]
|
|
return nextCU < 0x300 || _hasNormalizationBoundary(before: nextCU)
|
|
}
|
|
|
|
@_versioned
|
|
internal
|
|
func _compareStringsPreLoop(
|
|
other: _UnmanagedString<UInt16>
|
|
) -> _Ordering {
|
|
let count = Swift.min(self.count, other.count)
|
|
|
|
//
|
|
// Fast scan until we find a diff
|
|
//
|
|
let idx = _findDiffIdx(other)
|
|
guard idx < count else {
|
|
return _lexicographicalCompare(self.count, other.count)
|
|
}
|
|
let selfCU = self[idx]
|
|
let otherCU = other[idx]
|
|
|
|
//
|
|
// Fast path: sub-0x300 single-scalar segments can be compared directly
|
|
//
|
|
if _fastPath(
|
|
_isLatinyPrenormal(idx: idx)
|
|
&& other._isLatinyPrenormal(idx: idx)
|
|
) {
|
|
return _lexicographicalCompare(selfCU, otherCU)
|
|
}
|
|
|
|
return self._compareStringsSuffix(other: other, randomIndex: idx)
|
|
}
|
|
|
|
//Is the shorter of the two parameters a prefix of the other parameter?
|
|
private func shorterPrefixesOther(
|
|
_ other: _UnmanagedString<UInt16>
|
|
) -> Bool {
|
|
if self.count == other.count {
|
|
return false
|
|
}
|
|
|
|
let minimumLength = Swift.min(self.count, other.count)
|
|
for i in 0..<minimumLength {
|
|
if self[i] != other[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
private func _compareStringsSuffix(
|
|
other: _UnmanagedString<UInt16>,
|
|
randomIndex: Int
|
|
) -> _Ordering {
|
|
let count = Swift.min(self.count, other.count)
|
|
let selfCU = self[randomIndex]
|
|
let otherCU = other[randomIndex]
|
|
_sanityCheck(randomIndex >= 0 && randomIndex < count, "out of bounds")
|
|
_sanityCheck(selfCU != otherCU, "should be called at a point of difference")
|
|
|
|
//
|
|
// Find the segment surrounding the random index passed in. This may involve
|
|
// some back tracking to the nearest normalization boundary. Once we've
|
|
// identified the segment, we can normalize and continue comparision.
|
|
//
|
|
// NOTE: We need to back-track for both self and other. Even though prefixes
|
|
// are binary equal, the point of difference might be at the start of a new
|
|
// segment for one and in the middle of the prior segment for the other. In
|
|
// which case, we will want to effectively compare the two consecutive
|
|
// segments together.
|
|
//
|
|
let (selfSegmentStartIdx, selfSegmentEndIdx) =
|
|
self._findNormalizationSegment(spanning: randomIndex)
|
|
let (otherSegmentStartIdx, otherSegmentEndIdx) =
|
|
other._findNormalizationSegment(spanning: randomIndex)
|
|
let comparisonStartIdx = Swift.min(selfSegmentStartIdx, otherSegmentStartIdx)
|
|
|
|
|
|
//
|
|
// Fast path: if both are prenormal, we have our answer
|
|
//
|
|
let selfSegment = self[comparisonStartIdx..<selfSegmentEndIdx]
|
|
let otherSegment = other[comparisonStartIdx..<otherSegmentEndIdx]
|
|
let selfSegmentPrenormal = _Normalization._prenormalQuickCheckYes(selfSegment)
|
|
let otherSegmentPrenormal = _Normalization._prenormalQuickCheckYes(
|
|
otherSegment)
|
|
if selfSegmentPrenormal && otherSegmentPrenormal {
|
|
return _lexicographicalCompare(selfCU, otherCU)
|
|
}
|
|
|
|
//
|
|
// Pathological case: multi-segment expanders ruin segment-by-segment
|
|
// processing.
|
|
//
|
|
// NOTE: Multi-segment expanders are (at least up til Unicode 10) always the
|
|
// beginning of a normalization segment (i.e. they are starters). This is very
|
|
// unlikely to change in the future, as new non-starter scalars that normalize
|
|
// to pre-existing scalars would have to produce a starter. We validate this
|
|
// fact on constructing our MultiSegmentExpander set, so we can rely on it
|
|
// here.
|
|
//
|
|
if _slowPath(
|
|
selfSegment._parseRawScalar().0._isMultiSegmentExpander
|
|
|| otherSegment._parseRawScalar().0._isMultiSegmentExpander
|
|
) {
|
|
return self[comparisonStartIdx...]._compareStringsPathological(
|
|
other: other[comparisonStartIdx...]
|
|
)
|
|
}
|
|
|
|
//
|
|
// Normalize segments and compare. If they still differ, we have our answer.
|
|
//
|
|
var selfOutputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
|
|
var otherOutputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
|
|
let selfSegmentLengthOpt: Int?
|
|
let otherSegmentLengthOpt: Int?
|
|
if selfSegmentPrenormal {
|
|
selfOutputBuffer.fill(from: selfSegment)
|
|
selfSegmentLengthOpt = selfSegment.count
|
|
} else {
|
|
selfSegmentLengthOpt = selfSegment._tryNormalize(into: &selfOutputBuffer)
|
|
}
|
|
if otherSegmentPrenormal {
|
|
otherOutputBuffer.fill(from: otherSegment)
|
|
otherSegmentLengthOpt = otherSegment.count
|
|
} else {
|
|
otherSegmentLengthOpt = otherSegment._tryNormalize(into: &otherOutputBuffer)
|
|
}
|
|
|
|
if _slowPath(selfSegmentLengthOpt == nil || otherSegmentLengthOpt == nil) {
|
|
// If we couldn't normalize a segment into a generously large stack buffer,
|
|
// we have a pathological String.
|
|
return self[comparisonStartIdx...]._compareStringsPathological(
|
|
other: other[comparisonStartIdx...]
|
|
)
|
|
}
|
|
let selfLength = selfSegmentLengthOpt._unsafelyUnwrappedUnchecked
|
|
let otherLength = otherSegmentLengthOpt._unsafelyUnwrappedUnchecked
|
|
|
|
if Swift.shorterPrefixesOther(
|
|
&selfOutputBuffer, selfLength,
|
|
&otherOutputBuffer, otherLength)
|
|
{
|
|
let selfSlice = self[selfSegmentEndIdx...]
|
|
let otherSlice = other[otherSegmentEndIdx...]
|
|
return selfSlice._compareStringsPathological(other: otherSlice)
|
|
}
|
|
|
|
let comp = _lexicographicalCompare(
|
|
&selfOutputBuffer, leftCount: selfLength,
|
|
&otherOutputBuffer, rightCount: otherLength)
|
|
if _fastPath(comp != .equal) {
|
|
return comp
|
|
}
|
|
|
|
//
|
|
// If they compare equal after normalization, we may have equal strings that
|
|
// differ in form, e.g. NFC vs NFD strings. Or, we may have strings that
|
|
// differ in form that also will differ later on. Either way, segment-by-
|
|
// segment processing incurs significant overhead. We'd rather do larger
|
|
// chunks of work at a time (e.g. ~1KB of text at a time). For now, we eagerly
|
|
// process the entire strings, as chunking properly without guarantees of
|
|
// normality is tricky (and expensive at times as well).
|
|
//
|
|
// NOTE: We could add a chunking path. It is hard to do correctly, because
|
|
// Unicode. It's especially hard to test, because Unicode. It's hard to ensure
|
|
// lasting correctness, because Unicode. (Also, sometimes it's impossible, but
|
|
// that's what _compareStringsPathological is for.) However, it helps for very
|
|
// long strings that differ in the middle. We might want this one day... but
|
|
// not today.
|
|
//
|
|
// TODO: An additional (or even repeated) reapplying of the algorithm,
|
|
// including the binary diff scan, could greatly benefit strings that only
|
|
// sparsely differ in normality (penalizing strings that densely differ in
|
|
// normality). This would add complexity, but with compelling data could be an
|
|
// alternative to chunking.
|
|
//
|
|
return self[selfSegmentEndIdx...]._compareStringsPathological(
|
|
other: other[otherSegmentEndIdx...]
|
|
)
|
|
}
|
|
|
|
private func _compareStringsPathological(
|
|
other: _UnmanagedString<UInt16>
|
|
) -> _Ordering {
|
|
var selfIterator = _NormalizedCodeUnitIterator(self)
|
|
return selfIterator.compare(with:
|
|
_NormalizedCodeUnitIterator(other)
|
|
)
|
|
}
|
|
}
|
|
|
|
private func shorterPrefixesOther(
|
|
_ selfBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>,
|
|
_ selfLength: Int,
|
|
_ otherBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>,
|
|
_ otherLength: Int
|
|
) -> Bool {
|
|
return shorterPrefixesOther(
|
|
_castOutputBuffer(selfBuffer, endingAt: selfLength),
|
|
_castOutputBuffer(otherBuffer, endingAt: otherLength)
|
|
)
|
|
}
|
|
|
|
//Is the shorter of the two parameters a prefix of the other parameter?
|
|
private func shorterPrefixesOther(
|
|
_ selfBuffer: UnsafeBufferPointer<UInt16>,
|
|
_ otherBuffer: UnsafeBufferPointer<UInt16>
|
|
) -> Bool {
|
|
if selfBuffer.count == otherBuffer.count {
|
|
return false
|
|
}
|
|
|
|
let minimumLength = Swift.min(selfBuffer.count, otherBuffer.count)
|
|
for i in 0..<minimumLength {
|
|
if selfBuffer[i] != otherBuffer[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|