//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// import SwiftShims // TODO: pick values that give us the best branching pattern internal enum _GutsClassification: UInt { case smallUTF8 = 0 case irregular = 1 case regularASCII = 2 case regularUTF16 = 4 } extension _StringGuts { @inlinable var classification: _GutsClassification { if _isSmall { return .smallUTF8 } if _isContiguous { return isASCII ? .regularASCII : .regularUTF16 } return .irregular } } // HACK: This gets rid of some retains/releases that was slowing down the // memcmp fast path for comparing ascii strings. rdar://problem/37473470 @inline(never) // @outlined @effects(readonly) @usableFromInline // @opaque internal func _compareUnicode( _ lhs: _StringGuts._RawBitPattern, _ rhs: _StringGuts._RawBitPattern ) -> Int { let left = _StringGuts(rawBits: lhs) let right = _StringGuts(rawBits: rhs) switch (left.classification, right.classification) { // Both small: fast in-register comparison case (.smallUTF8, .smallUTF8): return left._smallUTF8String._compare(right._smallUTF8String).rawValue // Either irregular: branch to opaque code case (.irregular, _): return left._asOpaque()._compare(right).rawValue case (_, .irregular): return right._asOpaque()._compare(left).flipped.rawValue // One small, other contiguous in memory case (.smallUTF8, _): return left._smallUTF8String._compare(_contiguous: right).rawValue case (_, .smallUTF8): return right._smallUTF8String._compare( _contiguous: left ).flipped.rawValue // Both contiguous case (.regularASCII, _): return left._unmanagedASCIIView._compare(_contiguous: right).rawValue case (.regularUTF16, _): return left._unmanagedUTF16View._compare(_contiguous: right).rawValue } } @inline(never) // @outlined @effects(readonly) @usableFromInline // @opaque internal func _compareUnicode( _ lhs: _StringGuts._RawBitPattern, _ leftRange: Range, _ rhs: _StringGuts._RawBitPattern, _ rightRange: Range ) -> Int { let left = _StringGuts(rawBits: lhs) let right = _StringGuts(rawBits: rhs) switch (left.classification, right.classification) { // Both small: fast in-register comparison case (.smallUTF8, .smallUTF8): return left._smallUTF8String[leftRange]._compare( right._smallUTF8String[rightRange] ).rawValue // Either irregular: branch to opaque code case (.irregular, _): return left._asOpaque()[leftRange]._compare(right, rightRange).rawValue case (_, .irregular): return right._asOpaque()[rightRange]._compare( left, leftRange ).flipped.rawValue // One small, other contiguous in memory case (.smallUTF8, _): return left._smallUTF8String[leftRange]._compare( _contiguous: right, rightRange ).rawValue case (_, .smallUTF8): return right._smallUTF8String[rightRange]._compare( _contiguous: left, leftRange ).flipped.rawValue // Both contiguous case (.regularASCII, _): return left._unmanagedASCIIView[leftRange]._compare( _contiguous: right, rightRange ).rawValue case (.regularUTF16, _): return left._unmanagedUTF16View[leftRange]._compare( _contiguous: right, rightRange ).rawValue } } // TODO: coalesce many of these into a protocol to simplify the code extension _SmallUTF8String { func _compare(_ other: _SmallUTF8String) -> _Ordering { #if arch(i386) || arch(arm) _conditionallyUnreachable() #else if _fastPath(self.isASCII && other.isASCII) { // TODO: fast in-register comparison return self.withUnmanagedASCII { selfView in return other.withUnmanagedASCII { otherView in return _Ordering(signedNotation: selfView.compareASCII(to: otherView)) } } } // TODO: fast in-register comparison return self.withUnmanagedUTF16 { selfView in return other.withUnmanagedUTF16 { otherView in return selfView._compare(otherView) } } #endif // 64-bit } func _compare(_contiguous other: _StringGuts) -> _Ordering { #if arch(i386) || arch(arm) unsupportedOn32bit() #else _sanityCheck(other._isContiguous) if other.isASCII { // TODO: fast in-register comparison return self._compare(other._unmanagedASCIIView) } return self._compare(other._unmanagedUTF16View) #endif // 64-bit } func _compare( _contiguous other: _StringGuts, _ otherRange: Range ) -> _Ordering { #if arch(i386) || arch(arm) unsupportedOn32bit() #else _sanityCheck(other._isContiguous) if other.isASCII { return self._compare(other._unmanagedASCIIView[otherRange]) } return self._compare(other._unmanagedUTF16View[otherRange]) #endif // 64-bit } func _compare(_ other: _UnmanagedString) -> _Ordering { #if arch(i386) || arch(arm) unsupportedOn32bit() #else if _fastPath(self.isASCII) { return self.withUnmanagedASCII { selfView in return _Ordering( signedNotation: selfView.compareASCII(to: other)) } } return self.withUnmanagedUTF16 { $0._compare(other) } #endif // 64-bit } func _compare(_ other: _UnmanagedString) -> _Ordering { #if arch(i386) || arch(arm) unsupportedOn32bit() #else if _fastPath(self.isASCII) { return self.withUnmanagedASCII { $0._compare(other) } } return self.withUnmanagedUTF16 { $0._compare(other) } #endif // 64-bit } } extension _UnmanagedString where CodeUnit == UInt8 { func _compare(_contiguous other: _StringGuts) -> _Ordering { _sanityCheck(other._isContiguous) if other.isASCII { return self._compare(other._unmanagedASCIIView) } return self._compare(other._unmanagedUTF16View) } func _compare( _contiguous other: _StringGuts, _ otherRange: Range ) -> _Ordering { _sanityCheck(other._isContiguous) if other.isASCII { return self._compare(other._unmanagedASCIIView[otherRange]) } return self._compare(other._unmanagedUTF16View[otherRange]) } func _compare(_ other: _UnmanagedString) -> _Ordering { fatalError("Should have hit the ascii comp in StringComparable.compare") } func _compare(_ other: _UnmanagedString) -> _Ordering { return self._compareStringsPreLoop(other) } } extension _UnmanagedString where CodeUnit == UInt16 { func _compare(_contiguous other: _StringGuts) -> _Ordering { _sanityCheck(other._isContiguous) if other.isASCII { return self._compare(other._unmanagedASCIIView) } return self._compare(other._unmanagedUTF16View) } func _compare( _contiguous other: _StringGuts, _ otherRange: Range ) -> _Ordering { _sanityCheck(other._isContiguous) if other.isASCII { return self._compare(other._unmanagedASCIIView[otherRange]) } return self._compare(other._unmanagedUTF16View[otherRange]) } func _compare(_ other: _UnmanagedString) -> _Ordering { return other._compare(self).flipped } func _compare(_ other: _UnmanagedString) -> _Ordering { return self._compareStringsPreLoop(other) } } extension _UnmanagedOpaqueString { func _compare(_ other: _StringGuts) -> _Ordering { return self._compareOpaque(other) } func _compare(_ other: _StringGuts, _ otherRange: Range) -> _Ordering { return self._compareOpaque(other, otherRange) } } // // Pointer casting helpers // @inline(__always) private func _unsafeMutableBufferPointerCast( _ ptr: UnsafeMutablePointer, _ count: Int, to: U.Type = U.self ) -> UnsafeMutableBufferPointer { return UnsafeMutableBufferPointer( start: UnsafeMutableRawPointer(ptr).assumingMemoryBound(to: U.self), count: count ) } @inline(__always) private func _unsafeBufferPointerCast( _ ptr: UnsafePointer, _ count: Int, to: U.Type = U.self ) -> UnsafeBufferPointer { return UnsafeBufferPointer( start: UnsafeRawPointer(ptr).assumingMemoryBound(to: U.self), count: count ) } internal let _leadingSurrogateBias: UInt16 = 0xd800 internal let _trailingSurrogateBias: UInt16 = 0xdc00 internal let _surrogateMask: UInt16 = 0xfc00 @inline(__always) internal func _isSurrogate(_ cu: UInt16) -> Bool { return _isLeadingSurrogate(cu) || _isTrailingSurrogate(cu) } @inline(__always) internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool { // NOTE: Specifically match against the trailing surrogate mask, as it matches // more cases. return cu & _surrogateMask == _leadingSurrogateBias } @inline(__always) internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool { return cu & _surrogateMask == _trailingSurrogateBias } @inline(__always) internal func _decodeSurrogatePair( leading high: UInt16, trailing low: UInt16 ) -> UInt32 { _sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low)) let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias) _sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something") let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias) _sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something") return ((hi10 &<< 10) | lo10) &+ 0x1_00_00 } internal func _hasNormalizationBoundary(before cu: UInt16) -> Bool { guard !_isSurrogate(cu) else { return false } return UnicodeScalar(_unchecked: UInt32(cu))._hasNormalizationBoundaryBefore } // // Pointer casting helpers // internal func _castOutputBuffer( _ ptr: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>, endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity ) -> UnsafeMutableBufferPointer { let bufPtr: UnsafeMutableBufferPointer = _unsafeMutableBufferPointerCast( ptr, _Normalization._SegmentOutputBuffer.capacity) return UnsafeMutableBufferPointer(rebasing: bufPtr[.., endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity ) -> UnsafeBufferPointer { let bufPtr: UnsafeBufferPointer = _unsafeBufferPointerCast( ptr, _Normalization._SegmentOutputBuffer.capacity) return UnsafeBufferPointer(rebasing: bufPtr[..) { _sanityCheck(other.count < _FixedArray16.capacity, "out of bounds fill") for i in 0..) -> Int { let count = Swift.min(self.count, other.count) for idx in 0.., _ right: UnsafeBufferPointer ) -> Int { let count = Swift.min(left.count, right.count) for idx in 0..( _ left: UnsafeBufferPointer, _ right: UnsafeBufferPointer ) -> Int where CodeUnit : FixedWidthInteger & UnsignedInteger { let count = Swift.min(left.count, right.count) for idx in 0..( _ other: _UnmanagedString ) -> Int { let count = Swift.min(self.count, other.count) for idx in 0.. ) -> Int { let count = Swift.min(self.count, otherRange.count) for idx in 0.. _Ordering { // TODO: inspect code quality return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal) } internal func _lexicographicalCompare( _ lhs: UInt16, _ rhs: UInt16 ) -> _Ordering { return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal) } internal func _lexicographicalCompare( _ leftHS: UnsafeBufferPointer, _ rightHS: UnsafeBufferPointer ) -> _Ordering { let count = Swift.min(leftHS.count, rightHS.count) let idx = _findDiffIdx(leftHS, rightHS) guard idx < count else { return _lexicographicalCompare(leftHS.count, rightHS.count) } let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked return _lexicographicalCompare(leftHSPtr[idx], rightHSPtr[idx]) } internal func _lexicographicalCompare( _ leftHS: UnsafeBufferPointer, _ rightHS: UnsafeBufferPointer ) -> _Ordering { let count = Swift.min(leftHS.count, rightHS.count) let idx = _findDiffIdx(leftHS, rightHS) guard idx < count else { return _lexicographicalCompare(leftHS.count, rightHS.count) } let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked return _lexicographicalCompare(UInt16(leftHSPtr[idx]), rightHSPtr[idx]) } @inline(__always) internal func _lexicographicalCompare( _ leftHS: UnsafePointer<_Normalization._SegmentOutputBuffer>, leftCount: Int, _ rightHS: UnsafePointer<_Normalization._SegmentOutputBuffer>, rightCount: Int ) -> _Ordering { return _lexicographicalCompare( _castOutputBuffer(leftHS, endingAt: leftCount), _castOutputBuffer(rightHS, endingAt: rightCount)) } @inline(__always) internal func _lexicographicalCompare( _ leftHS: Array, _ rightHS: Array ) -> _Ordering { return leftHS.withUnsafeBufferPointer { leftPtr in return rightHS.withUnsafeBufferPointer { rightPtr in return _lexicographicalCompare(leftPtr, rightPtr) } } } internal func _parseRawScalar( _ buf: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>, startingFrom idx: Int = 0 ) -> (UnicodeScalar, scalarEndIndex: Int) { return Swift._parseRawScalar(buffer: _castOutputBuffer(buf), startingFrom: idx) } internal func _parseRawScalar( buffer buf: UnsafeBufferPointer, startingFrom idx: Int = 0 ) -> (UnicodeScalar, scalarEndIndex: Int) { let ptr = buf.baseAddress._unsafelyUnwrappedUnchecked _sanityCheck(idx >= 0 && idx < buf.count, "out of bounds index") let cu: UInt16 = ptr[idx] if _slowPath(idx+1 == buf.count) { return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) } guard _isLeadingSurrogate(cu) else { return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) } let nextCu: UInt16 = ptr[idx+1] guard _isTrailingSurrogate(nextCu) else { // Invalid surrogate pair: just return the invalid value return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) } // Decode let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu) _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set") return (UnicodeScalar(_unchecked: value), idx+2) } extension _UnmanagedOpaqueString { internal func _parseRawScalar( startingFrom idx: Int = 0 ) -> (UnicodeScalar, scalarEndIndex: Int) { var buffer = _FixedArray2(allZeros:()) if idx+1 < self.count { buffer[0] = self[idx] buffer[1] = self[idx+1] let bufferPointer = _unsafeBufferPointerCast( &buffer, 2, to: UInt16.self ) return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0) } else { buffer[0] = self[idx] let bufferPointer = _unsafeBufferPointerCast( &buffer, 1, to: UInt16.self ) return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0) } } } extension _UnmanagedString where CodeUnit == UInt16 { internal func _parseRawScalar( startingFrom idx: Int = 0 ) -> (UnicodeScalar, scalarEndIndex: Int) { _sanityCheck(idx >= 0 && idx < self.count, "out of bounds index") let cu = self[idx] if _slowPath(idx+1 == self.count) { return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) } guard _isLeadingSurrogate(cu) else { return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) } let nextCu = self[idx+1] guard _isTrailingSurrogate(nextCu) else { // Invalid surrogate pair: just return the invalid value return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) } // Decode let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu) _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set") return (UnicodeScalar(_unchecked: value), idx+2) } internal func _reverseParseRawScalar( endingAt idx: Int // one-past-the-end ) -> (UnicodeScalar, scalarStartIndex: Int) { _sanityCheck(idx > 0 && idx <= self.count, "out of bounds end index") // Corner case: leading un-paired surrogate if _slowPath(idx == 1) { return (UnicodeScalar(_unchecked: UInt32(self[0])), 0) } let cu = self[idx-1] guard _isTrailingSurrogate(cu) else { return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1) } let priorCU = self[idx-2] guard _isLeadingSurrogate(priorCU) else { return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1) } // Decode let value: UInt32 = _decodeSurrogatePair(leading: priorCU, trailing: cu) _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set") return (UnicodeScalar(_unchecked: value), idx-2) } internal func _tryNormalize( into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> ) -> Int? { return self._tryNormalize(into: _castOutputBuffer(outputBuffer)) } internal func _tryNormalize( into outputBuffer: UnsafeMutableBufferPointer ) -> Int? { var err = __swift_stdlib_U_ZERO_ERROR let count = __swift_stdlib_unorm2_normalize( _Normalization._nfcNormalizer, self.start, numericCast(self.count), outputBuffer.baseAddress._unsafelyUnwrappedUnchecked, numericCast(outputBuffer.count), &err ) guard err.isSuccess else { // The output buffer needs to grow return nil } return numericCast(count) } internal func _slowNormalize() -> [UInt16] { _sanityCheck(self.count > 0, "called on empty string") let canary = self.count * _Normalization._maxNFCExpansionFactor var count = self.count while true { var result = Array(repeating: 0, count: count) if let length = result.withUnsafeMutableBufferPointer({ (bufPtr) -> Int? in return self._tryNormalize(into: bufPtr) }) { result.removeLast(count - length) return result } // Otherwise, we need to grow guard count <= canary else { fatalError("Invariant broken: Max decomposition factor insufficient") } count *= 2 } } } internal func _tryNormalize( _ input: UnsafeBufferPointer, into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> ) -> Int? { return _tryNormalize(input, into: _castOutputBuffer(outputBuffer)) } internal func _tryNormalize( _ input: UnsafeBufferPointer, into outputBuffer: UnsafeMutableBufferPointer ) -> Int? { var err = __swift_stdlib_U_ZERO_ERROR let count = __swift_stdlib_unorm2_normalize( _Normalization._nfcNormalizer, input.baseAddress._unsafelyUnwrappedUnchecked, numericCast(input.count), outputBuffer.baseAddress._unsafelyUnwrappedUnchecked, numericCast(outputBuffer.count), &err ) guard err.isSuccess else { // The output buffer needs to grow return nil } return numericCast(count) } extension _UnmanagedString where CodeUnit == UInt8 { @inlinable // FIXME(sil-serialize-all) internal func compareASCII(to other: _UnmanagedString) -> Int { // FIXME Results should be the same across all platforms. if self.start == other.start { return (self.count &- other.count).signum() } var cmp = Int(truncatingIfNeeded: _stdlib_memcmp( self.rawStart, other.rawStart, Swift.min(self.count, other.count))) if cmp == 0 { cmp = self.count &- other.count } return cmp.signum() } } extension _UnmanagedOpaqueString { @inline(never) // @outlined @usableFromInline internal func _compareOpaque(_ other: _StringGuts) -> _Ordering { return self._compareOpaque(other, 0.. ) -> _Ordering { // // Do a fast Latiny comparison loop; bail if that proves insufficient. // // The vast majority of the time, seemingly-non-contiguous Strings are // really ASCII strings that were bridged improperly. E.g., unknown nul- // termination of an all-ASCII file loaded by String.init(contentsOfFile:). // let selfCount = self.count let otherCount = otherRange.count let count = Swift.min(selfCount, otherCount) let idx = self._findDiffIdx(other, otherRange) if idx == count { return _lexicographicalCompare(selfCount, otherCount) } let selfCU = self[idx] let otherCU = other[idx + otherRange.lowerBound] // // Fast path: if one is ASCII, we can often compare the code units directly. // let selfIsASCII = selfCU <= 0x7F let otherIsASCII = otherCU <= 0x7F let selfIsSingleSegmentScalar = self.hasNormalizationBoundary(after: idx) && _hasNormalizationBoundary(before: selfCU) let otherIsSingleSegmentScalar = other.hasNormalizationBoundary(after: idx) && _hasNormalizationBoundary(before: otherCU) if _fastPath(selfIsASCII || otherIsASCII) { _sanityCheck(idx < selfCount && idx < otherCount, "Should be caught by check against min-count") // Check if next CU is <0x300, or if we're in a // "_isNormalizedSuperASCII" case. 99.9% of the time, we're here because // the non-contig string is ASCII. We never want to hit the pathological // path for those. if selfIsASCII && otherIsASCII { if selfIsSingleSegmentScalar && otherIsSingleSegmentScalar { return _lexicographicalCompare(selfCU, otherCU) } return self._compareOpaquePathological( other, otherRange, startingFrom: Swift.max(0, idx-1)) } if selfIsASCII && selfIsSingleSegmentScalar && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII { return .less } else if otherIsASCII && otherIsSingleSegmentScalar && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII { return .greater } } return self._compareOpaquePathological( other, otherRange, startingFrom: Swift.max(0, idx-1) ) } @inline(never) func _compareOpaquePathological( _ other: _StringGuts, _ otherRange: Range, startingFrom: Int ) -> _Ordering { // Compare by pulling in a segment at a time, normalizing then comparing // individual code units var selfIterator = _NormalizedCodeUnitIterator(self[startingFrom...]) return selfIterator.compare(with: _NormalizedCodeUnitIterator(other, otherRange, startIndex: startingFrom) ) } } extension UnicodeScalar { internal func _normalize( into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> ) -> Int { // Implementation: Perform the normalization on an input buffer and output // buffer. func impl( _ input: UnsafeMutablePointer<_FixedArray2>, count: Int, into output: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> ) -> Int { let inputBuffer = _unsafeBufferPointerCast( input, count, to: UInt16.self ) let outputBuffer = _unsafeMutableBufferPointerCast( output, _FixedArray8.capacity, to: UInt16.self ) return _tryNormalize( inputBuffer, into: outputBuffer )._unsafelyUnwrappedUnchecked } var inBuffer = _FixedArray2(allZeros:()) var inLength = 0 for cu in self.utf16 { inBuffer[inLength] = cu inLength += 1 } return impl(&inBuffer, count: inLength, into: outputBuffer) } internal static let maxValue = 0x0010_FFFF } private struct _UnicodeScalarExceptions { fileprivate let _multiSegmentExpanders: Set fileprivate let _normalizedASCIIStarter: Array @inline(__always) init() { var msExpanders = Set() msExpanders.reserveCapacity(16) var normalizedASCIIStarter = Array() normalizedASCIIStarter.reserveCapacity(8) for rawValue in 0.. 0x7F } } extension _UnmanagedString where CodeUnit == UInt8 { @usableFromInline internal func _compareStringsPreLoop( _ other: _UnmanagedString ) -> _Ordering { let count = Swift.min(self.count, other.count) // // Fast scan until we find a difference // let idx = self._findDiffIdx(other) guard idx < count else { return _lexicographicalCompare(self.count, other.count) } let otherCU = other[idx] // // Fast path: if other is super-ASCII post-normalization, we must be less. // If other is ASCII and a single-scalar segment, we have our answer. // if otherCU > 0x7F { if _fastPath( other._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII ) { return .less } } else { let selfASCIIChar = UInt16(self[idx]) _sanityCheck(selfASCIIChar != otherCU, "should be different") if idx+1 == other.count { return _lexicographicalCompare(selfASCIIChar, otherCU) } if _fastPath(other.hasNormalizationBoundary(after: idx)) { return _lexicographicalCompare(selfASCIIChar, otherCU) } } // // Otherwise, need to normalize the segment and then compare // let selfASCIIChar = UInt16(self[idx]) return _compareStringsPostSuffix( selfASCIIChar: selfASCIIChar, otherUTF16: other[idx...] ) } } extension _StringGuts { internal func hasNormalizationBoundary(after index: Int) -> Bool { let nextIndex = index + 1 if nextIndex >= self.count { return true } let nextCU = self[nextIndex] return _hasNormalizationBoundary(before: nextCU) } } extension _UnmanagedOpaqueString { internal func hasNormalizationBoundary(after index: Int) -> Bool { let nextIndex = index + 1 if nextIndex >= self.count { return true } let nextCU = self[nextIndex] return _hasNormalizationBoundary(before: nextCU) } } extension _UnmanagedString where CodeUnit == UInt16 { internal func hasNormalizationBoundary(after index: Int) -> Bool { let nextIndex = index + 1 if nextIndex >= self.count { return true } let nextCU = self[nextIndex] return _hasNormalizationBoundary(before: nextCU) } } extension BidirectionalCollection where Element == UInt16, SubSequence == Self { internal func hasNormalizationBoundary(after index: Index) -> Bool { let nextIndex = self.index(after: index) if nextIndex == self.endIndex { return true } let nextCU = self[nextIndex] return _hasNormalizationBoundary(before: nextCU) } } private func _compareStringsPostSuffix( selfASCIIChar: UInt16, otherUTF16: _UnmanagedString ) -> _Ordering { let otherCU = otherUTF16[0] _sanityCheck(otherCU <= 0x7F, "should be ASCII, otherwise no need to call") let segmentEndIdx = otherUTF16._findNormalizationSegmentEnd(startingFrom: 0) let segment = otherUTF16[.. Int { let count = self.count _sanityCheck(idx < count, "out of bounds") // Normalization boundaries are best queried before known starters. Advance // past one scalar first. var (_, segmentEndIdx) = self._parseRawScalar(startingFrom: idx) while segmentEndIdx < count { let (scalar, nextIdx) = self._parseRawScalar(startingFrom: segmentEndIdx) if scalar._hasNormalizationBoundaryBefore { break } segmentEndIdx = nextIdx } return segmentEndIdx } internal func _findNormalizationSegmentStart( endingAt idx: Int // one-past-the-end ) -> Int { var idx = idx let count = self.count _sanityCheck(idx > 0 && idx <= count, "out of bounds") while idx > 0 { let (scalar, priorIdx) = _reverseParseRawScalar(endingAt: idx) idx = priorIdx if scalar._hasNormalizationBoundaryBefore { break } } return idx } internal func _findNormalizationSegment(spanning idx: Int) -> (Int, Int) { var idx = idx // Corner case: if we're sub-surrogate, back up if _slowPath( idx > 0 && _isTrailingSurrogate(self[idx]) && _isLeadingSurrogate(self[idx-1]) ) { idx -= 1 } let segmentEnd = self._findNormalizationSegmentEnd(startingFrom: idx) // Find the start if _slowPath(idx == 0) { return (0, segmentEnd) } // Check current scalar if self._parseRawScalar(startingFrom: idx).0._hasNormalizationBoundaryBefore { return (idx, segmentEnd) } // Reverse parse until we found the segment start let segmentStart = self._findNormalizationSegmentStart(endingAt: idx) return (segmentStart, segmentEnd) } // Wether the segment identified by `idx` is prenormal. // // Scalar values below 0x300 are special: normalization segments containing only // one such scalar are trivially prenormal under NFC. Most Latin-derived scripts // can be represented entirely by <0x300 scalar values, meaning that many user // strings satisfy this prenormal check. We call sub-0x300 scalars "Latiny" (not // official terminology). // // The check is effectively: // 1) Whether the current scalar <0x300, AND // 2) Whether the current scalar comprises the entire segment // internal func _isLatinyPrenormal(idx: Int ) -> Bool { _sanityCheck(idx < self.count, "out of bounds") let cu = self[idx] if _slowPath(cu >= 0x300) { return false } if _slowPath(idx+1 == self.count) { return true } let nextCU = self[idx+1] return nextCU < 0x300 || _hasNormalizationBoundary(before: nextCU) } @usableFromInline internal func _compareStringsPreLoop( _ other: _UnmanagedString ) -> _Ordering { let count = Swift.min(self.count, other.count) // // Fast scan until we find a diff // let idx = _findDiffIdx(other) guard idx < count else { return _lexicographicalCompare(self.count, other.count) } let selfCU = self[idx] let otherCU = other[idx] // // Fast path: sub-0x300 single-scalar segments can be compared directly // if _fastPath( _isLatinyPrenormal(idx: idx) && other._isLatinyPrenormal(idx: idx) ) { return _lexicographicalCompare(selfCU, otherCU) } return self._compareStringsSuffix(other: other, randomIndex: idx) } //Is the shorter of the two parameters a prefix of the other parameter? private func shorterPrefixesOther( _ other: _UnmanagedString ) -> Bool { if self.count == other.count { return false } let minimumLength = Swift.min(self.count, other.count) for i in 0.., randomIndex: Int ) -> _Ordering { let count = Swift.min(self.count, other.count) let selfCU = self[randomIndex] let otherCU = other[randomIndex] _sanityCheck(randomIndex >= 0 && randomIndex < count, "out of bounds") _sanityCheck(selfCU != otherCU, "should be called at a point of difference") // // Find the segment surrounding the random index passed in. This may involve // some back tracking to the nearest normalization boundary. Once we've // identified the segment, we can normalize and continue comparision. // // NOTE: We need to back-track for both self and other. Even though prefixes // are binary equal, the point of difference might be at the start of a new // segment for one and in the middle of the prior segment for the other. In // which case, we will want to effectively compare the two consecutive // segments together. // let (selfSegmentStartIdx, selfSegmentEndIdx) = self._findNormalizationSegment(spanning: randomIndex) let (otherSegmentStartIdx, otherSegmentEndIdx) = other._findNormalizationSegment(spanning: randomIndex) let comparisonStartIdx = Swift.min(selfSegmentStartIdx, otherSegmentStartIdx) // // Fast path: if both are prenormal, we have our answer // let selfSegment = self[comparisonStartIdx.. ) -> _Ordering { var selfIterator = _NormalizedCodeUnitIterator(self) return selfIterator.compare(with: _NormalizedCodeUnitIterator(other) ) } } private func shorterPrefixesOther( _ selfBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>, _ selfLength: Int, _ otherBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>, _ otherLength: Int ) -> Bool { return shorterPrefixesOther( _castOutputBuffer(selfBuffer, endingAt: selfLength), _castOutputBuffer(otherBuffer, endingAt: otherLength) ) } //Is the shorter of the two parameters a prefix of the other parameter? private func shorterPrefixesOther( _ selfBuffer: UnsafeBufferPointer, _ otherBuffer: UnsafeBufferPointer ) -> Bool { if selfBuffer.count == otherBuffer.count { return false } let minimumLength = Swift.min(selfBuffer.count, otherBuffer.count) for i in 0..