swift-mirror/stdlib/public/core/StringComparison.swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import SwiftShims

//HACK: This gets rid of some retains/releases that was slowing down the
//memcmp fast path for comparing ascii strings. rdar://problem/37473470
@inline(never) // @outlined
@effects(readonly)
@_versioned // @opaque
internal
func _compareUnicode(
  _ lhs: _StringGuts._RawBitPattern, _ rhs: _StringGuts._RawBitPattern
) -> Int {
  let left = _StringGuts(rawBits: lhs)
  let right = _StringGuts(rawBits: rhs)

  if _slowPath(!left._isContiguous || !right._isContiguous) {
    if !left._isContiguous {
      return left._asOpaque()._compareOpaque(right).rawValue
    } else {
      return right._asOpaque()._compareOpaque(left).flipped.rawValue
    }
  }

  return left._compareContiguous(right)
}

@inline(never) // @outlined
@effects(readonly)
@_versioned // @opaque
internal
func _compareUnicode(
  _ lhs: _StringGuts._RawBitPattern, _ leftRange: Range<Int>,
  _ rhs: _StringGuts._RawBitPattern, _ rightRange: Range<Int>
) -> Int {
  let left = _StringGuts(rawBits: lhs)
  let right = _StringGuts(rawBits: rhs)

   if _slowPath(!left._isContiguous || !right._isContiguous) {
     if !left._isContiguous {
       return left._asOpaque()[leftRange]._compareOpaque(
         right, rightRange
       ).rawValue
     } else {
       return right._asOpaque()[rightRange]._compareOpaque(
         left, leftRange
       ).flipped.rawValue
     }
   }

  return left._compareContiguous(leftRange, right, rightRange)
}

//
// Pointer casting helpers
//
@inline(__always)
private func _unsafeMutableBufferPointerCast<T, U>(
  _ ptr: UnsafeMutablePointer<T>,
  _ count: Int,
  to: U.Type = U.self
) -> UnsafeMutableBufferPointer<U> {
  return UnsafeMutableBufferPointer(
    start: UnsafeMutableRawPointer(ptr).assumingMemoryBound(to: U.self),
    count: count
  )
}
@inline(__always)
private func _unsafeBufferPointerCast<T, U>(
  _ ptr: UnsafePointer<T>,
  _ count: Int,
  to: U.Type = U.self
) -> UnsafeBufferPointer<U> {
  return UnsafeBufferPointer(
    start: UnsafeRawPointer(ptr).assumingMemoryBound(to: U.self),
    count: count
  )
}

internal let _leadingSurrogateBias: UInt16 = 0xd800
internal let _trailingSurrogateBias: UInt16 = 0xdc00
internal let _surrogateMask: UInt16 = 0xfc00

@inline(__always)
internal func _isSurrogate(_ cu: UInt16) -> Bool {
  return _isLeadingSurrogate(cu) || _isTrailingSurrogate(cu)
}

@inline(__always)
internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool {
  // NOTE: Specifically match against the trailing surrogate mask, as it matches
  // more cases.
  return cu & _surrogateMask == _leadingSurrogateBias
}

@inline(__always)
internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool {
  return cu & _surrogateMask == _trailingSurrogateBias
}
@inline(__always)
internal func _decodeSurrogatePair(
  leading high: UInt16, trailing low: UInt16
) -> UInt32 {
  _sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low))
  let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias)
  _sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something")
  let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias)
  _sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something")

  return ((hi10 &<< 10) | lo10) &+ 0x1_00_00
}

internal func _hasNormalizationBoundary(before cu: UInt16) -> Bool {
  guard !_isSurrogate(cu) else { return false }
  return UnicodeScalar(_unchecked: UInt32(cu))._hasNormalizationBoundaryBefore
}

//
// Pointer casting helpers
//
internal func _castOutputBuffer(
  _ ptr: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>,
  endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity
) -> UnsafeMutableBufferPointer<UInt16> {
  let bufPtr: UnsafeMutableBufferPointer<UInt16> =
    _unsafeMutableBufferPointerCast(
      ptr, _Normalization._SegmentOutputBuffer.capacity)
  return UnsafeMutableBufferPointer<UInt16>(rebasing: bufPtr[..<endIdx])
}
internal func _castOutputBuffer(
  _ ptr: UnsafePointer<_Normalization._SegmentOutputBuffer>,
  endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity
) -> UnsafeBufferPointer<UInt16> {
  let bufPtr: UnsafeBufferPointer<UInt16> =
    _unsafeBufferPointerCast(
      ptr, _Normalization._SegmentOutputBuffer.capacity)
  return UnsafeBufferPointer<UInt16>(rebasing: bufPtr[..<endIdx])
}

extension _FixedArray16 where T == UInt16 {
  mutating func fill(from other: _UnmanagedString<T>) {
    _sanityCheck(other.count < _FixedArray16<T>.capacity,
      "out of bounds fill")
    for i in 0..<other.count {
      self[i] = other[i]
    }
  }
}

@_versioned internal
enum _Ordering: Int, Equatable {
  case less = -1
  case equal = 0
  case greater = 1

  @_versioned internal
  var flipped: _Ordering {
    switch self {
      case .less: return .greater
      case .equal: return .equal
      case .greater: return .less
    }
  }

  @inline(__always)
  @_versioned internal
  init(signedNotation int: Int) {
    self = int < 0 ? .less : int == 0 ? .equal : .greater
  }
}

extension _UnmanagedString where CodeUnit == UInt8 {
  // TODO: These should be SIMD-ized
  internal func _findDiffIdx(_ other: _UnmanagedString<UInt16>) -> Int {
    let count = Swift.min(self.count, other.count)
    for idx in 0..<count {
      guard UInt16(self[idx]) == other[idx] else {
        return idx
      }
    }
    return count
  }
}

internal func _findDiffIdx(
  _ left: UnsafeBufferPointer<UInt8>,
  _ right: UnsafeBufferPointer<UInt16>
) -> Int {
  let count = Swift.min(left.count, right.count)
  for idx in 0..<count {
    guard UInt16(left[idx]) == right[idx] else {
      return idx
    }
  }
  return count
}

internal func _findDiffIdx<CodeUnit>(
  _ left: UnsafeBufferPointer<CodeUnit>,
  _ right: UnsafeBufferPointer<CodeUnit>
) -> Int where CodeUnit : FixedWidthInteger & UnsignedInteger {
  let count = Swift.min(left.count, right.count)
  for idx in 0..<count {
    guard left[idx] == right[idx] else {
      return idx
    }
  }
  return count
}

extension _UnmanagedString where CodeUnit : FixedWidthInteger & UnsignedInteger {
  internal func _findDiffIdx<CodeUnit>(
    _ other: _UnmanagedString<CodeUnit>
  ) -> Int {
    let count = Swift.min(self.count, other.count)
    for idx in 0..<count {
      guard self[idx] == other[idx] else {
        return idx
      }
    }
    return count
  }
}

extension _UnmanagedOpaqueString {
  internal func _findDiffIdx(_ other: _StringGuts, _ otherRange: Range<Int>
  ) -> Int {
    let count = Swift.min(self.count, otherRange.count)
    for idx in 0..<count {
      guard self[idx] == other[idx + otherRange.lowerBound] else {
        return idx
      }
    }
    return count
  }
}

internal func _lexicographicalCompare(_ lhs: Int, _ rhs: Int) -> _Ordering {
  // TODO: inspect code quality
  return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal)
}

internal func _lexicographicalCompare(
  _ lhs: UInt16, _ rhs: UInt16
) -> _Ordering {
  return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal)
}

internal func _lexicographicalCompare(
  _ leftHS: UnsafeBufferPointer<UInt16>,
  _ rightHS: UnsafeBufferPointer<UInt16>
) -> _Ordering {
  let count = Swift.min(leftHS.count, rightHS.count)

  let idx = _findDiffIdx(leftHS, rightHS)
  guard idx < count else {
    return _lexicographicalCompare(leftHS.count, rightHS.count)
  }
  let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked
  let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked
  return _lexicographicalCompare(leftHSPtr[idx], rightHSPtr[idx])
}

internal func _lexicographicalCompare(
  _ leftHS: UnsafeBufferPointer<UInt8>,
  _ rightHS: UnsafeBufferPointer<UInt16>
) -> _Ordering {
  let count = Swift.min(leftHS.count, rightHS.count)

  let idx = _findDiffIdx(leftHS, rightHS)
  guard idx < count else {
    return _lexicographicalCompare(leftHS.count, rightHS.count)
  }
  let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked
  let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked
  return _lexicographicalCompare(UInt16(leftHSPtr[idx]), rightHSPtr[idx])
}
@inline(__always)
internal func _lexicographicalCompare(
  _ leftHS: UnsafePointer<_Normalization._SegmentOutputBuffer>,
  leftCount: Int,
  _ rightHS: UnsafePointer<_Normalization._SegmentOutputBuffer>,
  rightCount: Int
) -> _Ordering {
  return _lexicographicalCompare(
    _castOutputBuffer(leftHS, endingAt: leftCount),
    _castOutputBuffer(rightHS, endingAt: rightCount))
}
@inline(__always)
internal func _lexicographicalCompare(
  _ leftHS: Array<UInt16>,
  _ rightHS: Array<UInt16>
) -> _Ordering {
  return leftHS.withUnsafeBufferPointer { leftPtr in
    return rightHS.withUnsafeBufferPointer { rightPtr in
      return _lexicographicalCompare(leftPtr, rightPtr)
    }
  }
}

internal func _parseRawScalar(
  _ buf: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>,
  startingFrom idx: Int = 0
) -> (UnicodeScalar, scalarEndIndex: Int) {
  return Swift._parseRawScalar(buffer: _castOutputBuffer(buf), startingFrom: idx)
}

internal func _parseRawScalar(
  buffer buf: UnsafeBufferPointer<UInt16>,
  startingFrom idx: Int = 0
) -> (UnicodeScalar, scalarEndIndex: Int) {
  let ptr = buf.baseAddress._unsafelyUnwrappedUnchecked
  _sanityCheck(idx >= 0 && idx < buf.count, "out of bounds index")
  let cu: UInt16 = ptr[idx]
  if _slowPath(idx+1 == buf.count) {
    return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
  }
  guard _isLeadingSurrogate(cu) else {
    return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
  }
  let nextCu: UInt16 = ptr[idx+1]
  guard _isTrailingSurrogate(nextCu) else {
    // Invalid surrogate pair: just return the invalid value
    return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
  }

  // Decode
  let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu)
  _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
  return (UnicodeScalar(_unchecked: value), idx+2)
}

extension _UnmanagedOpaqueString {
  internal func _parseRawScalar(
    startingFrom idx: Int = 0
  ) -> (UnicodeScalar, scalarEndIndex: Int) {
    var buffer = _FixedArray2<UInt16>(allZeros:())
    if idx+1 < self.count {
      buffer[0] = self[idx]
      buffer[1] = self[idx+1]

      let bufferPointer = _unsafeBufferPointerCast(
        &buffer, 2, to: UInt16.self
      )
      return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0)
    } else {
      buffer[0] = self[idx]

      let bufferPointer = _unsafeBufferPointerCast(
        &buffer, 1, to: UInt16.self
      )
      return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0)
    }
  }
}

extension _UnmanagedString where CodeUnit == UInt16 {
  internal func _parseRawScalar(
    startingFrom idx: Int = 0
  ) -> (UnicodeScalar, scalarEndIndex: Int) {
    _sanityCheck(idx >= 0 && idx < self.count, "out of bounds index")
    let cu = self[idx]
    if _slowPath(idx+1 == self.count) {
      return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
    }
    guard _isLeadingSurrogate(cu) else {
      return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
    }
    let nextCu = self[idx+1]
    guard _isTrailingSurrogate(nextCu) else {
      // Invalid surrogate pair: just return the invalid value
      return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
    }

    // Decode
    let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu)
    _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
    return (UnicodeScalar(_unchecked: value), idx+2)
  }

  internal func _reverseParseRawScalar(
    endingAt idx: Int // one-past-the-end
  ) -> (UnicodeScalar, scalarStartIndex: Int) {
    _sanityCheck(idx > 0 && idx <= self.count, "out of bounds end index")

    // Corner case: leading un-paired surrogate
    if _slowPath(idx == 1) {
      return (UnicodeScalar(_unchecked: UInt32(self[0])), 0)
    }

    let cu = self[idx-1]
    guard _isTrailingSurrogate(cu) else {
      return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1)
    }
    let priorCU = self[idx-2]
    guard _isLeadingSurrogate(priorCU) else {
      return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1)
    }

    // Decode
    let value: UInt32 = _decodeSurrogatePair(leading: priorCU, trailing: cu)
    _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
    return (UnicodeScalar(_unchecked: value), idx-2)
  }

  internal func _tryNormalize(
    into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
  ) -> Int? {
    return self._tryNormalize(into: _castOutputBuffer(outputBuffer))
  }

  internal func _tryNormalize(
    into outputBuffer: UnsafeMutableBufferPointer<UInt16>
  ) -> Int? {
    var err = __swift_stdlib_U_ZERO_ERROR
    let count = __swift_stdlib_unorm2_normalize(
      _Normalization._nfcNormalizer,
      self.start,
      numericCast(self.count),
      outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
      numericCast(outputBuffer.count),
      &err
    )
    guard err.isSuccess else {
      // The output buffer needs to grow
      return nil
    }
    return numericCast(count)
  }

  internal func _slowNormalize() -> [UInt16] {
    _sanityCheck(self.count > 0, "called on empty string")

    let canary = self.count * _Normalization._maxNFCExpansionFactor
    var count = self.count
    while true {
      var result = Array<UInt16>(repeating: 0, count: count)
      if let length = result.withUnsafeMutableBufferPointer({ (bufPtr) -> Int? in
        return self._tryNormalize(into: bufPtr)
      }) {
        result.removeLast(count - length)
        return result
      }
      // Otherwise, we need to grow
      guard count <= canary else {
        fatalError("Invariant broken: Max decomposition factor insufficient")
      }
      count *= 2
    }
  }
}

internal func _tryNormalize(
  _ input: UnsafeBufferPointer<UInt16>,
  into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
) -> Int? {
  return _tryNormalize(input, into: _castOutputBuffer(outputBuffer))
}
internal func _tryNormalize(
  _ input: UnsafeBufferPointer<UInt16>,
  into outputBuffer: UnsafeMutableBufferPointer<UInt16>
) -> Int? {
  var err = __swift_stdlib_U_ZERO_ERROR
  let count = __swift_stdlib_unorm2_normalize(
    _Normalization._nfcNormalizer,
    input.baseAddress._unsafelyUnwrappedUnchecked,
    numericCast(input.count),
    outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
    numericCast(outputBuffer.count),
    &err
  )
  guard err.isSuccess else {
    // The output buffer needs to grow
    return nil
  }
  return numericCast(count)
}

extension _UnmanagedString where CodeUnit == UInt8 {
  @_inlineable // FIXME(sil-serialize-all)
  @_versioned
  internal func compareASCII(to other: _UnmanagedString<UInt8>) -> Int {
    // FIXME Results should be the same across all platforms.
    if self.start == other.start {
      return (self.count &- other.count).signum()
    }
    var cmp = Int(truncatingIfNeeded:
      _stdlib_memcmp(
        self.rawStart, other.rawStart,
        Swift.min(self.count, other.count)))
    if cmp == 0 {
      cmp = self.count &- other.count
    }
    return cmp.signum()
  }
}

public extension _StringGuts {
  @inline(__always)
  public
  func _compareContiguous(_ other: _StringGuts) -> Int {
    _sanityCheck(self._isContiguous && other._isContiguous)
    switch (self.isASCII, other.isASCII) {
    case (true, true):
      fatalError("Should have hit the ascii comp in StringComparable.compare()")
    case (true, false):
      return self._unmanagedASCIIView._compareStringsPreLoop(
        other: other._unmanagedUTF16View
      ).rawValue
    case (false, true):
      // Same compare, just invert result
      return other._unmanagedASCIIView._compareStringsPreLoop(
        other: self._unmanagedUTF16View
      ).flipped.rawValue
    case (false, false):
      return self._unmanagedUTF16View._compareStringsPreLoop(
        other: other._unmanagedUTF16View
      ).rawValue
    }
  }

  @inline(__always)
  public
  func _compareContiguous(
    _ selfRange: Range<Int>,
    _ other: _StringGuts,
    _ otherRange: Range<Int>
  ) -> Int {
    _sanityCheck(self._isContiguous && other._isContiguous)
    switch (self.isASCII, other.isASCII) {
    case (true, true):
      fatalError("Should have hit the ascii comp in StringComparable.compare()")
    case (true, false):
      return self._unmanagedASCIIView[selfRange]._compareStringsPreLoop(
        other: other._unmanagedUTF16View[otherRange]
      ).rawValue
    case (false, true):
      // Same compare, just invert result
      return other._unmanagedASCIIView[otherRange]._compareStringsPreLoop(
        other: self._unmanagedUTF16View[selfRange]
      ).flipped.rawValue
    case (false, false):
      return self._unmanagedUTF16View[selfRange]._compareStringsPreLoop(
        other: other._unmanagedUTF16View[otherRange]
      ).rawValue
    }
  }
}

extension _UnmanagedOpaqueString {
  @inline(never) // @outlined
  @_versioned
  internal
  func _compareOpaque(_ other: _StringGuts) -> _Ordering {
    return self._compareOpaque(other, 0..<other.count)
  }

  @inline(never) // @outlined
  @_versioned
  internal
  func _compareOpaque(
    _ other: _StringGuts, _ otherRange: Range<Int>
  ) -> _Ordering {
    //
    // Do a fast Latiny comparison loop; bail if that proves insufficient.
    //
    // The vast majority of the time, seemingly-non-contiguous Strings are
    // really ASCII strings that were bridged improperly. E.g., unknown nul-
    // termination of an all-ASCII file loaded by String.init(contentsOfFile:).
    //

    let selfCount = self.count
    let otherCount = otherRange.count
    let count = Swift.min(selfCount, otherCount)
    let idx = self._findDiffIdx(other, otherRange)
    if idx == count {
      return _lexicographicalCompare(selfCount, otherCount)
    }

    let selfCU = self[idx]
    let otherCU = other[idx + otherRange.lowerBound]

    //
    // Fast path: if one is ASCII, we can often compare the code units directly.
    //
    let selfIsASCII = selfCU <= 0x7F
    let otherIsASCII = otherCU <= 0x7F

    let selfIsSingleSegmentScalar =
        self.hasNormalizationBoundary(after: idx)
        && _hasNormalizationBoundary(before: selfCU)
    let otherIsSingleSegmentScalar =
        other.hasNormalizationBoundary(after: idx)
        && _hasNormalizationBoundary(before: otherCU)

    if _fastPath(selfIsASCII || otherIsASCII) {
      _sanityCheck(idx < selfCount && idx < otherCount,
        "Should be caught by check against min-count")
      // Check if next CU is <0x300, or if we're in a
      // "_isNormalizedSuperASCII" case. 99.9% of the time, we're here because
      // the non-contig string is ASCII. We never want to hit the pathological
      // path for those.

      if selfIsASCII && otherIsASCII {
        if selfIsSingleSegmentScalar && otherIsSingleSegmentScalar {
          return _lexicographicalCompare(selfCU, otherCU)
        }

        return self._compareOpaquePathological(
          other, otherRange, startingFrom: Swift.max(0, idx-1))
      }

      if selfIsASCII && selfIsSingleSegmentScalar
      && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII {
         return .less
      } else if otherIsASCII && otherIsSingleSegmentScalar
      && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII {
         return .greater
       }
    }

    return self._compareOpaquePathological(
      other, otherRange, startingFrom: Swift.max(0, idx-1)
    )
  }

  @inline(never)
  func _compareOpaquePathological(
    _ other: _StringGuts, _ otherRange: Range<Int>,
    startingFrom: Int
  ) -> _Ordering {
    // Compare by pulling in a segment at a time, normalizing then comparing
    // individual code units
    var selfIterator = _NormalizedCodeUnitIterator(self, startIndex: startingFrom)
    return selfIterator.compare(with:
      _NormalizedCodeUnitIterator(other, otherRange, startIndex: startingFrom)
    )
  }
}

extension UnicodeScalar {
  internal func _normalize(
    into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
  ) -> Int {
    // Implementation: Perform the normalization on an input buffer and output
    // buffer.
    func impl(
      _ input: UnsafeMutablePointer<_FixedArray2<UInt16>>,
      count: Int,
      into output: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
    ) -> Int {
      let inputBuffer = _unsafeBufferPointerCast(
        input, count, to: UInt16.self
      )
      let outputBuffer = _unsafeMutableBufferPointerCast(
        output, _FixedArray8<UInt16>.capacity, to: UInt16.self
      )
      return _tryNormalize(
        inputBuffer, into: outputBuffer
      )._unsafelyUnwrappedUnchecked
    }

    var inBuffer = _FixedArray2<UInt16>(allZeros:())
    var inLength = 0
    for cu in self.utf16 {
      inBuffer[inLength] = cu
      inLength += 1
    }

    return impl(&inBuffer, count: inLength, into: outputBuffer)
  }

  static internal let maxValue = 0x0010_FFFF
}

private struct _UnicodeScalarExceptions {
  fileprivate let _multiSegmentExpanders: Set<UInt32>
  fileprivate let _normalizedASCIIStarter: Array<UInt32>

  @inline(__always)
  init() {
    var msExpanders = Set<UInt32>()
    msExpanders.reserveCapacity(16)
    var normalizedASCIIStarter = Array<UInt32>()
    normalizedASCIIStarter.reserveCapacity(8)

    for rawValue in 0..<UnicodeScalar.maxValue {
      guard let scalar = UnicodeScalar(rawValue) else { continue }

      // Fast path: skip unassigned code points
      guard scalar._isDefined else { continue }

      // Fast path: skip unless QC_FCD=no
      if _fastPath(!scalar._hasFullCompExclusion) {
        continue
      }

      var outBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
      let length = scalar._normalize(into: &outBuffer)

      // See if this normalized to have an ASCII starter
      if _slowPath(outBuffer[0] <= 0x7F) {
        normalizedASCIIStarter.append(scalar.value)
      }

      // See if this normalizes to multiple segments
      var i = 0
      while i < length {
        let (innerScalar, nextI) = _parseRawScalar(&outBuffer, startingFrom: i)
        if _slowPath(i != 0 && innerScalar._hasNormalizationBoundaryBefore) {
          guard innerScalar._hasNormalizationBoundaryBefore else {
            fatalError(
              "Unicode invariant violated: non-starter multi-segment expander")
          }
          msExpanders.insert(scalar.value)
          break
        }
        i = nextI
      }
    }

    self._multiSegmentExpanders = msExpanders
    self._normalizedASCIIStarter = normalizedASCIIStarter
  }
}
private let _unicodeScalarExceptions: _UnicodeScalarExceptions = {
  return _UnicodeScalarExceptions()
}()

extension UnicodeScalar {
  // Multi-Segment Expanders - Unicode defines "expanding canonical
  // decompositions", where even in NFC a single scalar expands to multiple
  // scalars. A small subset (currently 12 scalars circa Unicode 10) of these
  // will expand into multiple normalization segments, breaking any kind of
  // segment-by- segment logic or processing even under NFC. These are a subset
  // of what is identified by the UCD as "composition exclusion" scalars. Since
  // we don't have access to a UCD (available only at runtime), we go through
  // ICU which lumps those and even more as "Full Composition Exclusions". Of
  // the many full composition exclusions, this set (created once at runtime as
  // this can change with Unicode version) tracks just those that can expand
  // into multiple normalization segments.
  internal var _isMultiSegmentExpander: Bool {
    return _unicodeScalarExceptions._multiSegmentExpanders.contains(self.value)
  }

  // Whether, post-normalization, this scalar definitely compares greater than
  // any ASCII scalar. This is true for all super-ASCII scalars that are not
  // ASCII Normalized Starters.
  //
  // ASCII Normalized Starters - A handful of scalars normalize to have ASCII
  // starters, e.g. Greek question mark ";". As of Unicode 10 there are 3 (all
  // from Unicode 1.1 originally) and more are unlikely. But, there could be
  // more in future versions, so determine at runtime.
  internal var _isNormalizedSuperASCII: Bool {
    if _slowPath(
      _unicodeScalarExceptions._normalizedASCIIStarter.contains(self.value)
    ) {
      return false
    }
    return self.value > 0x7F
  }
}

extension _UnmanagedString where CodeUnit == UInt8 {
  @_versioned
  internal func _compareStringsPreLoop(
    other: _UnmanagedString<UInt16>
  ) -> _Ordering {
    let count = Swift.min(self.count, other.count)

    //
    // Fast scan until we find a difference
    //
    let idx = self._findDiffIdx(other)
    guard idx < count else {
      return _lexicographicalCompare(self.count, other.count)
    }
    let otherCU = other[idx]

    //
    // Fast path: if other is super-ASCII post-normalization, we must be less. If
    // other is ASCII and a single-scalar segment, we have our answer.
    //
    if otherCU > 0x7F {
      if _fastPath(
        other._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII
      ) {
       return .less
      }
    } else {
      let selfASCIIChar = UInt16(self[idx])
      _sanityCheck(selfASCIIChar != otherCU, "should be different")
      if idx+1 == other.count {
        return _lexicographicalCompare(selfASCIIChar, otherCU)
      }
      if _fastPath(other.hasNormalizationBoundary(after: idx)) {
        return _lexicographicalCompare(selfASCIIChar, otherCU)
      }
    }

    //
    // Otherwise, need to normalize the segment and then compare
    //
    let selfASCIIChar = UInt16(self[idx])
    return _compareStringsPostSuffix(
      selfASCIIChar: selfASCIIChar, otherUTF16: other[idx...]
      )
  }
}

extension _StringGuts {
  func hasNormalizationBoundary(after index: Int) -> Bool {
    let nextIndex = index + 1
    if nextIndex >= self.count {
      return true
    }

    let nextCU = self[nextIndex]
    return _hasNormalizationBoundary(before: nextCU)
  }
}

extension _UnmanagedOpaqueString {
  func hasNormalizationBoundary(after index: Int) -> Bool {
    let nextIndex = index + 1
    if nextIndex >= self.count {
      return true
    }

    let nextCU = self[nextIndex]
    return _hasNormalizationBoundary(before: nextCU)
  }
}

extension _UnmanagedString where CodeUnit == UInt16 {
  func hasNormalizationBoundary(after index: Int) -> Bool {
    let nextIndex = index + 1
    if nextIndex >= self.count {
      return true
    }

    let nextCU = self[nextIndex]
    return _hasNormalizationBoundary(before: nextCU)
  }
}

private func _compareStringsPostSuffix(
  selfASCIIChar: UInt16,
  otherUTF16: _UnmanagedString<UInt16>
) -> _Ordering {
  let otherCU = otherUTF16[0]
  _sanityCheck(otherCU <= 0x7F, "should be ASCII, otherwise no need to call")

  let segmentEndIdx = otherUTF16._findNormalizationSegmentEnd(startingFrom: 0)
  let segment = otherUTF16[..<segmentEndIdx]

  // Fast path: If prenormal, we're done.
  if _Normalization._prenormalQuickCheckYes(segment) {
    return _lexicographicalCompare(selfASCIIChar, otherCU)
  }

  // Normalize segment, and then compare first code unit
  var outputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
  if _fastPath(
    segment._tryNormalize(into: &outputBuffer) != nil
  ) {
    return _lexicographicalCompare(selfASCIIChar, outputBuffer[0])
  }
  return _lexicographicalCompare(selfASCIIChar, segment._slowNormalize()[0])
}

extension _UnmanagedString where CodeUnit == UInt16 {
  //
  // Find the end of the normalization segment
  //
  internal func _findNormalizationSegmentEnd(startingFrom idx: Int) -> Int {
    let count = self.count
    _sanityCheck(idx < count, "out of bounds")

    // Normalization boundaries are best queried before known starters. Advance
    // past one scalar first.
    var (_, segmentEndIdx) = self._parseRawScalar(startingFrom: idx)
    while segmentEndIdx < count {
      let (scalar, nextIdx) = self._parseRawScalar(startingFrom: segmentEndIdx)
      if scalar._hasNormalizationBoundaryBefore {
        break
      }
      segmentEndIdx = nextIdx
    }
    return segmentEndIdx
  }

  internal func _findNormalizationSegmentStart(
    endingAt idx: Int // one-past-the-end
  ) -> Int {
    var idx = idx
    let count = self.count
    _sanityCheck(idx > 0 && idx <= count, "out of bounds")

    while idx > 0 {
      let (scalar, priorIdx) = _reverseParseRawScalar(endingAt: idx)
      idx = priorIdx
      if scalar._hasNormalizationBoundaryBefore {
        break
      }
    }
    return idx
  }

  internal func _findNormalizationSegment(spanning idx: Int) -> (Int, Int) {
    var idx = idx

    // Corner case: if we're sub-surrogate, back up
    if _slowPath(
      idx > 0
      && _isTrailingSurrogate(self[idx])
      && _isLeadingSurrogate(self[idx-1])
    ) {
      idx -= 1
    }
    let segmentEnd = self._findNormalizationSegmentEnd(startingFrom: idx)

    // Find the start
    if _slowPath(idx == 0) {
      return (0, segmentEnd)
    }

    // Check current scalar
    if self._parseRawScalar(startingFrom: idx).0._hasNormalizationBoundaryBefore {
      return (idx, segmentEnd)
    }

    // Reverse parse until we found the segment start
    let segmentStart = self._findNormalizationSegmentStart(endingAt: idx)

    return (segmentStart, segmentEnd)
  }

  // Wether the segment identified by `idx` is prenormal.
  //
  // Scalar values below 0x300 are special: normalization segments containing only
  // one such scalar are trivially prenormal under NFC. Most Latin-derived scripts
  // can be represented entirely by <0x300 scalar values, meaning that many user
  // strings satisfy this prenormal check. We call sub-0x300 scalars "Latiny" (not
  // official terminology).
  //
  // The check is effectively:
  //   1) Whether the current scalar <0x300, AND
  //   2) Whether the current scalar comprises the entire segment
  //
  internal func _isLatinyPrenormal(idx: Int
  ) -> Bool {
    _sanityCheck(idx < self.count, "out of bounds")

    let cu = self[idx]
    if _slowPath(cu >= 0x300) {
      return false
    }
    if _slowPath(idx+1 == self.count) {
      return true
    }

    let nextCU = self[idx+1]
    return nextCU < 0x300 || _hasNormalizationBoundary(before: nextCU)
  }

  @_versioned
  internal
  func _compareStringsPreLoop(
    other: _UnmanagedString<UInt16>
  ) -> _Ordering {
    let count = Swift.min(self.count, other.count)

    //
    // Fast scan until we find a diff
    //
    let idx = _findDiffIdx(other)
    guard idx < count else {
      return _lexicographicalCompare(self.count, other.count)
    }
    let selfCU = self[idx]
    let otherCU = other[idx]

    //
    // Fast path: sub-0x300 single-scalar segments can be compared directly
    //
    if _fastPath(
      _isLatinyPrenormal(idx: idx)
      && other._isLatinyPrenormal(idx: idx)
    ) {
      return _lexicographicalCompare(selfCU, otherCU)
    }

    return self._compareStringsSuffix(other: other, randomIndex: idx)
  }

  //Is the shorter of the two parameters a prefix of the other parameter?
  private func shorterPrefixesOther(
    _ other: _UnmanagedString<UInt16>
  ) -> Bool {
    if self.count == other.count {
      return false
    }

    let minimumLength = Swift.min(self.count, other.count)
    for i in 0..<minimumLength {
      if self[i] != other[i] {
        return false
      }
    }
    return true
  }

  private func _compareStringsSuffix(
    other: _UnmanagedString<UInt16>,
    randomIndex: Int
  ) -> _Ordering {
    let count = Swift.min(self.count, other.count)
    let selfCU = self[randomIndex]
    let otherCU = other[randomIndex]
    _sanityCheck(randomIndex >= 0 && randomIndex < count, "out of bounds")
    _sanityCheck(selfCU != otherCU, "should be called at a point of difference")

    //
    // Find the segment surrounding the random index passed in. This may involve
    // some back tracking to the nearest normalization boundary. Once we've
    // identified the segment, we can normalize and continue comparision.
    //
    // NOTE: We need to back-track for both self and other. Even though prefixes
    // are binary equal, the point of difference might be at the start of a new
    // segment for one and in the middle of the prior segment for the other. In
    // which case, we will want to effectively compare the two consecutive
    // segments together.
    //
    let (selfSegmentStartIdx, selfSegmentEndIdx) =
      self._findNormalizationSegment(spanning: randomIndex)
    let (otherSegmentStartIdx, otherSegmentEndIdx) =
      other._findNormalizationSegment(spanning: randomIndex)
    let comparisonStartIdx = Swift.min(selfSegmentStartIdx, otherSegmentStartIdx)


    //
    // Fast path: if both are prenormal, we have our answer
    //
    let selfSegment = self[comparisonStartIdx..<selfSegmentEndIdx]
    let otherSegment = other[comparisonStartIdx..<otherSegmentEndIdx]
    let selfSegmentPrenormal = _Normalization._prenormalQuickCheckYes(selfSegment)
    let otherSegmentPrenormal = _Normalization._prenormalQuickCheckYes(
      otherSegment)
    if selfSegmentPrenormal && otherSegmentPrenormal {
      return _lexicographicalCompare(selfCU, otherCU)
    }

    //
    // Pathological case: multi-segment expanders ruin segment-by-segment
    // processing.
    //
    // NOTE: Multi-segment expanders are (at least up til Unicode 10) always the
    // beginning of a normalization segment (i.e. they are starters). This is very
    // unlikely to change in the future, as new non-starter scalars that normalize
    // to pre-existing scalars would have to produce a starter. We validate this
    // fact on constructing our MultiSegmentExpander set, so we can rely on it
    // here.
    //
    if _slowPath(
       selfSegment._parseRawScalar().0._isMultiSegmentExpander
    || otherSegment._parseRawScalar().0._isMultiSegmentExpander
    ) {
      return self[comparisonStartIdx...]._compareStringsPathological(
        other: other[comparisonStartIdx...]
      )
    }

    //
    // Normalize segments and compare. If they still differ, we have our answer.
    //
    var selfOutputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
    var otherOutputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
    let selfSegmentLengthOpt: Int?
    let otherSegmentLengthOpt: Int?
    if selfSegmentPrenormal {
      selfOutputBuffer.fill(from: selfSegment)
      selfSegmentLengthOpt = selfSegment.count
    } else {
      selfSegmentLengthOpt = selfSegment._tryNormalize(into: &selfOutputBuffer)
    }
    if otherSegmentPrenormal {
      otherOutputBuffer.fill(from: otherSegment)
      otherSegmentLengthOpt = otherSegment.count
    } else {
      otherSegmentLengthOpt = otherSegment._tryNormalize(into: &otherOutputBuffer)
    }

    if _slowPath(selfSegmentLengthOpt == nil || otherSegmentLengthOpt == nil) {
      // If we couldn't normalize a segment into a generously large stack buffer,
      // we have a pathological String.
      return self[comparisonStartIdx...]._compareStringsPathological(
        other: other[comparisonStartIdx...]
      )
    }
    let selfLength = selfSegmentLengthOpt._unsafelyUnwrappedUnchecked
    let otherLength = otherSegmentLengthOpt._unsafelyUnwrappedUnchecked

    if Swift.shorterPrefixesOther(
      &selfOutputBuffer, selfLength,
      &otherOutputBuffer, otherLength)
    {
      let selfSlice = self[selfSegmentEndIdx...]
      let otherSlice = other[otherSegmentEndIdx...]
      return selfSlice._compareStringsPathological(other: otherSlice)
    }

    let comp = _lexicographicalCompare(
      &selfOutputBuffer, leftCount: selfLength,
      &otherOutputBuffer, rightCount: otherLength)
    if _fastPath(comp != .equal) {
      return comp
    }

    //
    // If they compare equal after normalization, we may have equal strings that
    // differ in form, e.g. NFC vs NFD strings. Or, we may have strings that
    // differ in form that also will differ later on. Either way, segment-by-
    // segment processing incurs significant overhead. We'd rather do larger
    // chunks of work at a time (e.g. ~1KB of text at a time). For now, we eagerly
    // process the entire strings, as chunking properly without guarantees of
    // normality is tricky (and expensive at times as well).
    //
    // NOTE: We could add a chunking path. It is hard to do correctly, because
    // Unicode. It's especially hard to test, because Unicode. It's hard to ensure
    // lasting correctness, because Unicode. (Also, sometimes it's impossible, but
    // that's what _compareStringsPathological is for.) However, it helps for very
    // long strings that differ in the middle. We might want this one day... but
    // not today.
    //
    // TODO: An additional (or even repeated) reapplying of the algorithm,
    // including the binary diff scan, could greatly benefit strings that only
    // sparsely differ in normality (penalizing strings that densely differ in
    // normality). This would add complexity, but with compelling data could be an
    // alternative to chunking.
    //
    return self[selfSegmentEndIdx...]._compareStringsPathological(
      other: other[otherSegmentEndIdx...]
    )
  }

  private func _compareStringsPathological(
    other: _UnmanagedString<UInt16>
  ) -> _Ordering {
    var selfIterator = _NormalizedCodeUnitIterator(self)
    return selfIterator.compare(with:
      _NormalizedCodeUnitIterator(other)
    )
  }
}

private func shorterPrefixesOther(
  _ selfBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>,
  _ selfLength: Int,
  _ otherBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>,
  _ otherLength: Int
) -> Bool {
  return shorterPrefixesOther(
    _castOutputBuffer(selfBuffer, endingAt: selfLength),
    _castOutputBuffer(otherBuffer, endingAt: otherLength)
  )
}

//Is the shorter of the two parameters a prefix of the other parameter?
private func shorterPrefixesOther(
  _ selfBuffer: UnsafeBufferPointer<UInt16>,
  _ otherBuffer: UnsafeBufferPointer<UInt16>
) -> Bool {
  if selfBuffer.count == otherBuffer.count {
    return false
  }

  let minimumLength = Swift.min(selfBuffer.count, otherBuffer.count)
  for i in 0..<minimumLength {
    if selfBuffer[i] != otherBuffer[i] {
      return false
    }
  }
  return true
}