swift-mirror/stdlib/public/core/StringUTF16View.swift

//===--- StringUTF16.swift ------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// FIXME(ABI)#71 : The UTF-16 string view should have a custom iterator type to
// allow performance optimizations of linear traversals.

extension String {
  /// A view of a string's contents as a collection of UTF-16 code units.
  ///
  /// You can access a string's view of UTF-16 code units by using its `utf16`
  /// property. A string's UTF-16 view encodes the string's Unicode scalar
  /// values as 16-bit integers.
  ///
  ///     let flowers = "Flowers 💐"
  ///     for v in flowers.utf16 {
  ///         print(v)
  ///     }
  ///     // 70
  ///     // 108
  ///     // 111
  ///     // 119
  ///     // 101
  ///     // 114
  ///     // 115
  ///     // 32
  ///     // 55357
  ///     // 56464
  ///
  /// Unicode scalar values that make up a string's contents can be up to 21
  /// bits long. The longer scalar values may need two `UInt16` values for
  /// storage. Those "pairs" of code units are called *surrogate pairs*.
  ///
  ///     let flowermoji = "💐"
  ///     for v in flowermoji.unicodeScalars {
  ///         print(v, v.value)
  ///     }
  ///     // 💐 128144
  ///
  ///     for v in flowermoji.utf16 {
  ///         print(v)
  ///     }
  ///     // 55357
  ///     // 56464
  ///
  /// To convert a `String.UTF16View` instance back into a string, use the
  /// `String` type's `init(_:)` initializer.
  ///
  ///     let favemoji = "My favorite emoji is 🎉"
  ///     if let i = favemoji.utf16.firstIndex(where: { $0 >= 128 }) {
  ///         let asciiPrefix = String(favemoji.utf16[..<i])!
  ///         print(asciiPrefix)
  ///     }
  ///     // Prints "My favorite emoji is "
  ///
  /// UTF16View Elements Match NSString Characters
  /// ============================================
  ///
  /// The UTF-16 code units of a string's `utf16` view match the elements
  /// accessed through indexed `NSString` APIs.
  ///
  ///     print(flowers.utf16.count)
  ///     // Prints "10"
  ///
  ///     let nsflowers = flowers as NSString
  ///     print(nsflowers.length)
  ///     // Prints "10"
  ///
  /// Unlike `NSString`, however, `String.UTF16View` does not use integer
  /// indices. If you need to access a specific position in a UTF-16 view, use
  /// Swift's index manipulation methods. The following example accesses the
  /// fourth code unit in both the `flowers` and `nsflowers` strings:
  ///
  ///     print(nsflowers.character(at: 3))
  ///     // Prints "119"
  ///
  ///     let i = flowers.utf16.index(flowers.utf16.startIndex, offsetBy: 3)
  ///     print(flowers.utf16[i])
  ///     // Prints "119"
  ///
  /// Although the Swift overlay updates many Objective-C methods to return
  /// native Swift indices and index ranges, some still return instances of
  /// `NSRange`. To convert an `NSRange` instance to a range of
  /// `String.Index`, use the `Range(_:in:)` initializer, which takes an
  /// `NSRange` and a string as arguments.
  ///
  ///     let snowy = "❄️ Let it snow! ☃️"
  ///     let nsrange = NSRange(location: 3, length: 12)
  ///     if let range = Range(nsrange, in: snowy) {
  ///         print(snowy[range])
  ///     }
  ///     // Prints "Let it snow!"
  @frozen
  public struct UTF16View: Sendable {
    @usableFromInline
    internal var _guts: _StringGuts

    @inlinable
    internal init(_ guts: _StringGuts) {
      self._guts = guts
      _invariantCheck()
    }
  }
}

extension String.UTF16View {
  #if !INTERNAL_CHECKS_ENABLED
  @inlinable @inline(__always) internal func _invariantCheck() {}
  #else
  @usableFromInline @inline(never) @_effects(releasenone)
  internal func _invariantCheck() {
    _internalInvariant(
      startIndex.transcodedOffset == 0 && endIndex.transcodedOffset == 0)
  }
  #endif // INTERNAL_CHECKS_ENABLED
}

extension String.UTF16View: BidirectionalCollection {
  public typealias Index = String.Index

  /// The position of the first code unit if the `String` is
  /// nonempty; identical to `endIndex` otherwise.
  @inlinable @inline(__always)
  public var startIndex: Index { return _guts.startIndex }

  /// The "past the end" position---that is, the position one greater than
  /// the last valid subscript argument.
  ///
  /// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
  @inlinable @inline(__always)
  public var endIndex: Index { return _guts.endIndex }

  @inline(__always)
  internal var _breadcrumbStride: Int { _StringBreadcrumbs.breadcrumbStride }

  @inlinable @inline(__always)
  public func index(after idx: Index) -> Index {
    var idx = _guts.ensureMatchingEncoding(idx)
    _precondition(idx._encodedOffset < _guts.count,
      "String index is out of bounds")
    if _slowPath(_guts.isForeign) { return _foreignIndex(after: idx) }
    if _guts.isASCII {
      return idx.nextEncoded._scalarAligned._encodingIndependent
    }

    // For a BMP scalar (1-3 UTF-8 code units), advance past it. For a non-BMP
    // scalar, use a transcoded offset first.

    // TODO: If transcoded is 1, can we just skip ahead 4?

    idx = _utf16AlignNativeIndex(idx)

    let len = _guts.fastUTF8ScalarLength(startingAt: idx._encodedOffset)
    if len == 4 && idx.transcodedOffset == 0 {
      return idx.nextTranscoded._knownUTF8
    }
    return idx
      .strippingTranscoding
      .encoded(offsetBy: len)
      ._scalarAligned
      ._knownUTF8
  }

  @inlinable @inline(__always)
  public func index(before idx: Index) -> Index {
    var idx = _guts.ensureMatchingEncoding(idx)
    _precondition(!idx.isZeroPosition && idx <= endIndex,
      "String index is out of bounds")
    if _slowPath(_guts.isForeign) { return _foreignIndex(before: idx) }
    if _guts.isASCII {
      return idx.priorEncoded._scalarAligned._encodingIndependent
    }

    if idx.transcodedOffset != 0 {
      _internalInvariant(idx.transcodedOffset == 1)
      return idx.strippingTranscoding._scalarAligned._knownUTF8
    }

    idx = _utf16AlignNativeIndex(idx)
    let len = _guts.fastUTF8ScalarLength(endingAt: idx._encodedOffset)
    if len == 4 {
      // 2 UTF-16 code units comprise this scalar; advance to the beginning and
      // start mid-scalar transcoding
      return idx.encoded(offsetBy: -len).nextTranscoded._knownUTF8
    }

    // Single UTF-16 code unit
    _internalInvariant((1...3) ~= len)
    return idx.encoded(offsetBy: -len)._scalarAligned._knownUTF8
  }

  @_effects(releasenone)
  public func index(_ i: Index, offsetBy n: Int) -> Index {
    var i = _guts.ensureMatchingEncoding(i)
    _precondition(i <= endIndex, "String index is out of bounds")

    if _slowPath(_guts.isForeign) {
      return _foreignIndex(i, offsetBy: n)
    }

    if _guts.isASCII {
      return Index(
        _encodedOffset: i._encodedOffset + n
      )._scalarAligned._encodingIndependent
    }

    i = _utf16AlignNativeIndex(i)
    let threshold = (
      i == startIndex ? _breadcrumbStride / 2 : _breadcrumbStride)
    if n.magnitude < threshold {
      // Do not use breadcrumbs if directly computing the result is expected
      // to be cheaper.
      return _index(i, offsetBy: n)._knownUTF8
    }

    let lowerOffset = _nativeGetOffset(for: i)
    let result = _nativeGetIndex(for: lowerOffset + n)
    return result
  }

  @_effects(releasenone)
  public func index(
    _ i: Index, offsetBy n: Int, limitedBy limit: Index
  ) -> Index? {
    var limit = _guts.ensureMatchingEncoding(limit)
    guard _fastPath(limit <= endIndex) else { return index(i, offsetBy: n) }

    var i = _guts.ensureMatchingEncoding(i)
    _precondition(i <= endIndex, "String index is out of bounds")

    if _slowPath(_guts.isForeign) {
      return _foreignIndex(i, offsetBy: n, limitedBy: limit)
    }

    if !_guts.isASCII { // We have ASCII fast paths below
      limit = _utf16AlignNativeIndex(limit)
      i = _utf16AlignNativeIndex(i)
      let threshold = (
        _breadcrumbStride + (i == startIndex ? 0 : _breadcrumbStride / 2))
      if n.magnitude < threshold {
        // Do not use breadcrumbs if directly computing the result is expected
        // to be cheaper.
        return _index(i, offsetBy: n, limitedBy: limit)?._knownUTF8
      }
    }

    let iOffset = _nativeGetOffset(for: i)
    let limitOffset = _nativeGetOffset(for: limit)

    // If distance < 0, limit has no effect if it is greater than i.
    if _slowPath(n < 0 && limit <= i && limitOffset > iOffset + n) {
      return nil
    }
    // If distance > 0, limit has no effect if it is less than i.
    if _slowPath(n >= 0 && limit >= i && limitOffset < iOffset + n) {
      return nil
    }

    let result = _nativeGetIndex(for: iOffset + n)
    return result
  }

  @_effects(releasenone)
  public func distance(from start: Index, to end: Index) -> Int {
    var start = _guts.ensureMatchingEncoding(start)
    var end = _guts.ensureMatchingEncoding(end)

    // FIXME: This method used to not properly validate indices before 5.7;
    // temporarily allow older binaries to keep invoking undefined behavior as
    // before.
    _precondition(
      ifLinkedOnOrAfter: .v5_7_0,
      start._encodedOffset <= _guts.count,
      "String index is out of bounds")
    _precondition(
      ifLinkedOnOrAfter: .v5_7_0,
      end._encodedOffset <= _guts.count,
      "String index is out of bounds")

    if _slowPath(_guts.isForeign) {
      return _foreignDistance(from: start, to: end)
    }

    let utf8Distance = end._encodedOffset - start._encodedOffset

    if _guts.isASCII {
      return utf8Distance
    }

    let threshold = (start == startIndex || end == startIndex
      ? _breadcrumbStride / 2
      : _breadcrumbStride)
    if utf8Distance.magnitude < threshold {
      // Do not use breadcrumbs if directly computing the result is expected to
      // be cheaper. The conservative threshold above assumes that each UTF-16
      // code unit will map to a single UTF-8 code unit, i.e., the worst
      // possible (a.k.a. most compact) case with all ASCII scalars.
      // FIXME: Figure out if a more optimistic threshold would work better.
      start = _utf16AlignNativeIndex(start)
      end = _utf16AlignNativeIndex(end)
      guard start <= end else {
        return -_utf16Distance(from: end, to: start)
      }
      return _utf16Distance(from: start, to: end)
    }

    let lower = _nativeGetOffset(for: start)
    let upper = _nativeGetOffset(for: end)
    return upper &- lower
  }

  @inlinable
  public var count: Int {
    if _slowPath(_guts.isForeign) {
      return _foreignCount()
    }
    return _nativeGetOffset(for: endIndex)
  }

  internal func _indexRange(
    for offsets: Range<Int>,
    from start: Index
  ) -> Range<Index> {
    _internalInvariant(_guts.hasMatchingEncoding(start))
    if _slowPath(_guts.isForeign) {
      let lower = self.index(start, offsetBy: offsets.lowerBound)
      let upper = _foreignIndex(lower, offsetBy: offsets.count)
      return unsafe Range(uncheckedBounds: (lower, upper))
    }

    if _guts.isASCII {
      let lower = self.index(start, offsetBy: offsets.lowerBound)
      let upper = self.index(lower, offsetBy: offsets.count)
      return unsafe Range(uncheckedBounds: (lower, upper))
    }

    if offsets.count < _breadcrumbStride / 2 {
      let lower = self.index(start, offsetBy: offsets.lowerBound)
      let upper = _index(lower, offsetBy: offsets.count)._knownUTF8
      return unsafe Range(uncheckedBounds: (lower, upper))
    }

    let bias = _nativeGetOffset(for: start)
    let lower = (
      offsets.lowerBound - bias <= _breadcrumbStride / 2
      ? _index(start, offsetBy: offsets.lowerBound)
      : _nativeGetIndex(for: bias + offsets.lowerBound))
    let upper = _nativeGetIndex(for: bias + offsets.upperBound)
    return unsafe Range(uncheckedBounds: (lower, upper))
  }

  internal func _offsetRange(
    for range: Range<Index>,
    from start: Index
  ) -> Range<Int> {
    var lower = _guts.ensureMatchingEncoding(range.lowerBound)
    var upper = _guts.ensureMatchingEncoding(range.upperBound)
    _internalInvariant(_guts.hasMatchingEncoding(start))

    _precondition(
      ifLinkedOnOrAfter: .v5_7_0,
      lower._encodedOffset <= _guts.count,
      "String index is out of bounds")
    _precondition(
      ifLinkedOnOrAfter: .v5_7_0,
      upper._encodedOffset <= _guts.count,
      "String index is out of bounds")

    if _slowPath(_guts.isForeign) {
      let lowerOffset = _foreignDistance(from: start, to: lower)
      let distance = _foreignDistance(from: lower, to: upper)
      return unsafe Range(uncheckedBounds: (lowerOffset, lowerOffset + distance))
    }

    let utf8Distance = upper._encodedOffset - lower._encodedOffset

    if _guts.isASCII {
      let lowerOffset = lower._encodedOffset - start._encodedOffset
      return unsafe Range(uncheckedBounds: (lowerOffset, lowerOffset + utf8Distance))
    }

    if utf8Distance.magnitude <= _breadcrumbStride / 2 {
      lower = _utf16AlignNativeIndex(lower)
      upper = _utf16AlignNativeIndex(upper)
      let lowerOffset = distance(from: start, to: lower)
      let distance = _utf16Distance(from: lower, to: upper)
      return unsafe Range(uncheckedBounds: (lowerOffset, lowerOffset + distance))
    }

    let bias = _nativeGetOffset(for: start)
    let utf8StartOffset = lower._encodedOffset - start._encodedOffset
    let lowerOffset = (
      utf8StartOffset <= _breadcrumbStride / 2
      ? _utf16Distance(from: start, to: lower)
      : _nativeGetOffset(for: lower) - bias)
    let upperOffset = _nativeGetOffset(for: upper) - bias
    return unsafe Range(uncheckedBounds: (lowerOffset, upperOffset))
  }

  /// Accesses the code unit at the given position.
  ///
  /// The following example uses the subscript to print the value of a
  /// string's first UTF-16 code unit.
  ///
  ///     let greeting = "Hello, friend!"
  ///     let i = greeting.utf16.startIndex
  ///     print("First character's UTF-16 code unit: \(greeting.utf16[i])")
  ///     // Prints "First character's UTF-16 code unit: 72"
  ///
  /// - Parameter position: A valid index of the view. `position` must be
  ///   less than the view's end index.
  @inlinable @inline(__always)
  public subscript(idx: Index) -> UTF16.CodeUnit {
    let idx = _guts.ensureMatchingEncoding(idx)
    _precondition(idx._encodedOffset < _guts.count,
      "String index is out of bounds")
    return self[_unchecked: idx]
  }

  @_alwaysEmitIntoClient @inline(__always)
  internal subscript(_unchecked idx: Index) -> UTF16.CodeUnit {
    if _fastPath(_guts.isFastUTF8) {
      let scalar = _guts.fastUTF8Scalar(
        startingAt: _guts.scalarAlign(idx)._encodedOffset)
      return scalar.utf16[idx.transcodedOffset]
    }

    return _foreignSubscript(position: idx)
  }
}

extension String.UTF16View {
  @frozen
  public struct Iterator: IteratorProtocol, Sendable {
    @usableFromInline
    internal var _guts: _StringGuts

    @usableFromInline
    internal var _position: Int = 0

    @usableFromInline
    internal var _end: Int

    // If non-nil, return this value for `next()` (and set it to nil).
    //
    // This is set when visiting a non-BMP scalar: the leading surrogate is
    // returned, this field is set with the value of the trailing surrogate, and
    // `_position` is advanced to the start of the next scalar.
    @usableFromInline
    internal var _nextIsTrailingSurrogate: UInt16? = nil

    @inlinable
    internal init(_ guts: _StringGuts) {
      self._end = guts.count
      self._guts = guts
    }

    @inlinable
    public mutating func next() -> UInt16? {
      if _slowPath(_nextIsTrailingSurrogate != nil) {
        let trailing = self._nextIsTrailingSurrogate._unsafelyUnwrappedUnchecked
        self._nextIsTrailingSurrogate = nil
        return trailing
      }
      guard _fastPath(_position < _end) else { return nil }

      let (scalar, len) = _guts.errorCorrectedScalar(startingAt: _position)
      _position &+= len

      if _slowPath(scalar.value > UInt16.max) {
        self._nextIsTrailingSurrogate = scalar.utf16[1]
        return scalar.utf16[0]
      }
      return UInt16(truncatingIfNeeded: scalar.value)
    }
  }
  @inlinable
  public __consuming func makeIterator() -> Iterator {
    return Iterator(_guts)
  }
}


extension String.UTF16View: CustomStringConvertible {
  @inlinable @inline(__always)
  public var description: String { return String(_guts) }
}

extension String.UTF16View: CustomDebugStringConvertible {
  public var debugDescription: String {
    return "StringUTF16(\(self.description.debugDescription))"
  }
}

extension String {
  /// A UTF-16 encoding of `self`.
  @inlinable
  public var utf16: UTF16View {
    @inline(__always) get { return UTF16View(_guts) }
    @inline(__always) set { self = String(newValue._guts) }
  }

  /// Creates a string corresponding to the given sequence of UTF-16 code units.
  @inlinable @inline(__always)
  @available(swift, introduced: 4.0)
  public init(_ utf16: UTF16View) {
    self.init(utf16._guts)
  }
}

// Index conversions
extension String.UTF16View.Index {
  /// Creates an index in the given UTF-16 view that corresponds exactly to the
  /// specified string position.
  ///
  /// If the index passed as `sourcePosition` represents either the start of a
  /// Unicode scalar value or the position of a UTF-16 trailing surrogate,
  /// then the initializer succeeds. If `sourcePosition` does not have an
  /// exact corresponding position in `target`, then the result is `nil`. For
  /// example, an attempt to convert the position of a UTF-8 continuation byte
  /// results in `nil`.
  ///
  /// The following example finds the position of a space in a string and then
  /// converts that position to an index in the string's `utf16` view.
  ///
  ///     let cafe = "Café 🍵"
  ///
  ///     let stringIndex = cafe.firstIndex(of: "é")!
  ///     let utf16Index = String.Index(stringIndex, within: cafe.utf16)!
  ///
  ///     print(String(cafe.utf16[...utf16Index])!)
  ///     // Prints "Café"
  ///
  /// - Parameters:
  ///   - idx: A position in at least one of the views of the string
  ///     shared by `target`.
  ///   - target: The `UTF16View` in which to find the new position.
  public init?(
    _ idx: String.Index, within target: String.UTF16View
  ) {
    // As a special exception, we allow `idx` to be an UTF-16 index when `self`
    // is a UTF-8 string (or vice versa), to preserve compatibility with
    // (broken) code that keeps using indices from a bridged string after
    // converting the string to a native representation. Such indices are
    // invalid, but returning nil here can break code that appeared to work fine
    // for ASCII strings in Swift releases prior to 5.7.
    let idx = target._guts.ensureMatchingEncoding(idx)
    guard idx._encodedOffset <= target._guts.count else { return nil }

    if _slowPath(target._guts.isForeign) {
      guard idx._foreignIsWithin(target) else { return nil }
    } else { // fast UTF-8
      guard (
        // If the transcoded offset is non-zero, then `idx` addresses a trailing
        // surrogate, so its encoding offset is on a scalar boundary, and it's a
        // valid UTF-16 index.
        idx.transcodedOffset != 0
        /// Otherwise we need to reject indices that aren't scalar aligned.
        || target._guts.isOnUnicodeScalarBoundary(idx)
      ) else { return nil }
    }

    self = idx
  }

  /// Returns the position in the given view of Unicode scalars that
  /// corresponds exactly to this index.
  ///
  /// This index must be a valid index of `String(unicodeScalars).utf16`.
  ///
  /// This example first finds the position of a space (UTF-16 code point `32`)
  /// in a string's `utf16` view and then uses this method to find the same
  /// position in the string's `unicodeScalars` view.
  ///
  ///     let cafe = "Café 🍵"
  ///     let i = cafe.utf16.firstIndex(of: 32)!
  ///     let j = i.samePosition(in: cafe.unicodeScalars)!
  ///     print(String(cafe.unicodeScalars[..<j]))
  ///     // Prints "Café"
  ///
  /// - Parameter unicodeScalars: The view to use for the index conversion.
  ///   This index must be a valid index of at least one view of the string
  ///   shared by `unicodeScalars`.
  /// - Returns: The position in `unicodeScalars` that corresponds exactly to
  ///   this index. If this index does not have an exact corresponding
  ///   position in `unicodeScalars`, this method returns `nil`. For example,
  ///   an attempt to convert the position of a UTF-16 trailing surrogate
  ///   returns `nil`.
  public func samePosition(
    in unicodeScalars: String.UnicodeScalarView
  ) -> String.UnicodeScalarIndex? {
    return String.UnicodeScalarIndex(self, within: unicodeScalars)
  }
}

#if SWIFT_ENABLE_REFLECTION
// Reflection
extension String.UTF16View: CustomReflectable {
  /// Returns a mirror that reflects the UTF-16 view of a string.
  public var customMirror: Mirror {
    return Mirror(self, unlabeledChildren: self)
  }
}
#endif

// Slicing
extension String.UTF16View {
  public typealias SubSequence = Substring.UTF16View

  public subscript(r: Range<Index>) -> Substring.UTF16View {
    let r = _guts.validateSubscalarRange(r)
    return Substring.UTF16View(self, _bounds: r)
  }
}

// Foreign string support
extension String.UTF16View {
  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignIndex(after i: Index) -> Index {
    _internalInvariant(_guts.isForeign)
    return i.strippingTranscoding.nextEncoded._knownUTF16
  }

  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignIndex(before i: Index) -> Index {
    _internalInvariant(_guts.isForeign)
    return i.strippingTranscoding.priorEncoded._knownUTF16
  }

  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignSubscript(position i: Index) -> UTF16.CodeUnit {
    _internalInvariant(_guts.isForeign)
    return _guts.foreignErrorCorrectedUTF16CodeUnit(at: i.strippingTranscoding)
  }

  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignDistance(from start: Index, to end: Index) -> Int {
    _internalInvariant(_guts.isForeign)

    // Ignore transcoded offsets, i.e. scalar align if-and-only-if from a
    // transcoded view
    return end._encodedOffset - start._encodedOffset
  }

  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignIndex(
    _ i: Index, offsetBy n: Int, limitedBy limit: Index
  ) -> Index? {
    _internalInvariant(_guts.isForeign)
    let l = limit._encodedOffset - i._encodedOffset
    if n > 0 ? l >= 0 && l < n : l <= 0 && n < l {
      return nil
    }
    let offset = i._encodedOffset &+ n
    _precondition(offset >= 0 && offset <= _guts.count,
      "String index is out of bounds")
    return Index(_encodedOffset: offset)._knownUTF16
  }

  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignIndex(_ i: Index, offsetBy n: Int) -> Index {
    _internalInvariant(_guts.isForeign)
    let offset = i._encodedOffset &+ n
    _precondition(offset >= 0 && offset <= _guts.count,
      "String index is out of bounds")
    return Index(_encodedOffset: offset)._knownUTF16
  }

  @usableFromInline @inline(never)
  @_effects(releasenone)
  internal func _foreignCount() -> Int {
    _internalInvariant(_guts.isForeign)
    return endIndex._encodedOffset - startIndex._encodedOffset
  }

  // Align a native UTF-8 index to a valid UTF-16 position. If there is a
  // transcoded offset already, this is already a valid UTF-16 position
  // (referring to the second surrogate) and returns `idx`. Otherwise, this will
  // scalar-align the index. This is needed because we may be passed a
  // non-scalar-aligned index from the UTF8View.
  @_alwaysEmitIntoClient // Swift 5.1
  @inline(__always)
  internal func _utf16AlignNativeIndex(_ idx: String.Index) -> String.Index {
    _internalInvariant(!_guts.isForeign)
    guard idx.transcodedOffset == 0 else { return idx }
    return _guts.scalarAlign(idx)
  }
}

extension String.Index {
  @usableFromInline @inline(never) // opaque slow-path
  @_effects(releasenone)
  internal func _foreignIsWithin(_ target: String.UTF16View) -> Bool {
    _internalInvariant(target._guts.isForeign)

    // If we're transcoding, we're a UTF-8 view index, not UTF-16.
    return self.transcodedOffset == 0
  }
}

// Breadcrumb-aware acceleration
extension _StringGuts {
  @inline(__always)
  fileprivate func _useBreadcrumbs(forEncodedOffset offset: Int) -> Bool {
    return hasBreadcrumbs && offset >= _StringBreadcrumbs.breadcrumbStride
  }
}

extension String.UTF16View {

#if SWIFT_STDLIB_ENABLE_VECTOR_TYPES
  @inline(__always)
  internal func _utf16Length<U: SIMD, S: SIMD>(
    readPtr: inout UnsafeRawPointer,
    endPtr: UnsafeRawPointer,
    unsignedSIMDType: U.Type,
    signedSIMDType: S.Type
  ) -> Int where U.Scalar == UInt8, S.Scalar == Int8 {
    var utf16Count = 0

    while unsafe readPtr + MemoryLayout<U>.stride < endPtr {
      //Find the number of continuations (0b10xxxxxx)
      let sValue = unsafe readPtr.loadUnaligned(as: S.self)
      let continuations = S.zero.replacing(with: S.one, where: sValue .< -65 + 1)

      //Find the number of 4 byte code points (0b11110xxx)
      let uValue = unsafe readPtr.loadUnaligned(as: U.self)
      let fourBytes = unsafe S.zero.replacing(
        with: S.one,
        where: unsafeBitCast(
          uValue .>= 0b11110000,
          to: SIMDMask<S.MaskStorage>.self
        )
      )

      utf16Count &+= U.scalarCount + Int((fourBytes &- continuations).wrappedSum())

      unsafe readPtr += MemoryLayout<U>.stride
    }

    return utf16Count
  }
#endif

  internal func _utf16Distance(from start: Index, to end: Index) -> Int {
    _internalInvariant(end.transcodedOffset == 0 || end.transcodedOffset == 1)

    return unsafe (end.transcodedOffset - start.transcodedOffset) + _guts.withFastUTF8(
      range: start._encodedOffset ..< end._encodedOffset
    ) { utf8 in
      let rawBuffer = UnsafeRawBufferPointer(utf8)
      guard rawBuffer.count > 0 else { return 0 }

      var utf16Count = 0
      var readPtr = unsafe rawBuffer.baseAddress.unsafelyUnwrapped
      let initialReadPtr = unsafe readPtr
      let endPtr = unsafe readPtr + rawBuffer.count

      //eat leading continuations
      while unsafe readPtr < endPtr {
        let byte = unsafe readPtr.load(as: UInt8.self)
        if !UTF8.isContinuation(byte) {
          break
        }
        unsafe readPtr += 1
      }

#if SWIFT_STDLIB_ENABLE_VECTOR_TYPES
      // TODO: Currently, using SIMD sizes above SIMD8 is slower
      // Once that's fixed we should go up to SIMD64 here

      unsafe utf16Count &+= _utf16Length(
        readPtr: &readPtr,
        endPtr: endPtr,
        unsignedSIMDType: SIMD8<UInt8>.self,
        signedSIMDType: SIMD8<Int8>.self
      )

      //TO CONSIDER: SIMD widths <8 here

      //back up to the start of the current scalar if we may have a trailing
      //incomplete scalar
      if unsafe utf16Count > 0 && UTF8.isContinuation(readPtr.load(as: UInt8.self)) {
        while unsafe readPtr > initialReadPtr && UTF8.isContinuation(readPtr.load(as: UInt8.self)) {
          unsafe readPtr -= 1
        }

        //The trailing scalar may be incomplete, subtract it out and check below
        let byte = unsafe readPtr.load(as: UInt8.self)
        let len = _utf8ScalarLength(byte)
        utf16Count &-= len == 4 ? 2 : 1
        if unsafe readPtr == initialReadPtr {
          //if we backed up all the way and didn't hit a non-continuation, then
          //we don't have any complete scalars, and we should bail.
          return 0
        }
      }
#endif

      //trailing bytes
      while unsafe readPtr < endPtr {
        let byte = unsafe readPtr.load(as: UInt8.self)
        let len = _utf8ScalarLength(byte)
        // if we don't have enough bytes left, we don't have a complete scalar,
        // so don't add it to the count.
        if unsafe readPtr + len <= endPtr {
          utf16Count &+= len == 4 ? 2 : 1
        }
        unsafe readPtr += len
      }

      return utf16Count
    }
  }

  /// Return the UTF-16 offset corresponding to `idx`, measured from the
  /// start of this string, which must be a native UTF-8 string.
  ///
  /// - Complexity: This measures the UTF-16 distance of `idx` from its nearest
  ///    breadcrumb index (rounding down), so on average it needs to look at
  ///    `breadcrumbStride / 2` UTF-16 code units. (In addition to the O(log(n))
  ///    cost of looking up the nearest breadcrumb, and the amortizable O(n)
  ///    cost of generating the breadcrumbs in the first place.)
  @usableFromInline
  @_effects(releasenone)
  internal func _nativeGetOffset(for idx: Index) -> Int {
    _internalInvariant(idx._encodedOffset <= _guts.count)
    if _guts.isASCII {
      _internalInvariant(idx.transcodedOffset == 0)
      return idx._encodedOffset
    }
    // Trivial and common: start
    if idx == startIndex { return 0 }

    let idx = _utf16AlignNativeIndex(idx)

    guard _guts._useBreadcrumbs(forEncodedOffset: idx._encodedOffset) else {
      return _utf16Distance(from: startIndex, to: idx)
    }

    let breadcrumbs = _guts.loadUnmanagedBreadcrumbs()

    // Simple and common: endIndex aka `length`.
    if idx == endIndex {
      return unsafe breadcrumbs._withUnsafeGuaranteedRef { $0.utf16Length }
    }

    return unsafe breadcrumbs._withUnsafeGuaranteedRef { crumbs in
      // Otherwise, find the nearest lower-bound breadcrumb and count from there
      // FIXME: Starting from the upper-bound crumb when that is closer would
      // cut the average cost of the subsequent iteration by 50%.
      let (crumb, crumbOffset) = crumbs.getBreadcrumb(forIndex: idx)
      return crumbOffset + _utf16Distance(from: crumb, to: idx)
    }
  }

  /// Return the index at the given UTF-16 offset, measured from the
  /// start of this string, which must be a native UTF-8 string.
  ///
  /// - Complexity: This iterates UTF-16 code units starting from the
  ///    nearest breadcrumb to `offset` (rounding down), so on
  ///    average it needs to look at `breadcrumbStride / 2` UTF-16 code
  ///    units. (In addition to the O(1) cost of looking up the nearest
  ///    breadcrumb, and the amortizable O(n) cost of generating the
  ///    breadcrumbs in the first place.)
  @usableFromInline
  @_effects(releasenone)
  internal func _nativeGetIndex(for offset: Int) -> Index {
    _precondition(offset >= 0, "String index is out of bounds")

    // Trivial and common: start
    if offset == 0 { return startIndex }

    if _guts.isASCII {
      return Index(
        _encodedOffset: offset
      )._scalarAligned._encodingIndependent
    }

    guard _guts._useBreadcrumbs(forEncodedOffset: offset) else {
      return _index(startIndex, offsetBy: offset)._knownUTF8
    }

    // Simple and common: endIndex aka `length`.
    let breadcrumbs = _guts.loadUnmanagedBreadcrumbs()
    let utf16Count = unsafe breadcrumbs._withUnsafeGuaranteedRef { $0.utf16Length }
    if offset == utf16Count { return endIndex }

    // Otherwise, find the nearest lower-bound breadcrumb and advance that
    // FIXME: Starting from the upper-bound crumb when that is closer would cut
    // the average cost of the subsequent iteration by 50%.
    let (crumb, remaining) = unsafe breadcrumbs._withUnsafeGuaranteedRef {
      $0.getBreadcrumb(forOffset: offset)
    }
    _internalInvariant(crumb._canBeUTF8 && crumb._encodedOffset <= _guts.count)
    if remaining == 0 { return crumb }

    return unsafe _guts.withFastUTF8 { utf8 in
      var readIdx = crumb._encodedOffset
      let readEnd = utf8.count
      _internalInvariant(readIdx < readEnd)

      var utf16I = 0
      let utf16End: Int = remaining

      // Adjust for sub-scalar initial transcoding: If we're starting the scan
      // at a trailing surrogate, then we set our starting count to be -1 so as
      // offset counting the leading surrogate.
      if crumb.transcodedOffset != 0 {
        utf16I = -1
      }

      while true {
        _precondition(readIdx < readEnd, "String index is out of bounds")
        let len = unsafe _utf8ScalarLength(utf8[_unchecked: readIdx])
        let utf16Len = len == 4 ? 2 : 1
        utf16I &+= utf16Len

        if utf16I >= utf16End {
          // Uncommon: final sub-scalar transcoded offset
          if _slowPath(utf16I > utf16End) {
            _internalInvariant(utf16Len == 2)
            return Index(
              encodedOffset: readIdx, transcodedOffset: 1
            )._knownUTF8
          }
          return Index(
            _encodedOffset: readIdx &+ len
          )._scalarAligned._knownUTF8
        }

        readIdx &+= len
      }
      fatalError()
    }
  }

  // Copy (i.e. transcode to UTF-16) our contents into a buffer. `alignedRange`
  // means that the indices are part of the UTF16View.indices -- they are either
  // scalar-aligned or transcoded (e.g. derived from the UTF-16 view). They do
  // not need to go through an alignment check.
  internal func _nativeCopy(
    into buffer: UnsafeMutableBufferPointer<UInt16>,
    alignedRange range: Range<String.Index>
  ) {
    _internalInvariant(_guts.isFastUTF8)
    _internalInvariant(
      range.lowerBound == _utf16AlignNativeIndex(range.lowerBound))
    _internalInvariant(
      range.upperBound == _utf16AlignNativeIndex(range.upperBound))

    if _slowPath(range.isEmpty) { return }

    let isASCII = _guts.isASCII
    return unsafe _guts.withFastUTF8 { utf8 in
      var writeIdx = 0
      let writeEnd = buffer.count
      var readIdx = range.lowerBound._encodedOffset
      let readEnd = range.upperBound._encodedOffset

      if isASCII {
        _internalInvariant(range.lowerBound.transcodedOffset == 0)
        _internalInvariant(range.upperBound.transcodedOffset == 0)
        while readIdx < readEnd {
          unsafe _internalInvariant(utf8[readIdx] < 0x80)
          unsafe buffer[_unchecked: writeIdx] = unsafe UInt16(
            truncatingIfNeeded: utf8[_unchecked: readIdx])
          readIdx &+= 1
          writeIdx &+= 1
        }
        return
      }

      // Handle mid-transcoded-scalar initial index
      if _slowPath(range.lowerBound.transcodedOffset != 0) {
        _internalInvariant(range.lowerBound.transcodedOffset == 1)
        let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: readIdx)
        // Note: this is intentionally not using the _unchecked subscript.
        // (We rely on debug assertions to catch out of bounds access.)
        unsafe buffer[writeIdx] = scalar.utf16[1]
        readIdx &+= len
        writeIdx &+= 1
      }

      // Transcode middle
      while readIdx < readEnd {
        let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: readIdx)
        unsafe buffer[writeIdx] = scalar.utf16[0]
        readIdx &+= len
        writeIdx &+= 1
        if _slowPath(scalar.utf16.count == 2) {
          // Note: this is intentionally not using the _unchecked subscript.
          // (We rely on debug assertions to catch out of bounds access.)
          unsafe buffer[writeIdx] = scalar.utf16[1]
          writeIdx &+= 1
        }
      }

      // Handle mid-transcoded-scalar final index
      if _slowPath(range.upperBound.transcodedOffset == 1) {
        _internalInvariant(writeIdx < writeEnd)
        let (scalar, _) = unsafe _decodeScalar(utf8, startingAt: readIdx)
        _internalInvariant(scalar.utf16.count == 2)

        // Note: this is intentionally not using the _unchecked subscript.
        // (We rely on debug assertions to catch out of bounds access.)
        unsafe buffer[writeIdx] = scalar.utf16[0]
        writeIdx &+= 1
      }
      _internalInvariant(writeIdx <= writeEnd)
    }
  }
}