swift-mirror/stdlib/public/core/StringUnicodeScalarView.swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

extension String {
  /// A view of a string's contents as a collection of Unicode scalar values.
  ///
  /// You can access a string's view of Unicode scalar values by using its
  /// `unicodeScalars` property. Unicode scalar values are the 21-bit codes
  /// that are the basic unit of Unicode. Each scalar value is represented by
  /// a `UnicodeScalar` instance and is equivalent to a UTF-32 code unit.
  ///
  ///     let flowers = "Flowers 💐"
  ///     for v in flowers.unicodeScalars {
  ///         print(v.value)
  ///     }
  ///     // 70
  ///     // 108
  ///     // 111
  ///     // 119
  ///     // 101
  ///     // 114
  ///     // 115
  ///     // 32
  ///     // 128144
  ///
  /// Some characters that are visible in a string are made up of more than one
  /// Unicode scalar value. In that case, a string's `unicodeScalars` view
  /// contains more values than its `characters` view.
  ///
  ///     let flag = "🇵🇷"
  ///     for c in flag.characters {
  ///         print(c)
  ///     }
  ///     // 🇵🇷
  ///
  ///     for v in flag.unicodeScalars {
  ///         print(v.value)
  ///     }
  ///     // 127477
  ///     // 127479
  ///
  /// You can convert a `String.UnicodeScalarView` instance back into a string
  /// using the `String` type's `init(_:)` initializer.
  ///
  ///     let favemoji = "My favorite emoji is 🎉"
  ///     if let i = favemoji.unicodeScalars.index(where: { $0.value >= 128 }) {
  ///         let asciiPrefix = String(favemoji.unicodeScalars.prefix(upTo: i))
  ///         print(asciiPrefix)
  ///     }
  ///     // Prints "My favorite emoji is "
  public struct UnicodeScalarView :
    BidirectionalCollection,
    CustomStringConvertible,
    CustomDebugStringConvertible
  {
    internal init(_ _core: _StringCore, coreOffset: Int = 0) {
      self._core = _core
      self._coreOffset = coreOffset
    }

    internal struct _ScratchIterator : IteratorProtocol {
      var core: _StringCore
      var idx: Int
      @_versioned
      init(_ core: _StringCore, _ pos: Int) {
        self.idx = pos
        self.core = core
      }
      @inline(__always)
      mutating func next() -> UTF16.CodeUnit? {
        if idx == core.endIndex {
          return nil
        }
        defer { idx += 1 }
        return self.core[idx]
      }
    }

    /// A position in a string's `UnicodeScalars` view.
    ///
    /// You can convert between indices of the different string views by using
    /// conversion initializers and the `samePosition(in:)` method overloads.
    /// The following example finds the index of the solid heart pictograph in
    /// the string's character view and then converts that to the same
    /// position in the Unicode scalars view:
    ///
    ///     let hearts = "Hearts <3 ♥︎ 💘"
    ///     let i = hearts.characters.index(of: "♥︎")!
    ///
    ///     let j = i.samePosition(in: hearts.unicodeScalars)
    ///     print(hearts.unicodeScalars.suffix(from: j))
    ///     // Prints "♥︎ 💘"
    ///     print(hearts.unicodeScalars[j].value)
    ///     // Prints "9829"
    public struct Index {
      public // SPI(Foundation)
      init(_position: Int) {
        self._position = _position
      }

      @_versioned internal var _position: Int
    }

    /// Translates a `_core` index into a `UnicodeScalarIndex` using this view's
    /// `_coreOffset`.
    internal func _fromCoreIndex(_ i: Int) -> Index {
      return Index(_position: i + _coreOffset)
    }

    /// Translates a `UnicodeScalarIndex` into a `_core` index using this view's
    /// `_coreOffset`.
    internal func _toCoreIndex(_ i: Index) -> Int {
      return i._position - _coreOffset
    }

    /// The position of the first Unicode scalar value if the string is
    /// nonempty.
    ///
    /// If the string is empty, `startIndex` is equal to `endIndex`.
    public var startIndex: Index {
      return _fromCoreIndex(_core.startIndex)
    }

    /// The "past the end" position---that is, the position one greater than
    /// the last valid subscript argument.
    ///
    /// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
    public var endIndex: Index {
      return _fromCoreIndex(_core.endIndex)
    }

    /// Returns the next consecutive location after `i`.
    ///
    /// - Precondition: The next location exists.
    public func index(after i: Index) -> Index {
      let i = _toCoreIndex(i)
      var scratch = _ScratchIterator(_core, i)
      var decoder = UTF16()
      let (_, length) = decoder._decodeOne(&scratch)
      return _fromCoreIndex(i + length)
    }

    /// Returns the previous consecutive location before `i`.
    ///
    /// - Precondition: The previous location exists.
    public func index(before i: Index) -> Index {
      var i = _toCoreIndex(i) - 1
      let codeUnit = _core[i]
      if _slowPath((codeUnit >> 10) == 0b1101_11) {
        if i != 0 && (_core[i - 1] >> 10) == 0b1101_10 {
          i -= 1
        }
      }
      return _fromCoreIndex(i)
    }

    /// Accesses the Unicode scalar value at the given position.
    ///
    /// The following example searches a string's Unicode scalars view for a
    /// capital letter and then prints the character and Unicode scalar value
    /// at the found index:
    ///
    ///     let greeting = "Hello, friend!"
    ///     if let i = greeting.unicodeScalars.index(where: { "A"..."Z" ~= $0 }) {
    ///         print("First capital letter: \(greeting.unicodeScalars[i])")
    ///         print("Unicode scalar value: \(greeting.unicodeScalars[i].value)")
    ///     }
    ///     // Prints "First capital letter: H"
    ///     // Prints "Unicode scalar value: 72"
    ///
    /// - Parameter position: A valid index of the character view. `position`
    ///   must be less than the view's end index.
    public subscript(position: Index) -> UnicodeScalar {
      var scratch = _ScratchIterator(_core, _toCoreIndex(position))
      var decoder = UTF16()
      switch decoder.decode(&scratch) {
      case .scalarValue(let us):
        return us
      case .emptyInput:
        _sanityCheckFailure("cannot subscript using an endIndex")
      case .error:
        return UnicodeScalar(0xfffd)!
      }
    }

    /// Accesses the Unicode scalar values in the given range.
    ///
    /// The example below uses this subscript to access the scalar values up
    /// to, but not including, the first comma (`","`) in the string.
    ///
    ///     let str = "All this happened, more or less."
    ///     let i = str.unicodeScalars.index(of: ",")!
    ///     let substring = str.unicodeScalars[str.unicodeScalars.startIndex ..< i]
    ///     print(String(substring))
    ///     // Prints "All this happened"
    ///
    /// - Complexity: O(*n*) if the underlying string is bridged from
    ///   Objective-C, where *n* is the length of the string; otherwise, O(1).
    public subscript(r: Range<Index>) -> UnicodeScalarView {
      let rawSubRange = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound)
      return UnicodeScalarView(_core[rawSubRange],
        coreOffset: r.lowerBound._position)
    }

    /// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
    /// collection.
    public struct Iterator : IteratorProtocol {
      init(_ _base: _StringCore) {
        if _base.hasContiguousStorage {
            self._baseSet = true
          if _base.isASCII {
            self._ascii = true
            self._asciiBase = UnsafeBufferPointer(
              start: _base._baseAddress?.assumingMemoryBound(
                to: UTF8.CodeUnit.self),
              count: _base.count).makeIterator()
          } else {
            self._ascii = false
            self._base = UnsafeBufferPointer<UInt16>(
              start: _base._baseAddress?.assumingMemoryBound(
                to: UTF16.CodeUnit.self),
              count: _base.count).makeIterator()
          }
        } else {
          self._ascii = false
          self._baseSet = false
          self._iterator = _base.makeIterator()
        }
      }

      /// Advances to the next element and returns it, or `nil` if no next
      /// element exists.
      ///
      /// Once `nil` has been returned, all subsequent calls return `nil`.
      ///
      /// - Precondition: `next()` has not been applied to a copy of `self`
      ///   since the copy was made.
      public mutating func next() -> UnicodeScalar? {
        var result: UnicodeDecodingResult
        if _baseSet {
          if _ascii {
            switch self._asciiBase.next() {
            case let x?:
              result = .scalarValue(UnicodeScalar(x))
            case nil:
              result = .emptyInput
            }
          } else {
            result = _decoder.decode(&(self._base!))
          }
        } else {
          result = _decoder.decode(&(self._iterator!))
        }
        switch result {
        case .scalarValue(let us):
          return us
        case .emptyInput:
          return nil
        case .error:
          return UnicodeScalar(0xfffd)
        }
      }
      internal var _decoder: UTF16 = UTF16()
      internal let _baseSet: Bool
      internal let _ascii: Bool
      internal var _asciiBase: UnsafeBufferPointerIterator<UInt8>!
      internal var _base: UnsafeBufferPointerIterator<UInt16>!
      internal var _iterator: IndexingIterator<_StringCore>!
    }

    /// Returns an iterator over the Unicode scalars that make up this view.
    ///
    /// - Returns: An iterator over this collection's `UnicodeScalar` elements.
    public func makeIterator() -> Iterator {
      return Iterator(_core)
    }

    public var description: String {
      return String(_core)
    }

    public var debugDescription: String {
      return "StringUnicodeScalarView(\(self.description.debugDescription))"
    }

    internal var _core: _StringCore

    /// The offset of this view's `_core` from an original core. This works
    /// around the fact that `_StringCore` is always zero-indexed.
    /// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
    /// before that value is used as a `_core` index.
    internal var _coreOffset: Int
  }

  /// Creates a string corresponding to the given collection of Unicode
  /// scalars.
  ///
  /// You can use this initializer to create a new string from a slice of
  /// another string's `unicodeScalars` view.
  ///
  ///     let picnicGuest = "Deserving porcupine"
  ///     if let i = picnicGuest.unicodeScalars.index(of: " ") {
  ///         let adjective = String(picnicGuest.unicodeScalars.prefix(upTo: i))
  ///         print(adjective)
  ///     }
  ///     // Prints "Deserving"
  ///
  /// The `adjective` constant is created by calling this initializer with a
  /// slice of the `picnicGuest.unicodeScalars` view.
  ///
  /// - Parameter unicodeScalars: A collection of Unicode scalar values.
  public init(_ unicodeScalars: UnicodeScalarView) {
    self.init(unicodeScalars._core)
  }

  /// The index type for a string's `unicodeScalars` view.
  public typealias UnicodeScalarIndex = UnicodeScalarView.Index
}

extension String {
  /// The string's value represented as a collection of Unicode scalar values.
  public var unicodeScalars: UnicodeScalarView {
    get {
      return UnicodeScalarView(_core)
    }
    set {
      _core = newValue._core
    }
  }
}

extension String.UnicodeScalarView.Index : Comparable {
  public static func == (
    lhs: String.UnicodeScalarView.Index,
    rhs: String.UnicodeScalarView.Index
  ) -> Bool {
    return lhs._position == rhs._position
  }

  public static func < (
    lhs: String.UnicodeScalarView.Index,
    rhs: String.UnicodeScalarView.Index
  ) -> Bool {
    return lhs._position < rhs._position
  }
}

extension String.UnicodeScalarView : RangeReplaceableCollection {
  /// Creates an empty view instance.
  public init() {
    self = String.UnicodeScalarView(_StringCore())
  }

  /// Reserves enough space in the view's underlying storage to store the
  /// specified number of ASCII characters.
  ///
  /// Because a Unicode scalar value can require more than a single ASCII
  /// character's worth of storage, additional allocation may be necessary
  /// when adding to a Unicode scalar view after a call to
  /// `reserveCapacity(_:)`.
  ///
  /// - Parameter n: The minimum number of ASCII character's worth of storage
  ///   to allocate.
  ///
  /// - Complexity: O(*n*), where *n* is the capacity being reserved.
  public mutating func reserveCapacity(_ n: Int) {
    _core.reserveCapacity(n)
  }

  /// Appends the given Unicode scalar to the view.
  ///
  /// - Parameter c: The character to append to the string.
  public mutating func append(_ x: UnicodeScalar) {
    _core.append(x)
  }

  /// Appends the Unicode scalar values in the given sequence to the view.
  ///
  /// - Parameter newElements: A sequence of Unicode scalar values.
  ///
  /// - Complexity: O(*n*), where *n* is the length of the resulting view.
  public mutating func append<S : Sequence>(contentsOf newElements: S)
    where S.Iterator.Element == UnicodeScalar {
    _core.append(contentsOf: newElements.lazy.flatMap { $0.utf16 })
  }

  /// Replaces the elements within the specified bounds with the given Unicode
  /// scalar values.
  ///
  /// Calling this method invalidates any existing indices for use with this
  /// string.
  ///
  /// - Parameters:
  ///   - bounds: The range of elements to replace. The bounds of the range
  ///     must be valid indices of the view.
  ///   - newElements: The new Unicode scalar values to add to the string.
  ///
  /// - Complexity: O(*m*), where *m* is the combined length of the view and
  ///   `newElements`. If the call to `replaceSubrange(_:with:)` simply
  ///   removes elements at the end of the string, the complexity is O(*n*),
  ///   where *n* is equal to `bounds.count`.
  public mutating func replaceSubrange<C>(
    _ bounds: Range<Index>,
    with newElements: C
  ) where C : Collection, C.Iterator.Element == UnicodeScalar {
    let rawSubRange: Range<Int> = _toCoreIndex(bounds.lowerBound) ..<
      _toCoreIndex(bounds.upperBound)
    let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
    _core.replaceSubrange(rawSubRange, with: lazyUTF16)
  }
}

// Index conversions
extension String.UnicodeScalarIndex {
  /// Creates an index in the given Unicode scalars view that corresponds
  /// exactly to the specified `UTF16View` position.
  ///
  /// The following example finds the position of a space in a string's `utf16`
  /// view and then converts that position to an index in the string's
  /// `unicodeScalars` view:
  ///
  ///     let cafe = "Café 🍵"
  ///
  ///     let utf16Index = cafe.utf16.index(of: 32)!
  ///     let scalarIndex = String.UnicodeScalarView.Index(utf16Index, within: cafe.unicodeScalars)!
  ///
  ///     print(String(cafe.unicodeScalars.prefix(upTo: scalarIndex)))
  ///     // Prints "Café"
  ///
  /// If the position passed in `utf16Index` doesn't have an exact
  /// corresponding position in `unicodeScalars`, the result of the
  /// initializer is `nil`. For example, an attempt to convert the position of
  /// the trailing surrogate of a UTF-16 surrogate pair fails.
  ///
  /// - Parameters:
  ///   - utf16Index: A position in the `utf16` view of the `characters`
  ///     parameter.
  ///   - unicodeScalars: The `UnicodeScalarView` instance referenced by both
  ///     `utf16Index` and the resulting index.
  public init?(
    _ utf16Index: String.UTF16Index,
    within unicodeScalars: String.UnicodeScalarView
  ) {
    let utf16 = String.UTF16View(unicodeScalars._core)

    if utf16Index != utf16.startIndex
    && utf16Index != utf16.endIndex {
      _precondition(
        utf16Index >= utf16.startIndex
        && utf16Index <= utf16.endIndex,
        "Invalid String.UTF16Index for this UnicodeScalar view")

      // Detect positions that have no corresponding index.  Note that
      // we have to check before and after, because an unpaired
      // surrogate will be decoded as a single replacement character,
      // thus making the corresponding position valid.
      if UTF16.isTrailSurrogate(utf16[utf16Index])
        && UTF16.isLeadSurrogate(utf16[utf16.index(before: utf16Index)]) {
        return nil
      }
    }
    self.init(_position: utf16Index._offset)
  }

  /// Creates an index in the given Unicode scalars view that corresponds
  /// exactly to the specified `UTF8View` position.
  ///
  /// If the position passed as `utf8Index` doesn't have an exact corresponding
  /// position in `unicodeScalars`, the result of the initializer is `nil`.
  /// For example, an attempt to convert the position of a UTF-8 continuation
  /// byte returns `nil`.
  ///
  /// - Parameters:
  ///   - utf8Index: A position in the `utf8` view of the `characters`
  ///     parameter.
  ///   - unicodeScalars: The `UnicodeScalarView` instance referenced by both
  ///     `utf8Index` and the resulting index.
  public init?(
    _ utf8Index: String.UTF8Index,
    within unicodeScalars: String.UnicodeScalarView
  ) {
    let core = unicodeScalars._core

    _precondition(
      utf8Index._coreIndex >= 0 && utf8Index._coreIndex <= core.endIndex,
      "Invalid String.UTF8Index for this UnicodeScalar view")

    // Detect positions that have no corresponding index.
    if !utf8Index._isOnUnicodeScalarBoundary(in: core) {
      return nil
    }
    self.init(_position: utf8Index._coreIndex)
  }

  /// Creates an index in the given Unicode scalars view that corresponds
  /// exactly to the specified string position.
  ///
  /// The following example converts the position of the teacup emoji (`"🍵"`)
  /// into its corresponding position in the string's `unicodeScalars` view.
  ///
  ///     let cafe = "Café 🍵"
  ///     let characterIndex = cafe.characters.index(of: "🍵")!
  ///     let scalarIndex = String.UnicodeScalarView.Index(characterIndex, within: cafe.unicodeScalars)
  ///
  ///     print(cafe.unicodeScalars.suffix(from: scalarIndex))
  ///     // Prints "🍵"
  ///
  /// - Parameters:
  ///   - characterIndex: A position in a `CharacterView` instance.
  ///     `characterIndex` must be an element of
  ///     `String(utf8).characters.indices`.
  ///   - utf8: The `UTF8View` in which to find the new position.
  public init(
    _ characterIndex: String.Index,
    within unicodeScalars: String.UnicodeScalarView
  ) {
    self.init(_position: characterIndex._base._position)
  }

  /// Returns the position in the given UTF-8 view that corresponds exactly to
  /// this index.
  ///
  /// The index must be a valid index of `String(utf8).unicodeScalars`.
  ///
  /// This example first finds the position of the character `"é"` and then uses
  /// this method find the same position in the string's `utf8` view.
  ///
  ///     let cafe = "Café"
  ///     if let i = cafe.unicodeScalars.index(of: "é") {
  ///         let j = i.samePosition(in: cafe.utf8)
  ///         print(Array(cafe.utf8.suffix(from: j)))
  ///     }
  ///     // Prints "[195, 169]"
  ///
  /// - Parameter utf8: The view to use for the index conversion.
  /// - Returns: The position in `utf8` that corresponds exactly to this index.
  public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index {
    return String.UTF8View.Index(self, within: utf8)
  }

  /// Returns the position in the given UTF-16 view that corresponds exactly to
  /// this index.
  ///
  /// The index must be a valid index of `String(utf16).unicodeScalars`.
  ///
  /// This example first finds the position of the character `"é"` and then uses
  /// this method find the same position in the string's `utf16` view.
  ///
  ///     let cafe = "Café"
  ///     if let i = cafe.characters.index(of: "é") {
  ///         let j = i.samePosition(in: cafe.utf16)
  ///         print(cafe.utf16[j])
  ///     }
  ///     // Prints "233"
  ///
  /// - Parameter utf16: The view to use for the index conversion.
  /// - Returns: The position in `utf16` that corresponds exactly to this index.
  public func samePosition(
    in utf16: String.UTF16View
  ) -> String.UTF16View.Index {
    return String.UTF16View.Index(self, within: utf16)
  }

  /// Returns the position in the given string that corresponds exactly to this
  /// index.
  ///
  /// This index must be a valid index of `characters.unicodeScalars`.
  ///
  /// This example first finds the position of a space (UTF-8 code point `32`)
  /// in a string's `utf8` view and then uses this method find the same position
  /// in the string.
  ///
  ///     let cafe = "Café 🍵"
  ///     let i = cafe.unicodeScalars.index(of: "🍵")
  ///     let j = i.samePosition(in: cafe)!
  ///     print(cafe.suffix(from: j))
  ///     // Prints "🍵"
  ///
  /// - Parameter characters: The string to use for the index conversion.
  /// - Returns: The position in `characters` that corresponds exactly to
  ///   this index. If this index does not have an exact corresponding
  ///   position in `characters`, this method returns `nil`. For example,
  ///   an attempt to convert the position of a UTF-8 continuation byte
  ///   returns `nil`.
  public func samePosition(in characters: String) -> String.Index? {
    return String.Index(self, within: characters)
  }
}

extension String.UnicodeScalarView {
  // NOTE: Don't make this function inlineable.  Grapheme cluster
  // segmentation uses a completely different algorithm in Unicode 9.0.
  internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool {
    if i == startIndex || i == endIndex {
      return true
    }
    let precedingScalar = self[index(before: i)]

    let graphemeClusterBreakProperty =
      _UnicodeGraphemeClusterBreakPropertyTrie()
    let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()

    let gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
      precedingScalar.value)

    if segmenter.isBoundaryAfter(gcb0) {
      return true
    }

    let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(self[i].value)

    return segmenter.isBoundary(gcb0, gcb1)
  }
}

// Reflection
extension String.UnicodeScalarView : CustomReflectable {
  /// Returns a mirror that reflects the Unicode scalars view of a string.
  public var customMirror: Mirror {
    return Mirror(self, unlabeledChildren: self)
  }
}

extension String.UnicodeScalarView : CustomPlaygroundQuickLookable {
  public var customPlaygroundQuickLook: PlaygroundQuickLook {
    return .text(description)
  }
}