swift-mirror/stdlib/public/core/String.swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import SwiftShims

@inlinable @_transparent
internal func unimplemented_utf8(
  _ message: String = "",
  file: StaticString = #file, line: UInt = #line
) -> Never {
  fatalError("Unimplemented for UTF-8 support", file: file, line: line)
}
@inlinable @_transparent
internal func unimplemented_utf8_32bit(
  _ message: String = "",
  file: StaticString = #file, line: UInt = #line
) -> Never {
  fatalError("32-bit: Unimplemented for UTF-8 support", file: file, line: line)
}

// TODO(UTF8): Find a better place to stick these...
extension UnsafePointer where Pointee == UInt8 {
  @inlinable
  internal var _asCChar: UnsafePointer<CChar> {
    @inline(__always) get {
      return UnsafeRawPointer(self).assumingMemoryBound(to: CChar.self)
    }
  }
}
extension UnsafeBufferPointer where Element == UInt8 {
  @inlinable
  internal var _asCChar: UnsafeBufferPointer<CChar> {
    @inline(__always) get {
      return UnsafeBufferPointer<CChar>(
        start: self.baseAddress._unsafelyUnwrappedUnchecked._asCChar,
        count: self.count)
    }
  }
}
extension UnsafeRawPointer {
  @inlinable
  internal var _asCChar: UnsafePointer<CChar> {
    @inline(__always) get {
      return self.assumingMemoryBound(to: CChar.self)
    }
  }
}
extension UnsafeRawBufferPointer {
  @inlinable
  internal var _asCChar: UnsafeBufferPointer<CChar> {
    @inline(__always) get {
      return UnsafeBufferPointer<CChar>(
        start: self.baseAddress._unsafelyUnwrappedUnchecked._asCChar,
        count: self.count)
    }
  }
}
extension UnsafePointer where Pointee == CChar {
  @inlinable
  internal var _asUInt8: UnsafePointer<UInt8> {
    @inline(__always) get {
      return UnsafeRawPointer(self).assumingMemoryBound(to: UInt8.self)
    }
  }
}
extension UnsafeBufferPointer where Element == CChar {
  @inlinable
  internal var _asUInt8: UnsafeBufferPointer<UInt8> {
    @inline(__always) get {
      return UnsafeBufferPointer<UInt8>(
        start: self.baseAddress._unsafelyUnwrappedUnchecked._asUInt8,
        count: self.count)
    }
  }
}
extension UnsafeRawPointer {
  @inlinable
  internal var _asUInt8: UnsafePointer<UInt8> {
    @inline(__always) get {
      return self.assumingMemoryBound(to: UInt8.self)
    }
  }
}
extension UnsafeRawBufferPointer {
  @inlinable
  internal var _asUInt8: UnsafeBufferPointer<UInt8> {
    @inline(__always) get {
      return UnsafeBufferPointer<UInt8>(
        start: self.baseAddress._unsafelyUnwrappedUnchecked._asUInt8,
        count: self.count)
    }
  }
}

extension Slice where Base == UnsafeBufferPointer<UInt8> {
  @inlinable
  internal var _rebased: UnsafeBufferPointer<UInt8> {
    @inline(__always) get {
      return UnsafeBufferPointer<UInt8>(rebasing: self)
    }
  }
}
extension Slice where Base == UnsafeBufferPointer<CChar> {
  @inlinable
  internal var _rebased: UnsafeBufferPointer<CChar> {
    @inline(__always) get {
      return UnsafeBufferPointer<CChar>(rebasing: self)
    }
  }
}

/// A Unicode string value that is a collection of characters.
///
/// A string is a series of characters, such as `"Swift"`, that forms a
/// collection. Strings in Swift are Unicode correct and locale insensitive,
/// and are designed to be efficient. The `String` type bridges with the
/// Objective-C class `NSString` and offers interoperability with C functions
/// that works with strings.
///
/// You can create new strings using string literals or string interpolations.
/// A *string literal* is a series of characters enclosed in quotes.
///
///     let greeting = "Welcome!"
///
/// *String interpolations* are string literals that evaluate any included
/// expressions and convert the results to string form. String interpolations
/// give you an easy way to build a string from multiple pieces. Wrap each
/// expression in a string interpolation in parentheses, prefixed by a
/// backslash.
///
///     let name = "Rosa"
///     let personalizedGreeting = "Welcome, \(name)!"
///     // personalizedGreeting == "Welcome, Rosa!"
///
///     let price = 2
///     let number = 3
///     let cookiePrice = "\(number) cookies: $\(price * number)."
///     // cookiePrice == "3 cookies: $6."
///
/// Combine strings using the concatenation operator (`+`).
///
///     let longerGreeting = greeting + " We're glad you're here!"
///     // longerGreeting == "Welcome! We're glad you're here!"
///
/// Multiline string literals are enclosed in three double quotation marks
/// (`"""`), with each delimiter on its own line. Indentation is stripped from
/// each line of a multiline string literal to match the indentation of the
/// closing delimiter.
///
///     let banner = """
///               __,
///              (           o  /) _/_
///               `.  , , , ,  //  /
///             (___)(_(_/_(_ //_ (__
///                          /)
///                         (/
///             """
///
/// Modifying and Comparing Strings
/// ===============================
///
/// Strings always have value semantics. Modifying a copy of a string leaves
/// the original unaffected.
///
///     var otherGreeting = greeting
///     otherGreeting += " Have a nice time!"
///     // otherGreeting == "Welcome! Have a nice time!"
///
///     print(greeting)
///     // Prints "Welcome!"
///
/// Comparing strings for equality using the equal-to operator (`==`) or a
/// relational operator (like `<` or `>=`) is always performed using Unicode
/// canonical representation. As a result, different representations of a
/// string compare as being equal.
///
///     let cafe1 = "Cafe\u{301}"
///     let cafe2 = "Café"
///     print(cafe1 == cafe2)
///     // Prints "true"
///
/// The Unicode scalar value `"\u{301}"` modifies the preceding character to
/// include an accent, so `"e\u{301}"` has the same canonical representation
/// as the single Unicode scalar value `"é"`.
///
/// Basic string operations are not sensitive to locale settings, ensuring that
/// string comparisons and other operations always have a single, stable
/// result, allowing strings to be used as keys in `Dictionary` instances and
/// for other purposes.
///
/// Accessing String Elements
/// =========================
///
/// A string is a collection of *extended grapheme clusters*, which approximate
/// human-readable characters. Many individual characters, such as "é", "김",
/// and "🇮🇳", can be made up of multiple Unicode scalar values. These scalar
/// values are combined by Unicode's boundary algorithms into extended
/// grapheme clusters, represented by the Swift `Character` type. Each element
/// of a string is represented by a `Character` instance.
///
/// For example, to retrieve the first word of a longer string, you can search
/// for a space and then create a substring from a prefix of the string up to
/// that point:
///
///     let name = "Marie Curie"
///     let firstSpace = name.firstIndex(of: " ") ?? name.endIndex
///     let firstName = name[..<firstSpace]
///     // firstName == "Marie"
///
/// The `firstName` constant is an instance of the `Substring` type---a type
/// that represents substrings of a string while sharing the original string's
/// storage. Substrings present the same interface as strings.
///
///     print("\(name)'s first name has \(firstName.count) letters.")
///     // Prints "Marie Curie's first name has 5 letters."
///
/// Accessing a String's Unicode Representation
/// ===========================================
///
/// If you need to access the contents of a string as encoded in different
/// Unicode encodings, use one of the string's `unicodeScalars`, `utf16`, or
/// `utf8` properties. Each property provides access to a view of the string
/// as a series of code units, each encoded in a different Unicode encoding.
///
/// To demonstrate the different views available for every string, the
/// following examples use this `String` instance:
///
///     let cafe = "Cafe\u{301} du 🌍"
///     print(cafe)
///     // Prints "Café du 🌍"
///
/// The `cafe` string is a collection of the nine characters that are visible
/// when the string is displayed.
///
///     print(cafe.count)
///     // Prints "9"
///     print(Array(cafe))
///     // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]"
///
/// Unicode Scalar View
/// -------------------
///
/// A string's `unicodeScalars` property is a collection of Unicode scalar
/// values, the 21-bit codes that are the basic unit of Unicode. Each scalar
/// value is represented by a `Unicode.Scalar` instance and is equivalent to a
/// UTF-32 code unit.
///
///     print(cafe.unicodeScalars.count)
///     // Prints "10"
///     print(Array(cafe.unicodeScalars))
///     // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]"
///     print(cafe.unicodeScalars.map { $0.value })
///     // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]"
///
/// The `unicodeScalars` view's elements comprise each Unicode scalar value in
/// the `cafe` string. In particular, because `cafe` was declared using the
/// decomposed form of the `"é"` character, `unicodeScalars` contains the
/// scalar values for both the letter `"e"` (101) and the accent character
/// `"´"` (769).
///
/// UTF-16 View
/// -----------
///
/// A string's `utf16` property is a collection of UTF-16 code units, the
/// 16-bit encoding form of the string's Unicode scalar values. Each code unit
/// is stored as a `UInt16` instance.
///
///     print(cafe.utf16.count)
///     // Prints "11"
///     print(Array(cafe.utf16))
///     // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]"
///
/// The elements of the `utf16` view are the code units for the string when
/// encoded in UTF-16. These elements match those accessed through indexed
/// `NSString` APIs.
///
///     let nscafe = cafe as NSString
///     print(nscafe.length)
///     // Prints "11"
///     print(nscafe.character(at: 3))
///     // Prints "101"
///
/// UTF-8 View
/// ----------
///
/// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit
/// encoding form of the string's Unicode scalar values. Each code unit is
/// stored as a `UInt8` instance.
///
///     print(cafe.utf8.count)
///     // Prints "14"
///     print(Array(cafe.utf8))
///     // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]"
///
/// The elements of the `utf8` view are the code units for the string when
/// encoded in UTF-8. This representation matches the one used when `String`
/// instances are passed to C APIs.
///
///     let cLength = strlen(cafe)
///     print(cLength)
///     // Prints "14"
///
/// Measuring the Length of a String
/// ================================
///
/// When you need to know the length of a string, you must first consider what
/// you'll use the length for. Are you measuring the number of characters that
/// will be displayed on the screen, or are you measuring the amount of
/// storage needed for the string in a particular encoding? A single string
/// can have greatly differing lengths when measured by its different views.
///
/// For example, an ASCII character like the capital letter *A* is represented
/// by a single element in each of its four views. The Unicode scalar value of
/// *A* is `65`, which is small enough to fit in a single code unit in both
/// UTF-16 and UTF-8.
///
///     let capitalA = "A"
///     print(capitalA.count)
///     // Prints "1"
///     print(capitalA.unicodeScalars.count)
///     // Prints "1"
///     print(capitalA.utf16.count)
///     // Prints "1"
///     print(capitalA.utf8.count)
///     // Prints "1"
///
/// On the other hand, an emoji flag character is constructed from a pair of
/// Unicode scalar values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of these
/// scalar values, in turn, is too large to fit into a single UTF-16 or UTF-8
/// code unit. As a result, each view of the string `"🇵🇷"` reports a different
/// length.
///
///     let flag = "🇵🇷"
///     print(flag.count)
///     // Prints "1"
///     print(flag.unicodeScalars.count)
///     // Prints "2"
///     print(flag.utf16.count)
///     // Prints "4"
///     print(flag.utf8.count)
///     // Prints "8"
///
/// To check whether a string is empty, use its `isEmpty` property instead of
/// comparing the length of one of the views to `0`. Unlike with `isEmpty`,
/// calculating a view's `count` property requires iterating through the
/// elements of the string.
///
/// Accessing String View Elements
/// ==============================
///
/// To find individual elements of a string, use the appropriate view for your
/// task. For example, to retrieve the first word of a longer string, you can
/// search the string for a space and then create a new string from a prefix
/// of the string up to that point.
///
///     let name = "Marie Curie"
///     let firstSpace = name.firstIndex(of: " ") ?? name.endIndex
///     let firstName = name[..<firstSpace]
///     print(firstName)
///     // Prints "Marie"
///
/// Strings and their views share indices, so you can access the UTF-8 view of
/// the `name` string using the same `firstSpace` index.
///
///     print(Array(name.utf8[..<firstSpace]))
///     // Prints "[77, 97, 114, 105, 101]"
///
/// Note that an index into one view may not have an exact corresponding
/// position in another view. For example, the `flag` string declared above
/// comprises a single character, but is composed of eight code units when
/// encoded as UTF-8. The following code creates constants for the first and
/// second positions in the `flag.utf8` view. Accessing the `utf8` view with
/// these indices yields the first and second code UTF-8 units.
///
///     let firstCodeUnit = flag.startIndex
///     let secondCodeUnit = flag.utf8.index(after: firstCodeUnit)
///     // flag.utf8[firstCodeUnit] == 240
///     // flag.utf8[secondCodeUnit] == 159
///
/// When used to access the elements of the `flag` string itself, however, the
/// `secondCodeUnit` index does not correspond to the position of a specific
/// character. Instead of only accessing the specific UTF-8 code unit, that
/// index is treated as the position of the character at the index's encoded
/// offset. In the case of `secondCodeUnit`, that character is still the flag
/// itself.
///
///     // flag[firstCodeUnit] == "🇵🇷"
///     // flag[secondCodeUnit] == "🇵🇷"
///
/// If you need to validate that an index from one string's view corresponds
/// with an exact position in another view, use the index's
/// `samePosition(in:)` method or the `init(_:within:)` initializer.
///
///     if let exactIndex = secondCodeUnit.samePosition(in: flag) {
///         print(flag[exactIndex])
///     } else {
///         print("No exact match for this position.")
///     }
///     // Prints "No exact match for this position."
///
/// Performance Optimizations
/// =========================
///
/// Although strings in Swift have value semantics, strings use a copy-on-write
/// strategy to store their data in a buffer. This buffer can then be shared
/// by different copies of a string. A string's data is only copied lazily,
/// upon mutation, when more than one string instance is using the same
/// buffer. Therefore, the first in any sequence of mutating operations may
/// cost O(*n*) time and space.
///
/// When a string's contiguous storage fills up, a new buffer must be allocated
/// and data must be moved to the new storage. String buffers use an
/// exponential growth strategy that makes appending to a string a constant
/// time operation when averaged over many append operations.
///
/// Bridging Between String and NSString
/// ====================================
///
/// Any `String` instance can be bridged to `NSString` using the type-cast
/// operator (`as`), and any `String` instance that originates in Objective-C
/// may use an `NSString` instance as its storage. Because any arbitrary
/// subclass of `NSString` can become a `String` instance, there are no
/// guarantees about representation or efficiency when a `String` instance is
/// backed by `NSString` storage. Because `NSString` is immutable, it is just
/// as though the storage was shared by a copy. The first in any sequence of
/// mutating operations causes elements to be copied into unique, contiguous
/// storage which may cost O(*n*) time and space, where *n* is the length of
/// the string's encoded representation (or more, if the underlying `NSString`
/// has unusual performance characteristics).
///
/// For more information about the Unicode terms used in this discussion, see
/// the [Unicode.org glossary][glossary]. In particular, this discussion
/// mentions [extended grapheme clusters][clusters], [Unicode scalar
/// values][scalars], and [canonical equivalence][equivalence].
///
/// [glossary]: http://www.unicode.org/glossary/
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
/// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent
@_fixed_layout
public struct String {
  @usableFromInline
  internal var _guts: _StringGuts

  @inlinable @inline(__always)
  internal init(_ _guts: _StringGuts) {
    self._guts = _guts
    _invariantCheck()
  }

  /// Creates an empty string.
  ///
  /// Using this initializer is equivalent to initializing a string with an
  /// empty string literal.
  ///
  ///     let empty = ""
  ///     let alsoEmpty = String()
  @inlinable @inline(__always)
  public init() { self.init(_StringGuts()) }

  // TODO(UTF8 merge): Move these declarations back to their appropriate files.
  // Currently, they can crash the compiler in some build configurations.

  /// A view of a string's contents as a collection of Unicode scalar values.
  ///
  /// You can access a string's view of Unicode scalar values by using its
  /// `unicodeScalars` property. Unicode scalar values are the 21-bit codes
  /// that are the basic unit of Unicode. Each scalar value is represented by
  /// a `Unicode.Scalar` instance and is equivalent to a UTF-32 code unit.
  ///
  ///     let flowers = "Flowers 💐"
  ///     for v in flowers.unicodeScalars {
  ///         print(v.value)
  ///     }
  ///     // 70
  ///     // 108
  ///     // 111
  ///     // 119
  ///     // 101
  ///     // 114
  ///     // 115
  ///     // 32
  ///     // 128144
  ///
  /// Some characters that are visible in a string are made up of more than one
  /// Unicode scalar value. In that case, a string's `unicodeScalars` view
  /// contains more elements than the string itself.
  ///
  ///     let flag = "🇵🇷"
  ///     for c in flag {
  ///         print(c)
  ///     }
  ///     // 🇵🇷
  ///
  ///     for v in flag.unicodeScalars {
  ///         print(v.value)
  ///     }
  ///     // 127477
  ///     // 127479
  ///
  /// You can convert a `String.UnicodeScalarView` instance back into a string
  /// using the `String` type's `init(_:)` initializer.
  ///
  ///     let favemoji = "My favorite emoji is 🎉"
  ///     if let i = favemoji.unicodeScalars.firstIndex(where: { $0.value >= 128 }) {
  ///         let asciiPrefix = String(favemoji.unicodeScalars[..<i])
  ///         print(asciiPrefix)
  ///     }
  ///     // Prints "My favorite emoji is "
  @_fixed_layout
  public struct UnicodeScalarView {
    @usableFromInline
    internal var _guts: _StringGuts

    @inlinable @inline(__always)
    internal init(_ _guts: _StringGuts) {
      self._guts = _guts
      _invariantCheck()
    }
  }

  /// A view of a string's contents as a collection of UTF-16 code units.
  ///
  /// You can access a string's view of UTF-16 code units by using its `utf16`
  /// property. A string's UTF-16 view encodes the string's Unicode scalar
  /// values as 16-bit integers.
  ///
  ///     let flowers = "Flowers 💐"
  ///     for v in flowers.utf16 {
  ///         print(v)
  ///     }
  ///     // 70
  ///     // 108
  ///     // 111
  ///     // 119
  ///     // 101
  ///     // 114
  ///     // 115
  ///     // 32
  ///     // 55357
  ///     // 56464
  ///
  /// Unicode scalar values that make up a string's contents can be up to 21
  /// bits long. The longer scalar values may need two `UInt16` values for
  /// storage. Those "pairs" of code units are called *surrogate pairs*.
  ///
  ///     let flowermoji = "💐"
  ///     for v in flowermoji.unicodeScalars {
  ///         print(v, v.value)
  ///     }
  ///     // 💐 128144
  ///
  ///     for v in flowermoji.utf16 {
  ///         print(v)
  ///     }
  ///     // 55357
  ///     // 56464
  ///
  /// To convert a `String.UTF16View` instance back into a string, use the
  /// `String` type's `init(_:)` initializer.
  ///
  ///     let favemoji = "My favorite emoji is 🎉"
  ///     if let i = favemoji.utf16.firstIndex(where: { $0 >= 128 }) {
  ///         let asciiPrefix = String(favemoji.utf16[..<i])
  ///         print(asciiPrefix)
  ///     }
  ///     // Prints "My favorite emoji is "
  ///
  /// UTF16View Elements Match NSString Characters
  /// ============================================
  ///
  /// The UTF-16 code units of a string's `utf16` view match the elements
  /// accessed through indexed `NSString` APIs.
  ///
  ///     print(flowers.utf16.count)
  ///     // Prints "10"
  ///
  ///     let nsflowers = flowers as NSString
  ///     print(nsflowers.length)
  ///     // Prints "10"
  ///
  /// Unlike `NSString`, however, `String.UTF16View` does not use integer
  /// indices. If you need to access a specific position in a UTF-16 view, use
  /// Swift's index manipulation methods. The following example accesses the
  /// fourth code unit in both the `flowers` and `nsflowers` strings:
  ///
  ///     print(nsflowers.character(at: 3))
  ///     // Prints "119"
  ///
  ///     let i = flowers.utf16.index(flowers.utf16.startIndex, offsetBy: 3)
  ///     print(flowers.utf16[i])
  ///     // Prints "119"
  ///
  /// Although the Swift overlay updates many Objective-C methods to return
  /// native Swift indices and index ranges, some still return instances of
  /// `NSRange`. To convert an `NSRange` instance to a range of
  /// `String.Index`, use the `Range(_:in:)` initializer, which takes an
  /// `NSRange` and a string as arguments.
  ///
  ///     let snowy = "❄️ Let it snow! ☃️"
  ///     let nsrange = NSRange(location: 3, length: 12)
  ///     if let range = Range(nsrange, in: snowy) {
  ///         print(snowy[range])
  ///     }
  ///     // Prints "Let it snow!"
  @_fixed_layout // FIXME(sil-serialize-all)
  public struct UTF16View {
    @usableFromInline
    internal var _guts: _StringGuts

    @inlinable // FIXME(sil-serialize-all)
    internal init(_ guts: _StringGuts) {
      self._guts = guts
      _invariantCheck()
    }
  }

  /// A view of a string's contents as a collection of UTF-8 code units.
  ///
  /// You can access a string's view of UTF-8 code units by using its `utf8`
  /// property. A string's UTF-8 view encodes the string's Unicode scalar
  /// values as 8-bit integers.
  ///
  ///     let flowers = "Flowers 💐"
  ///     for v in flowers.utf8 {
  ///         print(v)
  ///     }
  ///     // 70
  ///     // 108
  ///     // 111
  ///     // 119
  ///     // 101
  ///     // 114
  ///     // 115
  ///     // 32
  ///     // 240
  ///     // 159
  ///     // 146
  ///     // 144
  ///
  /// A string's Unicode scalar values can be up to 21 bits in length. To
  /// represent those scalar values using 8-bit integers, more than one UTF-8
  /// code unit is often required.
  ///
  ///     let flowermoji = "💐"
  ///     for v in flowermoji.unicodeScalars {
  ///         print(v, v.value)
  ///     }
  ///     // 💐 128144
  ///
  ///     for v in flowermoji.utf8 {
  ///         print(v)
  ///     }
  ///     // 240
  ///     // 159
  ///     // 146
  ///     // 144
  ///
  /// In the encoded representation of a Unicode scalar value, each UTF-8 code
  /// unit after the first is called a *continuation byte*.
  ///
  /// UTF8View Elements Match Encoded C Strings
  /// =========================================
  ///
  /// Swift streamlines interoperation with C string APIs by letting you pass a
  /// `String` instance to a function as an `Int8` or `UInt8` pointer. When you
  /// call a C function using a `String`, Swift automatically creates a buffer
  /// of UTF-8 code units and passes a pointer to that buffer. The code units
  /// of that buffer match the code units in the string's `utf8` view.
  ///
  /// The following example uses the C `strncmp` function to compare the
  /// beginning of two Swift strings. The `strncmp` function takes two
  /// `const char*` pointers and an integer specifying the number of characters
  /// to compare. Because the strings are identical up to the 14th character,
  /// comparing only those characters results in a return value of `0`.
  ///
  ///     let s1 = "They call me 'Bell'"
  ///     let s2 = "They call me 'Stacey'"
  ///
  ///     print(strncmp(s1, s2, 14))
  ///     // Prints "0"
  ///     print(String(s1.utf8.prefix(14)))
  ///     // Prints "They call me '"
  ///
  /// Extending the compared character count to 15 includes the differing
  /// characters, so a nonzero result is returned.
  ///
  ///     print(strncmp(s1, s2, 15))
  ///     // Prints "-17"
  ///     print(String(s1.utf8.prefix(15)))
  ///     // Prints "They call me 'B"
  @_fixed_layout
  public struct UTF8View {
    @usableFromInline
    internal var _guts: _StringGuts

    @inlinable @inline(__always)
    internal init(_ guts: _StringGuts) {
      self._guts = guts
      _invariantCheck()
    }
  }
}

extension String {
  #if !INTERNAL_CHECKS_ENABLED
  @inlinable @inline(__always) internal func _invariantCheck() {}
  #else
  @usableFromInline @inline(never) @_effects(releasenone)
  internal func _invariantCheck() {
    _guts._invariantCheck()
  }
  #endif // INTERNAL_CHECKS_ENABLED

  public func _dump() {
    #if INTERNAL_CHECKS_ENABLED
    _guts._dump()
    #endif // INTERNAL_CHECKS_ENABLED
  }
}

extension String {
  /// Creates a string from the given Unicode code units in the specified
  /// encoding.
  ///
  /// - Parameters:
  ///   - codeUnits: A collection of code units encoded in the encoding
  ///     specified in `sourceEncoding`.
  ///   - sourceEncoding: The encoding in which `codeUnits` should be
  ///     interpreted.
  @inlinable
  @inline(__always) // Eliminate dynamic type check when possible
  public init<C: Collection, Encoding: Unicode.Encoding>(
    decoding codeUnits: C, as sourceEncoding: Encoding.Type
  ) where C.Iterator.Element == Encoding.CodeUnit {
    if let contigBytes = codeUnits as? _HasContiguousBytes,
       sourceEncoding == UTF8.self,
       contigBytes._providesContiguousBytesNoCopy
    {
      self = contigBytes.withUnsafeBytes { rawBufPtr in
        let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
        return String._fromUTF8Repairing(
          UnsafeBufferPointer(
            start: ptr.assumingMemoryBound(to: UInt8.self),
            count: rawBufPtr.count)).0
      }
      return
    }

    self = String._fromCodeUnits(
      codeUnits, encoding: sourceEncoding, repair: true)!.0
  }

  /// Calls the given closure with a pointer to the contents of the string,
  /// represented as a null-terminated sequence of code units.
  ///
  /// The pointer passed as an argument to `body` is valid only during the
  /// execution of `withCString(encodedAs:_:)`. Do not store or return the
  /// pointer for later use.
  ///
  /// - Parameters:
  ///   - body: A closure with a pointer parameter that points to a
  ///     null-terminated sequence of code units. If `body` has a return
  ///     value, that value is also used as the return value for the
  ///     `withCString(encodedAs:_:)` method. The pointer argument is valid
  ///     only for the duration of the method's execution.
  ///   - targetEncoding: The encoding in which the code units should be
  ///     interpreted.
  /// - Returns: The return value, if any, of the `body` closure parameter.
  @inlinable
  @inline(__always) // Eliminate dynamic type check when possible
  public func withCString<Result, TargetEncoding: Unicode.Encoding>(
    encodedAs targetEncoding: TargetEncoding.Type,
    _ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
  ) rethrows -> Result {
    if targetEncoding == UTF8.self {
      return try self.withCString {
        (cPtr: UnsafePointer<CChar>) -> Result  in
        _sanityCheck(UInt8.self == TargetEncoding.CodeUnit.self)
        let ptr = UnsafeRawPointer(cPtr).assumingMemoryBound(
          to: TargetEncoding.CodeUnit.self)
        return try body(ptr)
      }
    }
    return try _slowWithCString(encodedAs: targetEncoding, body)
  }

  @usableFromInline @inline(never) // slow-path
  @_effects(releasenone)
  internal func _slowWithCString<Result, TargetEncoding: Unicode.Encoding>(
    encodedAs targetEncoding: TargetEncoding.Type,
    _ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
  ) rethrows -> Result {
    // TODO(UTF8 perf): Transcode from guts directly
    let codeUnits = Array(self.utf8)
    var arg = Array<TargetEncoding.CodeUnit>()
    arg.reserveCapacity(1 &+ self._guts.count / 4)
    let repaired = transcode(
      codeUnits.makeIterator(),
      from: UTF8.self,
      to: targetEncoding,
      stoppingOnError: false,
      into: { arg.append($0) })
    arg.append(TargetEncoding.CodeUnit(0))
    _sanityCheck(!repaired)
    return try body(arg)
  }
}

extension String: _ExpressibleByBuiltinUnicodeScalarLiteral {
  @_effects(readonly)
  @inlinable @inline(__always)
  public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
    self.init(Unicode.Scalar(_unchecked: UInt32(value)))
  }

  @inlinable @inline(__always)
  public init(_ scalar: Unicode.Scalar) {
    self = scalar.withUTF8CodeUnits { String._uncheckedFromUTF8($0) }
  }
}

extension String: _ExpressibleByBuiltinExtendedGraphemeClusterLiteral {
  @inlinable @inline(__always)
  @_effects(readonly) @_semantics("string.makeUTF8")
  public init(
    _builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
    utf8CodeUnitCount: Builtin.Word,
    isASCII: Builtin.Int1
  ) {
    self.init(
      _builtinStringLiteral: start,
      utf8CodeUnitCount: utf8CodeUnitCount,
      isASCII: isASCII)
  }
}

extension String: _ExpressibleByBuiltinStringLiteral {
  @inlinable @inline(__always)
  @_effects(readonly) @_semantics("string.makeUTF8")
  public init(
    _builtinStringLiteral start: Builtin.RawPointer,
    utf8CodeUnitCount: Builtin.Word,
    isASCII: Builtin.Int1
    ) {
    let bufPtr = UnsafeBufferPointer(
      start: UnsafeRawPointer(start).assumingMemoryBound(to: UInt8.self),
      count: Int(utf8CodeUnitCount))
    if let smol = _SmallString(bufPtr) {
      self = String(_StringGuts(smol))
      return
    }
    self.init(_StringGuts(bufPtr, isASCII: Bool(isASCII)))
  }
}

extension String: ExpressibleByStringLiteral {
  // TODO(UTF8 merge): drop all of the below
  public typealias StringLiteralType = String
  public typealias UnicodeScalarLiteralType = String
  public typealias ExtendedGraphemeClusterLiteralType = String

  /// Creates an instance initialized to the given string value.
  ///
  /// Do not call this initializer directly. It is used by the compiler when you
  /// initialize a string using a string literal. For example:
  ///
  ///     let nextStop = "Clark & Lake"
  ///
  /// This assignment to the `nextStop` constant calls this string literal
  /// initializer behind the scenes.
  @inlinable @inline(__always)
  public init(stringLiteral value: String) {
    self = value
  }
}

extension String: CustomDebugStringConvertible {
  /// A representation of the string that is suitable for debugging.
  public var debugDescription: String {
    // TODO(UTF8): Drop some explicig `String` calls; needed for the SPM build
    var result: String = "\""
    for us in self.unicodeScalars {
      result += String(us.escaped(asASCII: false))
    }
    result += "\""
    return result
  }
}

extension String {
  @inlinable // Forward inlinability to append
  @_effects(readonly) @_semantics("string.concat")
  public static func + (lhs: String, rhs: String) -> String {
    var result = lhs
    result.append(rhs)
    return result
  }

  // String append
  @inlinable // Forward inlinability to append
  public static func += (lhs: inout String, rhs: String) {
    lhs.append(rhs)
  }
}

extension String {
  /// Constructs a `String` in `resultStorage` containing the given UTF-8.
  ///
  /// Low-level construction interface used by introspection
  /// implementation in the runtime library.
  @inlinable @inline(__always)
  @_silgen_name("swift_stringFromUTF8InRawMemory")
  public // COMPILER_INTRINSIC
  static func _fromUTF8InRawMemory(
    _ resultStorage: UnsafeMutablePointer<String>,
    start: UnsafeMutablePointer<UTF8.CodeUnit>,
    utf8CodeUnitCount: Int
    ) {
    unimplemented_utf8()
  }
}

extension Sequence where Element: StringProtocol {
  /// Returns a new string by concatenating the elements of the sequence,
  /// adding the given separator between each element.
  ///
  /// The following example shows how an array of strings can be joined to a
  /// single, comma-separated string:
  ///
  ///     let cast = ["Vivien", "Marlon", "Kim", "Karl"]
  ///     let list = cast.joined(separator: ", ")
  ///     print(list)
  ///     // Prints "Vivien, Marlon, Kim, Karl"
  ///
  /// - Parameter separator: A string to insert between each of the elements
  ///   in this sequence. The default separator is an empty string.
  /// - Returns: A single, concatenated string.
  @_specialize(where Self == Array<Substring>)
  @_specialize(where Self == Array<String>)
  public func joined(separator: String = "") -> String {
    return _joined(separator: separator)
  }

  @inline(__always) // Pick up @_specialize and devirtualize from two callers
  internal func _joined(separator: String) -> String {
    // A likely-under-estimate, but lets us skip some of the growth curve
    // for large Sequences.
    let understimatedCap =
      (1 &+ separator._guts.count) &* self.underestimatedCount
    var result = String() // TODO(UTF8 merge): replace String() with ""
    result.reserveCapacity(understimatedCap)
    if separator.isEmpty {
      for x in self {
        result.append(x._ephemeralString)
      }
      return result
    }

    var iter = makeIterator()
    if let first = iter.next() {
      result.append(first._ephemeralString)
      while let next = iter.next() {
        result.append(separator)
        result.append(next._ephemeralString)
      }
    }
    return result
  }
}

// This overload is necessary because String now conforms to
// BidirectionalCollection, and there are other `joined` overloads that are
// considered more specific. See Flatten.swift.gyb.
extension BidirectionalCollection where Element == String {
  /// Returns a new string by concatenating the elements of the sequence,
  /// adding the given separator between each element.
  ///
  /// The following example shows how an array of strings can be joined to a
  /// single, comma-separated string:
  ///
  ///     let cast = ["Vivien", "Marlon", "Kim", "Karl"]
  ///     let list = cast.joined(separator: ", ")
  ///     print(list)
  ///     // Prints "Vivien, Marlon, Kim, Karl"
  ///
  /// - Parameter separator: A string to insert between each of the elements
  ///   in this sequence. The default separator is an empty string.
  /// - Returns: A single, concatenated string.
  @_specialize(where Self == Array<String>)
  public func joined(separator: String = "") -> String {
    return _joined(separator: separator)
  }
}

// TODO(UTF8): Can we change the test and remove this? This is only here for
// test/RuntimeObjC.swift
#if _runtime(_ObjC)
@usableFromInline // FIXME(sil-serialize-all)
@_silgen_name("swift_stdlib_NSStringLowercaseString")
internal func _stdlib_NSStringLowercaseString(_ str: AnyObject) -> _CocoaString

@usableFromInline // FIXME(sil-serialize-all)
@_silgen_name("swift_stdlib_NSStringUppercaseString")
internal func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _CocoaString
#endif

// Unicode algorithms
extension String {
  @inline(__always)
  internal func _uppercaseASCII(_ x: UInt8) -> UInt8 {
    /// A "table" for which ASCII characters need to be upper cased.
    /// To determine which bit corresponds to which ASCII character, subtract 1
    /// from the ASCII value of that character and divide by 2. The bit is set iff
    /// that character is a lower case character.
    let _lowercaseTable: UInt64 =
      0b0001_1111_1111_1111_0000_0000_0000_0000 &<< 32

    // Lookup if it should be shifted in our ascii table, then we subtract 0x20 if
    // it should, 0x0 if not.
    // This code is equivalent to:
    // This code is equivalent to:
    // switch sourcex {
    // case let x where (x >= 0x41 && x <= 0x5a):
    //   return x &- 0x20
    // case let x:
    //   return x
    // }
    let isLower = _lowercaseTable &>> UInt64(((x &- 1) & 0b0111_1111) &>> 1)
    let toSubtract = (isLower & 0x1) &<< 5
    return x &- UInt8(truncatingIfNeeded: toSubtract)
  }

  @inline(__always)
  internal func _lowercaseASCII(_ x: UInt8) -> UInt8 {
    /// A "table" for which ASCII characters need to be lower cased.
    /// To determine which bit corresponds to which ASCII character, subtract 1
    /// from the ASCII value of that character and divide by 2. The bit is set iff
    /// that character is a upper case character.
    let _uppercaseTable: UInt64 =
      0b0000_0000_0000_0000_0001_1111_1111_1111 &<< 32

    // Lookup if it should be shifted in our ascii table, then we add 0x20 if
    // it should, 0x0 if not.
    // This code is equivalent to:
    // This code is equivalent to:
    // switch sourcex {
    // case let x where (x >= 0x41 && x <= 0x5a):
    //   return x &- 0x20
    // case let x:
    //   return x
    // }
    let isUpper = _uppercaseTable &>> UInt64(((x &- 1) & 0b0111_1111) &>> 1)
    let toAdd = (isUpper & 0x1) &<< 5
    return x &+ UInt8(truncatingIfNeeded: toAdd)
  }


  /// Returns a lowercase version of the string.
  ///
  /// Here's an example of transforming a string to all lowercase letters.
  ///
  ///     let cafe = "BBQ Café 🍵"
  ///     print(cafe.lowercased())
  ///     // Prints "bbq café 🍵"
  ///
  /// - Returns: A lowercase copy of the string.
  ///
  /// - Complexity: O(*n*)
  @_effects(releasenone)
  public func lowercased() -> String {
    if _fastPath(_guts.isFastASCII) {
      return _guts.withFastUTF8 { utf8 in
        // TODO(UTF8 perf): code-unit appendInPlace on guts
        var result = String()
        result.reserveCapacity(utf8.count)
        for u8 in utf8 {
          result._guts.append(String(Unicode.Scalar(_lowercaseASCII(u8)))._guts)
        }
        return result
      }
    }

    // TODO(UTF8 perf): This is a horribly slow means...
    let codeUnits = Array(self.utf16).withUnsafeBufferPointer {
      (uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in
      var result = Array<UInt16>(repeating: 0, count: uChars.count)
      let len = result.withUnsafeMutableBufferPointer {
        (output) -> Int in
        var err = __swift_stdlib_U_ZERO_ERROR
        return Int(truncatingIfNeeded:
          __swift_stdlib_u_strToLower(
            output.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(output.count),
            uChars.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(uChars.count),
            "", // TODO(UTF8): with new root, nil
            &err))
      }
      if len > uChars.count {
        var err = __swift_stdlib_U_ZERO_ERROR
        result = Array<UInt16>(repeating: 0, count: len)
        result.withUnsafeMutableBufferPointer {
          output -> Void in
          __swift_stdlib_u_strToLower(
            output.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(output.count),
            uChars.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(uChars.count),
            "", // TODO(UTF8): with new root, nil
            &err)
        }
      }
      return result
    }
    return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) }
  }

  /// Returns an uppercase version of the string.
  ///
  /// The following example transforms a string to uppercase letters:
  ///
  ///     let cafe = "Café 🍵"
  ///     print(cafe.uppercased())
  ///     // Prints "CAFÉ 🍵"
  ///
  /// - Returns: An uppercase copy of the string.
  ///
  /// - Complexity: O(*n*)
  @_effects(releasenone)
  public func uppercased() -> String {
    if _fastPath(_guts.isFastASCII) {
      return _guts.withFastUTF8 { utf8 in
        // TODO(UTF8 perf): code-unit appendInPlace on guts
        var result = String()
        result.reserveCapacity(utf8.count)
        for u8 in utf8 {
          result._guts.append(String(Unicode.Scalar(_uppercaseASCII(u8)))._guts)
        }
        return result
      }
    }

    // TODO(UTF8 perf): This is a horribly slow means...
    let codeUnits = Array(self.utf16).withUnsafeBufferPointer {
      (uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in
      var result = Array<UInt16>(repeating: 0, count: uChars.count)
      let len = result.withUnsafeMutableBufferPointer {
        (output) -> Int in
        var err = __swift_stdlib_U_ZERO_ERROR
        return Int(truncatingIfNeeded:
          __swift_stdlib_u_strToUpper(
            output.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(output.count),
            uChars.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(uChars.count),
            "", // TODO(UTF8): with new root, nil
            &err))
      }
      if len > uChars.count {
        var err = __swift_stdlib_U_ZERO_ERROR
        result = Array<UInt16>(repeating: 0, count: len)
        result.withUnsafeMutableBufferPointer {
          output -> Void in
          __swift_stdlib_u_strToUpper(
            output.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(output.count),
            uChars.baseAddress._unsafelyUnwrappedUnchecked,
            Int32(uChars.count),
            "", // TODO(UTF8): with new root, nil
            &err)
        }
      }
      return result
    }
    return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) }
  }

  /// Creates an instance from the description of a given
  /// `LosslessStringConvertible` instance.
  @inlinable @inline(__always)
  public init<T : LosslessStringConvertible>(_ value: T) {
    self = value.description
  }
}

extension String: CustomStringConvertible {
  /// The value of this string.
  ///
  /// Using this property directly is discouraged. Instead, use simple
  /// assignment to create a new constant or variable equal to this string.
  @inlinable
  public var description: String { return self }
}

extension String {
  /// Calls the given closure with a pointer to the contents of the string,
  /// represented as a null-terminated sequence of UTF-8 code units.
  ///
  /// The pointer passed as an argument to `body` is valid only during the
  /// execution of `withCString(_:)`. Do not store or return the pointer for
  /// later use.
  ///
  /// - Parameter body: A closure with a pointer parameter that points to a
  ///   null-terminated sequence of UTF-8 code units. If `body` has a return
  ///   value, that value is also used as the return value for the
  ///   `withCString(_:)` method. The pointer argument is valid only for the
  ///   duration of the method's execution.
  /// - Returns: The return value, if any, of the `body` closure parameter.
  @inlinable // fast-path: already C-string compatible
  public func withCString<Result>(
    _ body: (UnsafePointer<Int8>) throws -> Result
  ) rethrows -> Result {
    return try _guts.withCString(body)
  }
}

// TODO(UTF8): Move this decl back to StringIndex.swift
extension String {
  /// A position of a character or code unit in a string.
  @_fixed_layout // FIXME(sil-serialize-all)
  public struct Index {
    @usableFromInline
    internal var _rawBits: UInt64

    @inlinable @inline(__always)
    init(_ raw: UInt64) {
      self._rawBits = raw
      self._invariantCheck()
    }
  }
}

extension String : LosslessStringConvertible {
  @inlinable // FIXME(sil-serialize-all)
  public init(_ content: String) {
    self = content
  }
}

extension String {
  public // @testable
  var _nfcCodeUnits: [UInt8] {
    return _slicedGuts.withNFCCodeUnitsIterator { Array($0) }
  }

  public // @testable
  func _withNFCCodeUnits(_ f: (UInt8) throws -> Void) rethrows {
    try _gutsSlice.withNFCCodeUnitsIterator_2 {
      for cu in $0 {
        try f(cu)
      }
    }
  }
}