mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
890 lines
30 KiB
Swift
890 lines
30 KiB
Swift
//===----------------------------------------------------------------------===//
|
||
//
|
||
// This source file is part of the Swift.org open source project
|
||
//
|
||
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
|
||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||
//
|
||
// See https://swift.org/LICENSE.txt for license information
|
||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
|
||
import SwiftShims
|
||
|
||
// FIXME: complexity documentation for most of methods on String is ought to be
|
||
// qualified with "amortized" at least, as Characters are variable-length.
|
||
|
||
/// A Unicode string value.
|
||
///
|
||
/// A string is a series of characters, such as `"Swift"`. Strings in Swift are
|
||
/// Unicode correct, locale insensitive, and designed to be efficient. The
|
||
/// `String` type bridges with the Objective-C class `NSString` and offers
|
||
/// interoperability with C functions that works with strings.
|
||
///
|
||
/// You can create new strings using string literals or string interpolations.
|
||
/// A string literal is a series of characters enclosed in quotes.
|
||
///
|
||
/// let greeting = "Welcome!"
|
||
///
|
||
/// String interpolations are string literals that evaluate any included
|
||
/// expressions and convert the results to string form. String interpolations
|
||
/// are an easy way to build a string from multiple pieces. Wrap each
|
||
/// expression in a string interpolation in parentheses, prefixed by a
|
||
/// backslash.
|
||
///
|
||
/// let name = "Rosa"
|
||
/// let personalizedGreeting = "Welcome, \(name)!"
|
||
///
|
||
/// let price = 2
|
||
/// let number = 3
|
||
/// let cookiePrice = "\(number) cookies: $\(price * number)."
|
||
///
|
||
/// Combine strings using the concatenation operator (`+`).
|
||
///
|
||
/// let longerGreeting = greeting + " We're glad you're here!"
|
||
/// print(longerGreeting)
|
||
/// // Prints "Welcome! We're glad you're here!"
|
||
///
|
||
/// Modifying and Comparing Strings
|
||
/// ===============================
|
||
///
|
||
/// Strings always have value semantics. Modifying a copy of a string leaves
|
||
/// the original unaffected.
|
||
///
|
||
/// var otherGreeting = greeting
|
||
/// otherGreeting += " Have a nice time!"
|
||
/// print(otherGreeting)
|
||
/// // Prints "Welcome! Have a nice time!"
|
||
///
|
||
/// print(greeting)
|
||
/// // Prints "Welcome!"
|
||
///
|
||
/// Comparing strings for equality using the equal-to operator (`==`) or a
|
||
/// relational operator (like `<` and `>=`) is always performed using the
|
||
/// Unicode canonical representation. This means that different
|
||
/// representations of a string compare as being equal.
|
||
///
|
||
/// let cafe1 = "Cafe\u{301}"
|
||
/// let cafe2 = "Café"
|
||
/// print(cafe1 == cafe2)
|
||
/// // Prints "true"
|
||
///
|
||
/// The Unicode code point `"\u{301}"` modifies the preceding character to
|
||
/// include an accent, so `"e\u{301}"` has the same canonical representation
|
||
/// as the single Unicode code point `"é"`.
|
||
///
|
||
/// Basic string operations are not sensitive to locale settings. This ensures
|
||
/// that string comparisons and other operations always have a single, stable
|
||
/// result, allowing strings to be used as keys in `Dictionary` instances and
|
||
/// for other purposes.
|
||
///
|
||
/// Representing Strings: Views
|
||
/// ===========================
|
||
///
|
||
/// A string is not itself a collection. Instead, it has properties that
|
||
/// present its contents as meaningful collections. Each of these collections
|
||
/// is a particular type of *view* of the string's visible and data
|
||
/// representation.
|
||
///
|
||
/// To demonstrate the different views available for every string, the
|
||
/// following examples use this `String` instance:
|
||
///
|
||
/// let cafe = "Cafe\u{301} du 🌍"
|
||
/// print(cafe)
|
||
/// // Prints "Café du 🌍"
|
||
///
|
||
/// Character View
|
||
/// --------------
|
||
///
|
||
/// A string's `characters` property is a collection of *extended grapheme
|
||
/// clusters*, which approximate human-readable characters. Many individual
|
||
/// characters, such as "é", "김", and "🇮🇳", can be made up of multiple Unicode
|
||
/// code points. These code points are combined by Unicode's boundary
|
||
/// algorithms into extended grapheme clusters, represented by Swift's
|
||
/// `Character` type. Each element of the `characters` view is represented by
|
||
/// a `Character` instance.
|
||
///
|
||
/// print(cafe.characters.count)
|
||
/// // Prints "9"
|
||
/// print(Array(cafe.characters))
|
||
/// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]"
|
||
///
|
||
/// Each visible character in the `cafe` string is a separate element of the
|
||
/// `characters` view.
|
||
///
|
||
/// Unicode Scalar View
|
||
/// -------------------
|
||
///
|
||
/// A string's `unicodeScalars` property is a collection of Unicode scalar
|
||
/// values, the 21-bit codes that are the basic unit of Unicode. Each scalar
|
||
/// value is represented by a `UnicodeScalar` instance and is equivalent to a
|
||
/// UTF-32 code unit.
|
||
///
|
||
/// print(cafe.unicodeScalars.count)
|
||
/// // Prints "10"
|
||
/// print(Array(cafe.unicodeScalars))
|
||
/// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]"
|
||
/// print(cafe.unicodeScalars.map { $0.value })
|
||
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]"
|
||
///
|
||
/// The `unicodeScalars` view's elements comprise each Unicode scalar value in
|
||
/// the `cafe` string. In particular, because `cafe` was declared using the
|
||
/// decomposed form of the `"é"` character, `unicodeScalars` contains the code
|
||
/// points for both the letter `"e"` (101) and the accent character `"´"`
|
||
/// (769).
|
||
///
|
||
/// UTF-16 View
|
||
/// -----------
|
||
///
|
||
/// A string's `utf16` property is a collection of UTF-16 code units, the
|
||
/// 16-bit encoding form of the string's Unicode scalar values. Each code unit
|
||
/// is stored as a `UInt16` instance.
|
||
///
|
||
/// print(cafe.utf16.count)
|
||
/// // Prints "11"
|
||
/// print(Array(cafe.utf16))
|
||
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]"
|
||
///
|
||
/// The elements of the `utf16` view are the code units for the string when
|
||
/// encoded in UTF-16.
|
||
///
|
||
/// The elements of this collection match those accessed through indexed
|
||
/// `NSString` APIs.
|
||
///
|
||
/// let nscafe = cafe as NSString
|
||
/// print(nscafe.length)
|
||
/// // Prints "11"
|
||
/// print(nscafe.character(at: 3))
|
||
/// // Prints "101"
|
||
///
|
||
/// UTF-8 View
|
||
/// ----------
|
||
///
|
||
/// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit
|
||
/// encoding form of the string's Unicode scalar values. Each code unit is
|
||
/// stored as a `UInt8` instance.
|
||
///
|
||
/// print(cafe.utf8.count)
|
||
/// // Prints "14"
|
||
/// print(Array(cafe.utf8))
|
||
/// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]"
|
||
///
|
||
/// The elements of the `utf8` view are the code units for the string when
|
||
/// encoded in UTF-8. This representation matches the one used when `String`
|
||
/// instances are passed to C APIs.
|
||
///
|
||
/// let cLength = strlen(cafe)
|
||
/// print(cLength)
|
||
/// // Prints "14"
|
||
///
|
||
/// Counting the Length of a String
|
||
/// ===============================
|
||
///
|
||
/// When you need to know the length of a string, you must first consider what
|
||
/// you'll use the length for. Are you measuring the number of characters that
|
||
/// will be displayed on the screen, or are you measuring the amount of
|
||
/// storage needed for the string in a particular encoding? A single string
|
||
/// can have greatly differing lengths when measured by its different views.
|
||
///
|
||
/// For example, an ASCII character like the capital letter *A* is represented
|
||
/// by a single element in each of its four views. The Unicode scalar value of
|
||
/// *A* is `65`, which is small enough to fit in a single code unit in both
|
||
/// UTF-16 and UTF-8.
|
||
///
|
||
/// let capitalA = "A"
|
||
/// print(capitalA.characters.count)
|
||
/// // Prints "1"
|
||
/// print(capitalA.unicodeScalars.count)
|
||
/// // Prints "1"
|
||
/// print(capitalA.utf16.count)
|
||
/// // Prints "1"
|
||
/// print(capitalA.utf8.count)
|
||
/// // Prints "1"
|
||
///
|
||
/// On the other hand, an emoji flag character is constructed from a pair of
|
||
/// Unicode scalars values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of
|
||
/// these scalar values, in turn, is too large to fit into a single UTF-16 or
|
||
/// UTF-8 code unit. As a result, each view of the string `"🇵🇷"` reports a
|
||
/// different length.
|
||
///
|
||
/// let flag = "🇵🇷"
|
||
/// print(flag.characters.count)
|
||
/// // Prints "1"
|
||
/// print(flag.unicodeScalars.count)
|
||
/// // Prints "2"
|
||
/// print(flag.utf16.count)
|
||
/// // Prints "4"
|
||
/// print(flag.utf8.count)
|
||
/// // Prints "8"
|
||
///
|
||
/// To check whether a string is empty, use its `isEmpty` property instead
|
||
/// of comparing the length of one of the views to `0`. Unlike `isEmpty`,
|
||
/// calculating a view's `count` property requires iterating through the
|
||
/// elements of the string.
|
||
///
|
||
/// Accessing String View Elements
|
||
/// ==============================
|
||
///
|
||
/// To find individual elements of a string, use the appropriate view for your
|
||
/// task. For example, to retrieve the first word of a longer string, you can
|
||
/// search the `characters` view for a space and then create a new string from
|
||
/// a prefix of the `characters` view up to that point.
|
||
///
|
||
/// let name = "Marie Curie"
|
||
/// let firstSpace = name.characters.index(of: " ")!
|
||
/// let firstName = String(name.characters.prefix(upTo: firstSpace))
|
||
/// print(firstName)
|
||
/// // Prints "Marie"
|
||
///
|
||
/// You can convert an index into one of a string's views to an index into
|
||
/// another view.
|
||
///
|
||
/// let firstSpaceUTF8 = firstSpace.samePosition(in: name.utf8)
|
||
/// print(Array(name.utf8.prefix(upTo: firstSpaceUTF8)))
|
||
/// // Prints "[77, 97, 114, 105, 101]"
|
||
///
|
||
/// Performance Optimizations
|
||
/// =========================
|
||
///
|
||
/// Although strings in Swift have value semantics, strings use a copy-on-write
|
||
/// strategy to store their data in a buffer. This buffer can then be shared
|
||
/// by different copies of a string. A string's data is only copied lazily,
|
||
/// upon mutation, when more than one string instance is using the same
|
||
/// buffer. Therefore, the first in any sequence of mutating operations may
|
||
/// cost O(*n*) time and space.
|
||
///
|
||
/// When a string's contiguous storage fills up, a new buffer must be allocated
|
||
/// and data must be moved to the new storage. String buffers use an
|
||
/// exponential growth strategy that makes appending to a string a constant
|
||
/// time operation when averaged over many append operations.
|
||
///
|
||
/// Bridging between String and NSString
|
||
/// ====================================
|
||
///
|
||
/// Any `String` instance can be bridged to `NSString` using the type-cast
|
||
/// operator (`as`), and any `String` instance that originates in Objective-C
|
||
/// may use an `NSString` instance as its storage. Because any arbitrary
|
||
/// subclass of `NSString` can become a `String` instance, there are no
|
||
/// guarantees about representation or efficiency when a `String` instance is
|
||
/// backed by `NSString` storage. Because `NSString` is immutable, it is just
|
||
/// as though the storage was shared by a copy: The first in any sequence of
|
||
/// mutating operations causes elements to be copied into unique, contiguous
|
||
/// storage which may cost O(*n*) time and space, where *n* is the length of
|
||
/// the string's encoded representation (or more, if the underlying `NSString`
|
||
/// has unusual performance characteristics).
|
||
///
|
||
/// For more information about the Unicode terms used in this discussion, see
|
||
/// the [Unicode.org glossary][glossary]. In particular, this discussion
|
||
/// mentions [extended grapheme clusters][clusters],
|
||
/// [Unicode scalar values][scalars], and [canonical equivalence][equivalence].
|
||
///
|
||
/// [glossary]: http://www.unicode.org/glossary/
|
||
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
|
||
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
|
||
/// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent
|
||
///
|
||
/// - SeeAlso: `String.CharacterView`, `String.UnicodeScalarView`,
|
||
/// `String.UTF16View`, `String.UTF8View`
|
||
@_fixed_layout
|
||
public struct String {
|
||
/// Creates an empty string.
|
||
public init() {
|
||
_core = _StringCore()
|
||
}
|
||
|
||
public // @testable
|
||
init(_ _core: _StringCore) {
|
||
self._core = _core
|
||
}
|
||
|
||
public // @testable
|
||
var _core: _StringCore
|
||
}
|
||
|
||
extension String {
|
||
public // @testable
|
||
static func _fromWellFormedCodeUnitSequence<Encoding, Input>(
|
||
_ encoding: Encoding.Type, input: Input
|
||
) -> String
|
||
where
|
||
Encoding: UnicodeCodec,
|
||
Input: Collection,
|
||
Input.Iterator.Element == Encoding.CodeUnit {
|
||
return String._fromCodeUnitSequence(encoding, input: input)!
|
||
}
|
||
|
||
public // @testable
|
||
static func _fromCodeUnitSequence<Encoding, Input>(
|
||
_ encoding: Encoding.Type, input: Input
|
||
) -> String?
|
||
where
|
||
Encoding: UnicodeCodec,
|
||
Input: Collection,
|
||
Input.Iterator.Element == Encoding.CodeUnit {
|
||
let (stringBufferOptional, _) =
|
||
_StringBuffer.fromCodeUnits(input, encoding: encoding,
|
||
repairIllFormedSequences: false)
|
||
return stringBufferOptional.map { String(_storage: $0) }
|
||
}
|
||
|
||
public // @testable
|
||
static func _fromCodeUnitSequenceWithRepair<Encoding, Input>(
|
||
_ encoding: Encoding.Type, input: Input
|
||
) -> (String, hadError: Bool)
|
||
where
|
||
Encoding: UnicodeCodec,
|
||
Input: Collection,
|
||
Input.Iterator.Element == Encoding.CodeUnit {
|
||
|
||
let (stringBuffer, hadError) =
|
||
_StringBuffer.fromCodeUnits(input, encoding: encoding,
|
||
repairIllFormedSequences: true)
|
||
return (String(_storage: stringBuffer!), hadError)
|
||
}
|
||
}
|
||
|
||
extension String : _ExpressibleByBuiltinUnicodeScalarLiteral {
|
||
@effects(readonly)
|
||
public // @testable
|
||
init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
|
||
self = String._fromWellFormedCodeUnitSequence(
|
||
UTF32.self, input: CollectionOfOne(UInt32(value)))
|
||
}
|
||
}
|
||
|
||
extension String : ExpressibleByUnicodeScalarLiteral {
|
||
/// Creates an instance initialized to the given Unicode scalar value.
|
||
///
|
||
/// Do not call this initializer directly. It may be used by the compiler when
|
||
/// you initialize a string using a string literal that contains a single
|
||
/// Unicode scalar value.
|
||
public init(unicodeScalarLiteral value: String) {
|
||
self = value
|
||
}
|
||
}
|
||
|
||
extension String : _ExpressibleByBuiltinExtendedGraphemeClusterLiteral {
|
||
@effects(readonly)
|
||
@_semantics("string.makeUTF8")
|
||
public init(
|
||
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
|
||
utf8CodeUnitCount: Builtin.Word,
|
||
isASCII: Builtin.Int1) {
|
||
self = String._fromWellFormedCodeUnitSequence(
|
||
UTF8.self,
|
||
input: UnsafeBufferPointer(
|
||
start: UnsafeMutablePointer<UTF8.CodeUnit>(start),
|
||
count: Int(utf8CodeUnitCount)))
|
||
}
|
||
}
|
||
|
||
extension String : ExpressibleByExtendedGraphemeClusterLiteral {
|
||
/// Creates an instance initialized to the given extended grapheme cluster
|
||
/// literal.
|
||
///
|
||
/// Do not call this initializer directly. It may be used by the compiler when
|
||
/// you initialize a string using a string literal containing a single
|
||
/// extended grapheme cluster.
|
||
public init(extendedGraphemeClusterLiteral value: String) {
|
||
self = value
|
||
}
|
||
}
|
||
|
||
extension String : _ExpressibleByBuiltinUTF16StringLiteral {
|
||
@effects(readonly)
|
||
@_semantics("string.makeUTF16")
|
||
public init(
|
||
_builtinUTF16StringLiteral start: Builtin.RawPointer,
|
||
utf16CodeUnitCount: Builtin.Word
|
||
) {
|
||
self = String(
|
||
_StringCore(
|
||
baseAddress: UnsafeMutableRawPointer(start),
|
||
count: Int(utf16CodeUnitCount),
|
||
elementShift: 1,
|
||
hasCocoaBuffer: false,
|
||
owner: nil))
|
||
}
|
||
}
|
||
|
||
extension String : _ExpressibleByBuiltinStringLiteral {
|
||
@effects(readonly)
|
||
@_semantics("string.makeUTF8")
|
||
public init(
|
||
_builtinStringLiteral start: Builtin.RawPointer,
|
||
utf8CodeUnitCount: Builtin.Word,
|
||
isASCII: Builtin.Int1) {
|
||
if Bool(isASCII) {
|
||
self = String(
|
||
_StringCore(
|
||
baseAddress: UnsafeMutableRawPointer(start),
|
||
count: Int(utf8CodeUnitCount),
|
||
elementShift: 0,
|
||
hasCocoaBuffer: false,
|
||
owner: nil))
|
||
}
|
||
else {
|
||
self = String._fromWellFormedCodeUnitSequence(
|
||
UTF8.self,
|
||
input: UnsafeBufferPointer(
|
||
start: UnsafeMutablePointer<UTF8.CodeUnit>(start),
|
||
count: Int(utf8CodeUnitCount)))
|
||
}
|
||
}
|
||
}
|
||
|
||
extension String : ExpressibleByStringLiteral {
|
||
/// Creates an instance initialized to the given string value.
|
||
///
|
||
/// Do not call this initializer directly. It is used by the compiler when you
|
||
/// initialize a string using a string literal. For example:
|
||
///
|
||
/// let nextStop = "Clark & Lake"
|
||
///
|
||
/// This assignment to the `nextStop` constant calls this string literal
|
||
/// initializer behind the scenes.
|
||
public init(stringLiteral value: String) {
|
||
self = value
|
||
}
|
||
}
|
||
|
||
extension String : CustomDebugStringConvertible {
|
||
/// A representation of the string that is suitable for debugging.
|
||
public var debugDescription: String {
|
||
var result = "\""
|
||
for us in self.unicodeScalars {
|
||
result += us.escaped(asASCII: false)
|
||
}
|
||
result += "\""
|
||
return result
|
||
}
|
||
}
|
||
|
||
extension String {
|
||
/// Returns the number of code units occupied by this string
|
||
/// in the given encoding.
|
||
func _encodedLength<
|
||
Encoding: UnicodeCodec
|
||
>(_ encoding: Encoding.Type) -> Int {
|
||
var codeUnitCount = 0
|
||
self._encode(encoding, into: { _ in codeUnitCount += 1 })
|
||
return codeUnitCount
|
||
}
|
||
|
||
// FIXME: this function does not handle the case when a wrapped NSString
|
||
// contains unpaired surrogates. Fix this before exposing this function as a
|
||
// public API. But it is unclear if it is valid to have such an NSString in
|
||
// the first place. If it is not, we should not be crashing in an obscure
|
||
// way -- add a test for that.
|
||
// Related: <rdar://problem/17340917> Please document how NSString interacts
|
||
// with unpaired surrogates
|
||
func _encode<
|
||
Encoding: UnicodeCodec
|
||
>(
|
||
_ encoding: Encoding.Type,
|
||
into processCodeUnit: (Encoding.CodeUnit) -> Void
|
||
) {
|
||
return _core.encode(encoding, into: processCodeUnit)
|
||
}
|
||
}
|
||
|
||
// Support for copy-on-write
|
||
extension String {
|
||
|
||
/// Appends the given string to this string.
|
||
///
|
||
/// The following example builds a customized greeting by using the
|
||
/// `append(_:)` method:
|
||
///
|
||
/// var greeting = "Hello, "
|
||
/// if let name = getUserName() {
|
||
/// greeting.append(name)
|
||
/// } else {
|
||
/// greeting.append("friend")
|
||
/// }
|
||
/// print(greeting)
|
||
/// // Prints "Hello, friend"
|
||
///
|
||
/// - Parameter other: Another string.
|
||
public mutating func append(_ other: String) {
|
||
_core.append(other._core)
|
||
}
|
||
|
||
/// Appends the given Unicode scalar to the string.
|
||
///
|
||
/// - Parameter x: A Unicode scalar value.
|
||
///
|
||
/// - Complexity: Appending a Unicode scalar to a string averages to O(1)
|
||
/// over many additions.
|
||
@available(*, unavailable, message: "Replaced by append(_: String)")
|
||
public mutating func append(_ x: UnicodeScalar) {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
public // SPI(Foundation)
|
||
init(_storage: _StringBuffer) {
|
||
_core = _StringCore(_storage)
|
||
}
|
||
}
|
||
|
||
extension String {
|
||
@effects(readonly)
|
||
@_semantics("string.concat")
|
||
public static func + (lhs: String, rhs: String) -> String {
|
||
if lhs.isEmpty {
|
||
return rhs
|
||
}
|
||
var lhs = lhs
|
||
lhs._core.append(rhs._core)
|
||
return lhs
|
||
}
|
||
|
||
// String append
|
||
public static func += (lhs: inout String, rhs: String) {
|
||
if lhs.isEmpty {
|
||
lhs = rhs
|
||
}
|
||
else {
|
||
lhs._core.append(rhs._core)
|
||
}
|
||
}
|
||
|
||
/// Constructs a `String` in `resultStorage` containing the given UTF-8.
|
||
///
|
||
/// Low-level construction interface used by introspection
|
||
/// implementation in the runtime library.
|
||
@_silgen_name("swift_stringFromUTF8InRawMemory")
|
||
public // COMPILER_INTRINSIC
|
||
static func _fromUTF8InRawMemory(
|
||
_ resultStorage: UnsafeMutablePointer<String>,
|
||
start: UnsafeMutablePointer<UTF8.CodeUnit>,
|
||
utf8CodeUnitCount: Int
|
||
) {
|
||
resultStorage.initialize(to:
|
||
String._fromWellFormedCodeUnitSequence(
|
||
UTF8.self,
|
||
input: UnsafeBufferPointer(start: start, count: utf8CodeUnitCount)))
|
||
}
|
||
}
|
||
|
||
extension Sequence where Iterator.Element == String {
|
||
|
||
/// Returns a new string by concatenating the elements of the sequence,
|
||
/// adding the given separator between each element.
|
||
///
|
||
/// The following example shows how an array of strings can be joined to a
|
||
/// single, comma-separated string:
|
||
///
|
||
/// let cast = ["Vivien", "Marlon", "Kim", "Karl"]
|
||
/// let list = cast.joined(separator: ", ")
|
||
/// print(list)
|
||
/// // Prints "Vivien, Marlon, Kim, Karl"
|
||
///
|
||
/// - Parameter separator: A string to insert between each of the elements
|
||
/// in this sequence. The default separator is an empty string.
|
||
/// - Returns: A single, concatenated string.
|
||
public func joined(separator: String = "") -> String {
|
||
var result = ""
|
||
|
||
// FIXME(performance): this code assumes UTF-16 in-memory representation.
|
||
// It should be switched to low-level APIs.
|
||
let separatorSize = separator.utf16.count
|
||
|
||
let reservation = self._preprocessingPass {
|
||
() -> Int in
|
||
var r = 0
|
||
for chunk in self {
|
||
// FIXME(performance): this code assumes UTF-16 in-memory representation.
|
||
// It should be switched to low-level APIs.
|
||
r += separatorSize + chunk.utf16.count
|
||
}
|
||
return r - separatorSize
|
||
}
|
||
|
||
if let n = reservation {
|
||
result.reserveCapacity(n)
|
||
}
|
||
|
||
if separatorSize == 0 {
|
||
for x in self {
|
||
result.append(x)
|
||
}
|
||
return result
|
||
}
|
||
|
||
var iter = makeIterator()
|
||
if let first = iter.next() {
|
||
result.append(first)
|
||
while let next = iter.next() {
|
||
result.append(separator)
|
||
result.append(next)
|
||
}
|
||
}
|
||
|
||
return result
|
||
}
|
||
}
|
||
|
||
#if _runtime(_ObjC)
|
||
@_silgen_name("swift_stdlib_NSStringLowercaseString")
|
||
func _stdlib_NSStringLowercaseString(_ str: AnyObject) -> _CocoaString
|
||
|
||
@_silgen_name("swift_stdlib_NSStringUppercaseString")
|
||
func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _CocoaString
|
||
#else
|
||
internal func _nativeUnicodeLowercaseString(_ str: String) -> String {
|
||
var buffer = _StringBuffer(
|
||
capacity: str._core.count, initialSize: str._core.count, elementWidth: 2)
|
||
|
||
// Allocation of a StringBuffer requires binding the memory to the correct
|
||
// encoding type.
|
||
let dest = buffer.start.bindMemory(
|
||
to: UTF16.CodeUnit.self, capacity: str._core.count)
|
||
|
||
// Try to write it out to the same length.
|
||
let z = _swift_stdlib_unicode_strToLower(
|
||
dest, Int32(str._core.count),
|
||
str._core.startUTF16, Int32(str._core.count))
|
||
let correctSize = Int(z)
|
||
|
||
// If more space is needed, do it again with the correct buffer size.
|
||
if correctSize != str._core.count {
|
||
buffer = _StringBuffer(
|
||
capacity: correctSize, initialSize: correctSize, elementWidth: 2)
|
||
let dest = buffer.start.bindMemory(
|
||
to: UTF16.CodeUnit.self, capacity: str._core.count)
|
||
_swift_stdlib_unicode_strToLower(
|
||
dest, Int32(correctSize), str._core.startUTF16, Int32(str._core.count))
|
||
}
|
||
|
||
return String(_storage: buffer)
|
||
}
|
||
|
||
internal func _nativeUnicodeUppercaseString(_ str: String) -> String {
|
||
var buffer = _StringBuffer(
|
||
capacity: str._core.count, initialSize: str._core.count, elementWidth: 2)
|
||
|
||
// Allocation of a StringBuffer requires binding the memory to the correct
|
||
// encoding type.
|
||
let dest = buffer.start.bindMemory(
|
||
to: UTF16.CodeUnit.self, capacity: str._core.count)
|
||
|
||
// Try to write it out to the same length.
|
||
let z = _swift_stdlib_unicode_strToUpper(
|
||
dest, Int32(str._core.count),
|
||
str._core.startUTF16, Int32(str._core.count))
|
||
let correctSize = Int(z)
|
||
|
||
// If more space is needed, do it again with the correct buffer size.
|
||
if correctSize != str._core.count {
|
||
buffer = _StringBuffer(
|
||
capacity: correctSize, initialSize: correctSize, elementWidth: 2)
|
||
let dest = buffer.start.bindMemory(
|
||
to: UTF16.CodeUnit.self, capacity: str._core.count)
|
||
_swift_stdlib_unicode_strToUpper(
|
||
dest, Int32(correctSize), str._core.startUTF16, Int32(str._core.count))
|
||
}
|
||
|
||
return String(_storage: buffer)
|
||
}
|
||
#endif
|
||
|
||
// Unicode algorithms
|
||
extension String {
|
||
// FIXME: implement case folding without relying on Foundation.
|
||
// <rdar://problem/17550602> [unicode] Implement case folding
|
||
|
||
/// A "table" for which ASCII characters need to be upper cased.
|
||
/// To determine which bit corresponds to which ASCII character, subtract 1
|
||
/// from the ASCII value of that character and divide by 2. The bit is set iff
|
||
/// that character is a lower case character.
|
||
internal var _asciiLowerCaseTable: UInt64 {
|
||
@inline(__always)
|
||
get {
|
||
return 0b0001_1111_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000
|
||
}
|
||
}
|
||
|
||
/// The same table for upper case characters.
|
||
internal var _asciiUpperCaseTable: UInt64 {
|
||
@inline(__always)
|
||
get {
|
||
return 0b0000_0000_0000_0000_0001_1111_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000
|
||
}
|
||
}
|
||
|
||
/// Returns a lowercase version of the string.
|
||
///
|
||
/// Here's an example of transforming a string to all lowercase letters.
|
||
///
|
||
/// let cafe = "Café 🍵"
|
||
/// print(cafe.lowercased())
|
||
/// // Prints "café 🍵"
|
||
///
|
||
/// - Returns: A lowercase copy of the string.
|
||
///
|
||
/// - Complexity: O(*n*)
|
||
public func lowercased() -> String {
|
||
if let asciiBuffer = self._core.asciiBuffer {
|
||
let count = asciiBuffer.count
|
||
let source = asciiBuffer.baseAddress!
|
||
let buffer = _StringBuffer(
|
||
capacity: count, initialSize: count, elementWidth: 1)
|
||
let dest = buffer.start
|
||
for i in 0..<count {
|
||
// For each character in the string, we lookup if it should be shifted
|
||
// in our ascii table, then we return 0x20 if it should, 0x0 if not.
|
||
// This code is equivalent to:
|
||
// switch source[i] {
|
||
// case let x where (x >= 0x41 && x <= 0x5a):
|
||
// dest[i] = x &+ 0x20
|
||
// case let x:
|
||
// dest[i] = x
|
||
// }
|
||
let value = source[i]
|
||
let isUpper =
|
||
_asciiUpperCaseTable >>
|
||
UInt64(((value &- 1) & 0b0111_1111) >> 1)
|
||
let add = (isUpper & 0x1) << 5
|
||
// Since we are left with either 0x0 or 0x20, we can safely truncate to
|
||
// a UInt8 and add to our ASCII value (this will not overflow numbers in
|
||
// the ASCII range).
|
||
dest.storeBytes(of: value &+ UInt8(truncatingBitPattern: add),
|
||
toByteOffset: i, as: UInt8.self)
|
||
}
|
||
return String(_storage: buffer)
|
||
}
|
||
|
||
#if _runtime(_ObjC)
|
||
return _cocoaStringToSwiftString_NonASCII(
|
||
_stdlib_NSStringLowercaseString(self._bridgeToObjectiveCImpl()))
|
||
#else
|
||
return _nativeUnicodeLowercaseString(self)
|
||
#endif
|
||
}
|
||
|
||
/// Returns an uppercase version of the string.
|
||
///
|
||
/// The following example transforms a string to uppercase letters:
|
||
///
|
||
/// let cafe = "Café 🍵"
|
||
/// print(cafe.uppercased())
|
||
/// // Prints "CAFÉ 🍵"
|
||
///
|
||
/// - Returns: An uppercase copy of the string.
|
||
///
|
||
/// - Complexity: O(*n*)
|
||
public func uppercased() -> String {
|
||
if let asciiBuffer = self._core.asciiBuffer {
|
||
let count = asciiBuffer.count
|
||
let source = asciiBuffer.baseAddress!
|
||
let buffer = _StringBuffer(
|
||
capacity: count, initialSize: count, elementWidth: 1)
|
||
let dest = buffer.start
|
||
for i in 0..<count {
|
||
// See the comment above in lowercaseString.
|
||
let value = source[i]
|
||
let isLower =
|
||
_asciiLowerCaseTable >>
|
||
UInt64(((value &- 1) & 0b0111_1111) >> 1)
|
||
let add = (isLower & 0x1) << 5
|
||
dest.storeBytes(of: value &- UInt8(truncatingBitPattern: add),
|
||
toByteOffset: i, as: UInt8.self)
|
||
}
|
||
return String(_storage: buffer)
|
||
}
|
||
|
||
#if _runtime(_ObjC)
|
||
return _cocoaStringToSwiftString_NonASCII(
|
||
_stdlib_NSStringUppercaseString(self._bridgeToObjectiveCImpl()))
|
||
#else
|
||
return _nativeUnicodeUppercaseString(self)
|
||
#endif
|
||
}
|
||
|
||
/// Creates an instance from the description of a given
|
||
/// `LosslessStringConvertible` instance.
|
||
public init<T : LosslessStringConvertible>(_ value: T) {
|
||
self = value.description
|
||
}
|
||
}
|
||
|
||
extension String : CustomStringConvertible {
|
||
public var description: String {
|
||
return self
|
||
}
|
||
}
|
||
|
||
extension String : LosslessStringConvertible {
|
||
public init?(_ description: String) {
|
||
self = description
|
||
}
|
||
}
|
||
|
||
extension String {
|
||
@available(*, unavailable, renamed: "append(_:)")
|
||
public mutating func appendContentsOf(_ other: String) {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "append(contentsOf:)")
|
||
public mutating func appendContentsOf<S : Sequence>(_ newElements: S)
|
||
where S.Iterator.Element == Character {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "insert(contentsOf:at:)")
|
||
public mutating func insertContentsOf<S : Collection>(
|
||
_ newElements: S, at i: Index
|
||
) where S.Iterator.Element == Character {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "replaceSubrange")
|
||
public mutating func replaceRange<C : Collection>(
|
||
_ subRange: Range<Index>, with newElements: C
|
||
) where C.Iterator.Element == Character {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "replaceSubrange")
|
||
public mutating func replaceRange(
|
||
_ subRange: Range<Index>, with newElements: String
|
||
) {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "remove(at:)")
|
||
public mutating func removeAtIndex(_ i: Index) -> Character {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "removeSubrange")
|
||
public mutating func removeRange(_ subRange: Range<Index>) {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "lowercased()")
|
||
public var lowercaseString: String {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "uppercased()")
|
||
public var uppercaseString: String {
|
||
Builtin.unreachable()
|
||
}
|
||
|
||
@available(*, unavailable, renamed: "init(describing:)")
|
||
public init<T>(_: T) {
|
||
Builtin.unreachable()
|
||
}
|
||
}
|
||
|
||
extension Sequence where Iterator.Element == String {
|
||
@available(*, unavailable, renamed: "joined(separator:)")
|
||
public func joinWithSeparator(_ separator: String) -> String {
|
||
Builtin.unreachable()
|
||
}
|
||
}
|