mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
271 lines
10 KiB
Swift
271 lines
10 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// A type that can represent a string as a collection of characters.
|
|
///
|
|
/// Do not declare new conformances to `StringProtocol`. Only the `String` and
|
|
/// `Substring` types in the standard library are valid conforming types.
|
|
public protocol StringProtocol
|
|
: BidirectionalCollection,
|
|
TextOutputStream, TextOutputStreamable,
|
|
LosslessStringConvertible, ExpressibleByStringInterpolation,
|
|
Hashable, Comparable
|
|
where Iterator.Element == Character,
|
|
Index == String.Index,
|
|
SubSequence : StringProtocol,
|
|
StringInterpolation == DefaultStringInterpolation
|
|
{
|
|
associatedtype UTF8View : /*Bidirectional*/Collection
|
|
where UTF8View.Element == UInt8, // Unicode.UTF8.CodeUnit
|
|
UTF8View.Index == Index
|
|
|
|
associatedtype UTF16View : BidirectionalCollection
|
|
where UTF16View.Element == UInt16, // Unicode.UTF16.CodeUnit
|
|
UTF16View.Index == Index
|
|
|
|
associatedtype UnicodeScalarView : BidirectionalCollection
|
|
where UnicodeScalarView.Element == Unicode.Scalar,
|
|
UnicodeScalarView.Index == Index
|
|
|
|
associatedtype SubSequence = Substring
|
|
|
|
var utf8: UTF8View { get }
|
|
var utf16: UTF16View { get }
|
|
var unicodeScalars: UnicodeScalarView { get }
|
|
|
|
func hasPrefix(_ prefix: String) -> Bool
|
|
func hasSuffix(_ prefix: String) -> Bool
|
|
|
|
func lowercased() -> String
|
|
func uppercased() -> String
|
|
|
|
/// Creates a string from the given Unicode code units in the specified
|
|
/// encoding.
|
|
///
|
|
/// - Parameters:
|
|
/// - codeUnits: A collection of code units encoded in the encoding
|
|
/// specified in `sourceEncoding`.
|
|
/// - sourceEncoding: The encoding in which `codeUnits` should be
|
|
/// interpreted.
|
|
init<C: Collection, Encoding: Unicode.Encoding>(
|
|
decoding codeUnits: C, as sourceEncoding: Encoding.Type
|
|
)
|
|
where C.Iterator.Element == Encoding.CodeUnit
|
|
|
|
/// Creates a string from the null-terminated, UTF-8 encoded sequence of
|
|
/// bytes at the given pointer.
|
|
///
|
|
/// - Parameter nullTerminatedUTF8: A pointer to a sequence of contiguous,
|
|
/// UTF-8 encoded bytes ending just before the first zero byte.
|
|
init(cString nullTerminatedUTF8: UnsafePointer<CChar>)
|
|
|
|
/// Creates a string from the null-terminated sequence of bytes at the given
|
|
/// pointer.
|
|
///
|
|
/// - Parameters:
|
|
/// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code
|
|
/// units in the encoding specified in `sourceEncoding`, ending just
|
|
/// before the first zero code unit.
|
|
/// - sourceEncoding: The encoding in which the code units should be
|
|
/// interpreted.
|
|
init<Encoding: Unicode.Encoding>(
|
|
decodingCString nullTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
|
|
as sourceEncoding: Encoding.Type)
|
|
|
|
/// Calls the given closure with a pointer to the contents of the string,
|
|
/// represented as a null-terminated sequence of UTF-8 code units.
|
|
///
|
|
/// The pointer passed as an argument to `body` is valid only during the
|
|
/// execution of `withCString(_:)`. Do not store or return the pointer for
|
|
/// later use.
|
|
///
|
|
/// - Parameter body: A closure with a pointer parameter that points to a
|
|
/// null-terminated sequence of UTF-8 code units. If `body` has a return
|
|
/// value, that value is also used as the return value for the
|
|
/// `withCString(_:)` method. The pointer argument is valid only for the
|
|
/// duration of the method's execution.
|
|
/// - Returns: The return value, if any, of the `body` closure parameter.
|
|
func withCString<Result>(
|
|
_ body: (UnsafePointer<CChar>) throws -> Result) rethrows -> Result
|
|
|
|
/// Calls the given closure with a pointer to the contents of the string,
|
|
/// represented as a null-terminated sequence of code units.
|
|
///
|
|
/// The pointer passed as an argument to `body` is valid only during the
|
|
/// execution of `withCString(encodedAs:_:)`. Do not store or return the
|
|
/// pointer for later use.
|
|
///
|
|
/// - Parameters:
|
|
/// - body: A closure with a pointer parameter that points to a
|
|
/// null-terminated sequence of code units. If `body` has a return
|
|
/// value, that value is also used as the return value for the
|
|
/// `withCString(encodedAs:_:)` method. The pointer argument is valid
|
|
/// only for the duration of the method's execution.
|
|
/// - targetEncoding: The encoding in which the code units should be
|
|
/// interpreted.
|
|
/// - Returns: The return value, if any, of the `body` closure parameter.
|
|
func withCString<Result, Encoding: Unicode.Encoding>(
|
|
encodedAs targetEncoding: Encoding.Type,
|
|
_ body: (UnsafePointer<Encoding.CodeUnit>) throws -> Result
|
|
) rethrows -> Result
|
|
}
|
|
|
|
extension StringProtocol {
|
|
// TODO(String performance): Make a _SharedString for non-smol Substrings
|
|
//
|
|
// TODO(String performance): Provide a closure-based call with stack-allocated
|
|
// _SharedString for non-smol Substrings
|
|
//
|
|
public // @SPI(NSStringAPI.swift)
|
|
var _ephemeralString: String {
|
|
@_specialize(where Self == String)
|
|
@_specialize(where Self == Substring)
|
|
get { return String(self) }
|
|
}
|
|
|
|
internal var _gutsSlice: _StringGutsSlice {
|
|
@_specialize(where Self == String)
|
|
@_specialize(where Self == Substring)
|
|
@inline(__always) get {
|
|
if let str = self as? String {
|
|
return _StringGutsSlice(str._guts)
|
|
}
|
|
if let subStr = self as? Substring {
|
|
return _StringGutsSlice(subStr._wholeGuts, subStr._offsetRange)
|
|
}
|
|
return _StringGutsSlice(String(self)._guts)
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var _offsetRange: Range<Int> {
|
|
@inline(__always) get {
|
|
let start = startIndex
|
|
let end = endIndex
|
|
_internalInvariant(
|
|
start.transcodedOffset == 0 && end.transcodedOffset == 0)
|
|
return Range(uncheckedBounds: (start._encodedOffset, end._encodedOffset))
|
|
}
|
|
}
|
|
|
|
@inlinable
|
|
internal var _wholeGuts: _StringGuts {
|
|
@_specialize(where Self == String)
|
|
@_specialize(where Self == Substring)
|
|
@inline(__always) get {
|
|
if let str = self as? String {
|
|
return str._guts
|
|
}
|
|
if let subStr = self as? Substring {
|
|
return subStr._wholeGuts
|
|
}
|
|
return String(self)._guts
|
|
}
|
|
}
|
|
}
|
|
|
|
// Contiguous UTF-8 strings
|
|
extension String {
|
|
/// Returns whether this string is capable of providing access to
|
|
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
|
|
///
|
|
/// Contiguous strings always operate in O(1) time for withUTF8 and always
|
|
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
|
|
/// Contiguous strings also benefit from fast-paths and better optimizations.
|
|
///
|
|
@_alwaysEmitIntoClient
|
|
public var isContiguousUTF8: Bool { return _guts.isFastUTF8 }
|
|
|
|
/// If this string is not contiguous, make it so. If this mutates the string,
|
|
/// it will invalidate any pre-existing indices.
|
|
///
|
|
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
|
|
///
|
|
@_alwaysEmitIntoClient
|
|
public mutating func makeContiguousUTF8() {
|
|
if _fastPath(isContiguousUTF8) { return }
|
|
self = String._copying(self)
|
|
}
|
|
|
|
/// Runs `body` over the content of this string in contiguous memory. If this
|
|
/// string is not contiguous, this will first make it contiguous, which will
|
|
/// also speed up subsequent access. If this mutates the string,
|
|
/// it will invalidate any pre-existing indices.
|
|
///
|
|
/// Note that it is unsafe to escape the pointer provided to `body`. For
|
|
/// example, strings of up to 15 UTF-8 code units in length may be represented
|
|
/// in a small-string representation, and thus will be spilled into
|
|
/// temporary stack space which is invalid after `withUTF8` finishes
|
|
/// execution.
|
|
///
|
|
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
|
|
///
|
|
@_alwaysEmitIntoClient
|
|
public mutating func withUTF8<R>(
|
|
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
|
|
) rethrows -> R {
|
|
makeContiguousUTF8()
|
|
return try _guts.withFastUTF8(body)
|
|
}
|
|
}
|
|
|
|
// Contiguous UTF-8 strings
|
|
extension Substring {
|
|
/// Returns whether this string is capable of providing access to
|
|
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
|
|
///
|
|
/// Contiguous strings always operate in O(1) time for withUTF8 and always
|
|
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
|
|
/// Contiguous strings also benefit from fast-paths and better optimizations.
|
|
///
|
|
@_alwaysEmitIntoClient
|
|
public var isContiguousUTF8: Bool { return self.base.isContiguousUTF8 }
|
|
|
|
/// If this string is not contiguous, make it so. If this mutates the
|
|
/// substring, it will invalidate any pre-existing indices.
|
|
///
|
|
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
|
|
///
|
|
@_alwaysEmitIntoClient
|
|
public mutating func makeContiguousUTF8() {
|
|
if _fastPath(isContiguousUTF8) { return }
|
|
self = String._copying(self)[...]
|
|
}
|
|
|
|
/// Runs `body` over the content of this substring in contiguous memory. If
|
|
/// this substring is not contiguous, this will first make it contiguous,
|
|
/// which will also speed up subsequent access. If this mutates the substring,
|
|
/// it will invalidate any pre-existing indices.
|
|
///
|
|
/// Note that it is unsafe to escape the pointer provided to `body`. For
|
|
/// example, strings of up to 15 UTF-8 code units in length may be represented
|
|
/// in a small-string representation, and thus will be spilled into
|
|
/// temporary stack space which is invalid after `withUTF8` finishes
|
|
/// execution.
|
|
///
|
|
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
|
|
///
|
|
@_alwaysEmitIntoClient
|
|
public mutating func withUTF8<R>(
|
|
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
|
|
) rethrows -> R {
|
|
if _fastPath(isContiguousUTF8) {
|
|
return try _wholeGuts.withFastUTF8(range: self._offsetRange) {
|
|
return try body($0)
|
|
}
|
|
}
|
|
|
|
makeContiguousUTF8()
|
|
return try _wholeGuts.withFastUTF8(body)
|
|
}
|
|
}
|