Files
swift-mirror/stdlib/core/String.swift

492 lines
14 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
public struct String {
public init() {
core = _StringCore()
}
public init(_ _core: _StringCore) {
self.core = _core
}
public var core: _StringCore
}
extension String {
public static func _fromWellFormedCodeUnitSequence<
Encoding: UnicodeCodec, Input: Collection
where Input.GeneratorType.Element == Encoding.CodeUnit
>(
encoding: Encoding.Type, input: Input
) -> String {
return String._fromCodeUnitSequence(encoding, input: input)!
}
public static func _fromCodeUnitSequence<
Encoding: UnicodeCodec, Input: Collection
where Input.GeneratorType.Element == Encoding.CodeUnit
>(
encoding: Encoding.Type, input: Input
) -> String? {
let (stringBufferOptional, _) =
_StringBuffer.fromCodeUnits(encoding, input: input,
repairIllFormedSequences: false)
if let stringBuffer = stringBufferOptional {
return String(stringBuffer)
} else {
return .None
}
}
public static func _fromCodeUnitSequenceWithRepair<
Encoding: UnicodeCodec, Input: Collection
where Input.GeneratorType.Element == Encoding.CodeUnit
>(
encoding: Encoding.Type, input: Input
) -> (String, hadError: Bool) {
let (stringBuffer, hadError) =
_StringBuffer.fromCodeUnits(encoding, input: input,
repairIllFormedSequences: true)
return (String(stringBuffer!), hadError)
}
}
extension String : _BuiltinExtendedGraphemeClusterLiteralConvertible {
public
static func _convertFromBuiltinExtendedGraphemeClusterLiteral(
start: Builtin.RawPointer,
byteSize: Builtin.Word,
isASCII: Builtin.Int1) -> String {
return String._fromWellFormedCodeUnitSequence(
UTF8.self,
input: UnsafeArray(
start: UnsafePointer<UTF8.CodeUnit>(start),
length: Int(byteSize)))
}
}
extension String : ExtendedGraphemeClusterLiteralConvertible {
public static func convertFromExtendedGraphemeClusterLiteral(
value: String
) -> String {
return value
}
}
extension String : _BuiltinUTF16StringLiteralConvertible {
@semantics("readonly")
public
static func _convertFromBuiltinUTF16StringLiteral(
start: Builtin.RawPointer, numberOfCodeUnits: Builtin.Word
) -> String {
return String(
_StringCore(
baseAddress: COpaquePointer(start),
count: Int(numberOfCodeUnits),
elementShift: 1,
hasCocoaBuffer: false,
owner: nil))
}
}
extension String : _BuiltinStringLiteralConvertible {
public
static func _convertFromBuiltinStringLiteral(
start: Builtin.RawPointer,
byteSize: Builtin.Word,
isASCII: Builtin.Int1) -> String {
if isASCII {
return String(
_StringCore(
baseAddress: COpaquePointer(start),
count: Int(byteSize),
elementShift: 0,
hasCocoaBuffer: false,
owner: nil))
}
else {
return String._fromWellFormedCodeUnitSequence(
UTF8.self,
input: UnsafeArray(
start: UnsafePointer<UTF8.CodeUnit>(start),
length: Int(byteSize)))
}
}
}
extension String : StringLiteralConvertible {
public static func convertFromStringLiteral(value: String) -> String {
return value
}
}
extension String : DebugPrintable {
public var debugDescription: String {
var result = "\""
for us in self.unicodeScalars {
result += us.escape(asASCII: false)
}
result += "\""
return result
}
}
extension String {
/// Return the number of code units occupied by this string
/// in the given encoding.
func _encodedLength<Encoding: UnicodeCodec>(encoding: Encoding.Type) -> Int {
var codeUnitCount = 0
self._encode(
encoding, output: SinkOf<Encoding.CodeUnit>({ _ in ++codeUnitCount;() }))
return codeUnitCount
}
// FIXME: this function does not handle the case when a wrapped NSString
// contains unpaired surrogates. Fix this before exposing this function as a
// public API. But it is unclear if it is valid to have such an NSString in
// the first place. If it is not, we should not be crashing in an obscure
// way -- add a test for that.
// Related: <rdar://problem/17340917> Please document how NSString interacts
// with unpaired surrogates
func _encode<
Encoding: UnicodeCodec,
Output: Sink
where Encoding.CodeUnit == Output.Element
>(encoding: Encoding.Type, output: Output)
{
return core.encode(encoding, output: output)
}
}
extension String: Equatable {
}
public func ==(lhs: String, rhs: String) -> Bool {
// FIXME: Compares UnicodeScalars, but should eventually do proper
// Unicode string comparison. This is above the level of the
// standard equal algorithm because even the largest units
// (Characters/a.k.a. grapheme clusters) don't have a 1-for-1
// correspondence. For example, "SS" == "ß" should be true.
//
// NOTE: if this algorithm is changed, consider updating equality comparison
// of Character.
return Swift.equal(lhs.unicodeScalars, rhs.unicodeScalars)
}
public func <(lhs: String, rhs: String) -> Bool {
// FIXME: Does lexicographical ordering on component UnicodeScalars,
// but should eventually do a proper unicode String collation. See
// the comment on == for more information.
return lexicographicalCompare(lhs.unicodeScalars, rhs.unicodeScalars)
}
// Support for copy-on-write
extension String {
mutating func _append(rhs: String) {
core.append(rhs.core)
}
mutating func _append(x: UnicodeScalar) {
core.append(x)
}
var _utf16Count: Int {
return core.count
}
init(_ storage: _StringBuffer) {
core = _StringCore(storage)
}
}
extension String : Hashable {
public var hashValue: Int {
var r : Int = 5381
_encode(
UTF8.self,
output: SinkOf<UTF8.CodeUnit> ({
r = ((r << 5) &+ r) &+ Int($0)
}))
return r
}
}
extension String : StringInterpolationConvertible {
public
static func convertFromStringInterpolation(strings: String...) -> String {
var result = String()
for str in strings {
result += str
}
return result
}
public
static func convertFromStringInterpolationSegment<T>(expr: T) -> String {
return toString(expr)
}
}
public func +(var lhs: String, rhs: String) -> String {
if (lhs.isEmpty) {
return rhs
}
lhs._append(rhs)
return lhs
}
public func +(var lhs: String, rhs: Character) -> String {
lhs._append(String(rhs))
return lhs
}
public func +(lhs: Character, rhs: String) -> String {
var result = String(lhs)
result._append(rhs)
return result
}
public func +(lhs: Character, rhs: Character) -> String {
var result = String(lhs)
result += String(rhs)
return result
}
// String append
@assignment public func += (inout lhs: String, rhs: String) {
if (lhs.isEmpty) {
lhs = rhs
}
else {
lhs._append(rhs)
}
}
@assignment public func += (inout lhs: String, rhs: Character) {
lhs += String(rhs)
}
// Comparison operators
// FIXME: Compare Characters, not code units
extension String : Comparable {
}
extension String {
/// Low-level construction interface used by introspection
/// implementation in the runtime library. Constructs a String in
/// resultStorage containing the given UTF-8.
@asmname("swift_stringFromUTF8InRawMemory")
static func _fromUTF8InRawMemory(
resultStorage: UnsafePointer<String>,
start: UnsafePointer<UTF8.CodeUnit>, utf8Count: Int
) {
resultStorage.initialize(
String._fromWellFormedCodeUnitSequence(UTF8.self,
input: UnsafeArray(start: start, length: utf8Count)))
}
}
/// String is a Collection of Character
extension String : Collection {
// An adapter over UnicodeScalarView that advances by whole Character
public struct Index : BidirectionalIndex, Reflectable {
public init(_ _base: UnicodeScalarView.IndexType) {
self._base = _base
self._lengthUTF16 = Index._measureExtendedGraphemeClusterForward(_base)
}
init(_ _base: UnicodeScalarView.IndexType, _ _lengthUTF16: Int) {
self._base = _base
self._lengthUTF16 = _lengthUTF16
}
public func successor() -> Index {
_precondition(_base != _base._viewEndIndex, "can not increment endIndex")
return Index(_endBase)
}
public func predecessor() -> Index {
_precondition(_base != _base._viewStartIndex,
"can not decrement startIndex")
let predecessorLengthUTF16 =
Index._measureExtendedGraphemeClusterBackward(_base)
return Index(UnicodeScalarView.IndexType(
_utf16Index - predecessorLengthUTF16, _base._core))
}
let _base: UnicodeScalarView.IndexType
/// The length of this extended grapheme cluster in UTF-16 code units.
let _lengthUTF16: Int
/// The integer offset of this index in UTF-16 code units.
public var _utf16Index: Int {
return _base._position
}
/// The one past end index for this extended grapheme cluster in Unicode
/// scalars.
var _endBase: UnicodeScalarView.IndexType {
return UnicodeScalarView.IndexType(
_utf16Index + _lengthUTF16, _base._core)
}
/// Returns the length of the first extended grapheme cluster in UTF-16
/// code units.
static func _measureExtendedGraphemeClusterForward(
var start: UnicodeScalarView.IndexType
) -> Int {
let end = start._viewEndIndex
if start == end {
return 0
}
let startIndexUTF16 = start._position
let unicodeScalars = UnicodeScalarView(start._core)
let graphemeClusterBreakProperty =
_UnicodeGraphemeClusterBreakPropertyTrie()
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
unicodeScalars[start].value)
++start
for ; start != end; ++start {
// FIXME(performance): consider removing this "fast path". A branch
// that is hard to predict could be worse for performance than a few
// loads from cache to fetch the property 'gcb1'.
if segmenter.isBoundaryAfter(gcb0) {
break
}
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
unicodeScalars[start].value)
if segmenter.isBoundary(gcb0, gcb1) {
break
}
gcb0 = gcb1
}
return start._position - startIndexUTF16
}
/// Returns the length of the previous extended grapheme cluster in UTF-16
/// code units.
static func _measureExtendedGraphemeClusterBackward(
end: UnicodeScalarView.IndexType
) -> Int {
var start = end._viewStartIndex
if start == end {
return 0
}
let endIndexUTF16 = end._position
let unicodeScalars = UnicodeScalarView(start._core)
let graphemeClusterBreakProperty =
_UnicodeGraphemeClusterBreakPropertyTrie()
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
var graphemeClusterStart = end
--graphemeClusterStart
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
unicodeScalars[graphemeClusterStart].value)
var graphemeClusterStartUTF16 = graphemeClusterStart._position
while graphemeClusterStart != start {
--graphemeClusterStart
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
unicodeScalars[graphemeClusterStart].value)
if segmenter.isBoundary(gcb1, gcb0) {
break
}
gcb0 = gcb1
graphemeClusterStartUTF16 = graphemeClusterStart._position
}
return endIndexUTF16 - graphemeClusterStartUTF16
}
public func getMirror() -> Mirror {
return _IndexMirror(self)
}
}
public var startIndex: Index {
return Index(unicodeScalars.startIndex)
}
public var endIndex: Index {
return Index(unicodeScalars.endIndex)
}
public subscript(i: Index) -> Character {
return Character(unicodeScalars[i._base..<i._endBase].string)
}
public func generate() -> IndexingGenerator<String> {
return IndexingGenerator(self)
}
internal struct _IndexMirror : Mirror {
var _value: Index
init(_ x: Index) {
_value = x
}
public var value: Any { return _value }
public var valueType: Any.Type { return (_value as Any).dynamicType }
public var objectIdentifier: ObjectIdentifier? { return .None }
public var disposition: MirrorDisposition { return .Aggregate }
public var count: Int { return 0 }
public subscript(i: Int) -> (String,Mirror) {
_fatalError("Mirror access out of bounds")
}
public var summary: String { return "\(_value._utf16Index)" }
public var quickLookObject: QuickLookObject? { return .Some(.Int(Int64(_value._utf16Index))) }
}
}
public func == (lhs: String.Index, rhs: String.Index) -> Bool {
return lhs._base == rhs._base
}
extension String : Sliceable {
public subscript(subRange: Range<Index>) -> String {
return String(
unicodeScalars[subRange.startIndex._base..<subRange.endIndex._base]._core)
}
}
// Algorithms
extension String {
public func join<
S : Sequence where S.GeneratorType.Element == String
>(elements: S) -> String{
return Swift.join(self, elements)
}
}