mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Also, un-constrain it so that the type of the sequences passed as an argument does not have to match the type of the method target. Swift SVN r29467
393 lines
12 KiB
Swift
393 lines
12 KiB
Swift
//===--- StringCharacterView.swift - String's Collection of Characters ----===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// String is-not-a SequenceType or CollectionType, but it exposes a
|
|
// collection of characters.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
extension String {
|
|
/// A `String`'s collection of `Character`s ([extended grapheme
|
|
/// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster))
|
|
/// elements.
|
|
public struct CharacterView {
|
|
internal var _core: _StringCore
|
|
|
|
/// Create a view of the `Character`s in `text`.
|
|
public init(_ text: String) {
|
|
self._core = text._core
|
|
}
|
|
|
|
public // @testable
|
|
init(_ _core: _StringCore) {
|
|
self._core = _core
|
|
}
|
|
}
|
|
|
|
/// A collection of `Characters` representing the `String`'s
|
|
/// [extended grapheme
|
|
/// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster).
|
|
public var characters: CharacterView {
|
|
return CharacterView(self)
|
|
}
|
|
|
|
/// Efficiently mutate `self` by applying `body` to its `characters`.
|
|
///
|
|
/// - Warning: Do not rely on anything about `self` (the `String`
|
|
/// that is the target of this method) during the execution of
|
|
/// `body`: it may not appear to have its correct value. Instead,
|
|
/// use only the `String.CharacterView` argument to `body`.
|
|
public mutating func withMutableCharacters<R>(body: (inout CharacterView)->R) -> R {
|
|
// Naively mutating self.characters forces multiple references to
|
|
// exist at the point of mutation. Instead, temporarily move the
|
|
// core of this string into a CharacterView.
|
|
var tmp = CharacterView("")
|
|
swap(&_core, &tmp._core)
|
|
let r = body(&tmp)
|
|
swap(&_core, &tmp._core)
|
|
return r
|
|
}
|
|
|
|
/// Construct the `String` corresponding to the given sequence of
|
|
/// Unicode scalars.
|
|
public init(_ characters: CharacterView) {
|
|
self.init(characters._core)
|
|
}
|
|
}
|
|
|
|
/// `String.CharacterView` is a collection of `Character`.
|
|
extension String.CharacterView : CollectionType {
|
|
internal typealias UnicodeScalarView = String.UnicodeScalarView
|
|
internal var unicodeScalars: UnicodeScalarView {
|
|
return UnicodeScalarView(_core)
|
|
}
|
|
|
|
/// A character position.
|
|
public struct Index : BidirectionalIndexType, Comparable, _Reflectable {
|
|
public // SPI(Foundation)
|
|
init(_base: String.UnicodeScalarView.Index) {
|
|
self._base = _base
|
|
self._lengthUTF16 = Index._measureExtendedGraphemeClusterForward(_base)
|
|
}
|
|
|
|
internal init(_base: UnicodeScalarView.Index, _lengthUTF16: Int) {
|
|
self._base = _base
|
|
self._lengthUTF16 = _lengthUTF16
|
|
}
|
|
|
|
/// Returns the next consecutive value after `self`.
|
|
///
|
|
/// - Requires: The next value is representable.
|
|
public func successor() -> Index {
|
|
_precondition(_base != _base._viewEndIndex, "can not increment endIndex")
|
|
return Index(_base: _endBase)
|
|
}
|
|
|
|
/// Returns the previous consecutive value before `self`.
|
|
///
|
|
/// - Requires: The previous value is representable.
|
|
public func predecessor() -> Index {
|
|
_precondition(_base != _base._viewStartIndex,
|
|
"can not decrement startIndex")
|
|
let predecessorLengthUTF16 =
|
|
Index._measureExtendedGraphemeClusterBackward(_base)
|
|
return Index(
|
|
_base: UnicodeScalarView.Index(
|
|
_utf16Index - predecessorLengthUTF16, _base._core))
|
|
}
|
|
|
|
internal let _base: UnicodeScalarView.Index
|
|
|
|
/// The length of this extended grapheme cluster in UTF-16 code units.
|
|
internal let _lengthUTF16: Int
|
|
|
|
/// The integer offset of this index in UTF-16 code units.
|
|
public // SPI(Foundation)
|
|
var _utf16Index: Int {
|
|
return _base._position
|
|
}
|
|
|
|
/// The one past end index for this extended grapheme cluster in Unicode
|
|
/// scalars.
|
|
internal var _endBase: UnicodeScalarView.Index {
|
|
return UnicodeScalarView.Index(
|
|
_utf16Index + _lengthUTF16, _base._core)
|
|
}
|
|
|
|
/// Returns the length of the first extended grapheme cluster in UTF-16
|
|
/// code units.
|
|
internal static func _measureExtendedGraphemeClusterForward(
|
|
var start: UnicodeScalarView.Index
|
|
) -> Int {
|
|
let end = start._viewEndIndex
|
|
if start == end {
|
|
return 0
|
|
}
|
|
|
|
let startIndexUTF16 = start._position
|
|
let unicodeScalars = UnicodeScalarView(start._core)
|
|
let graphemeClusterBreakProperty =
|
|
_UnicodeGraphemeClusterBreakPropertyTrie()
|
|
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
|
|
|
|
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
|
|
unicodeScalars[start].value)
|
|
++start
|
|
|
|
for ; start != end; ++start {
|
|
// FIXME(performance): consider removing this "fast path". A branch
|
|
// that is hard to predict could be worse for performance than a few
|
|
// loads from cache to fetch the property 'gcb1'.
|
|
if segmenter.isBoundaryAfter(gcb0) {
|
|
break
|
|
}
|
|
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
|
|
unicodeScalars[start].value)
|
|
if segmenter.isBoundary(gcb0, gcb1) {
|
|
break
|
|
}
|
|
gcb0 = gcb1
|
|
}
|
|
|
|
return start._position - startIndexUTF16
|
|
}
|
|
|
|
/// Returns the length of the previous extended grapheme cluster in UTF-16
|
|
/// code units.
|
|
internal static func _measureExtendedGraphemeClusterBackward(
|
|
end: UnicodeScalarView.Index
|
|
) -> Int {
|
|
let start = end._viewStartIndex
|
|
if start == end {
|
|
return 0
|
|
}
|
|
|
|
let endIndexUTF16 = end._position
|
|
let unicodeScalars = UnicodeScalarView(start._core)
|
|
let graphemeClusterBreakProperty =
|
|
_UnicodeGraphemeClusterBreakPropertyTrie()
|
|
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
|
|
|
|
var graphemeClusterStart = end
|
|
|
|
--graphemeClusterStart
|
|
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
|
|
unicodeScalars[graphemeClusterStart].value)
|
|
|
|
var graphemeClusterStartUTF16 = graphemeClusterStart._position
|
|
|
|
while graphemeClusterStart != start {
|
|
--graphemeClusterStart
|
|
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
|
|
unicodeScalars[graphemeClusterStart].value)
|
|
if segmenter.isBoundary(gcb1, gcb0) {
|
|
break
|
|
}
|
|
gcb0 = gcb1
|
|
graphemeClusterStartUTF16 = graphemeClusterStart._position
|
|
}
|
|
|
|
return endIndexUTF16 - graphemeClusterStartUTF16
|
|
}
|
|
|
|
/// Returns a mirror that reflects `self`.
|
|
public func getMirror() -> MirrorType {
|
|
return _IndexMirror(self)
|
|
}
|
|
}
|
|
|
|
/// The position of the first `Character` if `self` is
|
|
/// non-empty; identical to `endIndex` otherwise.
|
|
public var startIndex: Index {
|
|
return Index(_base: unicodeScalars.startIndex)
|
|
}
|
|
|
|
/// The "past the end" position.
|
|
///
|
|
/// `endIndex` is not a valid argument to `subscript`, and is always
|
|
/// reachable from `startIndex` by zero or more applications of
|
|
/// `successor()`.
|
|
public var endIndex: Index {
|
|
return Index(_base: unicodeScalars.endIndex)
|
|
}
|
|
|
|
/// Access the `Character` at `position`.
|
|
///
|
|
/// - Requires: `position` is a valid position in `self` and
|
|
/// `position != endIndex`.
|
|
public subscript(i: Index) -> Character {
|
|
return Character(String(unicodeScalars[i._base..<i._endBase]))
|
|
}
|
|
|
|
/// Return a *generator* over the `Character`s.
|
|
///
|
|
/// - Complexity: O(1).
|
|
public func generate() -> IndexingGenerator<String.CharacterView> {
|
|
return IndexingGenerator(self)
|
|
}
|
|
|
|
internal struct _IndexMirror : MirrorType {
|
|
var _value: Index
|
|
|
|
init(_ x: Index) {
|
|
_value = x
|
|
}
|
|
|
|
var value: Any { return _value }
|
|
|
|
var valueType: Any.Type { return (_value as Any).dynamicType }
|
|
|
|
var objectIdentifier: ObjectIdentifier? { return .None }
|
|
|
|
var disposition: MirrorDisposition { return .Aggregate }
|
|
|
|
var count: Int { return 0 }
|
|
|
|
subscript(i: Int) -> (String, MirrorType) {
|
|
_preconditionFailure("MirrorType access out of bounds")
|
|
}
|
|
|
|
var summary: String { return "\(_value._utf16Index)" }
|
|
|
|
var quickLookObject: QuickLookObject? { return .Some(.Int(Int64(_value._utf16Index))) }
|
|
}
|
|
}
|
|
|
|
extension String.CharacterView : ExtensibleCollectionType {
|
|
/// Create an empty instance.
|
|
public init() {
|
|
self.init("")
|
|
}
|
|
|
|
/// Reserve enough space to store `n` ASCII characters.
|
|
///
|
|
/// - Complexity: O(`n`).
|
|
public mutating func reserveCapacity(n: Int) {
|
|
_core.reserveCapacity(n)
|
|
}
|
|
|
|
/// Append `c` to `self`.
|
|
///
|
|
/// - Complexity: Amortized O(1).
|
|
public mutating func append(c: Character) {
|
|
switch c._representation {
|
|
case .Small(let _63bits):
|
|
let bytes = Character._smallValue(_63bits)
|
|
_core.extend(Character._SmallUTF16(bytes))
|
|
case .Large(_):
|
|
_core.append(String(c)._core)
|
|
}
|
|
}
|
|
|
|
/// Append the elements of `newElements` to `self`.
|
|
public mutating func extend<
|
|
S : SequenceType
|
|
where S.Generator.Element == Character
|
|
>(newElements: S) {
|
|
reserveCapacity(_core.count + newElements.underestimateCount())
|
|
for c in newElements {
|
|
self.append(c)
|
|
}
|
|
}
|
|
|
|
/// Create an instance containing `characters`.
|
|
public init<
|
|
S : SequenceType
|
|
where S.Generator.Element == Character
|
|
>(_ characters: S) {
|
|
self = String.CharacterView()
|
|
self.extend(characters)
|
|
}
|
|
}
|
|
|
|
extension String.CharacterView : RangeReplaceableCollectionType {
|
|
/// Replace the given `subRange` of elements with `newElements`.
|
|
///
|
|
/// Invalidates all indices with respect to `self`.
|
|
///
|
|
/// - Complexity: O(`subRange.count`) if `subRange.endIndex
|
|
/// == self.endIndex` and `isEmpty(newElements)`, O(N) otherwise.
|
|
public mutating func replaceRange<
|
|
C: CollectionType where C.Generator.Element == Character
|
|
>(
|
|
subRange: Range<Index>, with newElements: C
|
|
) {
|
|
let rawSubRange = subRange.startIndex._base._position
|
|
..< subRange.endIndex._base._position
|
|
let lazyUTF16 = _lazyConcatenate(lazy(newElements).map { $0.utf16 })
|
|
_core.replaceRange(rawSubRange, with: lazyUTF16)
|
|
}
|
|
|
|
/// Insert `newElement` at index `i`.
|
|
///
|
|
/// Invalidates all indices with respect to `self`.
|
|
///
|
|
/// - Complexity: O(`self.count`).
|
|
public mutating func insert(newElement: Character, atIndex i: Index) {
|
|
Swift.insert(&self, newElement, atIndex: i)
|
|
}
|
|
|
|
/// Insert `newElements` at index `i`.
|
|
///
|
|
/// Invalidates all indices with respect to `self`.
|
|
///
|
|
/// - Complexity: O(`self.count + newElements.count`).
|
|
public mutating func splice<
|
|
S : CollectionType where S.Generator.Element == Character
|
|
>(newElements: S, atIndex i: Index) {
|
|
Swift.splice(&self, newElements, atIndex: i)
|
|
}
|
|
|
|
/// Remove and return the element at index `i`.
|
|
///
|
|
/// Invalidates all indices with respect to `self`.
|
|
///
|
|
/// - Complexity: O(`self.count`).
|
|
public mutating func removeAtIndex(i: Index) -> Character {
|
|
return Swift.removeAtIndex(&self, i)
|
|
}
|
|
|
|
/// Remove the indicated `subRange` of characters.
|
|
///
|
|
/// Invalidates all indices with respect to `self`.
|
|
///
|
|
/// - Complexity: O(`self.count`).
|
|
public mutating func removeRange(subRange: Range<Index>) {
|
|
Swift.removeRange(&self, subRange)
|
|
}
|
|
|
|
/// Remove all characters.
|
|
///
|
|
/// Invalidates all indices with respect to `self`.
|
|
///
|
|
/// - parameter keepCapacity: If `true`, prevents the release of
|
|
/// allocated storage, which can be a useful optimization
|
|
/// when `self` is going to be grown again.
|
|
public mutating func removeAll(keepCapacity keepCapacity: Bool = false) {
|
|
Swift.removeAll(&self, keepCapacity: keepCapacity)
|
|
}
|
|
}
|
|
|
|
extension String.CharacterView : Sliceable {
|
|
/// Access the characters in the given `subRange`.
|
|
///
|
|
/// - Complexity: O(1) unless bridging from Objective-C requires an
|
|
/// O(N) conversion.
|
|
public subscript(subRange: Range<Index>) -> String.CharacterView {
|
|
let unicodeScalarRange =
|
|
subRange.startIndex._base..<subRange.endIndex._base
|
|
return String.CharacterView(
|
|
String(_core).unicodeScalars[unicodeScalarRange]._core)
|
|
}
|
|
}
|
|
|