//===--- StringUTF8.swift - A UTF8 view of _StringCore ---------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // _StringCore currently has three representations: Native ASCII, // Native UTF-16, and Opaque Cocoa. Expose each of these as UTF-8 in a // way that will hopefully be efficient to traverse // //===----------------------------------------------------------------------===// extension _StringCore { /// An integral type that holds a sequence of UTF-8 code units, starting in /// its low byte. public typealias UTF8Chunk = UInt64 /// Encode text starting at `i` as UTF-8. Returns a pair whose first /// element is the index of the text following whatever got encoded, /// and the second element contains the encoded UTF-8 starting in its /// low byte. Any unused high bytes in the result will be set to /// 0xFF. func _encodeSomeUTF8(i: Int) -> (Int, UTF8Chunk) { _sanityCheck(i <= count) if _fastPath(elementWidth == 1) { // How many UTF-16 code units might we use before we've filled up // our UTF8Chunk with UTF-8 code units? let utf16Count = min(sizeof(UTF8Chunk.self), count - i) var result: UTF8Chunk = ~0 // start with all bits set _memcpy( dest: UnsafeMutablePointer(Builtin.addressof(&result)), src: UnsafeMutablePointer(startASCII + i), size: numericCast(utf16Count)) return (i + utf16Count, result) } else if _fastPath(!_baseAddress._isNull) { return _encodeSomeContiguousUTF16AsUTF8(i) } else { return _encodeSomeNonContiguousUTF16AsUTF8(i) } } /// Helper for `_encodeSomeUTF8`, above. Handles the case where the /// storage is contiguous UTF-16. func _encodeSomeContiguousUTF16AsUTF8(i: Int) -> (Int, UTF8Chunk) { _sanityCheck(elementWidth == 2) _sanityCheck(!_baseAddress._isNull) let storage = UnsafeBufferPointer(start: startUTF16, count: self.count) return _transcodeSomeUTF16AsUTF8(storage, i) } /// Helper for `_encodeSomeUTF8`, above. Handles the case where the /// storage is non-contiguous UTF-16. func _encodeSomeNonContiguousUTF16AsUTF8(i: Int) -> (Int, UTF8Chunk) { _sanityCheck(elementWidth == 2) _sanityCheck(_baseAddress._isNull) let storage = _CollectionOf( startIndex: 0, endIndex: self.count) { (i: Int) -> UInt16 in return _cocoaStringSubscript(target: self, position: i) } return _transcodeSomeUTF16AsUTF8(storage, i) } } extension String { /// A collection of UTF-8 code units that encodes a `String` value. public struct UTF8View : CollectionType, Reflectable { let _core: _StringCore init(_ _core: _StringCore) { self._core = _core } /// A position in a `String.UTF8View` public struct Index : ForwardIndexType { init(_ _core: _StringCore, _ _coreIndex: Int, _ _buffer: _StringCore.UTF8Chunk) { self._core = _core self._coreIndex = _coreIndex self._buffer = _buffer _sanityCheck(_coreIndex >= 0) _sanityCheck(_coreIndex <= _core.count) } /// Returns the next consecutive value after `self`. /// /// Requires: the next value is representable. public func successor() -> Index { let newBuffer0 = (_buffer >> 8) | ( 0xFF << numericCast((sizeofValue(_buffer) &- 1) &* 8) ) if _fastPath(newBuffer0 != ~0) { return Index(_core, _coreIndex, newBuffer0) } if _fastPath(_coreIndex != _core.endIndex) { let (newCoreIndex, newBuffer1) = _core._encodeSomeUTF8(_coreIndex) _sanityCheck(newCoreIndex > _coreIndex) return Index(_core, newCoreIndex, newBuffer1) } _precondition(_buffer & 0xFF != 0xFE, "can not increment endIndex") return Index(_core, _coreIndex, ~1) } let _core: _StringCore let _coreIndex: Int let _buffer: _StringCore.UTF8Chunk } /// The position of the first code unit if the `String` is /// non-empty; identical to `endIndex` otherwise. public var startIndex: Index { if _fastPath(_core.count != 0) { let (coreIndex, buffer) = _core._encodeSomeUTF8(0) return Index(_core, coreIndex, buffer) } return endIndex } /// The "past the end" position. /// /// `endIndex` is not a valid argument to `subscript`, and is always /// reachable from `startIndex` by zero or more applications of /// `successor()`. public var endIndex: Index { return Index(_core, _core.endIndex, ~1) } /// Access the element at `position`. /// /// Requires: `position` is a valid position in `self` and /// `position != endIndex`. public subscript(position: Index) -> UTF8.CodeUnit { let result: UTF8.CodeUnit = numericCast(position._buffer & 0xFF) _precondition(result != 0xFE, "can not subscript using endIndex") return result } /// Return a *generator* over the code points that comprise this /// *sequence*. /// /// Complexity: O(1) public func generate() -> IndexingGenerator { return IndexingGenerator(self) } /// Returns a mirror that reflects `self`. public func getMirror() -> MirrorType { return _UTF8ViewMirror(self) } } /// A UTF-8 encoding of `self`. public var utf8: UTF8View { return UTF8View(self._core) } public var _contiguousUTF8: UnsafeMutablePointer { return _core.elementWidth == 1 ? _core.startASCII : nil } /// A contiguously-stored nul-terminated UTF-8 representation of /// `self`. /// /// To access the underlying memory, invoke /// `withUnsafeBufferPointer` on the `ContiguousArray`. public var nulTerminatedUTF8: ContiguousArray { var result = ContiguousArray() result.reserveCapacity(count(utf8) + 1) result += utf8 result.append(0) return result } } public func == (lhs: String.UTF8View.Index, rhs: String.UTF8View.Index) -> Bool { return lhs._coreIndex == rhs._coreIndex && lhs._buffer == rhs._buffer }