Files
swift-mirror/stdlib/core/StringCore.swift
Ted Kremenek fad874708e Adjust test cases.
Swift SVN r17964
2014-05-12 22:01:52 +00:00

484 lines
16 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
/// The core implementation of a highly-optimizable String that
/// can store both ASCII and UTF-16, and can wrap native Swift
/// _StringBuffer or NSString instances.
///
/// Usage note: when elements are 8 bits wide, this code may
/// dereference one past the end of the byte array that it owns, so
/// make sure that storage is allocated! You want a null terminator
/// anyway, so it shouldn't be a burden.
//
// Implementation note: We try hard to avoid branches in this code, so
// for example we use integer math to avoid switching on the element
// size with the ternary operator. This is also the cause of the
// extra element requirement for 8 bit elements. See the
// implementation of subscript(Int) -> UTF16.CodeUnit below for details.
struct _StringCore {
//===--------------------------------------------------------------------===//
// Internals
var _baseAddress: COpaquePointer
var _countAndFlags: UWord
var _owner: AnyObject?
/// (private) create the implementation of a string from its component parts.
init(
baseAddress: COpaquePointer,
_countAndFlags: UWord,
owner: AnyObject?
) {
self._baseAddress = baseAddress
self._countAndFlags = _countAndFlags
self._owner = owner
_invariantCheck()
}
func _invariantCheck() {
assert(count >= 0)
if _baseAddress == .null() {
assert(cocoaBuffer,
"Only opaque cocoa strings may have a null base pointer")
assert(elementWidth == 2,
"Opaque cocoa strings should have an elementWidth of 2")
}
else if _baseAddress == _emptyStringBase {
assert(count == 0, "Empty string storage with non-zero length")
assert(!_owner, "String pointing at empty storage has owner")
}
else if let buffer = nativeBuffer {
assert(elementWidth == buffer.elementWidth,
"_StringCore elementWidth doesn't match its buffer's")
assert(UnsafePointer(_baseAddress) >= buffer.start)
assert(UnsafePointer(_baseAddress) <= buffer.usedEnd)
assert(UnsafePointer(_pointerToNth(count)) <= buffer.usedEnd)
}
}
/// Bitmask for the count part of _countAndFlags
var _countMask: UWord {
return UWord.max >> 2
}
/// Bitmask for the flags part of _countAndFlags
var _flagMask: UWord {
return ~_countMask
}
/// Value by which to multiply a 2nd byte fetched in order to
/// assemble a UTF16 code unit from our contiguous storage. If we
/// store ASCII, this will be zero. Otherwise, it will be 0x100
var _highByteMultiplier: UTF16.CodeUnit {
return UTF16.CodeUnit(elementShift) << 8
}
/// Return a pointer to the Nth element of contiguous
/// storage. Caveats: The string must have contiguous storage; the
/// element may be 1 or 2 bytes wide, depending on elementWidth; the
/// result may be null if the string is empty.
func _pointerToNth(n: Int) -> COpaquePointer {
assert(hasContiguousStorage && n >= 0 && n <= count)
return COpaquePointer(
UnsafePointer<RawByte>(_baseAddress) + (n << elementShift))
}
static func _copyElements(
srcStart: COpaquePointer, srcElementWidth: Int,
dstStart: COpaquePointer, dstElementWidth: Int,
count: Int
) {
// Copy the old stuff into the new storage
if _fastPath(srcElementWidth == dstElementWidth) {
// No change in storage width; we can use memcpy
c_memcpy(
dest: UnsafePointer(dstStart),
src: UnsafePointer(srcStart),
size: UInt(count << (srcElementWidth - 1)))
}
else if (srcElementWidth < dstElementWidth) {
// Widening ASCII to UTF16; we need to copy the bytes manually
var dest = UnsafePointer<UTF16.CodeUnit>(dstStart)
var src = UnsafePointer<UTF8.CodeUnit>(srcStart)
let srcEnd = src + count
while (src != srcEnd) {
dest++.set(UTF16.CodeUnit(src++.get()))
}
}
else {
// Narrowing UTF16 to ASCII; we need to copy the bytes manually
var dest = UnsafePointer<UTF8.CodeUnit>(dstStart)
var src = UnsafePointer<UTF16.CodeUnit>(srcStart)
let srcEnd = src + count
while (src != srcEnd) {
dest++.set(UTF8.CodeUnit(src++.get()))
}
}
}
//===--------------------------------------------------------------------===//
// Initialization
init(
baseAddress: COpaquePointer,
count: Int,
elementShift: Int,
hasCocoaBuffer: Bool,
owner: AnyObject?
) {
assert(elementShift == 0 || elementShift == 1)
self._baseAddress = baseAddress
self._countAndFlags
= (UWord(elementShift) << UWord(sizeof(UWord.self) * 8 - 1))
| ((hasCocoaBuffer ? 1 : 0) << UWord(sizeof(UWord.self) * 8 - 2))
| UWord(count)
self._owner = owner
assert(UWord(count) & _flagMask == 0, "String too long to represent")
_invariantCheck()
}
/// Create a _StringCore that covers the entire length of the _StringBuffer.
init(_ buffer: _StringBuffer) {
self = _StringCore(
baseAddress: COpaquePointer(buffer.start),
count: buffer.usedCount,
elementShift: buffer.elementShift,
hasCocoaBuffer: false,
owner: buffer
)
}
/// Create the implementation of an empty string.
/// NOTE: there is no null terminator in an empty string!
init() {
self._baseAddress = _emptyStringBase
self._countAndFlags = 0
self._owner = .None
_invariantCheck()
}
//===--------------------------------------------------------------------===//
// Properties
/// The number of elements stored
var count: Int {
get {
return Int(_countAndFlags & _countMask)
}
set(newValue) {
assert(UWord(newValue) & _flagMask == 0)
_countAndFlags = (_countAndFlags & _flagMask) | UWord(newValue)
}
}
/// left shift amount to apply to an offset N so that when
/// added to a UnsafePointer<RawByte>, it traverses N elements
var elementShift: Int {
return Int(_countAndFlags >> UWord(sizeof(UWord.self) * 8 - 1))
}
/// the number of bytes per element
var elementWidth: Int {
return elementShift + 1
}
var hasContiguousStorage: Bool {
return _fastPath(_baseAddress != .null())
}
/// are we using an NSString for storage?
var hasCocoaBuffer: Bool {
return Word((_countAndFlags << 1).value) < 0
}
var startASCII: UnsafePointer<UTF8.CodeUnit> {
assert(elementWidth == 1, "String does not contain contiguous ASCII")
return UnsafePointer(_baseAddress)
}
var startUTF16: UnsafePointer<UTF16.CodeUnit> {
assert(
count == 0 || elementWidth == 2,
"String does not contain contiguous UTF16")
return UnsafePointer(_baseAddress)
}
/// the native _StringBuffer, if any, or .None.
var nativeBuffer: _StringBuffer? {
if !hasCocoaBuffer {
return _owner.map {
reinterpretCast($0) as _StringBuffer
}
}
return nil
}
/// the Cocoa String buffer, if any, or .None.
var cocoaBuffer: _CocoaString? {
if hasCocoaBuffer {
return _owner.map {
reinterpretCast($0) as _CocoaString
}
}
return nil
}
//===--------------------------------------------------------------------===//
// slicing
/// Return the given sub-_StringCore
subscript(subRange: Range<Int>) -> _StringCore {
assert(subRange.startIndex >= 0)
assert(subRange.endIndex <= count)
let newCount = subRange.endIndex - subRange.startIndex
assert(UWord(newCount) & _flagMask == 0)
if hasContiguousStorage {
return _StringCore(
baseAddress: _pointerToNth(subRange.startIndex),
_countAndFlags: (_countAndFlags & _flagMask) | UWord(newCount),
owner: _owner)
}
return _cocoaStringSlice(target: self, subRange: subRange)
}
/// Get the Nth UTF16 Code Unit stored
subscript(position: Int) -> UTF16.CodeUnit {
assert(position >= 0)
assert(position <= count)
if (_baseAddress != .null()) {
let p = UnsafePointer<UInt8>(_pointerToNth(position).value)
// Always dereference two bytes, but when elements are 8 bits we
// multiply the high byte by 0.
return UTF16.CodeUnit(p.get())
+ UTF16.CodeUnit((p + 1).get()) * _highByteMultiplier
}
return _cocoaStringSubscript(target: self, position: position)
}
/// Write the string, in the given encoding, to output.
func encode<
Encoding: UnicodeCodec,
Output: Sink
where Encoding.CodeUnit == Output.Element
>(encoding: Encoding.Type, output: Output)
{
if _fastPath(_baseAddress != .null()) {
if _fastPath(elementWidth == 1) {
var out = output
for x in UnsafeArray(
start: UnsafePointer<UTF8.CodeUnit>(_baseAddress), length: count
) {
Encoding.encode(UnicodeScalar(UInt32(x)), output: &out)
}
}
else {
transcode(UTF16.self, encoding,
UnsafeArray(
start: UnsafePointer<UTF16.CodeUnit>(_baseAddress),
length: count
),
output
)
}
}
else if (hasCocoaBuffer) {
_StringCore(
_cocoaStringToContiguous(source: cocoaBuffer!, range: 0...count,
minimumCapacity: 0)
).encode(encoding, output: output)
}
}
/// Attempt to claim unused capacity in the String's existing
/// native buffer, if any. Return zero and a pointer to the claimed
/// storage if successful. Otherwise, returns a suggested new
/// capacity and a null pointer.
///
/// Note: If successful, effectively appends garbage to the String
/// until it has newSize UTF16 code units; you must immediately copy
/// valid UTF16 into that storage.
///
/// Note: if unsuccessful because of insufficient space in an
/// existing buffer, the suggested new capacity will at least double
/// the existing buffer's storage
mutating func _claimCapacity(newSize: Int,
minElementWidth: Int) -> (Int, COpaquePointer) {
if _fastPath(nativeBuffer && elementWidth >= minElementWidth) {
var buffer = nativeBuffer!
// The buffer's "used" field must match this in order to be
// grown. Otherwise, some other String is using parts of
// the buffer beyond our last byte.
let matchUsed = _pointerToNth(count)
// Attempt to claim unused capacity in the buffer
if _fastPath(buffer.grow(
UnsafePointer<RawByte>(matchUsed.value), newUsedCount: newSize)) {
count = newSize
return (0, matchUsed)
}
else if newSize > buffer.capacity {
// Growth failed because of insufficient storage; double the size
return (max(buffer.capacity * 2, newSize), .null())
}
}
return (newSize, .null())
}
/// Ensure that this String references a _StringBuffer having
/// a capacity of at least newSize elements of at least the given width.
/// Effectively appends garbage to the String until it has newSize
/// UTF16 code units. Returns a pointer to the garbage code units;
/// you must immediately copy valid data into that storage.
mutating func _growBuffer(
newSize: Int, minElementWidth: Int
) -> COpaquePointer {
let (newCapacity, existingStorage)
= _claimCapacity(newSize, minElementWidth: minElementWidth)
if _fastPath(!existingStorage.isNull()) {
return existingStorage
}
// Allocate storage.
let newElementWidth =
minElementWidth >= elementWidth
? minElementWidth
: representableAsASCII() ? 1 : 2
var newStorage = _StringBuffer(capacity: newCapacity, initialSize: newSize,
elementWidth: newElementWidth)
var oldCount = count
if hasContiguousStorage {
_StringCore._copyElements(
_baseAddress, srcElementWidth: elementWidth,
dstStart: COpaquePointer(newStorage.start),
dstElementWidth: newElementWidth, count: oldCount)
}
else {
// Opaque cocoa buffers might not store ASCII, so assert that
// we've allocated for 2-byte elements.
// FIXME: can we get Cocoa to tell us quickly that an opaque
// string is ASCII? Do we care much about that edge case?
assert(newStorage.elementShift == 1)
_cocoaStringReadAll(source: cocoaBuffer!,
destination: UnsafePointer(newStorage.start))
}
self = _StringCore(newStorage)
return _pointerToNth(oldCount)
}
mutating func append(c: UnicodeScalar) {
_invariantCheck()
// How many bytes does it take to encode each UTF16 code unit of
// c if ASCII storage is available?
let minBytesPerCodeUnit = c.value <= 0x7f ? 1 : 2
// How many UTF16 code units does it take to encode c?
let utf16Width = c.value <= 0xFFFF ? 1 : 2
let destination = _growBuffer(count + utf16Width,
minElementWidth: minBytesPerCodeUnit)
if _fastPath(elementWidth == 1) {
assert(
_pointerToNth(count)
== COpaquePointer(UnsafePointer<RawByte>(destination) + 1))
UnsafePointer<UTF8.CodeUnit>(destination).set(UTF8.CodeUnit(c.value))
}
else {
let destination16 = UnsafePointer<UTF16.CodeUnit>(destination.value)
if _fastPath(utf16Width == 1) {
assert(_pointerToNth(count) == COpaquePointer(destination16 + 1))
destination16.set(UTF16.CodeUnit(c.value))
}
else {
assert(_pointerToNth(count) == COpaquePointer(destination16 + 2))
destination16.set(UTF16.leadSurrogate(c))
(destination16 + 1).set(UTF16.trailSurrogate(c))
}
}
_invariantCheck()
}
mutating func append(rhs: _StringCore) {
_invariantCheck()
let minElementWidth
= elementWidth >= rhs.elementWidth
? elementWidth
: rhs.representableAsASCII() ? 1 : 2
let destination = _growBuffer(count + rhs.count,
minElementWidth: minElementWidth)
if _fastPath(rhs.hasContiguousStorage) {
_StringCore._copyElements(
rhs._baseAddress, srcElementWidth: rhs.elementWidth,
dstStart: destination, dstElementWidth:elementWidth, count: rhs.count)
}
else {
assert(elementWidth == 2)
_cocoaStringReadAll(source: rhs.cocoaBuffer!,
destination: UnsafePointer(destination))
}
_invariantCheck()
}
/// Return true iff the contents of this string can be
/// represented as pure ASCII. O(N) in the worst case
func representableAsASCII() -> Bool {
if _slowPath(!hasContiguousStorage) {
return false
}
if _fastPath(elementWidth == 1) {
return true
}
return !contains(
UnsafeArray(start: UnsafePointer<UTF16.CodeUnit>(_baseAddress),
length: count)
) { $0 > 0x7f }
}
}
extension _StringCore : Collection {
var startIndex: Int {
return 0
}
var endIndex: Int {
return count
}
func generate() -> IndexingGenerator<_StringCore> {
return IndexingGenerator(self)
}
}
extension _StringCore : Sliceable {}
// Used to support a tighter invariant: all strings with contiguous
// storage have a non-NULL base address.
var _emptyStringStorage: UInt32 = 0
var _emptyStringBase: COpaquePointer {
return COpaquePointer(
UnsafePointer<UInt16>(Builtin.addressof(&_emptyStringStorage)))
}