Files
swift-mirror/stdlib/core/Character.swift
Arnold Schwaighofer da6d9152b6 Differentiate between user assertion and preconditions and the like
assert() and fatalError()
These functions are meant to be used in user code. They are enabled in debug
mode and disabled in release or fast mode.

_precondition() and _preconditionFailure()
These functions are meant to be used in library code to check preconditions at
the api boundry. They are enabled in debug mode (with a verbose message) and
release mode (trap). In fast mode they are disabled.

_debugPrecondition() and _debugPreconditionFailure()
These functions are meant to be used in library code to check preconditions that
are not neccesarily comprehensive for safety (UnsafePointer can be null or an
invalid pointer but we can't check both). They are enabled only in debug mode.

_sanityCheck() and _fatalError()
These are meant to be used for internal consistency checks. They are only
enabled when the library is build with -DSWIFT_STDLIB_INTERNAL_CHECKS=ON.

I modified the code in the standard library to the best of my judgement.

rdar://16477198

Swift SVN r18212
2014-05-16 20:49:54 +00:00

134 lines
4.4 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
struct IntEncoder : Sink {
var asInt: UInt64 = 0
var shift: UInt64 = 0
mutating func put(x: UTF8.CodeUnit) {
asInt |= UInt64(x) << shift
shift += 8
}
}
/// \brief Character represents some Unicode grapheme cluster as
/// defined by a canonical, localized, or otherwise tailored
/// segmentation algorithm.
enum Character :
_BuiltinExtendedGraphemeClusterLiteralConvertible,
ExtendedGraphemeClusterLiteralConvertible, Equatable {
// Fundamentally, it is just a String, but it is optimized for the
// common case where the UTF-8 representation fits in 63 bits. The
// remaining bit is used to discriminate between small and large
// representations. In the small representation, the unused bytes
// representations. In the small representation, the unused bytes
// are filled with 0xFF.
//
// If the grapheme cluster can be represented in SmallRepresentation, it
// should be represented as such.
case LargeRepresentation(OnHeap<String>)
case SmallRepresentation(Builtin.Int63)
init(_ scalar: UnicodeScalar) {
var IE = IntEncoder()
UTF8.encode(scalar, output: &IE)
IE.asInt |= (~0) << IE.shift
self = SmallRepresentation(Builtin.trunc_Int64_Int63(IE.asInt.value))
}
static func _convertFromBuiltinExtendedGraphemeClusterLiteral(
start: Builtin.RawPointer,
byteSize: Builtin.Word,
isASCII: Builtin.Int1) -> Character {
return Character(
String._convertFromBuiltinExtendedGraphemeClusterLiteral(
start, byteSize: byteSize, isASCII: isASCII))
}
static func convertFromExtendedGraphemeClusterLiteral(
value: Character) -> Character {
return value
}
init(_ s: String) {
// The small representation can accept up to 8 code units as long
// as the last one is a continuation. Since the high bit of the
// last byte is used for the enum's discriminator, we have to
// reconstruct it. As a result, we can't store 0x7f in the final
// byte, because we wouldn't be able to distinguish it from an
// unused 0xFF byte. Rather than trying to squeeze in other
// one-byte code points there, we simplify decoding by banning
// starting a code point in the last byte, and assuming that its
// high bit is 1.
_precondition(
s.core.count != 0, "Can't form a Character from an empty String")
var (count, initialUTF8) = s.core._encodeSomeUTF8(0)
let bits = sizeofValue(initialUTF8) * 8 - 1
if _fastPath(
count == s.core.count && (initialUTF8 & (1 << numericCast(bits))) != 0) {
self = SmallRepresentation(Builtin.trunc_Int64_Int63(initialUTF8.value))
}
else {
self = LargeRepresentation(OnHeap(s))
}
}
/// \brief return the index of the lowest byte that is 0xFF, or 8 if
/// there is none
static func _smallSize(value: UInt64) -> Int {
var mask: UInt64 = 0xFF
for (var i = 0; i < 8; ++i) {
if (value & mask) == mask {
return i
}
mask <<= 8
}
return 8
}
static func _smallValue(value: Builtin.Int63) -> UInt64 {
return UInt64(Builtin.zext_Int63_Int64(value)) | (1<<63)
}
}
extension String {
init(_ c: Character) {
switch c {
case .SmallRepresentation(var _63bits):
var value = Character._smallValue(_63bits)
var size = Character._smallSize(value)
self = String(
UTF8.self,
input: UnsafeArray(
start: UnsafePointer<UTF8.CodeUnit>(Builtin.addressof(&value)),
length: size))
case .LargeRepresentation(var value):
self = value
}
}
}
func ==(lhs: Character, rhs: Character) -> Bool {
switch (lhs, rhs) {
case (.LargeRepresentation(let lhsValue), .LargeRepresentation(let rhsValue)):
return lhsValue == rhsValue
case (.SmallRepresentation(let lhsValue), .SmallRepresentation(let rhsValue)):
return Character._smallValue(lhsValue) == Character._smallValue(rhsValue)
default:
return false
}
}