Files
swift-mirror/stdlib/core/UnicodeScalar.swift
Erik Eckstein d0697f2ac1 Make internal stdlib functions public, which are called from the stdlib tests.
And make sure that all those public identifiers are preceeded with underscores.

I marked these public-modifiers with "// @testable" to document why they are public.
If some day we have a @testable attribute it should be used instead of those public-modifiers.

Again, this is needed for enabling dead internal function elimination in the stdlib.



Swift SVN r22657
2014-10-10 09:45:10 +00:00

325 lines
9.2 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// UnicodeScalar Type
//===----------------------------------------------------------------------===//
/// A `Unicode scalar value
/// <http://www.unicode.org/glossary/#unicode_scalar_value>`_.
public struct UnicodeScalar :
_BuiltinUnicodeScalarLiteralConvertible,
UnicodeScalarLiteralConvertible {
var _value: Builtin.Int32
/// A numeric representation of `self`.
public var value: UInt32 {
get {
return UInt32(_value)
}
}
@transparent
public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
self._value = value
}
/// Create an instance initialized to `value`.
@transparent
public init(unicodeScalarLiteral value: UnicodeScalar) {
self = value
}
/// Creates an instance of the NUL scalar value.
public init() {
self._value = Int32(0).value
}
init(_ value : Builtin.Int32) {
self._value = value
}
/// Create an instance with numeric value `v`.
///
/// Requires: `v` is a valid Unicode scalar value.
public init(_ v : UInt32) {
// Unicode 6.3.0:
//
// D9. Unicode codespace: A range of integers from 0 to 10FFFF.
//
// D76. Unicode scalar value: Any Unicode code point except
// high-surrogate and low-surrogate code points.
//
// * As a result of this definition, the set of Unicode scalar values
// consists of the ranges 0 to D7FF and E000 to 10FFFF, inclusive.
_precondition(v < 0xD800 || v > 0xDFFF,
"high- and low-surrogate code points are not valid Unicode scalar values")
_precondition(v <= 0x10FFFF, "value is outside of Unicode codespace")
self._value = v.value
}
/// Create an instance with numeric value `v`.
///
/// Requires: `v` is a valid Unicode scalar value.
public init(_ v : UInt16) {
self = UnicodeScalar(UInt32(v))
}
/// Create an instance with numeric value `v`.
public init(_ v : UInt8) {
self = UnicodeScalar(UInt32(v))
}
/// Create a duplicate of `v`.
public init(_ v: UnicodeScalar) {
// This constructor allows one to provide necessary type context to
// disambiguate between function overloads on 'String' and 'UnicodeScalar'.
self = v
}
/// Return a String representation of `self` .
///
/// :param: `asASCII`, if `true`, forces most values into a numeric
/// representation.
public func escape(#asASCII: Bool) -> String {
func lowNibbleAsHex(v: UInt32) -> String {
var nibble = v & 15
if nibble < 10 {
return String(UnicodeScalar(nibble+48)) // 48 = '0'
} else {
// FIXME: was UnicodeScalar(nibble-10+65), which is now
// ambiguous. <rdar://problem/18506025>
return String(UnicodeScalar(nibble+65-10)) // 65 = 'A'
}
}
if self == "\\" {
return "\\\\"
} else if self == "\'" {
return "\\\'"
} else if self == "\"" {
return "\\\""
} else if _isPrintableASCII() {
return String(self)
} else if self == "\0" {
return "\\0"
} else if self == "\n" {
return "\\n"
} else if self == "\r" {
return "\\r"
} else if self == "\t" {
return "\\t"
} else if UInt32(self) < 128 {
return "\\u{"
+ lowNibbleAsHex(UInt32(self) >> 4)
+ lowNibbleAsHex(UInt32(self)) + "}"
} else if !asASCII {
return String(self)
} else if UInt32(self) <= 0xFFFF {
var result = "\\u{"
result += lowNibbleAsHex(UInt32(self) >> 12)
result += lowNibbleAsHex(UInt32(self) >> 8)
result += lowNibbleAsHex(UInt32(self) >> 4)
result += lowNibbleAsHex(UInt32(self))
result += "}"
return result
} else {
// FIXME: Type checker performance prohibits this from being a
// single chained "+".
var result = "\\u{"
result += lowNibbleAsHex(UInt32(self) >> 28)
result += lowNibbleAsHex(UInt32(self) >> 24)
result += lowNibbleAsHex(UInt32(self) >> 20)
result += lowNibbleAsHex(UInt32(self) >> 16)
result += lowNibbleAsHex(UInt32(self) >> 12)
result += lowNibbleAsHex(UInt32(self) >> 8)
result += lowNibbleAsHex(UInt32(self) >> 4)
result += lowNibbleAsHex(UInt32(self))
result += "}"
return result
}
}
/// Returns true if this is an ASCII character (code point 0 to 127
/// inclusive).
public func isASCII() -> Bool {
return value <= 127
}
// FIXME: Locales make this interesting
func _isAlpha() -> Bool {
return (self >= "A" && self <= "Z") || (self >= "a" && self <= "z")
}
// FIXME: Is there an similar term of art in Unicode?
public func _isASCIIDigit() -> Bool {
return self >= "0" && self <= "9"
}
// FIXME: Unicode makes this interesting
func _isDigit() -> Bool {
return _isASCIIDigit()
}
// FIXME: Unicode and locales make this interesting
var _uppercase : UnicodeScalar {
if self >= "a" && self <= "z" {
return UnicodeScalar(UInt32(self) - 32)
} else if self >= "à" && self <= "þ" && self != "÷" {
return UnicodeScalar(UInt32(self) - 32)
}
return self
}
// FIXME: Unicode and locales make this interesting
var _lowercase : UnicodeScalar {
if self >= "A" && self <= "Z" {
return UnicodeScalar(UInt32(self) + 32)
} else if self >= "À" && self <= "Þ" && self != "×" {
return UnicodeScalar(UInt32(self) + 32)
}
return self
}
// FIXME: Unicode makes this interesting.
public // @testable
func _isSpace() -> Bool {
// FIXME: The constraint-based type checker goes painfully exponential
// when we turn this into one large expression. Break it up for now,
// until we can optimize the constraint solver better.
if self == " " || self == "\t" { return true }
if self == "\n" || self == "\r" { return true }
return self == "\u{0B}" || self == "\u{0C}"
}
// FIXME: Unicode makes this interesting.
func _isPrintableASCII() -> Bool {
return (self >= UnicodeScalar(0o040) && self <= UnicodeScalar(0o176))
}
}
extension UnicodeScalar : Printable, DebugPrintable {
/// A textual representation of `self`.
public var description: String {
return "\"\(escape(asASCII: false))\""
}
/// A textual representation of `self`, suitable for debugging.
public var debugDescription: String {
return "\"\(escape(asASCII: true))\""
}
}
extension UnicodeScalar : Hashable {
/// The hash value.
///
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`
///
/// **Note:** the hash value is not guaranteed to be stable across
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
public var hashValue: Int {
return Int(self.value)
}
}
extension UnicodeScalar {
/// Construct with value `v`.
///
/// Requires: `v` is a valid unicode scalar value.
public init(_ v : Int) {
self = UnicodeScalar(UInt32(v))
}
}
extension UInt8 {
/// Construct with value `v.value`.
///
/// Requires: `v.value` can be represented as UInt8.
public init(_ v : UnicodeScalar) {
_precondition(v.value <= UInt32(UInt8.max),
"Code point value does not fit into UInt8")
self = UInt8(v.value)
}
}
extension UInt32 {
/// Construct with value `v.value`.
///
/// Requires: `v.value` can be represented as UInt32.
public init(_ v : UnicodeScalar) {
self = v.value
}
}
extension UInt64 {
/// Construct with value `v.value`.
///
/// Requires: `v.value` can be represented as UInt64.
public init(_ v : UnicodeScalar) {
self = UInt64(v.value)
}
}
public func ==(lhs: UnicodeScalar, rhs: UnicodeScalar) -> Bool {
return lhs.value == rhs.value
}
extension UnicodeScalar : Comparable {
}
public func <(lhs: UnicodeScalar, rhs: UnicodeScalar) -> Bool {
return lhs.value < rhs.value
}
extension UnicodeScalar {
struct UTF16View {
var value: UnicodeScalar
}
var utf16: UTF16View {
return UTF16View(value: self)
}
}
extension UnicodeScalar.UTF16View : CollectionType {
/// The position of the first code unit.
var startIndex: Int {
return 0
}
/// The "past the end" position.
///
/// `endIndex` is not a valid argument to `subscript`, and is always
/// reachable from `startIndex` by zero or more applications of
/// `successor()`.
var endIndex: Int {
return 0 + UTF16.width(value)
}
/// Access the code unit at `position`.
///
/// Requires: `position` is a valid position in `self` and
/// `position != endIndex`.
subscript(position: Int) -> UTF16.CodeUnit {
return position == 0 ? (
endIndex == 1 ? UTF16.CodeUnit(value.value) : UTF16.leadSurrogate(value)
) : UTF16.trailSurrogate(value)
}
/// Return a *generator* over the code points that comprise this
/// *sequence*.
///
/// Complexity: O(1)
func generate() -> IndexingGenerator<UnicodeScalar.UTF16View> {
return IndexingGenerator(self)
}
}