Files
swift-mirror/stdlib/core/Unicode.swift
Dave Abrahams 38b9e6808f [stdlib] CharacterEncoding.swift => Unicode.swift
Also enshrine the tests in the test suite

Swift SVN r10057
2013-11-08 22:17:50 +00:00

146 lines
3.3 KiB
Swift

// Conversions between different Unicode encodings. Note that these
// are *not* currently resilient to erroneous data.
import swift
typealias UnicodeScalar = Char
protocol UnicodeCodec {
typealias CodeUnit
static def decode(next: ()->CodeUnit?) -> UnicodeScalar?
static def encode(input: UnicodeScalar, output: (CodeUnit)->())
}
struct UTF8 : UnicodeCodec {
typealias CodeUnit = UInt8
static def decode(next: ()->CodeUnit?) -> UnicodeScalar? {
var c = next()
if !c {
return .None
}
var c0 = c!
// one octet (7 bits)
if c0 < 0x80 {
return Char(UInt32(c0))
}
var c1 = next()!
// start with octet 1 (we'll mask off high bits later)
var result = UInt32(c0)
result = (result << 6) | UInt32(c1 & 0x3F) // merge octet 2
if c0 < 0xE0 {
return UnicodeScalar(result & 0x000007FF) // 11 bits
}
c1 = next()! // prefetch octet 3
result = (result << 6) | UInt32(c1 & 0x3F) // merge octet 3
if c0 < 0xF0 {
return UnicodeScalar(result & 0x0000FFFF) // 16 bits
}
c1 = next()! // prefetch octet 4
result = (result << 6) | UInt32(c1 & 0x3F) // merge octet 4
return UnicodeScalar(result & 0x001FFFFF) // 21 bits
}
static def encode(input: UnicodeScalar, output: (CodeUnit)->()) {
var c = UInt32(input)
var buf3 = UInt8(c)
if c >= UInt32(1<<7) {
c >>= 6
buf3 = (buf3 & 0x3F) | 0x80 // 10xxxxxx
var buf2 = UInt8(c)
if c < UInt32(1<<5) {
buf2 |= 0xC0 // 110xxxxx
}
else {
c >>= 6
buf2 = (buf2 & 0x3F) | 0x80 // 10xxxxxx
var buf1 = UInt8(c)
if c < UInt32(1<<4) {
buf1 |= 0xE0 // 1110xxxx
}
else {
c >>= 6
buf1 = (buf1 & 0x3F) | 0x80 // 10xxxxxx
output(UInt8(c | 0xF0)) // 11110xxx
}
output(buf1)
}
output(buf2)
}
output(buf3)
}
var _value: UInt8
}
struct UTF16 : UnicodeCodec {
typealias CodeUnit = UInt16
static def decode(next: ()->CodeUnit?) -> UnicodeScalar? {
var first = next()
if !first {
return .None
}
var unit0 = UInt32(first!)
if (unit0 >> 11) != 0x1B {
return UnicodeScalar(unit0)
}
var unit1 = UInt32(next()!)
return UnicodeScalar(
0x10000
+ ((unit0 - 0xD800) << 10)
+ (unit1 - 0xDC00))
}
static def encode(input: UnicodeScalar, output: (UInt16)->()) {
var scalarValue: UInt32 = UInt32(input)
if UInt32(UInt16(scalarValue)) == scalarValue {
output(UInt16(scalarValue))
}
else {
var lead_offset = UInt32(0xD800) - (0x10000 >> 10)
output(UInt16(lead_offset + (scalarValue >> 10)))
output(UInt16(0xDC00 + (scalarValue & 0x3FF)))
}
}
var _value: UInt16
}
struct UTF32 : UnicodeCodec {
typealias CodeUnit = UInt32
static def create(value: CodeUnit) -> UTF32 {
return UTF32(value)
}
def value() -> CodeUnit {
return self._value
}
static def decode(next: ()->CodeUnit?) -> UnicodeScalar? {
var x = next()
if x {
return UnicodeScalar(x!)
}
return .None
}
static def encode(input: UnicodeScalar, output: (UInt32)->()) {
output(UInt32(input))
}
var _value: UInt32
}