Files
swift-mirror/test/stdlib/Unicode.swift
Dave Abrahams 6d1095f44e Protocol names end in "Type," "ible," or "able"
Mechanically add "Type" to the end of any protocol names that don't end
in "Type," "ible," or "able."  Also, drop "Type" from the end of any
associated type names, except for those of the *LiteralConvertible
protocols.

There are obvious improvements to make in some of these names, which can
be handled with separate commits.

Fixes <rdar://problem/17165920> Protocols `Integer` etc should get
uglier names.

Swift SVN r19883
2014-07-12 17:29:57 +00:00

2528 lines
74 KiB
Swift

// RUN: %target-run-stdlib-swift | FileCheck %s
// REQUIRES: long_tests
import Foundation
import Swift
protocol TestableUnicodeCodec : UnicodeCodecType {
typealias CodeUnit : IntegerType
class func encodingId() -> NSStringEncoding
class func name() -> NSString
}
extension UTF8 : TestableUnicodeCodec {
static func encodingId() -> NSStringEncoding {
return NSUTF8StringEncoding
}
static func name() -> NSString {
return "UTF8"
}
}
extension UTF16 : TestableUnicodeCodec {
static func encodingId() -> NSStringEncoding {
return NSUTF16LittleEndianStringEncoding
}
static func name() -> NSString {
return "UTF16"
}
}
extension UTF32 : TestableUnicodeCodec {
static func encodingId() -> NSStringEncoding {
return NSUTF32LittleEndianStringEncoding
}
static func name() -> NSString {
return "UTF32"
}
}
// Backing store for computed var unicodeScalarRanges
var _unicodeScalarRanges = Array<Range<UInt32>>()
// The valid ranges of Unicode scalar values
var unicodeScalarRanges : [Range<UInt32>] {
if _unicodeScalarRanges.count == 0 {
for r in [UInt32(0)..<0xD800, 0xE000..<0xFDD0, 0xFDF0..<0xFFFE] {
_unicodeScalarRanges.append(r)
}
for base in UInt32(0x1)..<0x11 {
_unicodeScalarRanges.append((base << 16)..<((base << 16)+0xFFFE))
}
}
return _unicodeScalarRanges
}
var unicodeScalarCount : Int {
var count = 0
for r in unicodeScalarRanges {
count += Int(r.endIndex - r.startIndex)
}
return count
}
func nthUnicodeScalar(n: UInt32) -> UnicodeScalar {
var count: UInt32 = 0
for r in unicodeScalarRanges {
count += r.endIndex - r.startIndex
if count > n {
return UnicodeScalar(r.endIndex - (count - n))
}
}
_preconditionFailure("Index out of range")
}
// buffer should have a length >= 4
func nsEncode<CodeUnit>(
var c: UInt32,
encoding: NSStringEncoding,
inout buffer: [CodeUnit],
inout used: Int
) {
var s = NSString(
bytes: &c,
length: 4,
encoding: NSUTF32LittleEndianStringEncoding)
s.getBytes(
&buffer,
maxLength: buffer.count,
usedLength: &used,
encoding: encoding,
options: NSStringEncodingConversionOptions(0),
range: NSRange(location: 0, length: s.length),
remainingRange: nil)
}
// Convert the given numeric value to a hexidecimal string
func hex<T : IntegerType>(x: T) -> String {
return "0x" + String(x.toIntMax(), radix: 16)
}
// Convert the given sequence of numeric values to a string
// representing their hexidecimal values
func hex<
S: SequenceType
where
S.Generator.Element : IntegerType
>(x: S) -> String {
var r = "["
var prefix = ""
for unit in x {
r += prefix + hex(unit)
prefix = ", "
}
r += "]"
return r
}
// A SinkType that stores the elements written into an Array that can be
// inspected later.
class ArraySink<T: IntegerLiteralConvertible> : SinkType {
init(capacity: Int) {
storage = Array(count: capacity, repeatedValue: 0)
}
func put(x: T) {
storage[count++] = x
}
func clear() {
count = 0
}
var elements : Slice<T> {
return storage[0..<count]
}
var count = 0
var storage: [T] = Array()
}
@asmname("random") func random() -> UInt32
@asmname("srandomdev") func srandomdev()
// To avoid swamping the buildbot, by default, test only one out of
// testGroupCount cases, selected at random. You can manually pass
// "--all" on the command line to test everything.
var testGroupCount = 128
srandomdev()
var testGroup = random() % testGroupCount
var testAll = Process.arguments.count > 0 && Process.arguments[0] == "--all"
var minScalarOrd : Int
var maxScalarOrd : Int
if testAll {
println("Testing all Unicode scalars")
minScalarOrd = 0
maxScalarOrd = unicodeScalarCount
}
else {
println("Testing Unicode scalar group \(testGroup) of \(testGroupCount)")
minScalarOrd = unicodeScalarCount * testGroup / testGroupCount
maxScalarOrd = unicodeScalarCount * (testGroup+1) / testGroupCount
}
class CodecTest<Codec: TestableUnicodeCodec> {
var used = 0
typealias CodeUnit = Codec.CodeUnit
var nsEncodeBuffer: [CodeUnit] = Array(count: 4, repeatedValue: 0)
var encodeBuffer = ArraySink<CodeUnit>(capacity: 4)
func testOne(scalar: UnicodeScalar)
{
/* Progress reporter
if (scalar.value % 0x1000) == 0 {
println("\(hex(scalar.value))")
}
*/
// Use Cocoa to encode the scalar
nsEncode(scalar.value, Codec.encodingId(), &nsEncodeBuffer, &used)
let nsEncoded = nsEncodeBuffer[0..<(used/sizeof(CodeUnit.self))]
var g = nsEncoded.generate()
var decoded: UnicodeScalar
var decoder = Codec()
switch decoder.decode(&g) {
case .Result(let us):
decoded = us
default:
fatalError("decoding failed")
}
if decoded != scalar {
println("Decoding failed: \(hex(scalar.value)) => \(hex(nsEncoded)) => \(hex(decoded.value))")
}
encodeBuffer.clear()
Codec.encode(scalar, output: &self.encodeBuffer)
if !equal(nsEncoded, encodeBuffer.elements) {
println("Decoding failed: \(hex(nsEncoded)) => \(hex(scalar.value)) => \(hex(encodeBuffer.storage[0]))")
}
}
func run() {
println("testing \(Codec.name())")
for i in minScalarOrd..<maxScalarOrd {
testOne(nthUnicodeScalar(UInt32(i)))
}
println("done.")
}
}
srandomdev()
// CHECK: testing UTF8
// CHECK-NEXT: done.
CodecTest<UTF8>().run()
// CHECK: testing UTF16
// CHECK-NEXT: done.
CodecTest<UTF16>().run()
// CHECK: testing UTF32
// CHECK-NEXT: done.
CodecTest<UTF32>().run()
func println(a: [UTF8.CodeUnit]) {
print("[ ")
var prefix = ""
for x in a {
print("\(prefix)\(x)")
prefix = ", "
}
println(" ]")
}
func println(a: [UTF16.CodeUnit]) {
print("[ ")
var prefix = ""
for x in a {
print("\(prefix)\(x)")
prefix = ", "
}
println(" ]")
}
func additionalUtf16Tests() {
// CHECK: additionalUtf16Tests
println("additionalUtf16Tests")
// CHECK-NEXT: 1
println(UTF16.width("x"))
// CHECK-NEXT: 2
println(UTF16.width("\u{101010}"))
// CHECK-NEXT: 2
println(UTF16.width("𝄞"))
// CHECK-NEXT: true
println(UTF16.leadSurrogate("𝄞") == 0xD834)
// CHECK-NEXT: true
println(UTF16.trailSurrogate("𝄞") == 0xDD1E)
var u8: [UTF8.CodeUnit] = [ 0, 1, 2, 3, 4, 5 ]
var u16: [UTF16.CodeUnit] = [ 6, 7, 8, 9, 10, 11 ]
u16.withUnsafePointerToElements {
(p16)->() in
u8.withUnsafePointerToElements {
(p8)->() in
// CHECK-NEXT: [ 0, 1, 2, 9, 10, 11 ]
UTF16.copy(p8, destination: p16, count: 3)
println(u16)
// CHECK-NEXT: [ 9, 10, 11, 3, 4, 5 ]
UTF16.copy(p16 + 3, destination: p8, count: 3)
println(u8)
// CHECK-NEXT: [ 0, 1, 2, 0, 1, 2 ]
UTF16.copy(p16, destination: p16 + 3, count: 3)
println(u16)
// CHECK-NEXT: [ 9, 10, 11, 9, 10, 11 ]
UTF16.copy(p8, destination: p8 + 3, count: 3)
println(u8)
}
}
let (count0, isASCII0) = UTF16.measure(UTF8.self, input: u8.generate(),
repairIllFormedSequences: false)!
// CHECK-NEXT: 6 / true
println("\(count0) / \(isASCII0)")
let (count1, isASCII1) = UTF16.measure(UTF16.self, input: u16.generate(),
repairIllFormedSequences: false)!
// CHECK-NEXT: 6 / true
println("\(count1) / \(isASCII1)")
// "" == U+20AC.
u8 = [0xF0, 0xA4, 0xAD, 0xA2]
let (count2, isASCII2) = UTF16.measure(UTF8.self, input: u8.generate(),
repairIllFormedSequences: false)!
// CHECK-NEXT: 2 / false
println("\(count2) / \(isASCII2)")
}
additionalUtf16Tests()
import StdlibUnittest
class EOFCountingGenerator<T> : GeneratorType {
var array: [T]
var index: Int = 0
var numTimesReturnedEOF: Int = 0
init(_ array: [T]) {
self.array = array
}
func next() -> T? {
if index == array.count {
++numTimesReturnedEOF
return .None
}
return array[index++]
}
}
func checkDecodeUTF<Codec : UnicodeCodecType>(
codec: Codec.Type, expectedHead: [UInt32],
expectedRepairedTail: [UInt32], utfStr: [Codec.CodeUnit]
) -> AssertionResult {
if true {
var decoded: [UInt32] = []
var g = EOFCountingGenerator(utfStr)
transcode(codec, UTF32.self, g,
SinkOf {
decoded += $0
},
stopOnError: true)
expectGE(1, g.numTimesReturnedEOF)
if expectedHead != decoded {
return assertionFailure()
.withDescription("\n")
.withDescription("expectedHead: \(asHex(expectedHead))\n")
.withDescription("actual: \(asHex(decoded))")
}
}
if true {
var expected = expectedHead
expected += expectedRepairedTail
var decoded: [UInt32] = []
var g = EOFCountingGenerator(utfStr)
transcode(codec, UTF32.self, g,
SinkOf {
decoded += $0
},
stopOnError: false)
expectEqual(1, g.numTimesReturnedEOF)
if expected != decoded {
return assertionFailure()
.withDescription("\n")
.withDescription("expected: \(asHex(expected))\n")
.withDescription("actual: \(asHex(decoded))")
}
}
return assertionSuccess()
}
func checkDecodeUTF8(
expectedHead: [UInt32],
expectedRepairedTail: [UInt32], utf8Str: [UInt8]
) -> AssertionResult {
return checkDecodeUTF(UTF8.self, expectedHead, expectedRepairedTail, utf8Str)
}
func checkDecodeUTF16(
expectedHead: [UInt32],
expectedRepairedTail: [UInt32], utf16Str: [UInt16]
) -> AssertionResult {
return checkDecodeUTF(UTF16.self, expectedHead, expectedRepairedTail,
utf16Str)
}
func checkDecodeUTF32(
expectedHead: [UInt32],
expectedRepairedTail: [UInt32], utf32Str: [UInt32]
) -> AssertionResult {
return checkDecodeUTF(UTF32.self, expectedHead, expectedRepairedTail,
utf32Str)
}
func checkEncodeUTF8(expected: [UInt8], scalars: [UInt32]) -> AssertionResult {
var encoded: [UInt8] = []
var g = EOFCountingGenerator(scalars)
let hadError = transcode(UTF32.self, UTF8.self, g,
SinkOf {
encoded += $0
},
stopOnError: true)
expectFalse(hadError)
expectGE(1, g.numTimesReturnedEOF)
if expected != encoded {
return assertionFailure()
.withDescription("\n")
.withDescription("expected: \(asHex(expected))\n")
.withDescription("actual: \(asHex(encoded))")
}
return assertionSuccess()
}
struct UTF8Test {
let scalars: [UInt32]
let encoded: [UInt8]
let loc: SourceLoc
init(_ scalars: [UInt32], _ encoded: [UInt8],
file: String = __FILE__, line: UWord = __LINE__) {
self.scalars = scalars
self.encoded = encoded
self.loc = SourceLoc(file, line, comment: "test data")
}
}
let UTF8TestsSmokeTest = [
//
// 1-byte sequences
//
// U+0041 LATIN CAPITAL LETTER A
UTF8Test([ 0x0041 ], [ 0x41 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
UTF8Test([ 0x0041, 0x0042 ], [ 0x41, 0x42 ]),
// U+0061 LATIN SMALL LETTER A
// U+0062 LATIN SMALL LETTER B
// U+0063 LATIN SMALL LETTER C
UTF8Test([ 0x0061, 0x0062, 0x0063 ], [ 0x61, 0x62, 0x63 ]),
// U+0000 NULL
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0000 NULL
UTF8Test(
[ 0x0000, 0x0041, 0x0042, 0x0000 ],
[ 0x00, 0x41, 0x42, 0x00 ]),
//
// 2-byte sequences
//
// U+0283 LATIN SMALL LETTER ESH
UTF8Test([ 0x0283 ], [ 0xca, 0x83 ]),
// U+03BA GREEK SMALL LETTER KAPPA
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
// U+03C3 GREEK SMALL LETTER SIGMA
// U+03BC GREEK SMALL LETTER MU
// U+03B5 GREEK SMALL LETTER EPSILON
UTF8Test(
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ],
[ 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5 ]),
// U+0430 CYRILLIC SMALL LETTER A
// U+0431 CYRILLIC SMALL LETTER BE
// U+0432 CYRILLIC SMALL LETTER VE
UTF8Test([ 0x0430, 0x0431, 0x0432 ], [ 0xd0, 0xb0, 0xd0, 0xb1, 0xd0, 0xb2 ]),
//
// 3-byte sequences
//
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
// U+6587 CJK UNIFIED IDEOGRAPH-6587
UTF8Test(
[ 0x4f8b, 0x6587 ],
[ 0xe4, 0xbe, 0x8b, 0xe6, 0x96, 0x87 ]),
// U+D55C HANGUL SYLLABLE HAN
// U+AE00 HANGUL SYLLABLE GEUL
UTF8Test(
[ 0xd55c, 0xae00 ],
[ 0xed, 0x95, 0x9c, 0xea, 0xb8, 0x80 ]),
// U+1112 HANGUL CHOSEONG HIEUH
// U+1161 HANGUL JUNGSEONG A
// U+11AB HANGUL JONGSEONG NIEUN
// U+1100 HANGUL CHOSEONG KIYEOK
// U+1173 HANGUL JUNGSEONG EU
// U+11AF HANGUL JONGSEONG RIEUL
UTF8Test(
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ],
[ 0xe1, 0x84, 0x92, 0xe1, 0x85, 0xa1, 0xe1, 0x86, 0xab,
0xe1, 0x84, 0x80, 0xe1, 0x85, 0xb3, 0xe1, 0x86, 0xaf ]),
// U+3042 HIRAGANA LETTER A
// U+3044 HIRAGANA LETTER I
// U+3046 HIRAGANA LETTER U
// U+3048 HIRAGANA LETTER E
// U+304A HIRAGANA LETTER O
UTF8Test(
[ 0x3042, 0x3044, 0x3046, 0x3048, 0x304a ],
[ 0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86,
0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a ]),
//
// 4-byte sequences
//
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0001F425 ],
[ 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0001F425 ],
[ 0x41, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0001F425 ],
[ 0x41, 0x42, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0001F425 ],
[ 0x41, 0x42, 0x43, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+0044 LATIN CAPITAL LETTER D
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0001F425 ],
[ 0x41, 0x42, 0x43, 0x44, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+0044 LATIN CAPITAL LETTER D
// U+0045 LATIN CAPITAL LETTER E
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0001F425 ],
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+0044 LATIN CAPITAL LETTER D
// U+0045 LATIN CAPITAL LETTER E
// U+0046 LATIN CAPITAL LETTER F
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0001F425 ],
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+0044 LATIN CAPITAL LETTER D
// U+0045 LATIN CAPITAL LETTER E
// U+0046 LATIN CAPITAL LETTER F
// U+0047 LATIN CAPITAL LETTER G
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0001F425 ],
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+0044 LATIN CAPITAL LETTER D
// U+0045 LATIN CAPITAL LETTER E
// U+0046 LATIN CAPITAL LETTER F
// U+0047 LATIN CAPITAL LETTER G
// U+0048 LATIN CAPITAL LETTER H
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048,
0x0001F425 ],
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0xf0, 0x9f, 0x90, 0xa5 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0043 LATIN CAPITAL LETTER C
// U+0044 LATIN CAPITAL LETTER D
// U+0045 LATIN CAPITAL LETTER E
// U+0046 LATIN CAPITAL LETTER F
// U+0047 LATIN CAPITAL LETTER G
// U+0048 LATIN CAPITAL LETTER H
// U+0049 LATIN CAPITAL LETTER I
// U+1F425 FRONT-FACING BABY CHICK
UTF8Test(
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049,
0x0001F425 ],
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0xf0, 0x9f, 0x90, 0xa5 ]),
// U+E0100 VARIATION SELECTOR-17
UTF8Test(
[ 0x000E0100 ],
[ 0xf3, 0xa0, 0x84, 0x80 ]),
]
struct UTF16Test {
let scalarsHead: [UInt32]
let scalarsRepairedTail: [UInt32]
let encoded: [UInt16]
let loc: SourceLoc
init(_ scalarsHead: [UInt32], _ scalarsRepairedTail: [UInt32],
_ encoded: [UInt16],
file: String = __FILE__, line: UWord = __LINE__) {
self.scalarsHead = scalarsHead
self.scalarsRepairedTail = scalarsRepairedTail
self.encoded = encoded
self.loc = SourceLoc(file, line, comment: "test data")
}
}
let UTF16Tests = [
"Empty": [
UTF16Test([], [], []),
],
"SmokeTest": [
//
// 1-word sequences
//
// U+0041 LATIN CAPITAL LETTER A
UTF16Test([ 0x0041 ], [], [ 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
UTF16Test([ 0x0041, 0x0042 ], [], [ 0x0041, 0x0042 ]),
// U+0000 NULL
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0000 NULL
UTF16Test(
[ 0x0000, 0x0041, 0x0042, 0x0000 ], [],
[ 0x0000, 0x0041, 0x0042, 0x0000 ]),
// U+0283 LATIN SMALL LETTER ESH
UTF16Test([ 0x0283 ], [], [ 0x0283 ]),
// U+03BA GREEK SMALL LETTER KAPPA
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
// U+03C3 GREEK SMALL LETTER SIGMA
// U+03BC GREEK SMALL LETTER MU
// U+03B5 GREEK SMALL LETTER EPSILON
UTF16Test(
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ], [],
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ]),
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
// U+6587 CJK UNIFIED IDEOGRAPH-6587
UTF16Test(
[ 0x4f8b, 0x6587 ], [],
[ 0x4f8b, 0x6587 ]),
// U+D55C HANGUL SYLLABLE HAN
// U+AE00 HANGUL SYLLABLE GEUL
UTF16Test(
[ 0xd55c, 0xae00 ], [],
[ 0xd55c, 0xae00 ]),
// U+1112 HANGUL CHOSEONG HIEUH
// U+1161 HANGUL JUNGSEONG A
// U+11AB HANGUL JONGSEONG NIEUN
// U+1100 HANGUL CHOSEONG KIYEOK
// U+1173 HANGUL JUNGSEONG EU
// U+11AF HANGUL JONGSEONG RIEUL
UTF16Test(
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ], [],
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ]),
// U+D7FF (unassigned)
UTF16Test([ 0xd7ff ], [], [ 0xd7ff ]),
// U+E000 (private use)
UTF16Test([ 0xe000 ], [], [ 0xe000 ]),
// U+FFFD REPLACEMENT CHARACTER
UTF16Test([ 0xfffd ], [], [ 0xfffd ]),
// U+FFFF (noncharacter)
UTF16Test([ 0xffff ], [], [ 0xffff ]),
//
// 2-word sequences
//
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test([ 0x00010000 ], [], [ 0xd800, 0xdc00 ]),
// U+10100 AEGEAN WORD SEPARATOR LINE
UTF16Test([ 0x00010100 ], [], [ 0xd800, 0xdd00 ]),
// U+103FF (unassigned)
UTF16Test([ 0x000103ff ], [], [ 0xd800, 0xdfff ]),
// U+E0000 (unassigned)
UTF16Test([ 0x000e0000 ], [], [ 0xdb40, 0xdc00 ]),
// U+E0100 VARIATION SELECTOR-17
UTF16Test([ 0x000e0100 ], [], [ 0xdb40, 0xdd00 ]),
// U+E03FF (unassigned)
UTF16Test([ 0x000e03ff ], [], [ 0xdb40, 0xdfff ]),
// U+10FC00 (private use)
UTF16Test([ 0x0010fc00 ], [], [ 0xdbff, 0xdc00 ]),
// U+10FD00 (private use)
UTF16Test([ 0x0010fd00 ], [], [ 0xdbff, 0xdd00 ]),
// U+10FFFF (private use, noncharacter)
UTF16Test([ 0x0010ffff ], [], [ 0xdbff, 0xdfff ]),
],
"Incomplete": [
//
// Incomplete sequences that end right before EOF.
//
// U+D800 (high-surrogate)
UTF16Test([], [ 0xfffd ], [ 0xd800 ]),
// U+D800 (high-surrogate)
// U+D800 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xd800, 0xd800 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
UTF16Test([ 0x0041 ], [ 0xfffd ], [ 0x0041, 0xd800 ]),
// U+10000 LINEAR B SYLLABLE B008 A
// U+D800 (high-surrogate)
UTF16Test(
[ 0x00010000 ], [ 0xfffd ],
[ 0xd800, 0xdc00, 0xd800 ]),
//
// Incomplete sequences with more code units following them.
//
// U+D800 (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test([], [ 0xfffd, 0x0041 ], [ 0xd800, 0x0041 ]),
// U+D800 (high-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[], [ 0xfffd, 0x00010000 ],
[ 0xd800, 0xd800, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0x0041 ],
[ 0x0041, 0xd800, 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0x00010000 ],
[ 0x0041, 0xd800, 0xd800, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
// U+DB40 (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x0041 ],
[ 0x0041, 0xd800, 0xdb40, 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
// U+DB40 (high-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x00010000 ],
[ 0x0041, 0xd800, 0xdb40, 0xd800, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
// U+DB40 (high-surrogate)
// U+DBFF (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ],
[ 0x0041, 0xd800, 0xdb40, 0xdbff, 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
// U+DB40 (high-surrogate)
// U+DBFF (high-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x00010000 ],
[ 0x0041, 0xd800, 0xdb40, 0xdbff, 0xd800, 0xdc00 ]),
],
"IllFormed": [
//
// Low-surrogate right before EOF.
//
// U+DC00 (low-surrogate)
UTF16Test([], [ 0xfffd ], [ 0xdc00 ]),
// U+DC00 (low-surrogate)
// U+DC00 (low-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
UTF16Test([ 0x0041 ], [ 0xfffd ], [ 0x0041, 0xdc00 ]),
// U+10000 LINEAR B SYLLABLE B008 A
// U+DC00 (low-surrogate)
UTF16Test(
[ 0x00010000 ], [ 0xfffd ],
[ 0xd800, 0xdc00, 0xdc00 ]),
//
// Low-surrogate with more code units following it.
//
// U+DC00 (low-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test([], [ 0xfffd, 0x0041 ], [ 0xdc00, 0x0041 ]),
// U+DC00 (low-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[], [ 0xfffd, 0x00010000 ],
[ 0xdc00, 0xd800, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0x0041 ],
[ 0x0041, 0xdc00, 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0x00010000 ],
[ 0x0041, 0xdc00, 0xd800, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
// U+DD00 (low-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x0041 ],
[ 0x0041, 0xdc00, 0xdd00, 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
// U+DD00 (low-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x00010000 ],
[ 0x0041, 0xdc00, 0xdd00, 0xd800, 0xdc00 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
// U+DD00 (low-surrogate)
// U+DFFF (low-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ],
[ 0x0041, 0xdc00, 0xdd00, 0xdfff, 0x0041 ]),
// U+0041 LATIN CAPITAL LETTER A
// U+DC00 (low-surrogate)
// U+DD00 (low-surrogate)
// U+DFFF (low-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x00010000 ],
[ 0x0041, 0xdc00, 0xdd00, 0xdfff, 0xd800, 0xdc00 ]),
//
// Low-surrogate followed by high-surrogate.
//
// U+DC00 (low-surrogate)
// U+D800 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xd800 ]),
// U+DC00 (low-surrogate)
// U+DB40 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xdb40 ]),
// U+DC00 (low-surrogate)
// U+DBFF (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xdbff ]),
// U+DD00 (low-surrogate)
// U+D800 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdd00, 0xd800 ]),
// U+DD00 (low-surrogate)
// U+DB40 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdd00, 0xdb40 ]),
// U+DD00 (low-surrogate)
// U+DBFF (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdd00, 0xdbff ]),
// U+DFFF (low-surrogate)
// U+D800 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdfff, 0xd800 ]),
// U+DFFF (low-surrogate)
// U+DB40 (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdfff, 0xdb40 ]),
// U+DFFF (low-surrogate)
// U+DBFF (high-surrogate)
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdfff, 0xdbff ]),
// U+DC00 (low-surrogate)
// U+D800 (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
UTF16Test(
[], [ 0xfffd, 0xfffd, 0x0041 ],
[ 0xdc00, 0xd800, 0x0041 ]),
// U+DC00 (low-surrogate)
// U+D800 (high-surrogate)
// U+10000 LINEAR B SYLLABLE B008 A
UTF16Test(
[], [ 0xfffd, 0xfffd, 0x10000 ],
[ 0xdc00, 0xd800, 0xd800, 0xdc00 ]),
],
]
var UTF8Decoder = TestCase("UTF8Decoder")
UTF8Decoder.test("Internal/_numTrailingBytes") {
for i in UInt8(0x00)...UInt8(0x7f) {
expectEqual(0, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
for i in UInt8(0x80)...UInt8(0xc1) {
expectEqual(4, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
for i in UInt8(0xc2)...UInt8(0xdf) {
expectEqual(1, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
for i in UInt8(0xe0)...UInt8(0xef) {
expectEqual(2, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
for i in UInt8(0xf0)...UInt8(0xf4) {
expectEqual(3, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
for i in UInt8(0xf5)...UInt8(0xfe) {
expectEqual(4, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
// Separate test for 0xff because of:
// <rdar://problem/17376512> Range UInt8(0x00)...UInt8(0xff) invokes a
// runtime trap
var i = UInt8(0xff)
expectEqual(4, UTF8._numTrailingBytes(i)) { "i=\(i)" }
}
UTF8Decoder.test("Empty") {
expectTrue(checkDecodeUTF8([], [], []))
}
UTF8Decoder.test("SmokeTest") {
for test in UTF8TestsSmokeTest {
expectTrue(checkDecodeUTF8(test.scalars, [], test.encoded),
stackTrace: test.loc.withCurrentLoc())
}
}
UTF8Decoder.test("FirstPossibleSequence") {
//
// First possible sequence of a certain length
//
// U+0000 NULL
expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ]))
// U+0080 PADDING CHARACTER
expectTrue(checkDecodeUTF8([ 0x0080 ], [], [ 0xc2, 0x80 ]))
// U+0800 SAMARITAN LETTER ALAF
expectTrue(checkDecodeUTF8(
[ 0x0800 ], [],
[ 0xe0, 0xa0, 0x80 ]))
// U+10000 LINEAR B SYLLABLE B008 A
expectTrue(checkDecodeUTF8(
[ 0x10000 ], [],
[ 0xf0, 0x90, 0x80, 0x80 ]))
// U+200000 (invalid)
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x88, 0x80, 0x80, 0x80 ]))
// U+4000000 (invalid)
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 ]))
}
UTF8Decoder.test("LastPossibleSequence") {
//
// Last possible sequence of a certain length
//
// U+007F DELETE
expectTrue(checkDecodeUTF8([ 0x007f ], [], [ 0x7f ]))
// U+07FF (unassigned)
expectTrue(checkDecodeUTF8([ 0x07ff ], [], [ 0xdf, 0xbf ]))
// U+FFFF (noncharacter)
expectTrue(checkDecodeUTF8(
[ 0xffff ], [],
[ 0xef, 0xbf, 0xbf ]))
// U+1FFFFF (invalid)
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf7, 0xbf, 0xbf, 0xbf ]))
// U+3FFFFFF (invalid)
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf ]))
// U+7FFFFFFF (invalid)
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf ]))
}
UTF8Decoder.test("CodeSpaceBoundaryConditions") {
//
// Other boundary conditions
//
// U+D7FF (unassigned)
expectTrue(checkDecodeUTF8([ 0xd7ff ], [], [ 0xed, 0x9f, 0xbf ]))
// U+E000 (private use)
expectTrue(checkDecodeUTF8([ 0xe000 ], [], [ 0xee, 0x80, 0x80 ]))
// U+FFFD REPLACEMENT CHARACTER
expectTrue(checkDecodeUTF8([ 0xfffd ], [], [ 0xef, 0xbf, 0xbd ]))
// U+10FFFF (noncharacter)
expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ]))
// U+110000 (invalid)
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf4, 0x90, 0x80, 0x80 ]))
}
UTF8Decoder.test("UnexpectedContinuationBytes") {
//
// Unexpected continuation bytes
//
// A sequence of unexpected continuation bytes that don't follow a first
// byte, every byte is a maximal subpart.
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xbf, 0x80 ]))
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd ],
[ 0x80, 0xbf, 0x80 ]))
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0x80, 0xbf, 0x80, 0xbf ]))
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0x80, 0xbf, 0x82, 0xbf, 0xaa ]))
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0 ]))
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0, 0x8f ]))
// All continuation bytes (0x80--0xbf).
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf ]))
}
UTF8Decoder.test("LonelyStartBytes") {
//
// Lonely start bytes
//
// Start bytes of 2-byte sequences (0xc0--0xdf).
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
[ 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20, 0xc3, 0x20,
0xc4, 0x20, 0xc5, 0x20, 0xc6, 0x20, 0xc7, 0x20,
0xc8, 0x20, 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20,
0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20, 0xcf, 0x20,
0xd0, 0x20, 0xd1, 0x20, 0xd2, 0x20, 0xd3, 0x20,
0xd4, 0x20, 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20,
0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20, 0xdb, 0x20,
0xdc, 0x20, 0xdd, 0x20, 0xde, 0x20, 0xdf, 0x20 ]))
// Start bytes of 3-byte sequences (0xe0--0xef).
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
[ 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20, 0xe3, 0x20,
0xe4, 0x20, 0xe5, 0x20, 0xe6, 0x20, 0xe7, 0x20,
0xe8, 0x20, 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20,
0xec, 0x20, 0xed, 0x20, 0xee, 0x20, 0xef, 0x20 ]))
// Start bytes of 4-byte sequences (0xf0--0xf7).
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
[ 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20, 0xf3, 0x20,
0xf4, 0x20, 0xf5, 0x20, 0xf6, 0x20, 0xf7, 0x20 ]))
// Start bytes of 5-byte sequences (0xf8--0xfb).
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0xf9, 0xfa, 0xfb ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
[ 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20, 0xfb, 0x20 ]))
// Start bytes of 6-byte sequences (0xfc--0xfd).
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xfd ]))
expectTrue(checkDecodeUTF8(
[], [ 0xfffd, 0x0020, 0xfffd, 0x0020 ],
[ 0xfc, 0x20, 0xfd, 0x20 ]))
}
UTF8Decoder.test("InvalidStartBytes") {
//
// Other bytes (0xc0--0xc1, 0xfe--0xff).
//
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfe ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xff ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xc0, 0xc1, 0xfe, 0xff ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfe, 0xfe, 0xff, 0xff ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfe, 0x80, 0x80, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xff, 0x80, 0x80, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
[ 0xc0, 0x20, 0xc1, 0x20, 0xfe, 0x20, 0xff, 0x20 ]))
}
UTF8Decoder.test("MissingContinuationBytes") {
//
// Sequences with one continuation byte missing
//
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc2 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xdf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xc2, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xdf, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xa0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xa0, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xbf, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe1, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xec, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe1, 0x80, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xec, 0xbf, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x9f ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x80, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x9f, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xee, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xef, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xee, 0x80, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xef, 0xbf, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0x90, 0x80, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0xbf, 0xbf, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf1, 0x80, 0x80, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf3, 0xbf, 0xbf, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x80, 0x80, 0x41 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x8f, 0xbf, 0x41 ]))
// Overlong sequences with one trailing byte missing.
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x9f ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0x8f, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x80, 0x80, 0x80, 0x80 ]))
// Sequences that represent surrogates with one trailing byte missing.
// High-surrogates
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xa0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xac ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xaf ]))
// Low-surrogates
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb4 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xbf ]))
// Ill-formed 4-byte sequences.
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+1100xx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf4, 0x90, 0x80 ]))
// U+13FBxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf4, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf5, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf6, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf7, 0x80, 0x80 ]))
// U+1FFBxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf7, 0xbf, 0xbf ]))
// Ill-formed 5-byte sequences.
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+2000xx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x88, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0xbf, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf9, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfa, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfb, 0x80, 0x80, 0x80 ]))
// U+3FFFFxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfb, 0xbf, 0xbf, 0xbf ]))
// Ill-formed 6-byte sequences.
// 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
// U+40000xx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x84, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0xbf, 0xbf, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0x80, 0x80, 0x80, 0x80 ]))
// U+7FFFFFxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]))
//
// Sequences with two continuation bytes missing
//
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f ]))
// Overlong sequences with two trailing byte missing.
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x8f ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x80, 0x80, 0x80 ]))
// Sequences that represent surrogates with two trailing bytes missing.
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed ]))
// Ill-formed 4-byte sequences.
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+110yxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0x90 ]))
// U+13Fyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf5, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf6, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0x80 ]))
// U+1FFyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0xbf ]))
// Ill-formed 5-byte sequences.
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+200yxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x88, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xf9, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfa, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfb, 0x80, 0x80 ]))
// U+3FFFyxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfb, 0xbf, 0xbf ]))
// Ill-formed 6-byte sequences.
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+4000yxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x84, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0xbf, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0x80, 0x80, 0x80 ]))
// U+7FFFFyxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0xbf, 0xbf, 0xbf ]))
//
// Sequences with three continuation bytes missing
//
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf2 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4 ]))
// Broken overlong sequences.
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x80, 0x80 ]))
// Ill-formed 4-byte sequences.
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+14yyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf5 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf6 ]))
// U+1Cyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf7 ]))
// Ill-formed 5-byte sequences.
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+20yyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x88 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf9, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfa, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0x80 ]))
// U+3FCyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0xbf ]))
// Ill-formed 6-byte sequences.
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+400yyxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x84, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0x80, 0x80 ]))
// U+7FFCyyxx (invalid)
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xfd, 0xbf, 0xbf ]))
//
// Sequences with four continuation bytes missing
//
// Ill-formed 5-byte sequences.
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+uzyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf9 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfa ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ]))
// U+3zyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ]))
// Broken overlong sequences.
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x80 ]))
// Ill-formed 6-byte sequences.
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+uzzyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x84 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0x80 ]))
// U+7Fzzyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0xbf ]))
//
// Sequences with five continuation bytes missing
//
// Ill-formed 6-byte sequences.
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
// U+uzzyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfc ]))
// U+uuzzyyxx (invalid)
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfd ]))
//
// Consecutive sequences with trailing bytes missing
//
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xc0, /**/ 0xe0, 0x80, /**/ 0xf0, 0x80, 0x80,
0xf8, 0x80, 0x80, 0x80,
0xfc, 0x80, 0x80, 0x80, 0x80,
0xdf, /**/ 0xef, 0xbf, /**/ 0xf7, 0xbf, 0xbf,
0xfb, 0xbf, 0xbf, 0xbf,
0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]))
}
UTF8Decoder.test("OverlongSequences") {
//
// Overlong UTF-8 sequences
//
// U+002F SOLIDUS
expectTrue(checkDecodeUTF8([ 0x002f ], [], [ 0x2f ]))
// Overlong sequences of the above.
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xaf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xe0, 0x80, 0xaf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0x80, 0x80, 0xaf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x80, 0x80, 0x80, 0xaf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf ]))
// U+0000 NULL
expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ]))
// Overlong sequences of the above.
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xe0, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x80, 0x80, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 ]))
// Other overlong and ill-formed sequences.
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xbf ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0x80 ]))
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xe0, 0x9f, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xa0, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0x8f, 0x80, 0x80 ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf0, 0x8f, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xf8, 0x87, 0xbf, 0xbf, 0xbf ]))
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf ]))
}
UTF8Decoder.test("IsolatedSurrogates") {
// Unicode 6.3.0:
//
// D71. High-surrogate code point: A Unicode code point in the range
// U+D800 to U+DBFF.
//
// D73. Low-surrogate code point: A Unicode code point in the range
// U+DC00 to U+DFFF.
// Note: U+E0100 is <DB40 DD00> in UTF-16.
// High-surrogates
// U+D800
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xa0, 0x80 ]))
expectTrue(checkDecodeUTF8(
[ 0x0041 ],
[ 0xfffd, 0xfffd, 0xfffd, 0x0041 ],
[ 0x41, 0xed, 0xa0, 0x80, 0x41 ]))
// U+DB40
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xac, 0xa0 ]))
// U+DBFF
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xaf, 0xbf ]))
// Low-surrogates
// U+DC00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xb0, 0x80 ]))
// U+DD00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xb4, 0x80 ]))
// U+DFFF
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xbf, 0xbf ]))
}
UTF8Decoder.test("SurrogatePairs") {
// Surrogate pairs
// U+D800 U+DC00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 ]))
// U+D800 U+DD00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xa0, 0x80, 0xed, 0xb4, 0x80 ]))
// U+D800 U+DFFF
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf ]))
// U+DB40 U+DC00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xac, 0xa0, 0xed, 0xb0, 0x80 ]))
// U+DB40 U+DD00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xac, 0xa0, 0xed, 0xb4, 0x80 ]))
// U+DB40 U+DFFF
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xac, 0xa0, 0xed, 0xbf, 0xbf ]))
// U+DBFF U+DC00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 ]))
// U+DBFF U+DD00
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xaf, 0xbf, 0xed, 0xb4, 0x80 ]))
// U+DBFF U+DFFF
expectTrue(checkDecodeUTF8(
[],
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
[ 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf ]))
}
UTF8Decoder.test("Noncharacters") {
//
// Noncharacters
//
// Unicode 6.3.0:
//
// D14. Noncharacter: A code point that is permanently reserved for
// internal use and that should never be interchanged. Noncharacters
// consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
// and the values U+FDD0..U+FDEF.
// U+FFFE
expectTrue(checkDecodeUTF8([ 0xfffe ], [], [ 0xef, 0xbf, 0xbe ]))
// U+FFFF
expectTrue(checkDecodeUTF8([ 0xffff ], [], [ 0xef, 0xbf, 0xbf ]))
// U+1FFFE
expectTrue(checkDecodeUTF8([ 0x1fffe ], [], [ 0xf0, 0x9f, 0xbf, 0xbe ]))
// U+1FFFF
expectTrue(checkDecodeUTF8([ 0x1ffff ], [], [ 0xf0, 0x9f, 0xbf, 0xbf ]))
// U+2FFFE
expectTrue(checkDecodeUTF8([ 0x2fffe ], [], [ 0xf0, 0xaf, 0xbf, 0xbe ]))
// U+2FFFF
expectTrue(checkDecodeUTF8([ 0x2ffff ], [], [ 0xf0, 0xaf, 0xbf, 0xbf ]))
// U+3FFFE
expectTrue(checkDecodeUTF8([ 0x3fffe ], [], [ 0xf0, 0xbf, 0xbf, 0xbe ]))
// U+3FFFF
expectTrue(checkDecodeUTF8([ 0x3ffff ], [], [ 0xf0, 0xbf, 0xbf, 0xbf ]))
// U+4FFFE
expectTrue(checkDecodeUTF8([ 0x4fffe ], [], [ 0xf1, 0x8f, 0xbf, 0xbe ]))
// U+4FFFF
expectTrue(checkDecodeUTF8([ 0x4ffff ], [], [ 0xf1, 0x8f, 0xbf, 0xbf ]))
// U+5FFFE
expectTrue(checkDecodeUTF8([ 0x5fffe ], [], [ 0xf1, 0x9f, 0xbf, 0xbe ]))
// U+5FFFF
expectTrue(checkDecodeUTF8([ 0x5ffff ], [], [ 0xf1, 0x9f, 0xbf, 0xbf ]))
// U+6FFFE
expectTrue(checkDecodeUTF8([ 0x6fffe ], [], [ 0xf1, 0xaf, 0xbf, 0xbe ]))
// U+6FFFF
expectTrue(checkDecodeUTF8([ 0x6ffff ], [], [ 0xf1, 0xaf, 0xbf, 0xbf ]))
// U+7FFFE
expectTrue(checkDecodeUTF8([ 0x7fffe ], [], [ 0xf1, 0xbf, 0xbf, 0xbe ]))
// U+7FFFF
expectTrue(checkDecodeUTF8([ 0x7ffff ], [], [ 0xf1, 0xbf, 0xbf, 0xbf ]))
// U+8FFFE
expectTrue(checkDecodeUTF8([ 0x8fffe ], [], [ 0xf2, 0x8f, 0xbf, 0xbe ]))
// U+8FFFF
expectTrue(checkDecodeUTF8([ 0x8ffff ], [], [ 0xf2, 0x8f, 0xbf, 0xbf ]))
// U+9FFFE
expectTrue(checkDecodeUTF8([ 0x9fffe ], [], [ 0xf2, 0x9f, 0xbf, 0xbe ]))
// U+9FFFF
expectTrue(checkDecodeUTF8([ 0x9ffff ], [], [ 0xf2, 0x9f, 0xbf, 0xbf ]))
// U+AFFFE
expectTrue(checkDecodeUTF8([ 0xafffe ], [], [ 0xf2, 0xaf, 0xbf, 0xbe ]))
// U+AFFFF
expectTrue(checkDecodeUTF8([ 0xaffff ], [], [ 0xf2, 0xaf, 0xbf, 0xbf ]))
// U+BFFFE
expectTrue(checkDecodeUTF8([ 0xbfffe ], [], [ 0xf2, 0xbf, 0xbf, 0xbe ]))
// U+BFFFF
expectTrue(checkDecodeUTF8([ 0xbffff ], [], [ 0xf2, 0xbf, 0xbf, 0xbf ]))
// U+CFFFE
expectTrue(checkDecodeUTF8([ 0xcfffe ], [], [ 0xf3, 0x8f, 0xbf, 0xbe ]))
// U+CFFFF
expectTrue(checkDecodeUTF8([ 0xcfffF ], [], [ 0xf3, 0x8f, 0xbf, 0xbf ]))
// U+DFFFE
expectTrue(checkDecodeUTF8([ 0xdfffe ], [], [ 0xf3, 0x9f, 0xbf, 0xbe ]))
// U+DFFFF
expectTrue(checkDecodeUTF8([ 0xdffff ], [], [ 0xf3, 0x9f, 0xbf, 0xbf ]))
// U+EFFFE
expectTrue(checkDecodeUTF8([ 0xefffe ], [], [ 0xf3, 0xaf, 0xbf, 0xbe ]))
// U+EFFFF
expectTrue(checkDecodeUTF8([ 0xeffff ], [], [ 0xf3, 0xaf, 0xbf, 0xbf ]))
// U+FFFFE
expectTrue(checkDecodeUTF8([ 0xffffe ], [], [ 0xf3, 0xbf, 0xbf, 0xbe ]))
// U+FFFFF
expectTrue(checkDecodeUTF8([ 0xfffff ], [], [ 0xf3, 0xbf, 0xbf, 0xbf ]))
// U+10FFFE
expectTrue(checkDecodeUTF8([ 0x10fffe ], [], [ 0xf4, 0x8f, 0xbf, 0xbe ]))
// U+10FFFF
expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ]))
// U+FDD0
expectTrue(checkDecodeUTF8([ 0xfdd0 ], [], [ 0xef, 0xb7, 0x90 ]))
// U+FDD1
expectTrue(checkDecodeUTF8([ 0xfdd1 ], [], [ 0xef, 0xb7, 0x91 ]))
// U+FDD2
expectTrue(checkDecodeUTF8([ 0xfdd2 ], [], [ 0xef, 0xb7, 0x92 ]))
// U+FDD3
expectTrue(checkDecodeUTF8([ 0xfdd3 ], [], [ 0xef, 0xb7, 0x93 ]))
// U+FDD4
expectTrue(checkDecodeUTF8([ 0xfdd4 ], [], [ 0xef, 0xb7, 0x94 ]))
// U+FDD5
expectTrue(checkDecodeUTF8([ 0xfdd5 ], [], [ 0xef, 0xb7, 0x95 ]))
// U+FDD6
expectTrue(checkDecodeUTF8([ 0xfdd6 ], [], [ 0xef, 0xb7, 0x96 ]))
// U+FDD7
expectTrue(checkDecodeUTF8([ 0xfdd7 ], [], [ 0xef, 0xb7, 0x97 ]))
// U+FDD8
expectTrue(checkDecodeUTF8([ 0xfdd8 ], [], [ 0xef, 0xb7, 0x98 ]))
// U+FDD9
expectTrue(checkDecodeUTF8([ 0xfdd9 ], [], [ 0xef, 0xb7, 0x99 ]))
// U+FDDA
expectTrue(checkDecodeUTF8([ 0xfdda ], [], [ 0xef, 0xb7, 0x9a ]))
// U+FDDB
expectTrue(checkDecodeUTF8([ 0xfddb ], [], [ 0xef, 0xb7, 0x9b ]))
// U+FDDC
expectTrue(checkDecodeUTF8([ 0xfddc ], [], [ 0xef, 0xb7, 0x9c ]))
// U+FDDD
expectTrue(checkDecodeUTF8([ 0xfddd ], [], [ 0xef, 0xb7, 0x9d ]))
// U+FDDE
expectTrue(checkDecodeUTF8([ 0xfdde ], [], [ 0xef, 0xb7, 0x9e ]))
// U+FDDF
expectTrue(checkDecodeUTF8([ 0xfddf ], [], [ 0xef, 0xb7, 0x9f ]))
// U+FDE0
expectTrue(checkDecodeUTF8([ 0xfde0 ], [], [ 0xef, 0xb7, 0xa0 ]))
// U+FDE1
expectTrue(checkDecodeUTF8([ 0xfde1 ], [], [ 0xef, 0xb7, 0xa1 ]))
// U+FDE2
expectTrue(checkDecodeUTF8([ 0xfde2 ], [], [ 0xef, 0xb7, 0xa2 ]))
// U+FDE3
expectTrue(checkDecodeUTF8([ 0xfde3 ], [], [ 0xef, 0xb7, 0xa3 ]))
// U+FDE4
expectTrue(checkDecodeUTF8([ 0xfde4 ], [], [ 0xef, 0xb7, 0xa4 ]))
// U+FDE5
expectTrue(checkDecodeUTF8([ 0xfde5 ], [], [ 0xef, 0xb7, 0xa5 ]))
// U+FDE6
expectTrue(checkDecodeUTF8([ 0xfde6 ], [], [ 0xef, 0xb7, 0xa6 ]))
// U+FDE7
expectTrue(checkDecodeUTF8([ 0xfde7 ], [], [ 0xef, 0xb7, 0xa7 ]))
// U+FDE8
expectTrue(checkDecodeUTF8([ 0xfde8 ], [], [ 0xef, 0xb7, 0xa8 ]))
// U+FDE9
expectTrue(checkDecodeUTF8([ 0xfde9 ], [], [ 0xef, 0xb7, 0xa9 ]))
// U+FDEA
expectTrue(checkDecodeUTF8([ 0xfdea ], [], [ 0xef, 0xb7, 0xaa ]))
// U+FDEB
expectTrue(checkDecodeUTF8([ 0xfdeb ], [], [ 0xef, 0xb7, 0xab ]))
// U+FDEC
expectTrue(checkDecodeUTF8([ 0xfdec ], [], [ 0xef, 0xb7, 0xac ]))
// U+FDED
expectTrue(checkDecodeUTF8([ 0xfded ], [], [ 0xef, 0xb7, 0xad ]))
// U+FDEE
expectTrue(checkDecodeUTF8([ 0xfdee ], [], [ 0xef, 0xb7, 0xae ]))
// U+FDEF
expectTrue(checkDecodeUTF8([ 0xfdef ], [], [ 0xef, 0xb7, 0xaf ]))
// U+FDF0
expectTrue(checkDecodeUTF8([ 0xfdf0 ], [], [ 0xef, 0xb7, 0xb0 ]))
// U+FDF1
expectTrue(checkDecodeUTF8([ 0xfdf1 ], [], [ 0xef, 0xb7, 0xb1 ]))
// U+FDF2
expectTrue(checkDecodeUTF8([ 0xfdf2 ], [], [ 0xef, 0xb7, 0xb2 ]))
// U+FDF3
expectTrue(checkDecodeUTF8([ 0xfdf3 ], [], [ 0xef, 0xb7, 0xb3 ]))
// U+FDF4
expectTrue(checkDecodeUTF8([ 0xfdf4 ], [], [ 0xef, 0xb7, 0xb4 ]))
// U+FDF5
expectTrue(checkDecodeUTF8([ 0xfdf5 ], [], [ 0xef, 0xb7, 0xb5 ]))
// U+FDF6
expectTrue(checkDecodeUTF8([ 0xfdf6 ], [], [ 0xef, 0xb7, 0xb6 ]))
// U+FDF7
expectTrue(checkDecodeUTF8([ 0xfdf7 ], [], [ 0xef, 0xb7, 0xb7 ]))
// U+FDF8
expectTrue(checkDecodeUTF8([ 0xfdf8 ], [], [ 0xef, 0xb7, 0xb8 ]))
// U+FDF9
expectTrue(checkDecodeUTF8([ 0xfdf9 ], [], [ 0xef, 0xb7, 0xb9 ]))
// U+FDFA
expectTrue(checkDecodeUTF8([ 0xfdfa ], [], [ 0xef, 0xb7, 0xba ]))
// U+FDFB
expectTrue(checkDecodeUTF8([ 0xfdfb ], [], [ 0xef, 0xb7, 0xbb ]))
// U+FDFC
expectTrue(checkDecodeUTF8([ 0xfdfc ], [], [ 0xef, 0xb7, 0xbc ]))
// U+FDFD
expectTrue(checkDecodeUTF8([ 0xfdfd ], [], [ 0xef, 0xb7, 0xbd ]))
// U+FDFE
expectTrue(checkDecodeUTF8([ 0xfdfe ], [], [ 0xef, 0xb7, 0xbe ]))
// U+FDFF
expectTrue(checkDecodeUTF8([ 0xfdff ], [], [ 0xef, 0xb7, 0xbf ]))
}
UTF8Decoder.run()
// CHECK: {{^}}UTF8Decoder: All tests passed
var UTF16Decoder = TestCase("UTF16Decoder")
UTF16Decoder.test("Test") {
for (name, batch) in UTF16Tests {
println("Batch: \(name)")
for test in batch {
expectTrue(checkDecodeUTF16(test.scalarsHead, test.scalarsRepairedTail,
test.encoded), stackTrace: test.loc.withCurrentLoc())
}
}
}
UTF16Decoder.run()
// CHECK: {{^}}UTF16Decoder: All tests passed
var UTF32Decoder = TestCase("UTF32Decoder")
UTF32Decoder.test("Empty") {
expectTrue(checkDecodeUTF32([], [], []))
}
UTF32Decoder.test("SmokeTest") {
// U+0041 LATIN CAPITAL LETTER A
expectTrue(checkDecodeUTF32([ 0x0041 ], [], [ 0x0000_0041 ]))
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
expectTrue(checkDecodeUTF32(
[ 0x0041, 0x0042 ], [],
[ 0x0000_0041, 0x0000_0042 ]))
// U+0000 NULL
// U+0041 LATIN CAPITAL LETTER A
// U+0042 LATIN CAPITAL LETTER B
// U+0000 NULL
expectTrue(checkDecodeUTF32(
[ 0x0000, 0x0041, 0x0042, 0x0000 ], [],
[ 0x0000_0000, 0x0000_0041, 0x0000_0042, 0x0000_0000 ]))
// U+0283 LATIN SMALL LETTER ESH
expectTrue(checkDecodeUTF32([ 0x0283 ], [], [ 0x0000_0283 ]))
// U+03BA GREEK SMALL LETTER KAPPA
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
// U+03C3 GREEK SMALL LETTER SIGMA
// U+03BC GREEK SMALL LETTER MU
// U+03B5 GREEK SMALL LETTER EPSILON
expectTrue(checkDecodeUTF32(
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ], [],
[ 0x0000_03ba, 0x0000_1f79, 0x0000_03c3, 0x0000_03bc, 0x0000_03b5 ]))
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
// U+6587 CJK UNIFIED IDEOGRAPH-6587
expectTrue(checkDecodeUTF32(
[ 0x4f8b, 0x6587 ], [],
[ 0x0000_4f8b, 0x0000_6587 ]))
// U+D55C HANGUL SYLLABLE HAN
// U+AE00 HANGUL SYLLABLE GEUL
expectTrue(checkDecodeUTF32(
[ 0xd55c, 0xae00 ], [],
[ 0x0000_d55c, 0x0000_ae00 ]))
// U+1112 HANGUL CHOSEONG HIEUH
// U+1161 HANGUL JUNGSEONG A
// U+11AB HANGUL JONGSEONG NIEUN
// U+1100 HANGUL CHOSEONG KIYEOK
// U+1173 HANGUL JUNGSEONG EU
// U+11AF HANGUL JONGSEONG RIEUL
expectTrue(checkDecodeUTF32(
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ], [],
[ 0x0000_1112, 0x0000_1161, 0x0000_11ab, 0x0000_1100, 0x0000_1173,
0x0000_11af ]))
// U+D7FF (unassigned)
expectTrue(checkDecodeUTF16([ 0xd7ff ], [], [ 0x0000_d7ff ]))
// U+E000 (private use)
expectTrue(checkDecodeUTF16([ 0xe000 ], [], [ 0x0000_e000 ]))
// U+FFFD REPLACEMENT CHARACTER
expectTrue(checkDecodeUTF16([ 0xfffd ], [], [ 0x0000_fffd ]))
// U+FFFF (noncharacter)
expectTrue(checkDecodeUTF16([ 0xffff ], [], [ 0x0000_ffff ]))
// U+10000 LINEAR B SYLLABLE B008 A
expectTrue(checkDecodeUTF32([ 0x00010000 ], [], [ 0x0001_0000 ]))
// U+10100 AEGEAN WORD SEPARATOR LINE
expectTrue(checkDecodeUTF32([ 0x00010100 ], [], [ 0x0001_0100 ]))
// U+103FF (unassigned)
expectTrue(checkDecodeUTF32([ 0x000103ff ], [], [ 0x0001_03ff ]))
// U+1D800 (unassigned)
expectTrue(checkDecodeUTF32([ 0x0001d800 ], [], [ 0x0001_d800 ]))
// U+E0000 (unassigned)
expectTrue(checkDecodeUTF32([ 0x000e0000 ], [], [ 0x000e_0000 ]))
// U+E0100 VARIATION SELECTOR-17
expectTrue(checkDecodeUTF32([ 0x000e0100 ], [], [ 0x000e_0100 ]))
// U+E03FF (unassigned)
expectTrue(checkDecodeUTF32([ 0x000e03ff ], [], [ 0x000e_03ff ]))
// U+10FC00 (private use)
expectTrue(checkDecodeUTF32([ 0x0010fc00 ], [], [ 0x0010_fc00 ]))
// U+10FD00 (private use)
expectTrue(checkDecodeUTF32([ 0x0010fd00 ], [], [ 0x0010_fd00 ]))
// U+10FFFF (private use, noncharacter)
expectTrue(checkDecodeUTF32([ 0x0010ffff ], [], [ 0x0010_ffff ]))
}
UTF32Decoder.test("IllFormed") {
// U+D800 (high-surrogate)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_d800 ]))
// U+DB40 (high-surrogate)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_db40 ]))
// U+DBFF (high-surrogate)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dbff ]))
// U+DC00 (low-surrogate)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dc00 ]))
// U+DD00 (low-surrogate)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dd00 ]))
// U+DFFF (low-surrogate)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dfff ]))
// U+110000 (invalid)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0011_0000 ]))
// U+1000000 (invalid)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0100_0000 ]))
// U+80000000 (invalid)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x8000_0000 ]))
// U+FFFF0000 (invalid)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0xffff_0000 ]))
// U+FFFFFFFF (invalid)
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0xffff_ffff ]))
}
UTF32Decoder.run()
// CHECK: {{^}}UTF32Decoder: All tests passed
var UTF8Encoder = TestCase("UTF8Encoder")
UTF8Encoder.test("SmokeTest") {
for test in UTF8TestsSmokeTest {
expectTrue(checkEncodeUTF8(test.encoded, test.scalars),
stackTrace: test.loc.withCurrentLoc())
}
}
UTF8Encoder.run()
// CHECK: {{^}}UTF8Encoder: All tests passed
import Foundation
// The most simple subclass of NSString that CoreFoundation does not know
// about.
class NonContiguousNSString : NSString {
convenience init(_ utf8: [UInt8]) {
var encoded: [UInt16] = []
var g = utf8.generate()
let hadError = transcode(UTF8.self, UTF16.self, g,
SinkOf {
encoded += $0
},
stopOnError: true)
expectFalse(hadError)
self.init(encoded)
}
init(_ value: [UInt16]) {
_value = value
super.init()
}
convenience init(_ scalars: [UInt32]) {
var encoded: [UInt16] = []
var g = scalars.generate()
let hadError = transcode(UTF32.self, UTF16.self, g,
SinkOf {
encoded += $0
},
stopOnError: true)
expectFalse(hadError)
self.init(encoded)
}
@objc override func copyWithZone(zone: NSZone) -> AnyObject {
// Ensure that copying this string produces a class that CoreFoundation
// does not know about.
return self
}
@objc override var length: Int {
return _value.count
}
@objc override func characterAtIndex(index: Int) -> unichar {
return _value[index]
}
var _value: [UInt16]
}
func checkUTF8View(expected: [UInt8], subject: String,
stackTrace: SourceLocStack) {
checkCollection(expected, subject.utf8, stackTrace.withCurrentLoc())
}
func checkUTF16View(expected: [UInt16], subject: String,
stackTrace: SourceLocStack) {
checkSliceableWithBidirectionalIndex(expected, subject.utf16,
stackTrace.withCurrentLoc())
}
func forStringsWithUnpairedSurrogates(checkClosure: (UTF16Test, String) -> ()) {
for (name, batch) in UTF16Tests {
println("Batch: \(name)")
for test in batch {
let subject: String = NonContiguousNSString(test.encoded)
checkClosure(test, subject)
}
}
}
var StringCookedViews = TestCase("StringCookedViews")
StringCookedViews.test("UTF8ForContiguousUTF16") {
for test in UTF8TestsSmokeTest {
// Add a non-ASCII character at the beginning to force Swift String and
// CoreFoundation off the ASCII fast path.
//
// U+0283 LATIN SMALL LETTER ESH
var backingStorage: [UInt16] = [ 0x0283 ]
let expected: [UInt8] = [ 0xca, 0x83 ] + test.encoded
var g = test.scalars.generate()
transcode(UTF32.self, UTF16.self, g,
SinkOf {
backingStorage += $0
},
stopOnError: false)
backingStorage.withUnsafePointerToElements {
(ptr) -> () in
let cfstring = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
ptr, backingStorage.count, kCFAllocatorNull)
expectFalse(CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.ASCII.toRaw()).getLogicValue())
expectTrue(CFStringGetCharactersPtr(cfstring).getLogicValue())
checkUTF8View(expected, String(cfstring), test.loc.withCurrentLoc())
return ()
}
}
forStringsWithUnpairedSurrogates {
(test: UTF16Test, subject: String) -> () in
var expected: [UInt8] = []
var expectedScalars = test.scalarsHead + test.scalarsRepairedTail
var g = expectedScalars.generate()
transcode(UTF32.self, UTF8.self, g,
SinkOf {
expected += $0
},
stopOnError: false)
checkUTF8View(expected, subject, test.loc.withCurrentLoc())
}
}
func verifyThatStringIsOpaqueForCoreFoundation(nss: NSString) {
// Sanity checks to make sure we are testing the code path that does UTF-8
// encoding itself, instead of dispatching to CF. Both the original string
// itself and its copies should be resilient to CF's fast path functions,
// because Swift bridging may copy the string to ensure that it is not
// mutated.
let cfstring: CFString = reinterpretCast(nss)
assert(!CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.ASCII.toRaw()))
assert(!CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.UTF8.toRaw()))
assert(!CFStringGetCharactersPtr(cfstring))
let copy = CFStringCreateCopy(nil, cfstring)
assert(!CFStringGetCStringPtr(copy,
CFStringBuiltInEncodings.ASCII.toRaw()))
assert(!CFStringGetCStringPtr(copy,
CFStringBuiltInEncodings.UTF8.toRaw()))
assert(!CFStringGetCharactersPtr(copy))
}
StringCookedViews.test("UTF8ForNonContiguousUTF16") {
for test in UTF8TestsSmokeTest {
var nss = NonContiguousNSString(test.scalars)
verifyThatStringIsOpaqueForCoreFoundation(nss)
checkUTF8View(test.encoded, nss, test.loc.withCurrentLoc())
}
for (name, batch) in UTF16Tests {
println("Batch: \(name)")
for test in batch {
var expected: [UInt8] = []
var expectedScalars = test.scalarsHead + test.scalarsRepairedTail
var g = expectedScalars.generate()
transcode(UTF32.self, UTF8.self, g,
SinkOf {
expected += $0
},
stopOnError: false)
var nss = NonContiguousNSString(test.encoded)
verifyThatStringIsOpaqueForCoreFoundation(nss)
checkUTF8View(expected, nss, test.loc.withCurrentLoc())
}
}
}
StringCookedViews.test("UTF8ForNonContiguousUTF16Extra") {
// These tests don't add much additional value as long as tests above
// actually test the code path we care about.
if true {
var bytes: [UInt8] = [ 97, 98, 99 ]
var cfstring: CFString = CFStringCreateWithBytesNoCopy(kCFAllocatorDefault,
bytes, bytes.count, CFStringBuiltInEncodings.MacRoman.toRaw(), 0, kCFAllocatorNull)
// Sanity checks to make sure we are testing the code path that does UTF-8
// encoding itself, instead of dispatching to CF.
// GetCStringPtr fails because our un-copied bytes aren't zero-terminated.
// GetCharactersPtr fails because our un-copied bytes aren't UTF-16.
assert(!CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.ASCII.toRaw()))
assert(!CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.UTF8.toRaw()))
assert(!CFStringGetCharactersPtr(cfstring))
checkUTF8View(bytes, String(cfstring), SourceLocStack().withCurrentLoc())
_fixLifetime(bytes)
}
if true {
var bytes: [UInt8] = [ 97, 98, 99 ]
var cfstring: CFString = CFStringCreateWithBytes(kCFAllocatorDefault,
bytes, bytes.count, CFStringBuiltInEncodings.MacRoman.toRaw(), 0)
// Sanity checks to make sure we are testing the code path that does UTF-8
// encoding itself, instead of dispatching to CF.
// CFStringCreateWithBytes() usually allocates zero-terminated ASCII
// or UTF-16, in which case one of the fast paths will succeed.
// This test operates only when CF creates a tagged pointer string object.
if (object_getClassName(cfstring) == "NSTaggedPointerString") {
assert(!CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.ASCII.toRaw()))
assert(!CFStringGetCStringPtr(cfstring,
CFStringBuiltInEncodings.UTF8.toRaw()))
assert(!CFStringGetCharactersPtr(cfstring))
checkUTF8View(bytes, String(cfstring), SourceLocStack().withCurrentLoc())
}
}
}
StringCookedViews.test("UTF16") {
for test in UTF8TestsSmokeTest {
var expected: [UInt16] = []
var expectedScalars = test.scalars
var g = expectedScalars.generate()
transcode(UTF32.self, UTF16.self, g,
SinkOf {
expected += $0
},
stopOnError: false)
var nss = NonContiguousNSString(test.scalars)
checkUTF16View(expected, nss, test.loc.withCurrentLoc())
}
forStringsWithUnpairedSurrogates {
(test: UTF16Test, subject: String) -> () in
var expected: [UInt16] = []
var expectedScalars = test.scalarsHead + test.scalarsRepairedTail
var g = expectedScalars.generate()
transcode(UTF32.self, UTF16.self, g,
SinkOf {
expected += $0
},
stopOnError: false)
checkUTF16View(expected, subject, test.loc.withCurrentLoc())
}
}
StringCookedViews.test("UnicodeScalars") {
for test in UTF8TestsSmokeTest {
let expectedScalars = map(test.scalars) { UnicodeScalar($0) }
let subject: String = NonContiguousNSString(test.scalars)
checkSliceableWithBidirectionalIndex(expectedScalars,
subject.unicodeScalars, test.loc.withCurrentLoc())
}
forStringsWithUnpairedSurrogates {
(test: UTF16Test, subject: String) -> () in
let expectedScalars = map(test.scalarsHead + test.scalarsRepairedTail) {
UnicodeScalar($0)
}
checkSliceableWithBidirectionalIndex(expectedScalars,
subject.unicodeScalars, test.loc.withCurrentLoc())
}
}
StringCookedViews.run()
// CHECK: {{^}}StringCookedViews: All tests passed
var StringTests = TestCase("StringTests")
StringTests.test("StreamableConformance") {
forStringsWithUnpairedSurrogates {
(test: UTF16Test, subject: String) -> () in
let expected = test.scalarsHead + test.scalarsRepairedTail
let printedSubject = toString(subject)
let actual = map(printedSubject.unicodeScalars) { $0.value }
expectEqual(expected, actual)
}
}
StringTests.run()
// CHECK: {{^}}StringTests: All tests passed