mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
2541 lines
75 KiB
Swift
2541 lines
75 KiB
Swift
// RUN: %target-run-stdlib-swift | FileCheck %s
|
|
|
|
// REQUIRES: long_tests
|
|
|
|
import Foundation
|
|
import Swift
|
|
|
|
protocol TestableUnicodeCodec : UnicodeCodecType {
|
|
typealias CodeUnit : IntegerType
|
|
class func encodingId() -> NSStringEncoding
|
|
class func name() -> NSString
|
|
}
|
|
|
|
extension UTF8 : TestableUnicodeCodec {
|
|
static func encodingId() -> NSStringEncoding {
|
|
return NSUTF8StringEncoding
|
|
}
|
|
static func name() -> NSString {
|
|
return "UTF8"
|
|
}
|
|
}
|
|
|
|
extension UTF16 : TestableUnicodeCodec {
|
|
static func encodingId() -> NSStringEncoding {
|
|
return NSUTF16LittleEndianStringEncoding
|
|
}
|
|
static func name() -> NSString {
|
|
return "UTF16"
|
|
}
|
|
}
|
|
|
|
extension UTF32 : TestableUnicodeCodec {
|
|
static func encodingId() -> NSStringEncoding {
|
|
return NSUTF32LittleEndianStringEncoding
|
|
}
|
|
static func name() -> NSString {
|
|
return "UTF32"
|
|
}
|
|
}
|
|
|
|
// Backing store for computed var unicodeScalarRanges
|
|
var _unicodeScalarRanges = Array<Range<UInt32>>()
|
|
|
|
// The valid ranges of Unicode scalar values
|
|
var unicodeScalarRanges : [Range<UInt32>] {
|
|
if _unicodeScalarRanges.count == 0 {
|
|
for r in [UInt32(0)..<0xD800, 0xE000..<0xFDD0, 0xFDF0..<0xFFFE] {
|
|
_unicodeScalarRanges.append(r)
|
|
}
|
|
for base in UInt32(0x1)..<0x11 {
|
|
_unicodeScalarRanges.append((base << 16)..<((base << 16)+0xFFFE))
|
|
}
|
|
}
|
|
return _unicodeScalarRanges
|
|
}
|
|
|
|
var unicodeScalarCount : Int {
|
|
var count = 0
|
|
for r in unicodeScalarRanges {
|
|
count += Int(r.endIndex - r.startIndex)
|
|
}
|
|
return count
|
|
}
|
|
|
|
func nthUnicodeScalar(n: UInt32) -> UnicodeScalar {
|
|
var count: UInt32 = 0
|
|
for r in unicodeScalarRanges {
|
|
count += r.endIndex - r.startIndex
|
|
if count > n {
|
|
return UnicodeScalar(r.endIndex - (count - n))
|
|
}
|
|
}
|
|
_preconditionFailure("Index out of range")
|
|
}
|
|
|
|
// buffer should have a length >= 4
|
|
func nsEncode<CodeUnit>(
|
|
var c: UInt32,
|
|
encoding: NSStringEncoding,
|
|
inout buffer: [CodeUnit],
|
|
inout used: Int
|
|
) {
|
|
var s = NSString(
|
|
bytes: &c,
|
|
length: 4,
|
|
encoding: NSUTF32LittleEndianStringEncoding)
|
|
|
|
s.getBytes(
|
|
&buffer,
|
|
maxLength: buffer.count,
|
|
usedLength: &used,
|
|
encoding: encoding,
|
|
options: NSStringEncodingConversionOptions(0),
|
|
range: NSRange(location: 0, length: s.length),
|
|
remainingRange: nil)
|
|
}
|
|
|
|
// Convert the given numeric value to a hexidecimal string
|
|
func hex<T : IntegerType>(x: T) -> String {
|
|
return "0x" + String(x.toIntMax(), radix: 16)
|
|
}
|
|
|
|
// Convert the given sequence of numeric values to a string
|
|
// representing their hexidecimal values
|
|
func hex<
|
|
S: SequenceType
|
|
where
|
|
S.Generator.Element : IntegerType
|
|
>(x: S) -> String {
|
|
var r = "["
|
|
var prefix = ""
|
|
for unit in x {
|
|
r += prefix + hex(unit)
|
|
prefix = ", "
|
|
}
|
|
r += "]"
|
|
return r
|
|
}
|
|
|
|
// A SinkType that stores the elements written into an Array that can be
|
|
// inspected later.
|
|
class ArraySink<T: IntegerLiteralConvertible> : SinkType {
|
|
init(capacity: Int) {
|
|
storage = Array(count: capacity, repeatedValue: 0)
|
|
}
|
|
func put(x: T) {
|
|
storage[count++] = x
|
|
}
|
|
func clear() {
|
|
count = 0
|
|
}
|
|
var elements : Slice<T> {
|
|
return storage[0..<count]
|
|
}
|
|
var count = 0
|
|
var storage: [T] = Array()
|
|
}
|
|
|
|
@asmname("random") func random() -> UInt32
|
|
@asmname("srandomdev") func srandomdev()
|
|
|
|
// To avoid swamping the buildbot, by default, test only one out of
|
|
// testGroupCount cases, selected at random. You can manually pass
|
|
// "--all" on the command line to test everything.
|
|
var testGroupCount = 128
|
|
srandomdev()
|
|
var testGroup = random() % testGroupCount
|
|
var testAll = Process.arguments.count > 0 && Process.arguments[0] == "--all"
|
|
var minScalarOrd : Int
|
|
var maxScalarOrd : Int
|
|
|
|
if testAll {
|
|
println("Testing all Unicode scalars")
|
|
minScalarOrd = 0
|
|
maxScalarOrd = unicodeScalarCount
|
|
}
|
|
else {
|
|
println("Testing Unicode scalar group \(testGroup) of \(testGroupCount)")
|
|
minScalarOrd = unicodeScalarCount * testGroup / testGroupCount
|
|
maxScalarOrd = unicodeScalarCount * (testGroup+1) / testGroupCount
|
|
}
|
|
|
|
class CodecTest<Codec: TestableUnicodeCodec> {
|
|
var used = 0
|
|
typealias CodeUnit = Codec.CodeUnit
|
|
var nsEncodeBuffer: [CodeUnit] = Array(count: 4, repeatedValue: 0)
|
|
var encodeBuffer = ArraySink<CodeUnit>(capacity: 4)
|
|
|
|
func testOne(scalar: UnicodeScalar)
|
|
{
|
|
/* Progress reporter
|
|
if (scalar.value % 0x1000) == 0 {
|
|
println("\(hex(scalar.value))")
|
|
}
|
|
*/
|
|
|
|
// Use Cocoa to encode the scalar
|
|
nsEncode(scalar.value, Codec.encodingId(), &nsEncodeBuffer, &used)
|
|
let nsEncoded = nsEncodeBuffer[0..<(used/sizeof(CodeUnit.self))]
|
|
|
|
var g = nsEncoded.generate()
|
|
var decoded: UnicodeScalar
|
|
var decoder = Codec()
|
|
switch decoder.decode(&g) {
|
|
case .Result(let us):
|
|
decoded = us
|
|
default:
|
|
fatalError("decoding failed")
|
|
}
|
|
if decoded != scalar {
|
|
println("Decoding failed: \(hex(scalar.value)) => \(hex(nsEncoded)) => \(hex(decoded.value))")
|
|
}
|
|
encodeBuffer.clear()
|
|
Codec.encode(scalar, output: &self.encodeBuffer)
|
|
|
|
if !equal(nsEncoded, encodeBuffer.elements) {
|
|
println("Decoding failed: \(hex(nsEncoded)) => \(hex(scalar.value)) => \(hex(encodeBuffer.storage[0]))")
|
|
}
|
|
}
|
|
|
|
func run() {
|
|
println("testing \(Codec.name())")
|
|
for i in minScalarOrd..<maxScalarOrd {
|
|
testOne(nthUnicodeScalar(UInt32(i)))
|
|
}
|
|
println("done.")
|
|
}
|
|
}
|
|
|
|
srandomdev()
|
|
|
|
// CHECK: testing UTF8
|
|
// CHECK-NEXT: done.
|
|
CodecTest<UTF8>().run()
|
|
|
|
// CHECK: testing UTF16
|
|
// CHECK-NEXT: done.
|
|
CodecTest<UTF16>().run()
|
|
|
|
// CHECK: testing UTF32
|
|
// CHECK-NEXT: done.
|
|
CodecTest<UTF32>().run()
|
|
|
|
func println(a: [UTF8.CodeUnit]) {
|
|
print("[ ")
|
|
var prefix = ""
|
|
for x in a {
|
|
print("\(prefix)\(x)")
|
|
prefix = ", "
|
|
}
|
|
println(" ]")
|
|
}
|
|
|
|
func println(a: [UTF16.CodeUnit]) {
|
|
print("[ ")
|
|
var prefix = ""
|
|
for x in a {
|
|
print("\(prefix)\(x)")
|
|
prefix = ", "
|
|
}
|
|
println(" ]")
|
|
}
|
|
|
|
func additionalUtf16Tests() {
|
|
// CHECK: additionalUtf16Tests
|
|
println("additionalUtf16Tests")
|
|
// CHECK-NEXT: 1
|
|
println(UTF16.width("x"))
|
|
// CHECK-NEXT: 2
|
|
println(UTF16.width("\u{101010}"))
|
|
|
|
// CHECK-NEXT: 2
|
|
println(UTF16.width("𝄞"))
|
|
// CHECK-NEXT: true
|
|
println(UTF16.leadSurrogate("𝄞") == 0xD834)
|
|
// CHECK-NEXT: true
|
|
println(UTF16.trailSurrogate("𝄞") == 0xDD1E)
|
|
|
|
var u8: [UTF8.CodeUnit] = [ 0, 1, 2, 3, 4, 5 ]
|
|
var u16: [UTF16.CodeUnit] = [ 6, 7, 8, 9, 10, 11 ]
|
|
|
|
u16.withUnsafeMutableBufferPointer {
|
|
(u16)->() in
|
|
let p16 = u16.baseAddress
|
|
|
|
u8.withUnsafeMutableBufferPointer {
|
|
(u8)->() in
|
|
let p8 = u8.baseAddress
|
|
|
|
// CHECK-NEXT: [ 0, 1, 2, 9, 10, 11 ]
|
|
UTF16.copy(p8, destination: p16, count: 3)
|
|
println(Array(u16))
|
|
|
|
// CHECK-NEXT: [ 9, 10, 11, 3, 4, 5 ]
|
|
UTF16.copy(p16 + 3, destination: p8, count: 3)
|
|
println(Array(u8))
|
|
|
|
// CHECK-NEXT: [ 0, 1, 2, 0, 1, 2 ]
|
|
UTF16.copy(p16, destination: p16 + 3, count: 3)
|
|
println(Array(u16))
|
|
|
|
// CHECK-NEXT: [ 9, 10, 11, 9, 10, 11 ]
|
|
UTF16.copy(p8, destination: p8 + 3, count: 3)
|
|
println(Array(u8))
|
|
}
|
|
}
|
|
|
|
let (count0, isASCII0) = UTF16.measure(UTF8.self, input: u8.generate(),
|
|
repairIllFormedSequences: false)!
|
|
// CHECK-NEXT: 6 / true
|
|
println("\(count0) / \(isASCII0)")
|
|
|
|
let (count1, isASCII1) = UTF16.measure(UTF16.self, input: u16.generate(),
|
|
repairIllFormedSequences: false)!
|
|
// CHECK-NEXT: 6 / true
|
|
println("\(count1) / \(isASCII1)")
|
|
|
|
// "€" == U+20AC.
|
|
u8 = [0xF0, 0xA4, 0xAD, 0xA2]
|
|
let (count2, isASCII2) = UTF16.measure(UTF8.self, input: u8.generate(),
|
|
repairIllFormedSequences: false)!
|
|
// CHECK-NEXT: 2 / false
|
|
println("\(count2) / \(isASCII2)")
|
|
}
|
|
additionalUtf16Tests()
|
|
|
|
import StdlibUnittest
|
|
|
|
class EOFCountingGenerator<T> : GeneratorType {
|
|
var array: [T]
|
|
var index: Int = 0
|
|
var numTimesReturnedEOF: Int = 0
|
|
|
|
init(_ array: [T]) {
|
|
self.array = array
|
|
}
|
|
|
|
func next() -> T? {
|
|
if index == array.count {
|
|
++numTimesReturnedEOF
|
|
return .None
|
|
}
|
|
return array[index++]
|
|
}
|
|
}
|
|
|
|
func checkDecodeUTF<Codec : UnicodeCodecType>(
|
|
codec: Codec.Type, expectedHead: [UInt32],
|
|
expectedRepairedTail: [UInt32], utfStr: [Codec.CodeUnit]
|
|
) -> AssertionResult {
|
|
if true {
|
|
var decoded: [UInt32] = []
|
|
var g = EOFCountingGenerator(utfStr)
|
|
transcode(codec, UTF32.self, g,
|
|
SinkOf {
|
|
decoded.append($0)
|
|
},
|
|
stopOnError: true)
|
|
expectGE(1, g.numTimesReturnedEOF)
|
|
if expectedHead != decoded {
|
|
return assertionFailure()
|
|
.withDescription("\n")
|
|
.withDescription("expectedHead: \(asHex(expectedHead))\n")
|
|
.withDescription("actual: \(asHex(decoded))")
|
|
}
|
|
}
|
|
|
|
if true {
|
|
var expected = expectedHead
|
|
expected += expectedRepairedTail
|
|
|
|
var decoded: [UInt32] = []
|
|
var g = EOFCountingGenerator(utfStr)
|
|
transcode(codec, UTF32.self, g,
|
|
SinkOf {
|
|
decoded.append($0)
|
|
},
|
|
stopOnError: false)
|
|
expectEqual(1, g.numTimesReturnedEOF)
|
|
if expected != decoded {
|
|
return assertionFailure()
|
|
.withDescription("\n")
|
|
.withDescription("expected: \(asHex(expected))\n")
|
|
.withDescription("actual: \(asHex(decoded))")
|
|
}
|
|
}
|
|
|
|
return assertionSuccess()
|
|
}
|
|
|
|
func checkDecodeUTF8(
|
|
expectedHead: [UInt32],
|
|
expectedRepairedTail: [UInt32], utf8Str: [UInt8]
|
|
) -> AssertionResult {
|
|
return checkDecodeUTF(UTF8.self, expectedHead, expectedRepairedTail, utf8Str)
|
|
}
|
|
|
|
func checkDecodeUTF16(
|
|
expectedHead: [UInt32],
|
|
expectedRepairedTail: [UInt32], utf16Str: [UInt16]
|
|
) -> AssertionResult {
|
|
return checkDecodeUTF(UTF16.self, expectedHead, expectedRepairedTail,
|
|
utf16Str)
|
|
}
|
|
|
|
func checkDecodeUTF32(
|
|
expectedHead: [UInt32],
|
|
expectedRepairedTail: [UInt32], utf32Str: [UInt32]
|
|
) -> AssertionResult {
|
|
return checkDecodeUTF(UTF32.self, expectedHead, expectedRepairedTail,
|
|
utf32Str)
|
|
}
|
|
|
|
func checkEncodeUTF8(expected: [UInt8], scalars: [UInt32]) -> AssertionResult {
|
|
var encoded: [UInt8] = []
|
|
var g = EOFCountingGenerator(scalars)
|
|
let hadError = transcode(UTF32.self, UTF8.self, g,
|
|
SinkOf {
|
|
encoded.append($0)
|
|
},
|
|
stopOnError: true)
|
|
expectFalse(hadError)
|
|
expectGE(1, g.numTimesReturnedEOF)
|
|
if expected != encoded {
|
|
return assertionFailure()
|
|
.withDescription("\n")
|
|
.withDescription("expected: \(asHex(expected))\n")
|
|
.withDescription("actual: \(asHex(encoded))")
|
|
}
|
|
|
|
return assertionSuccess()
|
|
}
|
|
|
|
struct UTF8Test {
|
|
let scalars: [UInt32]
|
|
let encoded: [UInt8]
|
|
let loc: SourceLoc
|
|
|
|
init(_ scalars: [UInt32], _ encoded: [UInt8],
|
|
file: String = __FILE__, line: UWord = __LINE__) {
|
|
self.scalars = scalars
|
|
self.encoded = encoded
|
|
self.loc = SourceLoc(file, line, comment: "test data")
|
|
}
|
|
}
|
|
|
|
let UTF8TestsSmokeTest = [
|
|
//
|
|
// 1-byte sequences
|
|
//
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF8Test([ 0x0041 ], [ 0x41 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
UTF8Test([ 0x0041, 0x0042 ], [ 0x41, 0x42 ]),
|
|
|
|
// U+0061 LATIN SMALL LETTER A
|
|
// U+0062 LATIN SMALL LETTER B
|
|
// U+0063 LATIN SMALL LETTER C
|
|
UTF8Test([ 0x0061, 0x0062, 0x0063 ], [ 0x61, 0x62, 0x63 ]),
|
|
|
|
// U+0000 NULL
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0000 NULL
|
|
UTF8Test(
|
|
[ 0x0000, 0x0041, 0x0042, 0x0000 ],
|
|
[ 0x00, 0x41, 0x42, 0x00 ]),
|
|
|
|
//
|
|
// 2-byte sequences
|
|
//
|
|
|
|
// U+0283 LATIN SMALL LETTER ESH
|
|
UTF8Test([ 0x0283 ], [ 0xca, 0x83 ]),
|
|
|
|
// U+03BA GREEK SMALL LETTER KAPPA
|
|
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
|
// U+03C3 GREEK SMALL LETTER SIGMA
|
|
// U+03BC GREEK SMALL LETTER MU
|
|
// U+03B5 GREEK SMALL LETTER EPSILON
|
|
UTF8Test(
|
|
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ],
|
|
[ 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5 ]),
|
|
|
|
// U+0430 CYRILLIC SMALL LETTER A
|
|
// U+0431 CYRILLIC SMALL LETTER BE
|
|
// U+0432 CYRILLIC SMALL LETTER VE
|
|
UTF8Test([ 0x0430, 0x0431, 0x0432 ], [ 0xd0, 0xb0, 0xd0, 0xb1, 0xd0, 0xb2 ]),
|
|
|
|
//
|
|
// 3-byte sequences
|
|
//
|
|
|
|
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
|
|
// U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
UTF8Test(
|
|
[ 0x4f8b, 0x6587 ],
|
|
[ 0xe4, 0xbe, 0x8b, 0xe6, 0x96, 0x87 ]),
|
|
|
|
// U+D55C HANGUL SYLLABLE HAN
|
|
// U+AE00 HANGUL SYLLABLE GEUL
|
|
UTF8Test(
|
|
[ 0xd55c, 0xae00 ],
|
|
[ 0xed, 0x95, 0x9c, 0xea, 0xb8, 0x80 ]),
|
|
|
|
// U+1112 HANGUL CHOSEONG HIEUH
|
|
// U+1161 HANGUL JUNGSEONG A
|
|
// U+11AB HANGUL JONGSEONG NIEUN
|
|
// U+1100 HANGUL CHOSEONG KIYEOK
|
|
// U+1173 HANGUL JUNGSEONG EU
|
|
// U+11AF HANGUL JONGSEONG RIEUL
|
|
UTF8Test(
|
|
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ],
|
|
[ 0xe1, 0x84, 0x92, 0xe1, 0x85, 0xa1, 0xe1, 0x86, 0xab,
|
|
0xe1, 0x84, 0x80, 0xe1, 0x85, 0xb3, 0xe1, 0x86, 0xaf ]),
|
|
|
|
// U+3042 HIRAGANA LETTER A
|
|
// U+3044 HIRAGANA LETTER I
|
|
// U+3046 HIRAGANA LETTER U
|
|
// U+3048 HIRAGANA LETTER E
|
|
// U+304A HIRAGANA LETTER O
|
|
UTF8Test(
|
|
[ 0x3042, 0x3044, 0x3046, 0x3048, 0x304a ],
|
|
[ 0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86,
|
|
0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a ]),
|
|
|
|
//
|
|
// 4-byte sequences
|
|
//
|
|
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0001F425 ],
|
|
[ 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0001F425 ],
|
|
[ 0x41, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0001F425 ],
|
|
[ 0x41, 0x42, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0x44, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+0047 LATIN CAPITAL LETTER G
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+0047 LATIN CAPITAL LETTER G
|
|
// U+0048 LATIN CAPITAL LETTER H
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048,
|
|
0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
|
0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+0047 LATIN CAPITAL LETTER G
|
|
// U+0048 LATIN CAPITAL LETTER H
|
|
// U+0049 LATIN CAPITAL LETTER I
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTF8Test(
|
|
[ 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049,
|
|
0x0001F425 ],
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
|
0xf0, 0x9f, 0x90, 0xa5 ]),
|
|
|
|
// U+E0100 VARIATION SELECTOR-17
|
|
UTF8Test(
|
|
[ 0x000E0100 ],
|
|
[ 0xf3, 0xa0, 0x84, 0x80 ]),
|
|
]
|
|
|
|
struct UTF16Test {
|
|
let scalarsHead: [UInt32]
|
|
let scalarsRepairedTail: [UInt32]
|
|
let encoded: [UInt16]
|
|
let loc: SourceLoc
|
|
|
|
init(_ scalarsHead: [UInt32], _ scalarsRepairedTail: [UInt32],
|
|
_ encoded: [UInt16],
|
|
file: String = __FILE__, line: UWord = __LINE__) {
|
|
self.scalarsHead = scalarsHead
|
|
self.scalarsRepairedTail = scalarsRepairedTail
|
|
self.encoded = encoded
|
|
self.loc = SourceLoc(file, line, comment: "test data")
|
|
}
|
|
}
|
|
|
|
let UTF16Tests = [
|
|
"Empty": [
|
|
UTF16Test([], [], []),
|
|
],
|
|
|
|
"SmokeTest": [
|
|
//
|
|
// 1-word sequences
|
|
//
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test([ 0x0041 ], [], [ 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
UTF16Test([ 0x0041, 0x0042 ], [], [ 0x0041, 0x0042 ]),
|
|
|
|
// U+0000 NULL
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0000 NULL
|
|
UTF16Test(
|
|
[ 0x0000, 0x0041, 0x0042, 0x0000 ], [],
|
|
[ 0x0000, 0x0041, 0x0042, 0x0000 ]),
|
|
|
|
// U+0283 LATIN SMALL LETTER ESH
|
|
UTF16Test([ 0x0283 ], [], [ 0x0283 ]),
|
|
|
|
// U+03BA GREEK SMALL LETTER KAPPA
|
|
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
|
// U+03C3 GREEK SMALL LETTER SIGMA
|
|
// U+03BC GREEK SMALL LETTER MU
|
|
// U+03B5 GREEK SMALL LETTER EPSILON
|
|
UTF16Test(
|
|
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ], [],
|
|
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ]),
|
|
|
|
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
|
|
// U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
UTF16Test(
|
|
[ 0x4f8b, 0x6587 ], [],
|
|
[ 0x4f8b, 0x6587 ]),
|
|
|
|
// U+D55C HANGUL SYLLABLE HAN
|
|
// U+AE00 HANGUL SYLLABLE GEUL
|
|
UTF16Test(
|
|
[ 0xd55c, 0xae00 ], [],
|
|
[ 0xd55c, 0xae00 ]),
|
|
|
|
// U+1112 HANGUL CHOSEONG HIEUH
|
|
// U+1161 HANGUL JUNGSEONG A
|
|
// U+11AB HANGUL JONGSEONG NIEUN
|
|
// U+1100 HANGUL CHOSEONG KIYEOK
|
|
// U+1173 HANGUL JUNGSEONG EU
|
|
// U+11AF HANGUL JONGSEONG RIEUL
|
|
UTF16Test(
|
|
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ], [],
|
|
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ]),
|
|
|
|
// U+D7FF (unassigned)
|
|
UTF16Test([ 0xd7ff ], [], [ 0xd7ff ]),
|
|
|
|
// U+E000 (private use)
|
|
UTF16Test([ 0xe000 ], [], [ 0xe000 ]),
|
|
|
|
// U+FFFD REPLACEMENT CHARACTER
|
|
UTF16Test([ 0xfffd ], [], [ 0xfffd ]),
|
|
|
|
// U+FFFF (noncharacter)
|
|
UTF16Test([ 0xffff ], [], [ 0xffff ]),
|
|
|
|
//
|
|
// 2-word sequences
|
|
//
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test([ 0x00010000 ], [], [ 0xd800, 0xdc00 ]),
|
|
|
|
// U+10100 AEGEAN WORD SEPARATOR LINE
|
|
UTF16Test([ 0x00010100 ], [], [ 0xd800, 0xdd00 ]),
|
|
|
|
// U+103FF (unassigned)
|
|
UTF16Test([ 0x000103ff ], [], [ 0xd800, 0xdfff ]),
|
|
|
|
|
|
// U+E0000 (unassigned)
|
|
UTF16Test([ 0x000e0000 ], [], [ 0xdb40, 0xdc00 ]),
|
|
|
|
// U+E0100 VARIATION SELECTOR-17
|
|
UTF16Test([ 0x000e0100 ], [], [ 0xdb40, 0xdd00 ]),
|
|
|
|
// U+E03FF (unassigned)
|
|
UTF16Test([ 0x000e03ff ], [], [ 0xdb40, 0xdfff ]),
|
|
|
|
|
|
// U+10FC00 (private use)
|
|
UTF16Test([ 0x0010fc00 ], [], [ 0xdbff, 0xdc00 ]),
|
|
|
|
// U+10FD00 (private use)
|
|
UTF16Test([ 0x0010fd00 ], [], [ 0xdbff, 0xdd00 ]),
|
|
|
|
// U+10FFFF (private use, noncharacter)
|
|
UTF16Test([ 0x0010ffff ], [], [ 0xdbff, 0xdfff ]),
|
|
],
|
|
|
|
"Incomplete": [
|
|
//
|
|
// Incomplete sequences that end right before EOF.
|
|
//
|
|
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd ], [ 0xd800 ]),
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xd800, 0xd800 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([ 0x0041 ], [ 0xfffd ], [ 0x0041, 0xd800 ]),
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test(
|
|
[ 0x00010000 ], [ 0xfffd ],
|
|
[ 0xd800, 0xdc00, 0xd800 ]),
|
|
|
|
//
|
|
// Incomplete sequences with more code units following them.
|
|
//
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test([], [ 0xfffd, 0x0041 ], [ 0xd800, 0x0041 ]),
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[], [ 0xfffd, 0x00010000 ],
|
|
[ 0xd800, 0xd800, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0x0041 ],
|
|
[ 0x0041, 0xd800, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0x00010000 ],
|
|
[ 0x0041, 0xd800, 0xd800, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x0041 ],
|
|
[ 0x0041, 0xd800, 0xdb40, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x00010000 ],
|
|
[ 0x0041, 0xd800, 0xdb40, 0xd800, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ],
|
|
[ 0x0041, 0xd800, 0xdb40, 0xdbff, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x00010000 ],
|
|
[ 0x0041, 0xd800, 0xdb40, 0xdbff, 0xd800, 0xdc00 ]),
|
|
],
|
|
|
|
"IllFormed": [
|
|
//
|
|
// Low-surrogate right before EOF.
|
|
//
|
|
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test([], [ 0xfffd ], [ 0xdc00 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test([ 0x0041 ], [ 0xfffd ], [ 0x0041, 0xdc00 ]),
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test(
|
|
[ 0x00010000 ], [ 0xfffd ],
|
|
[ 0xd800, 0xdc00, 0xdc00 ]),
|
|
|
|
//
|
|
// Low-surrogate with more code units following it.
|
|
//
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test([], [ 0xfffd, 0x0041 ], [ 0xdc00, 0x0041 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[], [ 0xfffd, 0x00010000 ],
|
|
[ 0xdc00, 0xd800, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0x0041 ],
|
|
[ 0x0041, 0xdc00, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0x00010000 ],
|
|
[ 0x0041, 0xdc00, 0xd800, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x0041 ],
|
|
[ 0x0041, 0xdc00, 0xdd00, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0x00010000 ],
|
|
[ 0x0041, 0xdc00, 0xdd00, 0xd800, 0xdc00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+DFFF (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ],
|
|
[ 0x0041, 0xdc00, 0xdd00, 0xdfff, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+DFFF (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xfffd, 0xfffd, 0xfffd, 0x00010000 ],
|
|
[ 0x0041, 0xdc00, 0xdd00, 0xdfff, 0xd800, 0xdc00 ]),
|
|
|
|
//
|
|
// Low-surrogate followed by high-surrogate.
|
|
//
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xd800 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xdb40 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdc00, 0xdbff ]),
|
|
|
|
|
|
// U+DD00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdd00, 0xd800 ]),
|
|
|
|
// U+DD00 (low-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdd00, 0xdb40 ]),
|
|
|
|
// U+DD00 (low-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdd00, 0xdbff ]),
|
|
|
|
|
|
// U+DFFF (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdfff, 0xd800 ]),
|
|
|
|
// U+DFFF (low-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdfff, 0xdb40 ]),
|
|
|
|
// U+DFFF (low-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
UTF16Test([], [ 0xfffd, 0xfffd ], [ 0xdfff, 0xdbff ]),
|
|
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[], [ 0xfffd, 0xfffd, 0x0041 ],
|
|
[ 0xdc00, 0xd800, 0x0041 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[], [ 0xfffd, 0xfffd, 0x10000 ],
|
|
[ 0xdc00, 0xd800, 0xd800, 0xdc00 ]),
|
|
],
|
|
]
|
|
|
|
var UTF8Decoder = TestCase("UTF8Decoder")
|
|
|
|
UTF8Decoder.test("Internal/_numTrailingBytes") {
|
|
for i in UInt8(0x00)...UInt8(0x7f) {
|
|
expectEqual(0, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
for i in UInt8(0x80)...UInt8(0xc1) {
|
|
expectEqual(4, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
for i in UInt8(0xc2)...UInt8(0xdf) {
|
|
expectEqual(1, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
for i in UInt8(0xe0)...UInt8(0xef) {
|
|
expectEqual(2, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
for i in UInt8(0xf0)...UInt8(0xf4) {
|
|
expectEqual(3, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
for i in UInt8(0xf5)...UInt8(0xfe) {
|
|
expectEqual(4, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
// Separate test for 0xff because of:
|
|
// <rdar://problem/17376512> Range UInt8(0x00)...UInt8(0xff) invokes a
|
|
// runtime trap
|
|
var i = UInt8(0xff)
|
|
expectEqual(4, UTF8._numTrailingBytes(i)) { "i=\(i)" }
|
|
}
|
|
|
|
UTF8Decoder.test("Empty") {
|
|
expectTrue(checkDecodeUTF8([], [], []))
|
|
}
|
|
|
|
UTF8Decoder.test("SmokeTest") {
|
|
for test in UTF8TestsSmokeTest {
|
|
expectTrue(checkDecodeUTF8(test.scalars, [], test.encoded),
|
|
stackTrace: test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
|
|
UTF8Decoder.test("FirstPossibleSequence") {
|
|
//
|
|
// First possible sequence of a certain length
|
|
//
|
|
|
|
// U+0000 NULL
|
|
expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ]))
|
|
|
|
// U+0080 PADDING CHARACTER
|
|
expectTrue(checkDecodeUTF8([ 0x0080 ], [], [ 0xc2, 0x80 ]))
|
|
|
|
// U+0800 SAMARITAN LETTER ALAF
|
|
expectTrue(checkDecodeUTF8(
|
|
[ 0x0800 ], [],
|
|
[ 0xe0, 0xa0, 0x80 ]))
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
expectTrue(checkDecodeUTF8(
|
|
[ 0x10000 ], [],
|
|
[ 0xf0, 0x90, 0x80, 0x80 ]))
|
|
|
|
// U+200000 (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x88, 0x80, 0x80, 0x80 ]))
|
|
|
|
// U+4000000 (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 ]))
|
|
}
|
|
|
|
UTF8Decoder.test("LastPossibleSequence") {
|
|
//
|
|
// Last possible sequence of a certain length
|
|
//
|
|
|
|
// U+007F DELETE
|
|
expectTrue(checkDecodeUTF8([ 0x007f ], [], [ 0x7f ]))
|
|
|
|
// U+07FF (unassigned)
|
|
expectTrue(checkDecodeUTF8([ 0x07ff ], [], [ 0xdf, 0xbf ]))
|
|
|
|
// U+FFFF (noncharacter)
|
|
expectTrue(checkDecodeUTF8(
|
|
[ 0xffff ], [],
|
|
[ 0xef, 0xbf, 0xbf ]))
|
|
|
|
// U+1FFFFF (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf7, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// U+3FFFFFF (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// U+7FFFFFFF (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.test("CodeSpaceBoundaryConditions") {
|
|
//
|
|
// Other boundary conditions
|
|
//
|
|
|
|
// U+D7FF (unassigned)
|
|
expectTrue(checkDecodeUTF8([ 0xd7ff ], [], [ 0xed, 0x9f, 0xbf ]))
|
|
|
|
// U+E000 (private use)
|
|
expectTrue(checkDecodeUTF8([ 0xe000 ], [], [ 0xee, 0x80, 0x80 ]))
|
|
|
|
// U+FFFD REPLACEMENT CHARACTER
|
|
expectTrue(checkDecodeUTF8([ 0xfffd ], [], [ 0xef, 0xbf, 0xbd ]))
|
|
|
|
// U+10FFFF (noncharacter)
|
|
expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ]))
|
|
|
|
// U+110000 (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf4, 0x90, 0x80, 0x80 ]))
|
|
}
|
|
|
|
UTF8Decoder.test("UnexpectedContinuationBytes") {
|
|
//
|
|
// Unexpected continuation bytes
|
|
//
|
|
|
|
// A sequence of unexpected continuation bytes that don't follow a first
|
|
// byte, every byte is a maximal subpart.
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xbf, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0x80, 0xbf, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0x80, 0xbf, 0x80, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0x80, 0xbf, 0x82, 0xbf, 0xaa ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0, 0x8f ]))
|
|
|
|
// All continuation bytes (0x80--0xbf).
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
|
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
|
|
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
|
|
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
|
|
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
|
|
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
|
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
|
|
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.test("LonelyStartBytes") {
|
|
//
|
|
// Lonely start bytes
|
|
//
|
|
|
|
// Start bytes of 2-byte sequences (0xc0--0xdf).
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
|
|
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
|
|
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
|
|
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
|
|
[ 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20, 0xc3, 0x20,
|
|
0xc4, 0x20, 0xc5, 0x20, 0xc6, 0x20, 0xc7, 0x20,
|
|
0xc8, 0x20, 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20,
|
|
0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20, 0xcf, 0x20,
|
|
0xd0, 0x20, 0xd1, 0x20, 0xd2, 0x20, 0xd3, 0x20,
|
|
0xd4, 0x20, 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20,
|
|
0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20, 0xdb, 0x20,
|
|
0xdc, 0x20, 0xdd, 0x20, 0xde, 0x20, 0xdf, 0x20 ]))
|
|
|
|
// Start bytes of 3-byte sequences (0xe0--0xef).
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
|
|
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
|
|
[ 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20, 0xe3, 0x20,
|
|
0xe4, 0x20, 0xe5, 0x20, 0xe6, 0x20, 0xe7, 0x20,
|
|
0xe8, 0x20, 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20,
|
|
0xec, 0x20, 0xed, 0x20, 0xee, 0x20, 0xef, 0x20 ]))
|
|
|
|
// Start bytes of 4-byte sequences (0xf0--0xf7).
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
|
|
[ 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20, 0xf3, 0x20,
|
|
0xf4, 0x20, 0xf5, 0x20, 0xf6, 0x20, 0xf7, 0x20 ]))
|
|
|
|
// Start bytes of 5-byte sequences (0xf8--0xfb).
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0xf9, 0xfa, 0xfb ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
|
|
[ 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20, 0xfb, 0x20 ]))
|
|
|
|
// Start bytes of 6-byte sequences (0xfc--0xfd).
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xfd ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[], [ 0xfffd, 0x0020, 0xfffd, 0x0020 ],
|
|
[ 0xfc, 0x20, 0xfd, 0x20 ]))
|
|
}
|
|
|
|
UTF8Decoder.test("InvalidStartBytes") {
|
|
//
|
|
// Other bytes (0xc0--0xc1, 0xfe--0xff).
|
|
//
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfe ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xff ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xc0, 0xc1, 0xfe, 0xff ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfe, 0xfe, 0xff, 0xff ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfe, 0x80, 0x80, 0x80, 0x80, 0x80 ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xff, 0x80, 0x80, 0x80, 0x80, 0x80 ]))
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ],
|
|
[ 0xc0, 0x20, 0xc1, 0x20, 0xfe, 0x20, 0xff, 0x20 ]))
|
|
}
|
|
|
|
UTF8Decoder.test("MissingContinuationBytes") {
|
|
//
|
|
// Sequences with one continuation byte missing
|
|
//
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc2 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xdf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xc2, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xdf, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xa0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xa0, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xbf, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe1, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xec, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe1, 0x80, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xec, 0xbf, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x9f ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x80, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x9f, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xee, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xef, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xee, 0x80, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xef, 0xbf, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0x90, 0x80, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0xbf, 0xbf, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf1, 0x80, 0x80, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf3, 0xbf, 0xbf, 0x41 ]))
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x80, 0x80, 0x41 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x8f, 0xbf, 0x41 ]))
|
|
|
|
// Overlong sequences with one trailing byte missing.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x9f ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0x8f, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x80, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x80, 0x80, 0x80, 0x80 ]))
|
|
|
|
// Sequences that represent surrogates with one trailing byte missing.
|
|
// High-surrogates
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xa0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xac ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xaf ]))
|
|
// Low-surrogates
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb4 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xbf ]))
|
|
|
|
// Ill-formed 4-byte sequences.
|
|
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+1100xx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf4, 0x90, 0x80 ]))
|
|
// U+13FBxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf4, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf5, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf6, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf7, 0x80, 0x80 ]))
|
|
// U+1FFBxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf7, 0xbf, 0xbf ]))
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+2000xx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x88, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0xbf, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf9, 0x80, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfa, 0x80, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfb, 0x80, 0x80, 0x80 ]))
|
|
// U+3FFFFxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfb, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
|
|
// U+40000xx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x84, 0x80, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0xbf, 0xbf, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0x80, 0x80, 0x80, 0x80 ]))
|
|
// U+7FFFFFxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
//
|
|
// Sequences with two continuation bytes missing
|
|
//
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f ]))
|
|
|
|
// Overlong sequences with two trailing byte missing.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x8f ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x80, 0x80, 0x80 ]))
|
|
|
|
// Sequences that represent surrogates with two trailing bytes missing.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed ]))
|
|
|
|
// Ill-formed 4-byte sequences.
|
|
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+110yxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0x90 ]))
|
|
// U+13Fyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf5, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf6, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0x80 ]))
|
|
// U+1FFyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0xbf ]))
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+200yxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x88, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf9, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfa, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfb, 0x80, 0x80 ]))
|
|
// U+3FFFyxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfb, 0xbf, 0xbf ]))
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+4000yxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x84, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0xbf, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0x80, 0x80, 0x80 ]))
|
|
// U+7FFFFyxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
//
|
|
// Sequences with three continuation bytes missing
|
|
//
|
|
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf2 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4 ]))
|
|
|
|
// Broken overlong sequences.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x80, 0x80 ]))
|
|
|
|
// Ill-formed 4-byte sequences.
|
|
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+14yyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf5 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf6 ]))
|
|
// U+1Cyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf7 ]))
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+20yyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x88 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf9, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfa, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0x80 ]))
|
|
// U+3FCyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0xbf ]))
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+400yyxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x84, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0x80, 0x80 ]))
|
|
// U+7FFCyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfd, 0xbf, 0xbf ]))
|
|
|
|
//
|
|
// Sequences with four continuation bytes missing
|
|
//
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+uzyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf9 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfa ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ]))
|
|
// U+3zyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ]))
|
|
|
|
// Broken overlong sequences.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x80 ]))
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+uzzyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x84 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0x80 ]))
|
|
// U+7Fzzyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0xbf ]))
|
|
|
|
//
|
|
// Sequences with five continuation bytes missing
|
|
//
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+uzzyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfc ]))
|
|
// U+uuzzyyxx (invalid)
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfd ]))
|
|
|
|
//
|
|
// Consecutive sequences with trailing bytes missing
|
|
//
|
|
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xc0, /**/ 0xe0, 0x80, /**/ 0xf0, 0x80, 0x80,
|
|
0xf8, 0x80, 0x80, 0x80,
|
|
0xfc, 0x80, 0x80, 0x80, 0x80,
|
|
0xdf, /**/ 0xef, 0xbf, /**/ 0xf7, 0xbf, 0xbf,
|
|
0xfb, 0xbf, 0xbf, 0xbf,
|
|
0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.test("OverlongSequences") {
|
|
//
|
|
// Overlong UTF-8 sequences
|
|
//
|
|
|
|
// U+002F SOLIDUS
|
|
expectTrue(checkDecodeUTF8([ 0x002f ], [], [ 0x2f ]))
|
|
|
|
// Overlong sequences of the above.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xaf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xe0, 0x80, 0xaf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0x80, 0x80, 0xaf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x80, 0x80, 0x80, 0xaf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf ]))
|
|
|
|
// U+0000 NULL
|
|
expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ]))
|
|
|
|
// Overlong sequences of the above.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xe0, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0x80, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x80, 0x80, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 ]))
|
|
|
|
// Other overlong and ill-formed sequences.
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xe0, 0x9f, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xa0, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0x8f, 0x80, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf0, 0x8f, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xf8, 0x87, 0xbf, 0xbf, 0xbf ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.test("IsolatedSurrogates") {
|
|
// Unicode 6.3.0:
|
|
//
|
|
// D71. High-surrogate code point: A Unicode code point in the range
|
|
// U+D800 to U+DBFF.
|
|
//
|
|
// D73. Low-surrogate code point: A Unicode code point in the range
|
|
// U+DC00 to U+DFFF.
|
|
|
|
// Note: U+E0100 is <DB40 DD00> in UTF-16.
|
|
|
|
// High-surrogates
|
|
|
|
// U+D800
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xa0, 0x80 ]))
|
|
expectTrue(checkDecodeUTF8(
|
|
[ 0x0041 ],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0x0041 ],
|
|
[ 0x41, 0xed, 0xa0, 0x80, 0x41 ]))
|
|
|
|
// U+DB40
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xac, 0xa0 ]))
|
|
|
|
// U+DBFF
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xaf, 0xbf ]))
|
|
|
|
// Low-surrogates
|
|
|
|
// U+DC00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xb0, 0x80 ]))
|
|
|
|
// U+DD00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xb4, 0x80 ]))
|
|
|
|
// U+DFFF
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xbf, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.test("SurrogatePairs") {
|
|
// Surrogate pairs
|
|
|
|
// U+D800 U+DC00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 ]))
|
|
|
|
// U+D800 U+DD00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xa0, 0x80, 0xed, 0xb4, 0x80 ]))
|
|
|
|
// U+D800 U+DFFF
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf ]))
|
|
|
|
// U+DB40 U+DC00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xac, 0xa0, 0xed, 0xb0, 0x80 ]))
|
|
|
|
// U+DB40 U+DD00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xac, 0xa0, 0xed, 0xb4, 0x80 ]))
|
|
|
|
// U+DB40 U+DFFF
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xac, 0xa0, 0xed, 0xbf, 0xbf ]))
|
|
|
|
// U+DBFF U+DC00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 ]))
|
|
|
|
// U+DBFF U+DD00
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xaf, 0xbf, 0xed, 0xb4, 0x80 ]))
|
|
|
|
// U+DBFF U+DFFF
|
|
expectTrue(checkDecodeUTF8(
|
|
[],
|
|
[ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ],
|
|
[ 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.test("Noncharacters") {
|
|
//
|
|
// Noncharacters
|
|
//
|
|
|
|
// Unicode 6.3.0:
|
|
//
|
|
// D14. Noncharacter: A code point that is permanently reserved for
|
|
// internal use and that should never be interchanged. Noncharacters
|
|
// consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
|
|
// and the values U+FDD0..U+FDEF.
|
|
|
|
// U+FFFE
|
|
expectTrue(checkDecodeUTF8([ 0xfffe ], [], [ 0xef, 0xbf, 0xbe ]))
|
|
|
|
// U+FFFF
|
|
expectTrue(checkDecodeUTF8([ 0xffff ], [], [ 0xef, 0xbf, 0xbf ]))
|
|
|
|
// U+1FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x1fffe ], [], [ 0xf0, 0x9f, 0xbf, 0xbe ]))
|
|
|
|
// U+1FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x1ffff ], [], [ 0xf0, 0x9f, 0xbf, 0xbf ]))
|
|
|
|
// U+2FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x2fffe ], [], [ 0xf0, 0xaf, 0xbf, 0xbe ]))
|
|
|
|
// U+2FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x2ffff ], [], [ 0xf0, 0xaf, 0xbf, 0xbf ]))
|
|
|
|
// U+3FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x3fffe ], [], [ 0xf0, 0xbf, 0xbf, 0xbe ]))
|
|
|
|
// U+3FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x3ffff ], [], [ 0xf0, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// U+4FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x4fffe ], [], [ 0xf1, 0x8f, 0xbf, 0xbe ]))
|
|
|
|
// U+4FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x4ffff ], [], [ 0xf1, 0x8f, 0xbf, 0xbf ]))
|
|
|
|
// U+5FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x5fffe ], [], [ 0xf1, 0x9f, 0xbf, 0xbe ]))
|
|
|
|
// U+5FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x5ffff ], [], [ 0xf1, 0x9f, 0xbf, 0xbf ]))
|
|
|
|
// U+6FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x6fffe ], [], [ 0xf1, 0xaf, 0xbf, 0xbe ]))
|
|
|
|
// U+6FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x6ffff ], [], [ 0xf1, 0xaf, 0xbf, 0xbf ]))
|
|
|
|
// U+7FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x7fffe ], [], [ 0xf1, 0xbf, 0xbf, 0xbe ]))
|
|
|
|
// U+7FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x7ffff ], [], [ 0xf1, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// U+8FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x8fffe ], [], [ 0xf2, 0x8f, 0xbf, 0xbe ]))
|
|
|
|
// U+8FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x8ffff ], [], [ 0xf2, 0x8f, 0xbf, 0xbf ]))
|
|
|
|
// U+9FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x9fffe ], [], [ 0xf2, 0x9f, 0xbf, 0xbe ]))
|
|
|
|
// U+9FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x9ffff ], [], [ 0xf2, 0x9f, 0xbf, 0xbf ]))
|
|
|
|
// U+AFFFE
|
|
expectTrue(checkDecodeUTF8([ 0xafffe ], [], [ 0xf2, 0xaf, 0xbf, 0xbe ]))
|
|
|
|
// U+AFFFF
|
|
expectTrue(checkDecodeUTF8([ 0xaffff ], [], [ 0xf2, 0xaf, 0xbf, 0xbf ]))
|
|
|
|
// U+BFFFE
|
|
expectTrue(checkDecodeUTF8([ 0xbfffe ], [], [ 0xf2, 0xbf, 0xbf, 0xbe ]))
|
|
|
|
// U+BFFFF
|
|
expectTrue(checkDecodeUTF8([ 0xbffff ], [], [ 0xf2, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// U+CFFFE
|
|
expectTrue(checkDecodeUTF8([ 0xcfffe ], [], [ 0xf3, 0x8f, 0xbf, 0xbe ]))
|
|
|
|
// U+CFFFF
|
|
expectTrue(checkDecodeUTF8([ 0xcfffF ], [], [ 0xf3, 0x8f, 0xbf, 0xbf ]))
|
|
|
|
// U+DFFFE
|
|
expectTrue(checkDecodeUTF8([ 0xdfffe ], [], [ 0xf3, 0x9f, 0xbf, 0xbe ]))
|
|
|
|
// U+DFFFF
|
|
expectTrue(checkDecodeUTF8([ 0xdffff ], [], [ 0xf3, 0x9f, 0xbf, 0xbf ]))
|
|
|
|
// U+EFFFE
|
|
expectTrue(checkDecodeUTF8([ 0xefffe ], [], [ 0xf3, 0xaf, 0xbf, 0xbe ]))
|
|
|
|
// U+EFFFF
|
|
expectTrue(checkDecodeUTF8([ 0xeffff ], [], [ 0xf3, 0xaf, 0xbf, 0xbf ]))
|
|
|
|
// U+FFFFE
|
|
expectTrue(checkDecodeUTF8([ 0xffffe ], [], [ 0xf3, 0xbf, 0xbf, 0xbe ]))
|
|
|
|
// U+FFFFF
|
|
expectTrue(checkDecodeUTF8([ 0xfffff ], [], [ 0xf3, 0xbf, 0xbf, 0xbf ]))
|
|
|
|
// U+10FFFE
|
|
expectTrue(checkDecodeUTF8([ 0x10fffe ], [], [ 0xf4, 0x8f, 0xbf, 0xbe ]))
|
|
|
|
// U+10FFFF
|
|
expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ]))
|
|
|
|
// U+FDD0
|
|
expectTrue(checkDecodeUTF8([ 0xfdd0 ], [], [ 0xef, 0xb7, 0x90 ]))
|
|
|
|
// U+FDD1
|
|
expectTrue(checkDecodeUTF8([ 0xfdd1 ], [], [ 0xef, 0xb7, 0x91 ]))
|
|
|
|
// U+FDD2
|
|
expectTrue(checkDecodeUTF8([ 0xfdd2 ], [], [ 0xef, 0xb7, 0x92 ]))
|
|
|
|
// U+FDD3
|
|
expectTrue(checkDecodeUTF8([ 0xfdd3 ], [], [ 0xef, 0xb7, 0x93 ]))
|
|
|
|
// U+FDD4
|
|
expectTrue(checkDecodeUTF8([ 0xfdd4 ], [], [ 0xef, 0xb7, 0x94 ]))
|
|
|
|
// U+FDD5
|
|
expectTrue(checkDecodeUTF8([ 0xfdd5 ], [], [ 0xef, 0xb7, 0x95 ]))
|
|
|
|
// U+FDD6
|
|
expectTrue(checkDecodeUTF8([ 0xfdd6 ], [], [ 0xef, 0xb7, 0x96 ]))
|
|
|
|
// U+FDD7
|
|
expectTrue(checkDecodeUTF8([ 0xfdd7 ], [], [ 0xef, 0xb7, 0x97 ]))
|
|
|
|
// U+FDD8
|
|
expectTrue(checkDecodeUTF8([ 0xfdd8 ], [], [ 0xef, 0xb7, 0x98 ]))
|
|
|
|
// U+FDD9
|
|
expectTrue(checkDecodeUTF8([ 0xfdd9 ], [], [ 0xef, 0xb7, 0x99 ]))
|
|
|
|
// U+FDDA
|
|
expectTrue(checkDecodeUTF8([ 0xfdda ], [], [ 0xef, 0xb7, 0x9a ]))
|
|
|
|
// U+FDDB
|
|
expectTrue(checkDecodeUTF8([ 0xfddb ], [], [ 0xef, 0xb7, 0x9b ]))
|
|
|
|
// U+FDDC
|
|
expectTrue(checkDecodeUTF8([ 0xfddc ], [], [ 0xef, 0xb7, 0x9c ]))
|
|
|
|
// U+FDDD
|
|
expectTrue(checkDecodeUTF8([ 0xfddd ], [], [ 0xef, 0xb7, 0x9d ]))
|
|
|
|
// U+FDDE
|
|
expectTrue(checkDecodeUTF8([ 0xfdde ], [], [ 0xef, 0xb7, 0x9e ]))
|
|
|
|
// U+FDDF
|
|
expectTrue(checkDecodeUTF8([ 0xfddf ], [], [ 0xef, 0xb7, 0x9f ]))
|
|
|
|
// U+FDE0
|
|
expectTrue(checkDecodeUTF8([ 0xfde0 ], [], [ 0xef, 0xb7, 0xa0 ]))
|
|
|
|
// U+FDE1
|
|
expectTrue(checkDecodeUTF8([ 0xfde1 ], [], [ 0xef, 0xb7, 0xa1 ]))
|
|
|
|
// U+FDE2
|
|
expectTrue(checkDecodeUTF8([ 0xfde2 ], [], [ 0xef, 0xb7, 0xa2 ]))
|
|
|
|
// U+FDE3
|
|
expectTrue(checkDecodeUTF8([ 0xfde3 ], [], [ 0xef, 0xb7, 0xa3 ]))
|
|
|
|
// U+FDE4
|
|
expectTrue(checkDecodeUTF8([ 0xfde4 ], [], [ 0xef, 0xb7, 0xa4 ]))
|
|
|
|
// U+FDE5
|
|
expectTrue(checkDecodeUTF8([ 0xfde5 ], [], [ 0xef, 0xb7, 0xa5 ]))
|
|
|
|
// U+FDE6
|
|
expectTrue(checkDecodeUTF8([ 0xfde6 ], [], [ 0xef, 0xb7, 0xa6 ]))
|
|
|
|
// U+FDE7
|
|
expectTrue(checkDecodeUTF8([ 0xfde7 ], [], [ 0xef, 0xb7, 0xa7 ]))
|
|
|
|
// U+FDE8
|
|
expectTrue(checkDecodeUTF8([ 0xfde8 ], [], [ 0xef, 0xb7, 0xa8 ]))
|
|
|
|
// U+FDE9
|
|
expectTrue(checkDecodeUTF8([ 0xfde9 ], [], [ 0xef, 0xb7, 0xa9 ]))
|
|
|
|
// U+FDEA
|
|
expectTrue(checkDecodeUTF8([ 0xfdea ], [], [ 0xef, 0xb7, 0xaa ]))
|
|
|
|
// U+FDEB
|
|
expectTrue(checkDecodeUTF8([ 0xfdeb ], [], [ 0xef, 0xb7, 0xab ]))
|
|
|
|
// U+FDEC
|
|
expectTrue(checkDecodeUTF8([ 0xfdec ], [], [ 0xef, 0xb7, 0xac ]))
|
|
|
|
// U+FDED
|
|
expectTrue(checkDecodeUTF8([ 0xfded ], [], [ 0xef, 0xb7, 0xad ]))
|
|
|
|
// U+FDEE
|
|
expectTrue(checkDecodeUTF8([ 0xfdee ], [], [ 0xef, 0xb7, 0xae ]))
|
|
|
|
// U+FDEF
|
|
expectTrue(checkDecodeUTF8([ 0xfdef ], [], [ 0xef, 0xb7, 0xaf ]))
|
|
|
|
// U+FDF0
|
|
expectTrue(checkDecodeUTF8([ 0xfdf0 ], [], [ 0xef, 0xb7, 0xb0 ]))
|
|
|
|
// U+FDF1
|
|
expectTrue(checkDecodeUTF8([ 0xfdf1 ], [], [ 0xef, 0xb7, 0xb1 ]))
|
|
|
|
// U+FDF2
|
|
expectTrue(checkDecodeUTF8([ 0xfdf2 ], [], [ 0xef, 0xb7, 0xb2 ]))
|
|
|
|
// U+FDF3
|
|
expectTrue(checkDecodeUTF8([ 0xfdf3 ], [], [ 0xef, 0xb7, 0xb3 ]))
|
|
|
|
// U+FDF4
|
|
expectTrue(checkDecodeUTF8([ 0xfdf4 ], [], [ 0xef, 0xb7, 0xb4 ]))
|
|
|
|
// U+FDF5
|
|
expectTrue(checkDecodeUTF8([ 0xfdf5 ], [], [ 0xef, 0xb7, 0xb5 ]))
|
|
|
|
// U+FDF6
|
|
expectTrue(checkDecodeUTF8([ 0xfdf6 ], [], [ 0xef, 0xb7, 0xb6 ]))
|
|
|
|
// U+FDF7
|
|
expectTrue(checkDecodeUTF8([ 0xfdf7 ], [], [ 0xef, 0xb7, 0xb7 ]))
|
|
|
|
// U+FDF8
|
|
expectTrue(checkDecodeUTF8([ 0xfdf8 ], [], [ 0xef, 0xb7, 0xb8 ]))
|
|
|
|
// U+FDF9
|
|
expectTrue(checkDecodeUTF8([ 0xfdf9 ], [], [ 0xef, 0xb7, 0xb9 ]))
|
|
|
|
// U+FDFA
|
|
expectTrue(checkDecodeUTF8([ 0xfdfa ], [], [ 0xef, 0xb7, 0xba ]))
|
|
|
|
// U+FDFB
|
|
expectTrue(checkDecodeUTF8([ 0xfdfb ], [], [ 0xef, 0xb7, 0xbb ]))
|
|
|
|
// U+FDFC
|
|
expectTrue(checkDecodeUTF8([ 0xfdfc ], [], [ 0xef, 0xb7, 0xbc ]))
|
|
|
|
// U+FDFD
|
|
expectTrue(checkDecodeUTF8([ 0xfdfd ], [], [ 0xef, 0xb7, 0xbd ]))
|
|
|
|
// U+FDFE
|
|
expectTrue(checkDecodeUTF8([ 0xfdfe ], [], [ 0xef, 0xb7, 0xbe ]))
|
|
|
|
// U+FDFF
|
|
expectTrue(checkDecodeUTF8([ 0xfdff ], [], [ 0xef, 0xb7, 0xbf ]))
|
|
}
|
|
|
|
UTF8Decoder.run()
|
|
// CHECK: {{^}}UTF8Decoder: All tests passed
|
|
|
|
var UTF16Decoder = TestCase("UTF16Decoder")
|
|
|
|
UTF16Decoder.test("Test") {
|
|
for (name, batch) in UTF16Tests {
|
|
println("Batch: \(name)")
|
|
for test in batch {
|
|
expectTrue(checkDecodeUTF16(test.scalarsHead, test.scalarsRepairedTail,
|
|
test.encoded), stackTrace: test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
}
|
|
|
|
UTF16Decoder.run()
|
|
// CHECK: {{^}}UTF16Decoder: All tests passed
|
|
|
|
var UTF32Decoder = TestCase("UTF32Decoder")
|
|
|
|
UTF32Decoder.test("Empty") {
|
|
expectTrue(checkDecodeUTF32([], [], []))
|
|
}
|
|
|
|
UTF32Decoder.test("SmokeTest") {
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
expectTrue(checkDecodeUTF32([ 0x0041 ], [], [ 0x0000_0041 ]))
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
expectTrue(checkDecodeUTF32(
|
|
[ 0x0041, 0x0042 ], [],
|
|
[ 0x0000_0041, 0x0000_0042 ]))
|
|
|
|
// U+0000 NULL
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0000 NULL
|
|
expectTrue(checkDecodeUTF32(
|
|
[ 0x0000, 0x0041, 0x0042, 0x0000 ], [],
|
|
[ 0x0000_0000, 0x0000_0041, 0x0000_0042, 0x0000_0000 ]))
|
|
|
|
// U+0283 LATIN SMALL LETTER ESH
|
|
expectTrue(checkDecodeUTF32([ 0x0283 ], [], [ 0x0000_0283 ]))
|
|
|
|
// U+03BA GREEK SMALL LETTER KAPPA
|
|
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
|
// U+03C3 GREEK SMALL LETTER SIGMA
|
|
// U+03BC GREEK SMALL LETTER MU
|
|
// U+03B5 GREEK SMALL LETTER EPSILON
|
|
expectTrue(checkDecodeUTF32(
|
|
[ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ], [],
|
|
[ 0x0000_03ba, 0x0000_1f79, 0x0000_03c3, 0x0000_03bc, 0x0000_03b5 ]))
|
|
|
|
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
|
|
// U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
expectTrue(checkDecodeUTF32(
|
|
[ 0x4f8b, 0x6587 ], [],
|
|
[ 0x0000_4f8b, 0x0000_6587 ]))
|
|
|
|
// U+D55C HANGUL SYLLABLE HAN
|
|
// U+AE00 HANGUL SYLLABLE GEUL
|
|
expectTrue(checkDecodeUTF32(
|
|
[ 0xd55c, 0xae00 ], [],
|
|
[ 0x0000_d55c, 0x0000_ae00 ]))
|
|
|
|
// U+1112 HANGUL CHOSEONG HIEUH
|
|
// U+1161 HANGUL JUNGSEONG A
|
|
// U+11AB HANGUL JONGSEONG NIEUN
|
|
// U+1100 HANGUL CHOSEONG KIYEOK
|
|
// U+1173 HANGUL JUNGSEONG EU
|
|
// U+11AF HANGUL JONGSEONG RIEUL
|
|
expectTrue(checkDecodeUTF32(
|
|
[ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ], [],
|
|
[ 0x0000_1112, 0x0000_1161, 0x0000_11ab, 0x0000_1100, 0x0000_1173,
|
|
0x0000_11af ]))
|
|
|
|
// U+D7FF (unassigned)
|
|
expectTrue(checkDecodeUTF16([ 0xd7ff ], [], [ 0x0000_d7ff ]))
|
|
|
|
// U+E000 (private use)
|
|
expectTrue(checkDecodeUTF16([ 0xe000 ], [], [ 0x0000_e000 ]))
|
|
|
|
// U+FFFD REPLACEMENT CHARACTER
|
|
expectTrue(checkDecodeUTF16([ 0xfffd ], [], [ 0x0000_fffd ]))
|
|
|
|
// U+FFFF (noncharacter)
|
|
expectTrue(checkDecodeUTF16([ 0xffff ], [], [ 0x0000_ffff ]))
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
expectTrue(checkDecodeUTF32([ 0x00010000 ], [], [ 0x0001_0000 ]))
|
|
|
|
// U+10100 AEGEAN WORD SEPARATOR LINE
|
|
expectTrue(checkDecodeUTF32([ 0x00010100 ], [], [ 0x0001_0100 ]))
|
|
|
|
// U+103FF (unassigned)
|
|
expectTrue(checkDecodeUTF32([ 0x000103ff ], [], [ 0x0001_03ff ]))
|
|
|
|
// U+1D800 (unassigned)
|
|
expectTrue(checkDecodeUTF32([ 0x0001d800 ], [], [ 0x0001_d800 ]))
|
|
|
|
|
|
// U+E0000 (unassigned)
|
|
expectTrue(checkDecodeUTF32([ 0x000e0000 ], [], [ 0x000e_0000 ]))
|
|
|
|
// U+E0100 VARIATION SELECTOR-17
|
|
expectTrue(checkDecodeUTF32([ 0x000e0100 ], [], [ 0x000e_0100 ]))
|
|
|
|
// U+E03FF (unassigned)
|
|
expectTrue(checkDecodeUTF32([ 0x000e03ff ], [], [ 0x000e_03ff ]))
|
|
|
|
|
|
// U+10FC00 (private use)
|
|
expectTrue(checkDecodeUTF32([ 0x0010fc00 ], [], [ 0x0010_fc00 ]))
|
|
|
|
// U+10FD00 (private use)
|
|
expectTrue(checkDecodeUTF32([ 0x0010fd00 ], [], [ 0x0010_fd00 ]))
|
|
|
|
// U+10FFFF (private use, noncharacter)
|
|
expectTrue(checkDecodeUTF32([ 0x0010ffff ], [], [ 0x0010_ffff ]))
|
|
}
|
|
|
|
UTF32Decoder.test("IllFormed") {
|
|
// U+D800 (high-surrogate)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_d800 ]))
|
|
|
|
// U+DB40 (high-surrogate)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_db40 ]))
|
|
|
|
// U+DBFF (high-surrogate)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dbff ]))
|
|
|
|
// U+DC00 (low-surrogate)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dc00 ]))
|
|
|
|
// U+DD00 (low-surrogate)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dd00 ]))
|
|
|
|
// U+DFFF (low-surrogate)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dfff ]))
|
|
|
|
// U+110000 (invalid)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0011_0000 ]))
|
|
|
|
// U+1000000 (invalid)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0100_0000 ]))
|
|
|
|
// U+80000000 (invalid)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x8000_0000 ]))
|
|
|
|
// U+FFFF0000 (invalid)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0xffff_0000 ]))
|
|
|
|
// U+FFFFFFFF (invalid)
|
|
expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0xffff_ffff ]))
|
|
}
|
|
|
|
UTF32Decoder.run()
|
|
// CHECK: {{^}}UTF32Decoder: All tests passed
|
|
|
|
var UTF8Encoder = TestCase("UTF8Encoder")
|
|
|
|
UTF8Encoder.test("SmokeTest") {
|
|
for test in UTF8TestsSmokeTest {
|
|
expectTrue(checkEncodeUTF8(test.encoded, test.scalars),
|
|
stackTrace: test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
|
|
UTF8Encoder.run()
|
|
// CHECK: {{^}}UTF8Encoder: All tests passed
|
|
|
|
|
|
import Foundation
|
|
|
|
// The most simple subclass of NSString that CoreFoundation does not know
|
|
// about.
|
|
class NonContiguousNSString : NSString {
|
|
convenience init(_ utf8: [UInt8]) {
|
|
var encoded: [UInt16] = []
|
|
var g = utf8.generate()
|
|
let hadError = transcode(UTF8.self, UTF16.self, g,
|
|
SinkOf {
|
|
encoded.append($0)
|
|
},
|
|
stopOnError: true)
|
|
expectFalse(hadError)
|
|
self.init(encoded)
|
|
}
|
|
|
|
init(_ value: [UInt16]) {
|
|
_value = value
|
|
super.init()
|
|
}
|
|
|
|
convenience init(_ scalars: [UInt32]) {
|
|
var encoded: [UInt16] = []
|
|
var g = scalars.generate()
|
|
let hadError = transcode(UTF32.self, UTF16.self, g,
|
|
SinkOf {
|
|
encoded.append($0)
|
|
},
|
|
stopOnError: true)
|
|
expectFalse(hadError)
|
|
self.init(encoded)
|
|
}
|
|
|
|
@objc override func copyWithZone(zone: NSZone) -> AnyObject {
|
|
// Ensure that copying this string produces a class that CoreFoundation
|
|
// does not know about.
|
|
return self
|
|
}
|
|
|
|
@objc override var length: Int {
|
|
return _value.count
|
|
}
|
|
|
|
@objc override func characterAtIndex(index: Int) -> unichar {
|
|
return _value[index]
|
|
}
|
|
|
|
var _value: [UInt16]
|
|
}
|
|
|
|
func checkUTF8View(expected: [UInt8], subject: String,
|
|
stackTrace: SourceLocStack) {
|
|
checkCollection(expected, subject.utf8, stackTrace.withCurrentLoc())
|
|
}
|
|
|
|
func checkUTF16View(expected: [UInt16], subject: String,
|
|
stackTrace: SourceLocStack) {
|
|
checkSliceableWithBidirectionalIndex(expected, subject.utf16,
|
|
stackTrace.withCurrentLoc())
|
|
}
|
|
|
|
func forStringsWithUnpairedSurrogates(checkClosure: (UTF16Test, String) -> ()) {
|
|
for (name, batch) in UTF16Tests {
|
|
println("Batch: \(name)")
|
|
for test in batch {
|
|
let subject: String = NonContiguousNSString(test.encoded)
|
|
checkClosure(test, subject)
|
|
}
|
|
}
|
|
}
|
|
|
|
var StringCookedViews = TestCase("StringCookedViews")
|
|
|
|
StringCookedViews.test("UTF8ForContiguousUTF16") {
|
|
for test in UTF8TestsSmokeTest {
|
|
// Add a non-ASCII character at the beginning to force Swift String and
|
|
// CoreFoundation off the ASCII fast path.
|
|
//
|
|
// U+0283 LATIN SMALL LETTER ESH
|
|
var backingStorage: [UInt16] = [ 0x0283 ]
|
|
let expected: [UInt8] = [ 0xca, 0x83 ] + test.encoded
|
|
|
|
var g = test.scalars.generate()
|
|
transcode(UTF32.self, UTF16.self, g,
|
|
SinkOf {
|
|
backingStorage.append($0)
|
|
},
|
|
stopOnError: false)
|
|
|
|
backingStorage.withUnsafePointerToElements {
|
|
(ptr) -> () in
|
|
let cfstring = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
|
|
ptr, backingStorage.count, kCFAllocatorNull)
|
|
expectFalse(CFStringGetCStringPtr(cfstring,
|
|
CFStringBuiltInEncodings.ASCII.toRaw()) != nil)
|
|
expectTrue(CFStringGetCharactersPtr(cfstring) != nil)
|
|
checkUTF8View(expected, String(cfstring), test.loc.withCurrentLoc())
|
|
return ()
|
|
}
|
|
}
|
|
|
|
forStringsWithUnpairedSurrogates {
|
|
(test: UTF16Test, subject: String) -> () in
|
|
var expected: [UInt8] = []
|
|
var expectedScalars = test.scalarsHead + test.scalarsRepairedTail
|
|
var g = expectedScalars.generate()
|
|
transcode(UTF32.self, UTF8.self, g,
|
|
SinkOf {
|
|
expected.append($0)
|
|
},
|
|
stopOnError: false)
|
|
|
|
checkUTF8View(expected, subject, test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
|
|
func verifyThatStringIsOpaqueForCoreFoundation(nss: NSString) {
|
|
// Sanity checks to make sure we are testing the code path that does UTF-8
|
|
// encoding itself, instead of dispatching to CF. Both the original string
|
|
// itself and its copies should be resilient to CF's fast path functions,
|
|
// because Swift bridging may copy the string to ensure that it is not
|
|
// mutated.
|
|
let cfstring: CFString = reinterpretCast(nss)
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
cfstring, CFStringBuiltInEncodings.ASCII.toRaw()) == nil)
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
cfstring, CFStringBuiltInEncodings.UTF8.toRaw()) == nil)
|
|
|
|
assert(CFStringGetCharactersPtr(cfstring) == nil)
|
|
|
|
let copy = CFStringCreateCopy(nil, cfstring)
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
copy, CFStringBuiltInEncodings.ASCII.toRaw()) == nil)
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
copy, CFStringBuiltInEncodings.UTF8.toRaw()) == nil)
|
|
assert(CFStringGetCharactersPtr(copy) == nil)
|
|
}
|
|
|
|
StringCookedViews.test("UTF8ForNonContiguousUTF16") {
|
|
for test in UTF8TestsSmokeTest {
|
|
var nss = NonContiguousNSString(test.scalars)
|
|
verifyThatStringIsOpaqueForCoreFoundation(nss)
|
|
checkUTF8View(test.encoded, nss, test.loc.withCurrentLoc())
|
|
}
|
|
|
|
for (name, batch) in UTF16Tests {
|
|
println("Batch: \(name)")
|
|
for test in batch {
|
|
var expected: [UInt8] = []
|
|
var expectedScalars = test.scalarsHead + test.scalarsRepairedTail
|
|
var g = expectedScalars.generate()
|
|
transcode(UTF32.self, UTF8.self, g,
|
|
SinkOf {
|
|
expected.append($0)
|
|
},
|
|
stopOnError: false)
|
|
|
|
var nss = NonContiguousNSString(test.encoded)
|
|
verifyThatStringIsOpaqueForCoreFoundation(nss)
|
|
checkUTF8View(expected, nss, test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
}
|
|
|
|
StringCookedViews.test("UTF8ForNonContiguousUTF16Extra") {
|
|
// These tests don't add much additional value as long as tests above
|
|
// actually test the code path we care about.
|
|
if true {
|
|
var bytes: [UInt8] = [ 97, 98, 99 ]
|
|
var cfstring: CFString = CFStringCreateWithBytesNoCopy(kCFAllocatorDefault,
|
|
bytes, bytes.count, CFStringBuiltInEncodings.MacRoman.toRaw(), 0, kCFAllocatorNull)
|
|
|
|
// Sanity checks to make sure we are testing the code path that does UTF-8
|
|
// encoding itself, instead of dispatching to CF.
|
|
// GetCStringPtr fails because our un-copied bytes aren't zero-terminated.
|
|
// GetCharactersPtr fails because our un-copied bytes aren't UTF-16.
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
cfstring, CFStringBuiltInEncodings.ASCII.toRaw()) == nil)
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
cfstring, CFStringBuiltInEncodings.UTF8.toRaw()) == nil)
|
|
assert(CFStringGetCharactersPtr(cfstring) == nil)
|
|
|
|
checkUTF8View(bytes, String(cfstring), SourceLocStack().withCurrentLoc())
|
|
_fixLifetime(bytes)
|
|
}
|
|
|
|
if true {
|
|
var bytes: [UInt8] = [ 97, 98, 99 ]
|
|
var cfstring: CFString = CFStringCreateWithBytes(kCFAllocatorDefault,
|
|
bytes, bytes.count, CFStringBuiltInEncodings.MacRoman.toRaw(), 0)
|
|
|
|
// Sanity checks to make sure we are testing the code path that does UTF-8
|
|
// encoding itself, instead of dispatching to CF.
|
|
// CFStringCreateWithBytes() usually allocates zero-terminated ASCII
|
|
// or UTF-16, in which case one of the fast paths will succeed.
|
|
// This test operates only when CF creates a tagged pointer string object.
|
|
if (object_getClassName(cfstring) == "NSTaggedPointerString") {
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
cfstring, CFStringBuiltInEncodings.ASCII.toRaw()) == nil)
|
|
assert(
|
|
CFStringGetCStringPtr(
|
|
cfstring, CFStringBuiltInEncodings.UTF8.toRaw()) == nil)
|
|
assert(CFStringGetCharactersPtr(cfstring) == nil)
|
|
|
|
checkUTF8View(bytes, String(cfstring), SourceLocStack().withCurrentLoc())
|
|
}
|
|
}
|
|
}
|
|
|
|
StringCookedViews.test("UTF16") {
|
|
for test in UTF8TestsSmokeTest {
|
|
var expected: [UInt16] = []
|
|
var expectedScalars = test.scalars
|
|
var g = expectedScalars.generate()
|
|
transcode(UTF32.self, UTF16.self, g,
|
|
SinkOf {
|
|
expected.append($0)
|
|
},
|
|
stopOnError: false)
|
|
|
|
var nss = NonContiguousNSString(test.scalars)
|
|
checkUTF16View(expected, nss, test.loc.withCurrentLoc())
|
|
}
|
|
|
|
forStringsWithUnpairedSurrogates {
|
|
(test: UTF16Test, subject: String) -> () in
|
|
var expected: [UInt16] = []
|
|
var expectedScalars = test.scalarsHead + test.scalarsRepairedTail
|
|
var g = expectedScalars.generate()
|
|
transcode(UTF32.self, UTF16.self, g,
|
|
SinkOf {
|
|
expected.append($0)
|
|
},
|
|
stopOnError: false)
|
|
|
|
checkUTF16View(expected, subject, test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
|
|
StringCookedViews.test("UnicodeScalars") {
|
|
for test in UTF8TestsSmokeTest {
|
|
let expectedScalars = map(test.scalars) { UnicodeScalar($0) }
|
|
let subject: String = NonContiguousNSString(test.scalars)
|
|
checkSliceableWithBidirectionalIndex(expectedScalars,
|
|
subject.unicodeScalars, test.loc.withCurrentLoc())
|
|
}
|
|
|
|
forStringsWithUnpairedSurrogates {
|
|
(test: UTF16Test, subject: String) -> () in
|
|
let expectedScalars = map(test.scalarsHead + test.scalarsRepairedTail) {
|
|
UnicodeScalar($0)
|
|
}
|
|
checkSliceableWithBidirectionalIndex(expectedScalars,
|
|
subject.unicodeScalars, test.loc.withCurrentLoc())
|
|
}
|
|
}
|
|
|
|
StringCookedViews.run()
|
|
// CHECK: {{^}}StringCookedViews: All tests passed
|
|
|
|
var StringTests = TestCase("StringTests")
|
|
|
|
StringTests.test("StreamableConformance") {
|
|
forStringsWithUnpairedSurrogates {
|
|
(test: UTF16Test, subject: String) -> () in
|
|
let expected = test.scalarsHead + test.scalarsRepairedTail
|
|
let printedSubject = toString(subject)
|
|
let actual = map(printedSubject.unicodeScalars) { $0.value }
|
|
expectEqual(expected, actual)
|
|
}
|
|
}
|
|
|
|
StringTests.run()
|
|
// CHECK: {{^}}StringTests: All tests passed
|
|
|