//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// import StdlibUnittest public struct UTFTest { public struct Flags : OptionSet { public let rawValue: Int public init(rawValue: Int) { self.rawValue = rawValue } public static let utf8IsInvalid = Flags(rawValue: 1 << 0) public static let utf16IsInvalid = Flags(rawValue: 1 << 1) } public let string: String public let utf8: [UInt8] public let utf16: [UInt16] public let unicodeScalars: [Unicode.Scalar] public let unicodeScalarsRepairedTail: [Unicode.Scalar] public let flags: Flags public let loc: SourceLoc public var utf32: [UInt32] { return unicodeScalars.map(UInt32.init) } public var utf32RepairedTail: [UInt32] { return unicodeScalarsRepairedTail.map(UInt32.init) } public init( string: String, utf8: [UInt8], utf16: [UInt16], scalars: [UInt32], scalarsRepairedTail: [UInt32] = [], flags: Flags = [], file: String = #file, line: UInt = #line ) { self.string = string self.utf8 = utf8 self.utf16 = utf16 self.unicodeScalars = scalars.map { Unicode.Scalar($0)! } self.unicodeScalarsRepairedTail = scalarsRepairedTail.map { Unicode.Scalar($0)! } self.flags = flags self.loc = SourceLoc(file, line, comment: "test data") } } public let utfTests: [UTFTest] = [ // // Empty sequence. // UTFTest( string: "", utf8: [], utf16: [], scalars: []), // // 1-byte sequences. // // U+0000 NULL UTFTest( string: "\u{0000}", utf8: [ 0x00 ], utf16: [ 0x00 ], scalars: [ 0x00 ]), // U+0041 LATIN CAPITAL LETTER A UTFTest( string: "A", utf8: [ 0x41 ], utf16: [ 0x41 ], scalars: [ 0x41 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B UTFTest( string: "AB", utf8: [ 0x41, 0x42 ], utf16: [ 0x41, 0x42 ], scalars: [ 0x41, 0x42 ]), // U+0061 LATIN SMALL LETTER A // U+0062 LATIN SMALL LETTER B // U+0063 LATIN SMALL LETTER C UTFTest( string: "ABC", utf8: [ 0x41, 0x42, 0x43 ], utf16: [ 0x41, 0x42, 0x43 ], scalars: [ 0x41, 0x42, 0x43 ]), // U+0000 NULL // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0000 NULL UTFTest( string: "\u{0000}AB\u{0000}", utf8: [ 0x00, 0x41, 0x42, 0x00 ], utf16: [ 0x00, 0x41, 0x42, 0x00 ], scalars: [ 0x00, 0x41, 0x42, 0x00 ]), // U+007F DELETE UTFTest( string: "\u{007F}", utf8: [ 0x7F ], utf16: [ 0x7F ], scalars: [ 0x7F ]), // // 2-byte sequences. // // U+0283 LATIN SMALL LETTER ESH UTFTest( string: "\u{0283}", utf8: [ 0xCA, 0x83 ], utf16: [ 0x0283 ], scalars: [ 0x0283 ]), // U+03BA GREEK SMALL LETTER KAPPA // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA // U+03C3 GREEK SMALL LETTER SIGMA // U+03BC GREEK SMALL LETTER MU // U+03B5 GREEK SMALL LETTER EPSILON UTFTest( string: "\u{03BA}\u{1F79}\u{03C3}\u{03BC}\u{03B5}", utf8: [ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 ], utf16: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ], scalars: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ]), // U+0430 CYRILLIC SMALL LETTER A // U+0431 CYRILLIC SMALL LETTER BE // U+0432 CYRILLIC SMALL LETTER VE UTFTest( string: "\u{0430}\u{0431}\u{0432}", utf8: [ 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2 ], utf16: [ 0x0430, 0x0431, 0x0432 ], scalars: [ 0x0430, 0x0431, 0x0432 ]), // // 3-byte sequences. // // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B // U+6587 CJK UNIFIED IDEOGRAPH-6587 UTFTest( string: "\u{4F8b}\u{6587}", utf8: [ 0xE4, 0xBE, 0x8B, 0xE6, 0x96, 0x87 ], utf16: [ 0x4F8B, 0x6587 ], scalars: [ 0x4F8B, 0x6587 ]), // U+D55C HANGUL SYLLABLE HAN // U+AE00 HANGUL SYLLABLE GEUL UTFTest( string: "\u{d55c}\u{ae00}", utf8: [ 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80 ], utf16: [ 0xD55C, 0xAE00 ], scalars: [ 0xD55C, 0xAE00 ]), // U+1112 HANGUL CHOSEONG HIEUH // U+1161 HANGUL JUNGSEONG A // U+11AB HANGUL JONGSEONG NIEUN // U+1100 HANGUL CHOSEONG KIYEOK // U+1173 HANGUL JUNGSEONG EU // U+11AF HANGUL JONGSEONG RIEUL UTFTest( string: "\u{1112}\u{1161}\u{11ab}\u{1100}\u{1173}\u{11af}", utf8: [ 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB, 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF ], utf16: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ], scalars: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ]), // U+3042 HIRAGANA LETTER A // U+3044 HIRAGANA LETTER I // U+3046 HIRAGANA LETTER U // U+3048 HIRAGANA LETTER E // U+304A HIRAGANA LETTER O UTFTest( string: "\u{3042}\u{3044}\u{3046}\u{3048}\u{304a}", utf8: [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84, 0xE3, 0x81, 0x86, 0xE3, 0x81, 0x88, 0xE3, 0x81, 0x8A ], utf16: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ], scalars: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ]), // U+D7FF (unassigned) UTFTest( string: "\u{D7FF}", utf8: [ 0xED, 0x9F, 0xBF ], utf16: [ 0xD7FF ], scalars: [ 0xD7FF ]), // U+E000 (private use) UTFTest( string: "\u{E000}", utf8: [ 0xEE, 0x80, 0x80 ], utf16: [ 0xE000 ], scalars: [ 0xE000 ]), // U+FFFD REPLACEMENT CHARACTER UTFTest( string: "\u{FFFD}", utf8: [ 0xEF, 0xBF, 0xBD ], utf16: [ 0xFFFD ], scalars: [ 0xFFFD ]), // U+FFFF (noncharacter) UTFTest( string: "\u{FFFF}", utf8: [ 0xEF, 0xBF, 0xBF ], utf16: [ 0xFFFF ], scalars: [ 0xFFFF ]), // // 4-byte sequences. // // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "\u{1F425}", utf8: [ 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0xD83D, 0xDC25 ], scalars: [ 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "A\u{1F425}", utf8: [ 0x41, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "AB\u{1F425}", utf8: [ 0x41, 0x42, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABC\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+0044 LATIN CAPITAL LETTER D // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABCD\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0x44, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0x44, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x44, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+0044 LATIN CAPITAL LETTER D // U+0045 LATIN CAPITAL LETTER E // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABCDE\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+0044 LATIN CAPITAL LETTER D // U+0045 LATIN CAPITAL LETTER E // U+0046 LATIN CAPITAL LETTER F // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABCDEF\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+0044 LATIN CAPITAL LETTER D // U+0045 LATIN CAPITAL LETTER E // U+0046 LATIN CAPITAL LETTER F // U+0047 LATIN CAPITAL LETTER G // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABCDEFG\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+0044 LATIN CAPITAL LETTER D // U+0045 LATIN CAPITAL LETTER E // U+0046 LATIN CAPITAL LETTER F // U+0047 LATIN CAPITAL LETTER G // U+0048 LATIN CAPITAL LETTER H // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABCDEFGH\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x0001_F425 ]), // U+0041 LATIN CAPITAL LETTER A // U+0042 LATIN CAPITAL LETTER B // U+0043 LATIN CAPITAL LETTER C // U+0044 LATIN CAPITAL LETTER D // U+0045 LATIN CAPITAL LETTER E // U+0046 LATIN CAPITAL LETTER F // U+0047 LATIN CAPITAL LETTER G // U+0048 LATIN CAPITAL LETTER H // U+0049 LATIN CAPITAL LETTER I // U+1F425 FRONT-FACING BABY CHICK UTFTest( string: "ABCDEFGHI\u{1F425}", utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xF0, 0x9F, 0x90, 0xA5 ], utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xD83D, 0xDC25 ], scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x0001_F425 ]), // U+10000 LINEAR B SYLLABLE B008 A UTFTest( string: "\u{10000}", utf8: [ 0xF0, 0x90, 0x80, 0x80 ], utf16: [ 0xD800, 0xDC00 ], scalars: [ 0x0001_0000 ]), // U+10100 AEGEAN WORD SEPARATOR LINE UTFTest( string: "\u{10100}", utf8: [ 0xF0, 0x90, 0x84, 0x80 ], utf16: [ 0xD800, 0xDD00 ], scalars: [ 0x0001_0100 ]), // U+103FF (unassigned) UTFTest( string: "\u{103FF}", utf8: [ 0xF0, 0x90, 0x8F, 0xBF ], utf16: [ 0xD800, 0xDFFF ], scalars: [ 0x0001_03FF ]), // U+E0000 (unassigned) UTFTest( string: "\u{E0000}", utf8: [ 0xF3, 0xA0, 0x80, 0x80 ], utf16: [ 0xDB40, 0xDC00 ], scalars: [ 0x000E_0000 ]), // U+E0100 VARIATION SELECTOR-17 UTFTest( string: "\u{E0100}", utf8: [ 0xF3, 0xA0, 0x84, 0x80 ], utf16: [ 0xDB40, 0xDD00 ], scalars: [ 0x000E_0100 ]), // U+E03FF (unassigned) UTFTest( string: "\u{E03FF}", utf8: [ 0xF3, 0xA0, 0x8F, 0xBF ], utf16: [ 0xDB40, 0xDFFF ], scalars: [ 0x000E_03FF ]), // U+10FC00 (private use) UTFTest( string: "\u{10FC00}", utf8: [ 0xF4, 0x8F, 0xB0, 0x80 ], utf16: [ 0xDBFF, 0xDC00 ], scalars: [ 0x0010_FC00 ]), // U+10FD00 (private use) UTFTest( string: "\u{10FD00}", utf8: [ 0xF4, 0x8F, 0xB4, 0x80 ], utf16: [ 0xDBFF, 0xDD00 ], scalars: [ 0x0010_FD00 ]), // U+10FFFF (private use, noncharacter) UTFTest( string: "\u{10FFFF}", utf8: [ 0xF4, 0x8F, 0xBF, 0xBF ], utf16: [ 0xDBFF, 0xDFFF ], scalars: [ 0x0010_FFFF ]), ] public struct UTF16Test { public let scalarsHead: [UInt32] public let scalarsRepairedTail: [UInt32] public let encoded: [UInt16] public let loc: SourceLoc public init( _ scalarsHead: [UInt32], _ scalarsRepairedTail: [UInt32], _ encoded: [UInt16], file: String = #file, line: UInt = #line ) { self.scalarsHead = scalarsHead self.scalarsRepairedTail = scalarsRepairedTail self.encoded = encoded self.loc = SourceLoc(file, line, comment: "test data") } } public let utf16Tests = [ "Incomplete": [ // // Incomplete sequences that end right before EOF. // // U+D800 (high-surrogate) UTF16Test([], [ 0xFFFD ], [ 0xD800 ]), // U+D800 (high-surrogate) // U+D800 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xD800, 0xD800 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xD800 ]), // U+10000 LINEAR B SYLLABLE B008 A // U+D800 (high-surrogate) UTF16Test( [ 0x0001_0000 ], [ 0xFFFD ], [ 0xD800, 0xDC00, 0xD800 ]), // // Incomplete sequences with more code units following them. // // U+D800 (high-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xD800, 0x0041 ]), // U+D800 (high-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [], [ 0xFFFD, 0x0001_0000 ], [ 0xD800, 0xD800, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0x0041 ], [ 0x0041, 0xD800, 0x0041 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0x0001_0000 ], [ 0x0041, 0xD800, 0xD800, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) // U+DB40 (high-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ], [ 0x0041, 0xD800, 0xDB40, 0x0041 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) // U+DB40 (high-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ], [ 0x0041, 0xD800, 0xDB40, 0xD800, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) // U+DB40 (high-surrogate) // U+DBFF (high-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ], [ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0x0041 ]), // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) // U+DB40 (high-surrogate) // U+DBFF (high-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ], [ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0xD800, 0xDC00 ]), ], "IllFormed": [ // // Low-surrogate right before EOF. // // U+DC00 (low-surrogate) UTF16Test([], [ 0xFFFD ], [ 0xDC00 ]), // U+DC00 (low-surrogate) // U+DC00 (low-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xDC00 ]), // U+10000 LINEAR B SYLLABLE B008 A // U+DC00 (low-surrogate) UTF16Test( [ 0x0001_0000 ], [ 0xFFFD ], [ 0xD800, 0xDC00, 0xDC00 ]), // // Low-surrogate with more code units following it. // // U+DC00 (low-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xDC00, 0x0041 ]), // U+DC00 (low-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [], [ 0xFFFD, 0x0001_0000 ], [ 0xDC00, 0xD800, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0x0041 ], [ 0x0041, 0xDC00, 0x0041 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0x0001_0000 ], [ 0x0041, 0xDC00, 0xD800, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) // U+DD00 (low-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ], [ 0x0041, 0xDC00, 0xDD00, 0x0041 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) // U+DD00 (low-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ], [ 0x0041, 0xDC00, 0xDD00, 0xD800, 0xDC00 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) // U+DD00 (low-surrogate) // U+DFFF (low-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ], [ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0x0041 ]), // U+0041 LATIN CAPITAL LETTER A // U+DC00 (low-surrogate) // U+DD00 (low-surrogate) // U+DFFF (low-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ], [ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0xD800, 0xDC00 ]), // // Low-surrogate followed by high-surrogate. // // U+DC00 (low-surrogate) // U+D800 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xD800 ]), // U+DC00 (low-surrogate) // U+DB40 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDB40 ]), // U+DC00 (low-surrogate) // U+DBFF (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDBFF ]), // U+DD00 (low-surrogate) // U+D800 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xD800 ]), // U+DD00 (low-surrogate) // U+DB40 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDB40 ]), // U+DD00 (low-surrogate) // U+DBFF (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDBFF ]), // U+DFFF (low-surrogate) // U+D800 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xD800 ]), // U+DFFF (low-surrogate) // U+DB40 (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDB40 ]), // U+DFFF (low-surrogate) // U+DBFF (high-surrogate) UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDBFF ]), // U+DC00 (low-surrogate) // U+D800 (high-surrogate) // U+0041 LATIN CAPITAL LETTER A UTF16Test( [], [ 0xFFFD, 0xFFFD, 0x0041 ], [ 0xDC00, 0xD800, 0x0041 ]), // U+DC00 (low-surrogate) // U+D800 (high-surrogate) // U+10000 LINEAR B SYLLABLE B008 A UTF16Test( [], [ 0xFFFD, 0xFFFD, 0x10000 ], [ 0xDC00, 0xD800, 0xD800, 0xDC00 ]), ], ]