mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
673 lines
18 KiB
Swift
673 lines
18 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
import StdlibUnittest
|
|
|
|
public struct UTFTest {
|
|
public struct Flags : OptionSet {
|
|
public let rawValue: Int
|
|
|
|
public init(rawValue: Int) {
|
|
self.rawValue = rawValue
|
|
}
|
|
|
|
public static let utf8IsInvalid = Flags(rawValue: 1 << 0)
|
|
public static let utf16IsInvalid = Flags(rawValue: 1 << 1)
|
|
}
|
|
|
|
public let string: String
|
|
public let utf8: [UInt8]
|
|
public let utf16: [UInt16]
|
|
public let unicodeScalars: [Unicode.Scalar]
|
|
public let unicodeScalarsRepairedTail: [Unicode.Scalar]
|
|
public let flags: Flags
|
|
public let loc: SourceLoc
|
|
|
|
public var utf32: [UInt32] {
|
|
return unicodeScalars.map(UInt32.init)
|
|
}
|
|
|
|
public var utf32RepairedTail: [UInt32] {
|
|
return unicodeScalarsRepairedTail.map(UInt32.init)
|
|
}
|
|
|
|
public init(
|
|
string: String,
|
|
utf8: [UInt8],
|
|
utf16: [UInt16],
|
|
scalars: [UInt32],
|
|
scalarsRepairedTail: [UInt32] = [],
|
|
flags: Flags = [],
|
|
file: String = #file, line: UInt = #line
|
|
) {
|
|
self.string = string
|
|
self.utf8 = utf8
|
|
self.utf16 = utf16
|
|
self.unicodeScalars = scalars.map { Unicode.Scalar($0)! }
|
|
self.unicodeScalarsRepairedTail =
|
|
scalarsRepairedTail.map { Unicode.Scalar($0)! }
|
|
self.flags = flags
|
|
self.loc = SourceLoc(file, line, comment: "test data")
|
|
}
|
|
}
|
|
|
|
public let utfTests: [UTFTest] = [
|
|
//
|
|
// Empty sequence.
|
|
//
|
|
|
|
UTFTest(
|
|
string: "",
|
|
utf8: [],
|
|
utf16: [],
|
|
scalars: []),
|
|
|
|
//
|
|
// 1-byte sequences.
|
|
//
|
|
|
|
// U+0000 NULL
|
|
UTFTest(
|
|
string: "\u{0000}",
|
|
utf8: [ 0x00 ],
|
|
utf16: [ 0x00 ],
|
|
scalars: [ 0x00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTFTest(
|
|
string: "A",
|
|
utf8: [ 0x41 ],
|
|
utf16: [ 0x41 ],
|
|
scalars: [ 0x41 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
UTFTest(
|
|
string: "AB",
|
|
utf8: [ 0x41, 0x42 ],
|
|
utf16: [ 0x41, 0x42 ],
|
|
scalars: [ 0x41, 0x42 ]),
|
|
|
|
// U+0061 LATIN SMALL LETTER A
|
|
// U+0062 LATIN SMALL LETTER B
|
|
// U+0063 LATIN SMALL LETTER C
|
|
UTFTest(
|
|
string: "ABC",
|
|
utf8: [ 0x41, 0x42, 0x43 ],
|
|
utf16: [ 0x41, 0x42, 0x43 ],
|
|
scalars: [ 0x41, 0x42, 0x43 ]),
|
|
|
|
// U+0000 NULL
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0000 NULL
|
|
UTFTest(
|
|
string: "\u{0000}AB\u{0000}",
|
|
utf8: [ 0x00, 0x41, 0x42, 0x00 ],
|
|
utf16: [ 0x00, 0x41, 0x42, 0x00 ],
|
|
scalars: [ 0x00, 0x41, 0x42, 0x00 ]),
|
|
|
|
// U+007F DELETE
|
|
UTFTest(
|
|
string: "\u{007F}",
|
|
utf8: [ 0x7F ],
|
|
utf16: [ 0x7F ],
|
|
scalars: [ 0x7F ]),
|
|
|
|
//
|
|
// 2-byte sequences.
|
|
//
|
|
|
|
// U+0283 LATIN SMALL LETTER ESH
|
|
UTFTest(
|
|
string: "\u{0283}",
|
|
utf8: [ 0xCA, 0x83 ],
|
|
utf16: [ 0x0283 ],
|
|
scalars: [ 0x0283 ]),
|
|
|
|
// U+03BA GREEK SMALL LETTER KAPPA
|
|
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
|
// U+03C3 GREEK SMALL LETTER SIGMA
|
|
// U+03BC GREEK SMALL LETTER MU
|
|
// U+03B5 GREEK SMALL LETTER EPSILON
|
|
UTFTest(
|
|
string: "\u{03BA}\u{1F79}\u{03C3}\u{03BC}\u{03B5}",
|
|
utf8: [ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 ],
|
|
utf16: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ],
|
|
scalars: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ]),
|
|
|
|
// U+0430 CYRILLIC SMALL LETTER A
|
|
// U+0431 CYRILLIC SMALL LETTER BE
|
|
// U+0432 CYRILLIC SMALL LETTER VE
|
|
UTFTest(
|
|
string: "\u{0430}\u{0431}\u{0432}",
|
|
utf8: [ 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2 ],
|
|
utf16: [ 0x0430, 0x0431, 0x0432 ],
|
|
scalars: [ 0x0430, 0x0431, 0x0432 ]),
|
|
|
|
//
|
|
// 3-byte sequences.
|
|
//
|
|
|
|
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
|
|
// U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
UTFTest(
|
|
string: "\u{4F8b}\u{6587}",
|
|
utf8: [ 0xE4, 0xBE, 0x8B, 0xE6, 0x96, 0x87 ],
|
|
utf16: [ 0x4F8B, 0x6587 ],
|
|
scalars: [ 0x4F8B, 0x6587 ]),
|
|
|
|
// U+D55C HANGUL SYLLABLE HAN
|
|
// U+AE00 HANGUL SYLLABLE GEUL
|
|
UTFTest(
|
|
string: "\u{d55c}\u{ae00}",
|
|
utf8: [ 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80 ],
|
|
utf16: [ 0xD55C, 0xAE00 ],
|
|
scalars: [ 0xD55C, 0xAE00 ]),
|
|
|
|
// U+1112 HANGUL CHOSEONG HIEUH
|
|
// U+1161 HANGUL JUNGSEONG A
|
|
// U+11AB HANGUL JONGSEONG NIEUN
|
|
// U+1100 HANGUL CHOSEONG KIYEOK
|
|
// U+1173 HANGUL JUNGSEONG EU
|
|
// U+11AF HANGUL JONGSEONG RIEUL
|
|
UTFTest(
|
|
string: "\u{1112}\u{1161}\u{11ab}\u{1100}\u{1173}\u{11af}",
|
|
utf8:
|
|
[ 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
|
|
0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF ],
|
|
utf16: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ],
|
|
scalars: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ]),
|
|
|
|
// U+3042 HIRAGANA LETTER A
|
|
// U+3044 HIRAGANA LETTER I
|
|
// U+3046 HIRAGANA LETTER U
|
|
// U+3048 HIRAGANA LETTER E
|
|
// U+304A HIRAGANA LETTER O
|
|
UTFTest(
|
|
string: "\u{3042}\u{3044}\u{3046}\u{3048}\u{304a}",
|
|
utf8:
|
|
[ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84, 0xE3, 0x81, 0x86,
|
|
0xE3, 0x81, 0x88, 0xE3, 0x81, 0x8A ],
|
|
utf16: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ],
|
|
scalars: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ]),
|
|
|
|
// U+D7FF (unassigned)
|
|
UTFTest(
|
|
string: "\u{D7FF}",
|
|
utf8: [ 0xED, 0x9F, 0xBF ],
|
|
utf16: [ 0xD7FF ],
|
|
scalars: [ 0xD7FF ]),
|
|
|
|
// U+E000 (private use)
|
|
UTFTest(
|
|
string: "\u{E000}",
|
|
utf8: [ 0xEE, 0x80, 0x80 ],
|
|
utf16: [ 0xE000 ],
|
|
scalars: [ 0xE000 ]),
|
|
|
|
// U+FFFD REPLACEMENT CHARACTER
|
|
UTFTest(
|
|
string: "\u{FFFD}",
|
|
utf8: [ 0xEF, 0xBF, 0xBD ],
|
|
utf16: [ 0xFFFD ],
|
|
scalars: [ 0xFFFD ]),
|
|
|
|
// U+FFFF (noncharacter)
|
|
UTFTest(
|
|
string: "\u{FFFF}",
|
|
utf8: [ 0xEF, 0xBF, 0xBF ],
|
|
utf16: [ 0xFFFF ],
|
|
scalars: [ 0xFFFF ]),
|
|
|
|
//
|
|
// 4-byte sequences.
|
|
//
|
|
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "\u{1F425}",
|
|
utf8: [ 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "A\u{1F425}",
|
|
utf8: [ 0x41, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "AB\u{1F425}",
|
|
utf8: [ 0x41, 0x42, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0x42, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x42, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABC\u{1F425}",
|
|
utf8: [ 0x41, 0x42, 0x43, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0x42, 0x43, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x42, 0x43, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABCD\u{1F425}",
|
|
utf8: [ 0x41, 0x42, 0x43, 0x44, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0x42, 0x43, 0x44, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x42, 0x43, 0x44, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABCDE\u{1F425}",
|
|
utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABCDEF\u{1F425}",
|
|
utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+0047 LATIN CAPITAL LETTER G
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABCDEFG\u{1F425}",
|
|
utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xD83D, 0xDC25 ],
|
|
scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+0047 LATIN CAPITAL LETTER G
|
|
// U+0048 LATIN CAPITAL LETTER H
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABCDEFGH\u{1F425}",
|
|
utf8:
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
|
0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16:
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
|
0xD83D, 0xDC25 ],
|
|
scalars:
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x0001_F425 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0042 LATIN CAPITAL LETTER B
|
|
// U+0043 LATIN CAPITAL LETTER C
|
|
// U+0044 LATIN CAPITAL LETTER D
|
|
// U+0045 LATIN CAPITAL LETTER E
|
|
// U+0046 LATIN CAPITAL LETTER F
|
|
// U+0047 LATIN CAPITAL LETTER G
|
|
// U+0048 LATIN CAPITAL LETTER H
|
|
// U+0049 LATIN CAPITAL LETTER I
|
|
// U+1F425 FRONT-FACING BABY CHICK
|
|
UTFTest(
|
|
string: "ABCDEFGHI\u{1F425}",
|
|
utf8:
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
|
0xF0, 0x9F, 0x90, 0xA5 ],
|
|
utf16:
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
|
0xD83D, 0xDC25 ],
|
|
scalars:
|
|
[ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x0001_F425 ]),
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTFTest(
|
|
string: "\u{10000}",
|
|
utf8: [ 0xF0, 0x90, 0x80, 0x80 ],
|
|
utf16: [ 0xD800, 0xDC00 ],
|
|
scalars: [ 0x0001_0000 ]),
|
|
|
|
// U+10100 AEGEAN WORD SEPARATOR LINE
|
|
UTFTest(
|
|
string: "\u{10100}",
|
|
utf8: [ 0xF0, 0x90, 0x84, 0x80 ],
|
|
utf16: [ 0xD800, 0xDD00 ],
|
|
scalars: [ 0x0001_0100 ]),
|
|
|
|
// U+103FF (unassigned)
|
|
UTFTest(
|
|
string: "\u{103FF}",
|
|
utf8: [ 0xF0, 0x90, 0x8F, 0xBF ],
|
|
utf16: [ 0xD800, 0xDFFF ],
|
|
scalars: [ 0x0001_03FF ]),
|
|
|
|
// U+E0000 (unassigned)
|
|
UTFTest(
|
|
string: "\u{E0000}",
|
|
utf8: [ 0xF3, 0xA0, 0x80, 0x80 ],
|
|
utf16: [ 0xDB40, 0xDC00 ],
|
|
scalars: [ 0x000E_0000 ]),
|
|
|
|
// U+E0100 VARIATION SELECTOR-17
|
|
UTFTest(
|
|
string: "\u{E0100}",
|
|
utf8: [ 0xF3, 0xA0, 0x84, 0x80 ],
|
|
utf16: [ 0xDB40, 0xDD00 ],
|
|
scalars: [ 0x000E_0100 ]),
|
|
|
|
// U+E03FF (unassigned)
|
|
UTFTest(
|
|
string: "\u{E03FF}",
|
|
utf8: [ 0xF3, 0xA0, 0x8F, 0xBF ],
|
|
utf16: [ 0xDB40, 0xDFFF ],
|
|
scalars: [ 0x000E_03FF ]),
|
|
|
|
// U+10FC00 (private use)
|
|
UTFTest(
|
|
string: "\u{10FC00}",
|
|
utf8: [ 0xF4, 0x8F, 0xB0, 0x80 ],
|
|
utf16: [ 0xDBFF, 0xDC00 ],
|
|
scalars: [ 0x0010_FC00 ]),
|
|
|
|
// U+10FD00 (private use)
|
|
UTFTest(
|
|
string: "\u{10FD00}",
|
|
utf8: [ 0xF4, 0x8F, 0xB4, 0x80 ],
|
|
utf16: [ 0xDBFF, 0xDD00 ],
|
|
scalars: [ 0x0010_FD00 ]),
|
|
|
|
// U+10FFFF (private use, noncharacter)
|
|
UTFTest(
|
|
string: "\u{10FFFF}",
|
|
utf8: [ 0xF4, 0x8F, 0xBF, 0xBF ],
|
|
utf16: [ 0xDBFF, 0xDFFF ],
|
|
scalars: [ 0x0010_FFFF ]),
|
|
]
|
|
|
|
|
|
public struct UTF16Test {
|
|
public let scalarsHead: [UInt32]
|
|
public let scalarsRepairedTail: [UInt32]
|
|
public let encoded: [UInt16]
|
|
public let loc: SourceLoc
|
|
|
|
public init(
|
|
_ scalarsHead: [UInt32], _ scalarsRepairedTail: [UInt32],
|
|
_ encoded: [UInt16],
|
|
file: String = #file, line: UInt = #line
|
|
) {
|
|
self.scalarsHead = scalarsHead
|
|
self.scalarsRepairedTail = scalarsRepairedTail
|
|
self.encoded = encoded
|
|
self.loc = SourceLoc(file, line, comment: "test data")
|
|
}
|
|
}
|
|
|
|
public let utf16Tests = [
|
|
"Incomplete": [
|
|
//
|
|
// Incomplete sequences that end right before EOF.
|
|
//
|
|
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD ], [ 0xD800 ]),
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xD800, 0xD800 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xD800 ]),
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test(
|
|
[ 0x0001_0000 ], [ 0xFFFD ],
|
|
[ 0xD800, 0xDC00, 0xD800 ]),
|
|
|
|
//
|
|
// Incomplete sequences with more code units following them.
|
|
//
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xD800, 0x0041 ]),
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[], [ 0xFFFD, 0x0001_0000 ],
|
|
[ 0xD800, 0xD800, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0x0041 ],
|
|
[ 0x0041, 0xD800, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0x0001_0000 ],
|
|
[ 0x0041, 0xD800, 0xD800, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ],
|
|
[ 0x0041, 0xD800, 0xDB40, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ],
|
|
[ 0x0041, 0xD800, 0xDB40, 0xD800, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ],
|
|
[ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ],
|
|
[ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0xD800, 0xDC00 ]),
|
|
],
|
|
|
|
"IllFormed": [
|
|
//
|
|
// Low-surrogate right before EOF.
|
|
//
|
|
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test([], [ 0xFFFD ], [ 0xDC00 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xDC00 ]),
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
// U+DC00 (low-surrogate)
|
|
UTF16Test(
|
|
[ 0x0001_0000 ], [ 0xFFFD ],
|
|
[ 0xD800, 0xDC00, 0xDC00 ]),
|
|
|
|
//
|
|
// Low-surrogate with more code units following it.
|
|
//
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xDC00, 0x0041 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[], [ 0xFFFD, 0x0001_0000 ],
|
|
[ 0xDC00, 0xD800, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0x0041 ],
|
|
[ 0x0041, 0xDC00, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0x0001_0000 ],
|
|
[ 0x0041, 0xDC00, 0xD800, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ],
|
|
[ 0x0041, 0xDC00, 0xDD00, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ],
|
|
[ 0x0041, 0xDC00, 0xDD00, 0xD800, 0xDC00 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+DFFF (low-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ],
|
|
[ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0x0041 ]),
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+DC00 (low-surrogate)
|
|
// U+DD00 (low-surrogate)
|
|
// U+DFFF (low-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ],
|
|
[ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0xD800, 0xDC00 ]),
|
|
|
|
//
|
|
// Low-surrogate followed by high-surrogate.
|
|
//
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xD800 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDB40 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDBFF ]),
|
|
|
|
|
|
// U+DD00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xD800 ]),
|
|
|
|
// U+DD00 (low-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDB40 ]),
|
|
|
|
// U+DD00 (low-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDBFF ]),
|
|
|
|
|
|
// U+DFFF (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xD800 ]),
|
|
|
|
// U+DFFF (low-surrogate)
|
|
// U+DB40 (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDB40 ]),
|
|
|
|
// U+DFFF (low-surrogate)
|
|
// U+DBFF (high-surrogate)
|
|
UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDBFF ]),
|
|
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
UTF16Test(
|
|
[], [ 0xFFFD, 0xFFFD, 0x0041 ],
|
|
[ 0xDC00, 0xD800, 0x0041 ]),
|
|
|
|
// U+DC00 (low-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
UTF16Test(
|
|
[], [ 0xFFFD, 0xFFFD, 0x10000 ],
|
|
[ 0xDC00, 0xD800, 0xD800, 0xDC00 ]),
|
|
],
|
|
]
|
|
|