mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Add Script and Script Extension tests (#59194)
This commit is contained in:
@@ -35,6 +35,8 @@ func readInputFile(_ filename: String) -> String {
|
||||
}
|
||||
|
||||
func parseScalars(_ string: String) -> ClosedRange<UInt32> {
|
||||
let string = string.filter { !$0.isWhitespace }
|
||||
|
||||
// If we have . appear, it means we have a legitimate range. Otherwise,
|
||||
// it's a singular scalar.
|
||||
if string.contains(".") {
|
||||
@@ -684,4 +686,433 @@ public let caseFolding: [Unicode.Scalar: String] = {
|
||||
return result
|
||||
}()
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Script/Script Extensions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
extension Unicode {
|
||||
// Note: The `Script` enum includes the "meta" script type "Katakana_Or_Hiragana", which
|
||||
// isn't defined by https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt,
|
||||
// but is defined by https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
|
||||
// We may want to split it out, as it's the only case that is a union of
|
||||
// other script types.
|
||||
|
||||
/// Character script types.
|
||||
public enum Script: String, Hashable {
|
||||
case adlam = "Adlam"
|
||||
case ahom = "Ahom"
|
||||
case anatolianHieroglyphs = "Anatolian_Hieroglyphs"
|
||||
case arabic = "Arabic"
|
||||
case armenian = "Armenian"
|
||||
case avestan = "Avestan"
|
||||
case balinese = "Balinese"
|
||||
case bamum = "Bamum"
|
||||
case bassaVah = "Bassa_Vah"
|
||||
case batak = "Batak"
|
||||
case bengali = "Bengali"
|
||||
case bhaiksuki = "Bhaiksuki"
|
||||
case bopomofo = "Bopomofo"
|
||||
case brahmi = "Brahmi"
|
||||
case braille = "Braille"
|
||||
case buginese = "Buginese"
|
||||
case buhid = "Buhid"
|
||||
case canadianAboriginal = "Canadian_Aboriginal"
|
||||
case carian = "Carian"
|
||||
case caucasianAlbanian = "Caucasian_Albanian"
|
||||
case chakma = "Chakma"
|
||||
case cham = "Cham"
|
||||
case cherokee = "Cherokee"
|
||||
case chorasmian = "Chorasmian"
|
||||
case common = "Common"
|
||||
case coptic = "Coptic"
|
||||
case cuneiform = "Cuneiform"
|
||||
case cypriot = "Cypriot"
|
||||
case cyrillic = "Cyrillic"
|
||||
case cyproMinoan = "Cypro_Minoan"
|
||||
case deseret = "Deseret"
|
||||
case devanagari = "Devanagari"
|
||||
case divesAkuru = "Dives_Akuru"
|
||||
case dogra = "Dogra"
|
||||
case duployan = "Duployan"
|
||||
case egyptianHieroglyphs = "Egyptian_Hieroglyphs"
|
||||
case elbasan = "Elbasan"
|
||||
case elymaic = "Elymaic"
|
||||
case ethiopic = "Ethiopic"
|
||||
case georgian = "Georgian"
|
||||
case glagolitic = "Glagolitic"
|
||||
case gothic = "Gothic"
|
||||
case grantha = "Grantha"
|
||||
case greek = "Greek"
|
||||
case gujarati = "Gujarati"
|
||||
case gunjalaGondi = "Gunjala_Gondi"
|
||||
case gurmukhi = "Gurmukhi"
|
||||
case han = "Han"
|
||||
case hangul = "Hangul"
|
||||
case hanifiRohingya = "Hanifi_Rohingya"
|
||||
case hanunoo = "Hanunoo"
|
||||
case hatran = "Hatran"
|
||||
case hebrew = "Hebrew"
|
||||
case hiragana = "Hiragana"
|
||||
case imperialAramaic = "Imperial_Aramaic"
|
||||
case inherited = "Inherited"
|
||||
case inscriptionalPahlavi = "Inscriptional_Pahlavi"
|
||||
case inscriptionalParthian = "Inscriptional_Parthian"
|
||||
case javanese = "Javanese"
|
||||
case kaithi = "Kaithi"
|
||||
case kannada = "Kannada"
|
||||
case katakana = "Katakana"
|
||||
case katakanaOrHiragana = "Katakana_Or_Hiragana"
|
||||
case kayahLi = "Kayah_Li"
|
||||
case kharoshthi = "Kharoshthi"
|
||||
case khitanSmallScript = "Khitan_Small_Script"
|
||||
case khmer = "Khmer"
|
||||
case khojki = "Khojki"
|
||||
case khudawadi = "Khudawadi"
|
||||
case lao = "Lao"
|
||||
case latin = "Latin"
|
||||
case lepcha = "Lepcha"
|
||||
case limbu = "Limbu"
|
||||
case linearA = "Linear_A"
|
||||
case linearB = "Linear_B"
|
||||
case lisu = "Lisu"
|
||||
case lycian = "Lycian"
|
||||
case lydian = "Lydian"
|
||||
case mahajani = "Mahajani"
|
||||
case makasar = "Makasar"
|
||||
case malayalam = "Malayalam"
|
||||
case mandaic = "Mandaic"
|
||||
case manichaean = "Manichaean"
|
||||
case marchen = "Marchen"
|
||||
case masaramGondi = "Masaram_Gondi"
|
||||
case medefaidrin = "Medefaidrin"
|
||||
case meeteiMayek = "Meetei_Mayek"
|
||||
case mendeKikakui = "Mende_Kikakui"
|
||||
case meroiticCursive = "Meroitic_Cursive"
|
||||
case meroiticHieroglyphs = "Meroitic_Hieroglyphs"
|
||||
case miao = "Miao"
|
||||
case modi = "Modi"
|
||||
case mongolian = "Mongolian"
|
||||
case mro = "Mro"
|
||||
case multani = "Multani"
|
||||
case myanmar = "Myanmar"
|
||||
case nabataean = "Nabataean"
|
||||
case nandinagari = "Nandinagari"
|
||||
case newa = "Newa"
|
||||
case newTaiLue = "New_Tai_Lue"
|
||||
case nko = "Nko"
|
||||
case nushu = "Nushu"
|
||||
case nyiakengPuachueHmong = "Nyiakeng_Puachue_Hmong"
|
||||
case ogham = "Ogham"
|
||||
case olChiki = "Ol_Chiki"
|
||||
case oldHungarian = "Old_Hungarian"
|
||||
case oldItalic = "Old_Italic"
|
||||
case oldNorthArabian = "Old_North_Arabian"
|
||||
case oldPermic = "Old_Permic"
|
||||
case oldPersian = "Old_Persian"
|
||||
case oldSogdian = "Old_Sogdian"
|
||||
case oldSouthArabian = "Old_South_Arabian"
|
||||
case oldTurkic = "Old_Turkic"
|
||||
case oldUyghur = "Old_Uyghur"
|
||||
case oriya = "Oriya"
|
||||
case osage = "Osage"
|
||||
case osmanya = "Osmanya"
|
||||
case pahawhHmong = "Pahawh_Hmong"
|
||||
case palmyrene = "Palmyrene"
|
||||
case pauCinHau = "Pau_Cin_Hau"
|
||||
case phagsPa = "Phags_Pa"
|
||||
case phoenician = "Phoenician"
|
||||
case psalterPahlavi = "Psalter_Pahlavi"
|
||||
case rejang = "Rejang"
|
||||
case runic = "Runic"
|
||||
case samaritan = "Samaritan"
|
||||
case saurashtra = "Saurashtra"
|
||||
case sharada = "Sharada"
|
||||
case shavian = "Shavian"
|
||||
case siddham = "Siddham"
|
||||
case signWriting = "SignWriting"
|
||||
case sinhala = "Sinhala"
|
||||
case sogdian = "Sogdian"
|
||||
case soraSompeng = "Sora_Sompeng"
|
||||
case soyombo = "Soyombo"
|
||||
case sundanese = "Sundanese"
|
||||
case sylotiNagri = "Syloti_Nagri"
|
||||
case syriac = "Syriac"
|
||||
case tagalog = "Tagalog"
|
||||
case tagbanwa = "Tagbanwa"
|
||||
case taiLe = "Tai_Le"
|
||||
case taiTham = "Tai_Tham"
|
||||
case taiViet = "Tai_Viet"
|
||||
case takri = "Takri"
|
||||
case tamil = "Tamil"
|
||||
case tangsa = "Tangsa"
|
||||
case tangut = "Tangut"
|
||||
case telugu = "Telugu"
|
||||
case thaana = "Thaana"
|
||||
case thai = "Thai"
|
||||
case tibetan = "Tibetan"
|
||||
case tifinagh = "Tifinagh"
|
||||
case tirhuta = "Tirhuta"
|
||||
case toto = "Toto"
|
||||
case ugaritic = "Ugaritic"
|
||||
case unknown = "Unknown"
|
||||
case vai = "Vai"
|
||||
case vithkuqi = "Vithkuqi"
|
||||
case wancho = "Wancho"
|
||||
case warangCiti = "Warang_Citi"
|
||||
case yezidi = "Yezidi"
|
||||
case yi = "Yi"
|
||||
case zanabazarSquare = "Zanabazar_Square"
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
/// Whether this character represents whitespace,
|
||||
/// for the purposes of pattern parsing.
|
||||
var isPatternWhitespace: Bool {
|
||||
unicodeScalars.first!.properties.isPatternWhitespace
|
||||
}
|
||||
}
|
||||
|
||||
func withNormalizedForms<T>(
|
||||
_ str: String, requireInPrefix: Bool = false, match: (String) throws -> T?
|
||||
) rethrows -> T? {
|
||||
// This follows the rules provided by UAX44-LM3, including trying to drop an
|
||||
// "is" prefix, which isn't required by UTS#18 RL1.2, but is nice for
|
||||
// consistency with other engines and the Unicode.Scalar.Properties names.
|
||||
let str = str.filter { !$0.isPatternWhitespace && $0 != "_" && $0 != "-" }
|
||||
.lowercased()
|
||||
if requireInPrefix {
|
||||
guard str.hasPrefix("in") else { return nil }
|
||||
return try match(String(str.dropFirst(2)))
|
||||
}
|
||||
if let m = try match(str) {
|
||||
return m
|
||||
}
|
||||
if str.hasPrefix("is"), let m = try match(String(str.dropFirst(2))) {
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func classifyScriptProperty(
|
||||
_ value: String
|
||||
) -> Unicode.Script? {
|
||||
// This uses the aliases defined in
|
||||
// https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
|
||||
withNormalizedForms(value) { str in
|
||||
switch str {
|
||||
case "adlm", "adlam": return .adlam
|
||||
case "aghb", "caucasianalbanian": return .caucasianAlbanian
|
||||
case "ahom": return .ahom
|
||||
case "arab", "arabic": return .arabic
|
||||
case "armi", "imperialaramaic": return .imperialAramaic
|
||||
case "armn", "armenian": return .armenian
|
||||
case "avst", "avestan": return .avestan
|
||||
case "bali", "balinese": return .balinese
|
||||
case "bamu", "bamum": return .bamum
|
||||
case "bass", "bassavah": return .bassaVah
|
||||
case "batk", "batak": return .batak
|
||||
case "beng", "bengali": return .bengali
|
||||
case "bhks", "bhaiksuki": return .bhaiksuki
|
||||
case "bopo", "bopomofo": return .bopomofo
|
||||
case "brah", "brahmi": return .brahmi
|
||||
case "brai", "braille": return .braille
|
||||
case "bugi", "buginese": return .buginese
|
||||
case "buhd", "buhid": return .buhid
|
||||
case "cakm", "chakma": return .chakma
|
||||
case "cans", "canadianaboriginal": return .canadianAboriginal
|
||||
case "cari", "carian": return .carian
|
||||
case "cham": return .cham
|
||||
case "cher", "cherokee": return .cherokee
|
||||
case "chrs", "chorasmian": return .chorasmian
|
||||
case "copt", "coptic", "qaac": return .coptic
|
||||
case "cpmn", "cyprominoan": return .cyproMinoan
|
||||
case "cprt", "cypriot": return .cypriot
|
||||
case "cyrl", "cyrillic": return .cyrillic
|
||||
case "deva", "devanagari": return .devanagari
|
||||
case "diak", "divesakuru": return .divesAkuru
|
||||
case "dogr", "dogra": return .dogra
|
||||
case "dsrt", "deseret": return .deseret
|
||||
case "dupl", "duployan": return .duployan
|
||||
case "egyp", "egyptianhieroglyphs": return .egyptianHieroglyphs
|
||||
case "elba", "elbasan": return .elbasan
|
||||
case "elym", "elymaic": return .elymaic
|
||||
case "ethi", "ethiopic": return .ethiopic
|
||||
case "geor", "georgian": return .georgian
|
||||
case "glag", "glagolitic": return .glagolitic
|
||||
case "gong", "gunjalagondi": return .gunjalaGondi
|
||||
case "gonm", "masaramgondi": return .masaramGondi
|
||||
case "goth", "gothic": return .gothic
|
||||
case "gran", "grantha": return .grantha
|
||||
case "grek", "greek": return .greek
|
||||
case "gujr", "gujarati": return .gujarati
|
||||
case "guru", "gurmukhi": return .gurmukhi
|
||||
case "hang", "hangul": return .hangul
|
||||
case "hani", "han": return .han
|
||||
case "hano", "hanunoo": return .hanunoo
|
||||
case "hatr", "hatran": return .hatran
|
||||
case "hebr", "hebrew": return .hebrew
|
||||
case "hira", "hiragana": return .hiragana
|
||||
case "hluw", "anatolianhieroglyphs": return .anatolianHieroglyphs
|
||||
case "hmng", "pahawhhmong": return .pahawhHmong
|
||||
case "hmnp", "nyiakengpuachuehmong": return .nyiakengPuachueHmong
|
||||
case "hrkt", "katakanaorhiragana": return .katakanaOrHiragana
|
||||
case "hung", "oldhungarian": return .oldHungarian
|
||||
case "ital", "olditalic": return .oldItalic
|
||||
case "java", "javanese": return .javanese
|
||||
case "kali", "kayahli": return .kayahLi
|
||||
case "kana", "katakana": return .katakana
|
||||
case "khar", "kharoshthi": return .kharoshthi
|
||||
case "khmr", "khmer": return .khmer
|
||||
case "khoj", "khojki": return .khojki
|
||||
case "kits", "khitansmallscript": return .khitanSmallScript
|
||||
case "knda", "kannada": return .kannada
|
||||
case "kthi", "kaithi": return .kaithi
|
||||
case "lana", "taitham": return .taiTham
|
||||
case "laoo", "lao": return .lao
|
||||
case "latn", "latin": return .latin
|
||||
case "lepc", "lepcha": return .lepcha
|
||||
case "limb", "limbu": return .limbu
|
||||
case "lina", "lineara": return .linearA
|
||||
case "linb", "linearb": return .linearB
|
||||
case "lisu": return .lisu
|
||||
case "lyci", "lycian": return .lycian
|
||||
case "lydi", "lydian": return .lydian
|
||||
case "mahj", "mahajani": return .mahajani
|
||||
case "maka", "makasar": return .makasar
|
||||
case "mand", "mandaic": return .mandaic
|
||||
case "mani", "manichaean": return .manichaean
|
||||
case "marc", "marchen": return .marchen
|
||||
case "medf", "medefaidrin": return .medefaidrin
|
||||
case "mend", "mendekikakui": return .mendeKikakui
|
||||
case "merc", "meroiticcursive": return .meroiticCursive
|
||||
case "mero", "meroitichieroglyphs": return .meroiticHieroglyphs
|
||||
case "mlym", "malayalam": return .malayalam
|
||||
case "modi": return .modi
|
||||
case "mong", "mongolian": return .mongolian
|
||||
case "mroo", "mro": return .mro
|
||||
case "mtei", "meeteimayek": return .meeteiMayek
|
||||
case "mult", "multani": return .multani
|
||||
case "mymr", "myanmar": return .myanmar
|
||||
case "nand", "nandinagari": return .nandinagari
|
||||
case "narb", "oldnortharabian": return .oldNorthArabian
|
||||
case "nbat", "nabataean": return .nabataean
|
||||
case "newa": return .newa
|
||||
case "nkoo", "nko": return .nko
|
||||
case "nshu", "nushu": return .nushu
|
||||
case "ogam", "ogham": return .ogham
|
||||
case "olck", "olchiki": return .olChiki
|
||||
case "orkh", "oldturkic": return .oldTurkic
|
||||
case "orya", "oriya": return .oriya
|
||||
case "osge", "osage": return .osage
|
||||
case "osma", "osmanya": return .osmanya
|
||||
case "ougr", "olduyghur": return .oldUyghur
|
||||
case "palm", "palmyrene": return .palmyrene
|
||||
case "pauc", "paucinhau": return .pauCinHau
|
||||
case "perm", "oldpermic": return .oldPermic
|
||||
case "phag", "phagspa": return .phagsPa
|
||||
case "phli", "inscriptionalpahlavi": return .inscriptionalPahlavi
|
||||
case "phlp", "psalterpahlavi": return .psalterPahlavi
|
||||
case "phnx", "phoenician": return .phoenician
|
||||
case "plrd", "miao": return .miao
|
||||
case "prti", "inscriptionalparthian": return .inscriptionalParthian
|
||||
case "rjng", "rejang": return .rejang
|
||||
case "rohg", "hanifirohingya": return .hanifiRohingya
|
||||
case "runr", "runic": return .runic
|
||||
case "samr", "samaritan": return .samaritan
|
||||
case "sarb", "oldsoutharabian": return .oldSouthArabian
|
||||
case "saur", "saurashtra": return .saurashtra
|
||||
case "sgnw", "signwriting": return .signWriting
|
||||
case "shaw", "shavian": return .shavian
|
||||
case "shrd", "sharada": return .sharada
|
||||
case "sidd", "siddham": return .siddham
|
||||
case "sind", "khudawadi": return .khudawadi
|
||||
case "sinh", "sinhala": return .sinhala
|
||||
case "sogd", "sogdian": return .sogdian
|
||||
case "sogo", "oldsogdian": return .oldSogdian
|
||||
case "sora", "sorasompeng": return .soraSompeng
|
||||
case "soyo", "soyombo": return .soyombo
|
||||
case "sund", "sundanese": return .sundanese
|
||||
case "sylo", "sylotinagri": return .sylotiNagri
|
||||
case "syrc", "syriac": return .syriac
|
||||
case "tagb", "tagbanwa": return .tagbanwa
|
||||
case "takr", "takri": return .takri
|
||||
case "tale", "taile": return .taiLe
|
||||
case "talu", "newtailue": return .newTaiLue
|
||||
case "taml", "tamil": return .tamil
|
||||
case "tang", "tangut": return .tangut
|
||||
case "tavt", "taiviet": return .taiViet
|
||||
case "telu", "telugu": return .telugu
|
||||
case "tfng", "tifinagh": return .tifinagh
|
||||
case "tglg", "tagalog": return .tagalog
|
||||
case "thaa", "thaana": return .thaana
|
||||
case "thai": return .thai
|
||||
case "tibt", "tibetan": return .tibetan
|
||||
case "tirh", "tirhuta": return .tirhuta
|
||||
case "tnsa", "tangsa": return .tangsa
|
||||
case "toto": return .toto
|
||||
case "ugar", "ugaritic": return .ugaritic
|
||||
case "vaii", "vai": return .vai
|
||||
case "vith", "vithkuqi": return .vithkuqi
|
||||
case "wara", "warangciti": return .warangCiti
|
||||
case "wcho", "wancho": return .wancho
|
||||
case "xpeo", "oldpersian": return .oldPersian
|
||||
case "xsux", "cuneiform": return .cuneiform
|
||||
case "yezi", "yezidi": return .yezidi
|
||||
case "yiii", "yi": return .yi
|
||||
case "zanb", "zanabazarsquare": return .zanabazarSquare
|
||||
case "zinh", "inherited", "qaai": return .inherited
|
||||
case "zyyy", "common": return .common
|
||||
case "zzzz", "unknown": return .unknown
|
||||
default: return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseScripts(
|
||||
_ data: String
|
||||
) -> [Unicode.Scalar: [Unicode.Script]] {
|
||||
var result: [Unicode.Scalar: [Unicode.Script]] = [:]
|
||||
|
||||
for line in data.split(separator: "\n") {
|
||||
// Skip comments
|
||||
guard !line.hasPrefix("#") else {
|
||||
continue
|
||||
}
|
||||
|
||||
let components = line.split(separator: ";")
|
||||
let scriptStr = components[1].split(separator: "#")[0].split(separator: " ")
|
||||
|
||||
let scripts = scriptStr.map {
|
||||
classifyScriptProperty(String($0))!
|
||||
}
|
||||
|
||||
let scalars = parseScalars(String(components[0]))
|
||||
|
||||
for scalar in scalars {
|
||||
result[Unicode.Scalar(scalar)!] = scripts
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
public let scripts: [Unicode.Scalar: Unicode.Script] = {
|
||||
let scripts = readInputFile("Scripts.txt")
|
||||
// Parse scripts will return an array for each scalar, but this file only
|
||||
// defines a single script per scalar.
|
||||
let result = parseScripts(scripts).mapValues {
|
||||
$0[0]
|
||||
}
|
||||
|
||||
return result
|
||||
}()
|
||||
|
||||
public let scriptExtensions: [Unicode.Scalar: [Unicode.Script]] = {
|
||||
let scripts = readInputFile("ScriptExtensions.txt")
|
||||
let result = parseScripts(scripts)
|
||||
|
||||
return result
|
||||
}()
|
||||
|
||||
#endif
|
||||
|
||||
@@ -115,7 +115,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
|
||||
|
||||
// If our chunk index is larger than the quick look indices, then it means
|
||||
// our scalar appears in chunks who are all 0 and trailing.
|
||||
if ((__swift_uint64_t) idx > quickLookSize) {
|
||||
if ((__swift_uint64_t) idx > quickLookSize - 1) {
|
||||
return std::numeric_limits<__swift_intptr_t>::max();
|
||||
}
|
||||
|
||||
|
||||
628
utils/gen-unicode-data/Data/ScriptExtensions.txt
Normal file
628
utils/gen-unicode-data/Data/ScriptExtensions.txt
Normal file
@@ -0,0 +1,628 @@
|
||||
# ScriptExtensions-14.0.0.txt
|
||||
# Date: 2021-06-04, 02:19:38 GMT
|
||||
# © 2021 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# The Script_Extensions property indicates which characters are commonly used
|
||||
# with more than one script, but with a limited number of scripts.
|
||||
# For each code point, there is one or more property values. Each such value is a Script property value.
|
||||
# For more information, see:
|
||||
# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
|
||||
# Especially the sections:
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
|
||||
#
|
||||
# Each Script_Extensions value in this file consists of a set
|
||||
# of one or more abbreviated Script property values. The ordering of the
|
||||
# values in that set is not material, but for stability in presentation
|
||||
# it is given here as alphabetical.
|
||||
#
|
||||
# The Script_Extensions values are presented in sorted order in the file.
|
||||
# They are sorted first by the number of Script property values in their sets,
|
||||
# and then alphabetically by first differing Script property value.
|
||||
#
|
||||
# Following each distinct Script_Extensions value is the list of code
|
||||
# points associated with that value, listed in code point order.
|
||||
#
|
||||
# All code points not explicitly listed for Script_Extensions
|
||||
# have as their value the corresponding Script property value
|
||||
#
|
||||
# @missing: 0000..10FFFF; <script>
|
||||
|
||||
# ================================================
|
||||
|
||||
# Property: Script_Extensions
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng
|
||||
|
||||
1CF7 ; Beng # Mc VEDIC SIGN ATIKRAMA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva
|
||||
|
||||
1CD1 ; Deva # Mn VEDIC TONE SHARA
|
||||
1CD4 ; Deva # Mn VEDIC SIGN YAJURVEDIC MIDLINE SVARITA
|
||||
1CDB ; Deva # Mn VEDIC TONE TRIPLE SVARITA
|
||||
1CDE..1CDF ; Deva # Mn [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW
|
||||
1CE2..1CE8 ; Deva # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CEB..1CEC ; Deva # Lo [2] VEDIC SIGN ANUSVARA VAMAGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
|
||||
1CEE..1CF1 ; Deva # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
|
||||
|
||||
# Total code points: 18
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Dupl
|
||||
|
||||
1BCA0..1BCA3 ; Dupl # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Grek
|
||||
|
||||
0342 ; Grek # Mn COMBINING GREEK PERISPOMENI
|
||||
0345 ; Grek # Mn COMBINING GREEK YPOGEGRAMMENI
|
||||
1DC0..1DC1 ; Grek # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani
|
||||
|
||||
3006 ; Hani # Lo IDEOGRAPHIC CLOSING MARK
|
||||
303E..303F ; Hani # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
|
||||
3190..3191 ; Hani # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
|
||||
3192..3195 ; Hani # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
|
||||
3196..319F ; Hani # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
|
||||
31C0..31E3 ; Hani # So [36] CJK STROKE T..CJK STROKE Q
|
||||
3220..3229 ; Hani # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
322A..3247 ; Hani # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
|
||||
3280..3289 ; Hani # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
|
||||
328A..32B0 ; Hani # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
|
||||
32C0..32CB ; Hani # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER
|
||||
32FF ; Hani # So SQUARE ERA NAME REIWA
|
||||
3358..3370 ; Hani # So [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR
|
||||
337B..337F ; Hani # So [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION
|
||||
33E0..33FE ; Hani # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
|
||||
1D360..1D371 ; Hani # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
|
||||
1F250..1F251 ; Hani # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
|
||||
# Total code points: 238
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Latn
|
||||
|
||||
0363..036F ; Latn # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X
|
||||
|
||||
# Total code points: 13
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Nand
|
||||
|
||||
1CFA ; Nand # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Syrc
|
||||
|
||||
1DFA ; Syrc # Mn COMBINING DOT BELOW LEFT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Copt
|
||||
|
||||
102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK
|
||||
102E1..102FB ; Arab Copt # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
|
||||
|
||||
# Total code points: 28
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Rohg
|
||||
|
||||
06D4 ; Arab Rohg # Po ARABIC FULL STOP
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Nkoo
|
||||
|
||||
FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS
|
||||
FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Syrc
|
||||
|
||||
064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
|
||||
0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
|
||||
# Total code points: 12
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Thaa
|
||||
|
||||
FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM
|
||||
FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva
|
||||
|
||||
1CD5..1CD6 ; Beng Deva # Mn [2] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC INDEPENDENT SVARITA
|
||||
1CD8 ; Beng Deva # Mn VEDIC TONE CANDRA BELOW
|
||||
1CE1 ; Beng Deva # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
|
||||
1CEA ; Beng Deva # Lo VEDIC SIGN ANUSVARA BAHIRGOMUKHA
|
||||
1CED ; Beng Deva # Mn VEDIC SIGN TIRYAK
|
||||
1CF5..1CF6 ; Beng Deva # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
|
||||
A8F1 ; Beng Deva # Mn COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
|
||||
# Total code points: 9
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hani
|
||||
|
||||
302A..302D ; Bopo Hani # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bugi Java
|
||||
|
||||
A9CF ; Bugi Java # Lm JAVANESE PANGRANGKEP
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cprt Linb
|
||||
|
||||
10102 ; Cprt Linb # Po AEGEAN CHECK MARK
|
||||
10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Glag
|
||||
|
||||
0484 ; Cyrl Glag # Mn COMBINING CYRILLIC PALATALIZATION
|
||||
0487 ; Cyrl Glag # Mn COMBINING CYRILLIC POKRYTIE
|
||||
2E43 ; Cyrl Glag # Po DASH WITH LEFT UPTURN
|
||||
A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Latn
|
||||
|
||||
0485..0486 ; Cyrl Latn # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Perm
|
||||
|
||||
0483 ; Cyrl Perm # Mn COMBINING CYRILLIC TITLO
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Syrc
|
||||
|
||||
1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran
|
||||
|
||||
1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA
|
||||
1CF3 ; Deva Gran # Lo VEDIC SIGN ROTATED ARDHAVISARGA
|
||||
1CF8..1CF9 ; Deva Gran # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Nand
|
||||
|
||||
1CE9 ; Deva Nand # Lo VEDIC SIGN ANUSVARA ANTARGOMUKHA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Shrd
|
||||
|
||||
1CD7 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA
|
||||
1CD9 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER
|
||||
1CDC..1CDD ; Deva Shrd # Mn [2] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE DOT BELOW
|
||||
1CE0 ; Deva Shrd # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Taml
|
||||
|
||||
A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Geor Latn
|
||||
|
||||
10FB ; Geor Latn # Po GEORGIAN PARAGRAPH SEPARATOR
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Gran Taml
|
||||
|
||||
0BE6..0BEF ; Gran Taml # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
|
||||
0BF0..0BF2 ; Gran Taml # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
|
||||
0BF3 ; Gran Taml # So TAMIL DAY SIGN
|
||||
11301 ; Gran Taml # Mn GRANTHA SIGN CANDRABINDU
|
||||
11303 ; Gran Taml # Mc GRANTHA SIGN VISARGA
|
||||
1133B..1133C ; Gran Taml # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
|
||||
11FD0..11FD1 ; Gran Taml # No [2] TAMIL FRACTION ONE QUARTER..TAMIL FRACTION ONE HALF-1
|
||||
11FD3 ; Gran Taml # No TAMIL FRACTION THREE QUARTERS
|
||||
|
||||
# Total code points: 21
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Gujr Khoj
|
||||
|
||||
0AE6..0AEF ; Gujr Khoj # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Guru Mult
|
||||
|
||||
0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani Latn
|
||||
|
||||
A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
|
||||
|
||||
# Total code points: 8
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hira Kana
|
||||
|
||||
3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
|
||||
3099..309A ; Hira Kana # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
309B..309C ; Hira Kana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
30A0 ; Hira Kana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
30FC ; Hira Kana # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF70 ; Hira Kana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
|
||||
# Total code points: 14
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Knda Nand
|
||||
|
||||
0CE6..0CEF ; Knda Nand # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Latn Mong
|
||||
|
||||
202F ; Latn Mong # Zs NARROW NO-BREAK SPACE
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Mani Ougr
|
||||
|
||||
10AF2 ; Mani Ougr # Po MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Mong Phag
|
||||
|
||||
1802..1803 ; Mong Phag # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
|
||||
1805 ; Mong Phag # Po MONGOLIAN FOUR DOTS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Syrc Thaa
|
||||
|
||||
061C ; Arab Syrc Thaa # Cf ARABIC LETTER MARK
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Thaa Yezi
|
||||
|
||||
0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Cakm Sylo
|
||||
|
||||
09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cakm Mymr Tale
|
||||
|
||||
1040..1049 ; Cakm Mymr Tale # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cpmn Cprt Linb
|
||||
|
||||
10100..10101 ; Cpmn Cprt Linb # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cprt Lina Linb
|
||||
|
||||
10107..10133 ; Cprt Lina Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
|
||||
|
||||
# Total code points: 45
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran Knda
|
||||
|
||||
1CF4 ; Deva Gran Knda # Mn VEDIC TONE CANDRA ABOVE
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran Latn
|
||||
|
||||
20F0 ; Deva Gran Latn # Mn COMBINING ASTERISK ABOVE
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani Hira Kana
|
||||
|
||||
303C ; Hani Hira Kana # Lo MASU MARK
|
||||
303D ; Hani Hira Kana # Po PART ALTERNATION MARK
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Kali Latn Mymr
|
||||
|
||||
A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Knda
|
||||
|
||||
1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA
|
||||
1CD2 ; Beng Deva Gran Knda # Mn VEDIC TONE PRENKHA
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Buhd Hano Tagb Tglg
|
||||
|
||||
1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Kthi Mahj
|
||||
|
||||
0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana
|
||||
|
||||
3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK
|
||||
3013 ; Bopo Hang Hani Hira Kana # So GETA MARK
|
||||
301C ; Bopo Hang Hani Hira Kana # Pd WAVE DASH
|
||||
301D ; Bopo Hang Hani Hira Kana # Ps REVERSED DOUBLE PRIME QUOTATION MARK
|
||||
301E..301F ; Bopo Hang Hani Hira Kana # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
|
||||
3030 ; Bopo Hang Hani Hira Kana # Pd WAVY DASH
|
||||
3037 ; Bopo Hang Hani Hira Kana # So IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
|
||||
FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi
|
||||
|
||||
060C ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA
|
||||
061B ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana Yiii
|
||||
|
||||
3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
|
||||
3008 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT ANGLE BRACKET
|
||||
300A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT DOUBLE ANGLE BRACKET
|
||||
300B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT DOUBLE ANGLE BRACKET
|
||||
300C ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT CORNER BRACKET
|
||||
300D ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT CORNER BRACKET
|
||||
300E ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE CORNER BRACKET
|
||||
300F ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE CORNER BRACKET
|
||||
3010 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT BLACK LENTICULAR BRACKET
|
||||
3011 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT BLACK LENTICULAR BRACKET
|
||||
3014 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT TORTOISE SHELL BRACKET
|
||||
3015 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT TORTOISE SHELL BRACKET
|
||||
3016 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE LENTICULAR BRACKET
|
||||
3017 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE LENTICULAR BRACKET
|
||||
3018 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE TORTOISE SHELL BRACKET
|
||||
3019 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE TORTOISE SHELL BRACKET
|
||||
301A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE SQUARE BRACKET
|
||||
301B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE SQUARE BRACKET
|
||||
30FB ; Bopo Hang Hani Hira Kana Yiii # Po KATAKANA MIDDLE DOT
|
||||
FF61 ; Bopo Hang Hani Hira Kana Yiii # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
FF62 ; Bopo Hang Hani Hira Kana Yiii # Ps HALFWIDTH LEFT CORNER BRACKET
|
||||
FF63 ; Bopo Hang Hani Hira Kana Yiii # Pe HALFWIDTH RIGHT CORNER BRACKET
|
||||
FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
|
||||
|
||||
# Total code points: 26
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Knda Mlym Orya Taml Telu
|
||||
|
||||
1CDA ; Deva Knda Mlym Orya Taml Telu # Mn VEDIC TONE DOUBLE SVARITA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi
|
||||
|
||||
061F ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Knda Nand Orya Telu Tirh
|
||||
|
||||
1CF2 ; Beng Deva Gran Knda Nand Orya Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc
|
||||
|
||||
0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh
|
||||
|
||||
A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
|
||||
A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK
|
||||
A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh
|
||||
|
||||
0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN ANUDATTA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh
|
||||
|
||||
0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh
|
||||
|
||||
A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh
|
||||
|
||||
A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
|
||||
|
||||
0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DANDA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
|
||||
|
||||
0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# EOF
|
||||
2991
utils/gen-unicode-data/Data/Scripts.txt
Normal file
2991
utils/gen-unicode-data/Data/Scripts.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -216,4 +216,52 @@ if #available(SwiftStdlib 5.7, *) {
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Script/Script Extensions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
if #available(SwiftStdlib 5.7, *) {
|
||||
UnicodeScalarPropertiesTest.test("Scalar Scripts") {
|
||||
for i in 0x0 ... 0x10FFFF {
|
||||
guard let scalar = Unicode.Scalar(i) else {
|
||||
continue
|
||||
}
|
||||
|
||||
let script = unsafeBitCast(
|
||||
scalar.properties._script,
|
||||
to: Unicode.Script.self
|
||||
)
|
||||
|
||||
if let parsedScript = scripts[scalar] {
|
||||
expectEqual(script, parsedScript)
|
||||
} else {
|
||||
expectEqual(script, .unknown)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeScalarPropertiesTest.test("Scalar Script Extensions") {
|
||||
for i in 0x0 ... 0x10FFFF {
|
||||
guard let scalar = Unicode.Scalar(i) else {
|
||||
continue
|
||||
}
|
||||
|
||||
let extensions = scalar.properties._scriptExtensions.map {
|
||||
unsafeBitCast($0, to: Unicode.Script.self)
|
||||
}
|
||||
|
||||
let script = unsafeBitCast(
|
||||
scalar.properties._script,
|
||||
to: Unicode.Script.self
|
||||
)
|
||||
|
||||
if let parsedExtensions = scriptExtensions[scalar] {
|
||||
expectEqual(extensions, parsedExtensions)
|
||||
} else {
|
||||
expectEqual(extensions, [script])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
runAllTests()
|
||||
|
||||
Reference in New Issue
Block a user