mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
* Implement GraphemeWalker that does native grapheme breaking * Bridged strings use native grapheme breaking for forward strides * Implement bidirectional native grapheme breaking for native and foreign strings * Remove ICU's grapheme breaking support * Use UnicodeScalarView to implement GraphemeWalker use an Iterator approach remove Iterator conformance * Incorporate Michael's feedback more comments addressed fix crlf bug * Try bringing back some old fast paths * Parameterize nextBoundary and previousBoundary Parameterize nextBoundary and previousBoundary * Implement Michael's suggestions
167 lines
9.2 KiB
C++
167 lines
9.2 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2021 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// This was auto-generated by utils/gen-unicode-data/GenGraphemeBreakProperty,
|
|
// please do not edit this file yourself!
|
|
|
|
#include "../SwiftShims/UnicodeData.h"
|
|
|
|
static __swift_uint32_t _swift_stdlib_graphemeBreakProperties[610] = {
|
|
0x3e00000, 0x400007f, 0x800000a9, 0xad, 0x800000ae, 0x2de00300, 0x20c00483,
|
|
0x25800591, 0x200005bf, 0x202005c1, 0x202005c4, 0x200005c7, 0x40a00600,
|
|
0x21400610, 0x61c, 0x2280064b, 0x20000670, 0x20c006d6, 0x400006dd, 0x20a006df,
|
|
0x202006e7, 0x206006ea, 0x4000070f, 0x20000711, 0x23400730, 0x214007a6,
|
|
0x210007eb, 0x200007fd, 0x20600816, 0x2100081b, 0x20400825, 0x20800829,
|
|
0x20400859, 0x21c008d3, 0x400008e2, 0x23e008e3, 0x60000903, 0x2000093a,
|
|
0x6000093b, 0x2000093c, 0x6040093e, 0x20e00941, 0x60600949, 0x2000094d,
|
|
0x6020094e, 0x20c00951, 0x20200962, 0x20000981, 0x60200982, 0x200009bc,
|
|
0x200009be, 0x602009bf, 0x206009c1, 0x602009c7, 0x602009cb, 0x200009cd,
|
|
0x200009d7, 0x202009e2, 0x200009fe, 0x20200a01, 0x60000a03, 0x20000a3c,
|
|
0x60400a3e, 0x20200a41, 0x20200a47, 0x20400a4b, 0x20000a51, 0x20200a70,
|
|
0x20000a75, 0x20200a81, 0x60000a83, 0x20000abc, 0x60400abe, 0x20800ac1,
|
|
0x20200ac7, 0x60000ac9, 0x60200acb, 0x20000acd, 0x20200ae2, 0x20a00afa,
|
|
0x20000b01, 0x60200b02, 0x20000b3c, 0x20200b3e, 0x60000b40, 0x20600b41,
|
|
0x60200b47, 0x60200b4b, 0x20000b4d, 0x20400b55, 0x20200b62, 0x20000b82,
|
|
0x20000bbe, 0x60000bbf, 0x20000bc0, 0x60200bc1, 0x60400bc6, 0x60400bca,
|
|
0x20000bcd, 0x20000bd7, 0x20000c00, 0x60400c01, 0x20000c04, 0x20400c3e,
|
|
0x60600c41, 0x20400c46, 0x20600c4a, 0x20200c55, 0x20200c62, 0x20000c81,
|
|
0x60200c82, 0x20000cbc, 0x60000cbe, 0x20000cbf, 0x60200cc0, 0x20000cc2,
|
|
0x60200cc3, 0x20000cc6, 0x60200cc7, 0x60200cca, 0x20200ccc, 0x20200cd5,
|
|
0x20200ce2, 0x20200d00, 0x60200d02, 0x20200d3b, 0x20000d3e, 0x60200d3f,
|
|
0x20600d41, 0x60400d46, 0x60400d4a, 0x20000d4d, 0x40000d4e, 0x20000d57,
|
|
0x20200d62, 0x20000d81, 0x60200d82, 0x20000dca, 0x20000dcf, 0x60200dd0,
|
|
0x20400dd2, 0x20000dd6, 0x60c00dd8, 0x20000ddf, 0x60200df2, 0x20000e31,
|
|
0x60000e33, 0x20c00e34, 0x20e00e47, 0x20000eb1, 0x60000eb3, 0x21000eb4,
|
|
0x20a00ec8, 0x20200f18, 0x20000f35, 0x20000f37, 0x20000f39, 0x60200f3e,
|
|
0x21a00f71, 0x60000f7f, 0x20800f80, 0x20200f86, 0x21400f8d, 0x24600f99,
|
|
0x20000fc6, 0x2060102d, 0x60001031, 0x20a01032, 0x20201039, 0x6020103b,
|
|
0x2020103d, 0x60201056, 0x20201058, 0x2040105e, 0x20601071, 0x20001082,
|
|
0x60001084, 0x20201085, 0x2000108d, 0x2000109d, 0x2040135d, 0x20401712,
|
|
0x20401732, 0x20201752, 0x20201772, 0x202017b4, 0x600017b6, 0x20c017b7,
|
|
0x60e017be, 0x200017c6, 0x602017c7, 0x214017c9, 0x200017dd, 0x2040180b,
|
|
0x180e, 0x20201885, 0x200018a9, 0x20401920, 0x60601923, 0x20201927,
|
|
0x60401929, 0x60201930, 0x20001932, 0x60a01933, 0x20401939, 0x20201a17,
|
|
0x60201a19, 0x20001a1b, 0x60001a55, 0x20001a56, 0x60001a57, 0x20c01a58,
|
|
0x20001a60, 0x20001a62, 0x20e01a65, 0x60a01a6d, 0x21201a73, 0x20001a7f,
|
|
0x22001ab0, 0x20601b00, 0x60001b04, 0x20c01b34, 0x60001b3b, 0x20001b3c,
|
|
0x60801b3d, 0x20001b42, 0x60201b43, 0x21001b6b, 0x20201b80, 0x60001b82,
|
|
0x60001ba1, 0x20601ba2, 0x60201ba6, 0x20201ba8, 0x60001baa, 0x20401bab,
|
|
0x20001be6, 0x60001be7, 0x20201be8, 0x60401bea, 0x20001bed, 0x60001bee,
|
|
0x20401bef, 0x60201bf2, 0x60e01c24, 0x20e01c2c, 0x60201c34, 0x20201c36,
|
|
0x20401cd0, 0x21801cd4, 0x60001ce1, 0x20c01ce2, 0x20001ced, 0x20001cf4,
|
|
0x60001cf7, 0x20201cf8, 0x27201dc0, 0x20801dfb, 0x200b, 0x2000200c, 0x20200e,
|
|
0xc02028, 0x8000203c, 0x80002049, 0x1e02060, 0x240020d0, 0x80002122,
|
|
0x80002139, 0x80a02194, 0x802021a9, 0x8020231a, 0x80002328, 0x80002388,
|
|
0x800023cf, 0x814023e9, 0x804023f8, 0x800024c2, 0x802025aa, 0x800025b6,
|
|
0x800025c0, 0x806025fb, 0x80a02600, 0x81602607, 0x8e202614, 0x8ea02690,
|
|
0x81402708, 0x80002714, 0x80002716, 0x8000271d, 0x80002721, 0x80002728,
|
|
0x80202733, 0x80002744, 0x80002747, 0x8000274c, 0x8000274e, 0x80402753,
|
|
0x80002757, 0x80802763, 0x80402795, 0x800027a1, 0x800027b0, 0x800027bf,
|
|
0x80202934, 0x80402b05, 0x80202b1b, 0x80002b50, 0x80002b55, 0x20402cef,
|
|
0x20002d7f, 0x23e02de0, 0x20a0302a, 0x80003030, 0x8000303d, 0x20203099,
|
|
0x80003297, 0x80003299, 0x2060a66f, 0x2120a674, 0x2020a69e, 0x2020a6f0,
|
|
0x2000a802, 0x2000a806, 0x2000a80b, 0x6020a823, 0x2020a825, 0x6000a827,
|
|
0x2000a82c, 0x6020a880, 0x61e0a8b4, 0x2020a8c4, 0x2220a8e0, 0x2000a8ff,
|
|
0x20e0a926, 0x2140a947, 0x6020a952, 0x2040a980, 0x6000a983, 0x2000a9b3,
|
|
0x6020a9b4, 0x2060a9b6, 0x6020a9ba, 0x2020a9bc, 0x6040a9be, 0x2000a9e5,
|
|
0x20a0aa29, 0x6020aa2f, 0x2020aa31, 0x6020aa33, 0x2020aa35, 0x2000aa43,
|
|
0x2000aa4c, 0x6000aa4d, 0x2000aa7c, 0x2000aab0, 0x2040aab2, 0x2020aab7,
|
|
0x2020aabe, 0x2000aac1, 0x6000aaeb, 0x2020aaec, 0x6020aaee, 0x6000aaf5,
|
|
0x2000aaf6, 0x6020abe3, 0x2000abe5, 0x6020abe6, 0x2000abe8, 0x6020abe9,
|
|
0x6000abec, 0x2000abed, 0x2000fb1e, 0x21e0fe00, 0x21e0fe20, 0xfeff,
|
|
0x2020ff9e, 0x160fff0, 0x200101fd, 0x200102e0, 0x20810376, 0x20410a01,
|
|
0x20210a05, 0x20610a0c, 0x20410a38, 0x20010a3f, 0x20210ae5, 0x20610d24,
|
|
0x20210eab, 0x21410f46, 0x60011000, 0x20011001, 0x60011002, 0x21c11038,
|
|
0x2041107f, 0x60011082, 0x604110b0, 0x206110b3, 0x602110b7, 0x202110b9,
|
|
0x400110bd, 0x400110cd, 0x20411100, 0x20811127, 0x6001112c, 0x20e1112d,
|
|
0x60211145, 0x20011173, 0x20211180, 0x60011182, 0x604111b3, 0x210111b6,
|
|
0x602111bf, 0x402111c2, 0x206111c9, 0x600111ce, 0x200111cf, 0x6041122c,
|
|
0x2041122f, 0x60211232, 0x20011234, 0x60011235, 0x20211236, 0x2001123e,
|
|
0x200112df, 0x604112e0, 0x20e112e3, 0x20211300, 0x60211302, 0x2021133b,
|
|
0x2001133e, 0x6001133f, 0x20011340, 0x60611341, 0x60211347, 0x6041134b,
|
|
0x20011357, 0x60211362, 0x20c11366, 0x20811370, 0x60411435, 0x20e11438,
|
|
0x60211440, 0x20411442, 0x60011445, 0x20011446, 0x2001145e, 0x200114b0,
|
|
0x602114b1, 0x20a114b3, 0x600114b9, 0x200114ba, 0x602114bb, 0x200114bd,
|
|
0x600114be, 0x202114bf, 0x600114c1, 0x202114c2, 0x200115af, 0x602115b0,
|
|
0x206115b2, 0x606115b8, 0x202115bc, 0x600115be, 0x202115bf, 0x202115dc,
|
|
0x60411630, 0x20e11633, 0x6021163b, 0x2001163d, 0x6001163e, 0x2021163f,
|
|
0x200116ab, 0x600116ac, 0x200116ad, 0x602116ae, 0x20a116b0, 0x600116b6,
|
|
0x200116b7, 0x2041171d, 0x60211720, 0x20611722, 0x60011726, 0x20811727,
|
|
0x6041182c, 0x2101182f, 0x60011838, 0x20211839, 0x20011930, 0x60811931,
|
|
0x60211937, 0x2021193b, 0x6001193d, 0x2001193e, 0x4001193f, 0x60011940,
|
|
0x40011941, 0x60011942, 0x20011943, 0x604119d1, 0x206119d4, 0x202119da,
|
|
0x606119dc, 0x200119e0, 0x600119e4, 0x21211a01, 0x20a11a33, 0x60011a39,
|
|
0x40011a3a, 0x20611a3b, 0x20011a47, 0x20a11a51, 0x60211a57, 0x20411a59,
|
|
0x40a11a84, 0x21811a8a, 0x60011a97, 0x20211a98, 0x60011c2f, 0x20c11c30,
|
|
0x20a11c38, 0x60011c3e, 0x20011c3f, 0x22a11c92, 0x60011ca9, 0x20c11caa,
|
|
0x60011cb1, 0x20211cb2, 0x60011cb4, 0x20211cb5, 0x20a11d31, 0x20011d3a,
|
|
0x20211d3c, 0x20c11d3f, 0x40011d46, 0x20011d47, 0x60811d8a, 0x20211d90,
|
|
0x60211d93, 0x20011d95, 0x60011d96, 0x20011d97, 0x20211ef3, 0x60211ef5,
|
|
0x1013430, 0x20816af0, 0x20c16b30, 0x20016f4f, 0x66c16f51, 0x20616f8f,
|
|
0x20016fe4, 0x60216ff0, 0x2021bc9d, 0x61bca0, 0x2001d165, 0x6001d166,
|
|
0x2041d167, 0x6001d16d, 0x2081d16e, 0xe1d173, 0x20e1d17b, 0x20c1d185,
|
|
0x2061d1aa, 0x2041d242, 0x26c1da00, 0x2621da3b, 0x2001da75, 0x2001da84,
|
|
0x2081da9b, 0x21c1daa1, 0x20c1e000, 0x2201e008, 0x20c1e01b, 0x2021e023,
|
|
0x2081e026, 0x20c1e130, 0x2061e2ec, 0x20c1e8d0, 0x20c1e944, 0x9fe1f000,
|
|
0x8041f10d, 0x8001f12f, 0x80a1f16c, 0x8021f17e, 0x8001f18e, 0x8121f191,
|
|
0x8701f1ad, 0x81c1f201, 0x8001f21a, 0x8001f22f, 0x8101f232, 0x8061f23c,
|
|
0xb621f249, 0x2081f3fb, 0xa7a1f400, 0xa121f546, 0x8fe1f680, 0x8161f774,
|
|
0x8541f7d5, 0x8061f80c, 0x80e1f848, 0x80a1f85a, 0x80e1f888, 0x8a21f8ae,
|
|
0x85c1f90c, 0x8121f93c, 0xb701f947, 0x3ee0000, 0x2bee0020, 0xfee0080,
|
|
0x3dee0100,
|
|
};
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
|
|
auto low = 0;
|
|
auto high = 610 - 1;
|
|
|
|
while (high >= low) {
|
|
auto idx = low + (high - low) / 2;
|
|
|
|
auto entry = _swift_stdlib_graphemeBreakProperties[idx];
|
|
|
|
// Shift the enum and range count out of the value.
|
|
auto lower = (entry << 11) >> 11;
|
|
|
|
// Shift the enum out first, then shift out the scalar value.
|
|
auto upper = lower + ((entry << 3) >> 24);
|
|
|
|
// Shift everything out.
|
|
auto enumValue = (__swift_uint8_t)(entry >> 29);
|
|
|
|
// Special case: extendedPictographic who used an extra bit for the range.
|
|
if (enumValue == 5) {
|
|
upper = lower + ((entry << 2) >> 23);
|
|
}
|
|
|
|
if (scalar >= lower && scalar <= upper) {
|
|
return enumValue;
|
|
}
|
|
|
|
if (scalar > upper) {
|
|
low = idx + 1;
|
|
continue;
|
|
}
|
|
|
|
if (scalar < lower) {
|
|
high = idx - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we made it out here, then our scalar was not found in the grapheme
|
|
// array (this occurs when a scalar doesn't map to any grapheme break
|
|
// property). Return the max value here to indicate .any.
|
|
return 0xFF;
|
|
}
|