Files
swift-mirror/stdlib/public/stubs/UnicodeGrapheme.cpp
Alejandro Alonso 5a0bbb9f89 [stdlib] Implement native grapheme breaking for String (#37864)
* Implement GraphemeWalker that does native grapheme breaking

* Bridged strings use native grapheme breaking for forward strides

* Implement bidirectional native grapheme breaking for native and foreign strings

* Remove ICU's grapheme breaking support

* Use UnicodeScalarView to implement GraphemeWalker

use an Iterator approach

remove Iterator conformance

* Incorporate Michael's feedback

more comments addressed

fix crlf bug

* Try bringing back some old fast paths

* Parameterize nextBoundary and previousBoundary

Parameterize nextBoundary and previousBoundary

* Implement Michael's suggestions
2021-11-01 16:52:28 -07:00

167 lines
9.2 KiB
C++

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// This was auto-generated by utils/gen-unicode-data/GenGraphemeBreakProperty,
// please do not edit this file yourself!
#include "../SwiftShims/UnicodeData.h"
static __swift_uint32_t _swift_stdlib_graphemeBreakProperties[610] = {
0x3e00000, 0x400007f, 0x800000a9, 0xad, 0x800000ae, 0x2de00300, 0x20c00483,
0x25800591, 0x200005bf, 0x202005c1, 0x202005c4, 0x200005c7, 0x40a00600,
0x21400610, 0x61c, 0x2280064b, 0x20000670, 0x20c006d6, 0x400006dd, 0x20a006df,
0x202006e7, 0x206006ea, 0x4000070f, 0x20000711, 0x23400730, 0x214007a6,
0x210007eb, 0x200007fd, 0x20600816, 0x2100081b, 0x20400825, 0x20800829,
0x20400859, 0x21c008d3, 0x400008e2, 0x23e008e3, 0x60000903, 0x2000093a,
0x6000093b, 0x2000093c, 0x6040093e, 0x20e00941, 0x60600949, 0x2000094d,
0x6020094e, 0x20c00951, 0x20200962, 0x20000981, 0x60200982, 0x200009bc,
0x200009be, 0x602009bf, 0x206009c1, 0x602009c7, 0x602009cb, 0x200009cd,
0x200009d7, 0x202009e2, 0x200009fe, 0x20200a01, 0x60000a03, 0x20000a3c,
0x60400a3e, 0x20200a41, 0x20200a47, 0x20400a4b, 0x20000a51, 0x20200a70,
0x20000a75, 0x20200a81, 0x60000a83, 0x20000abc, 0x60400abe, 0x20800ac1,
0x20200ac7, 0x60000ac9, 0x60200acb, 0x20000acd, 0x20200ae2, 0x20a00afa,
0x20000b01, 0x60200b02, 0x20000b3c, 0x20200b3e, 0x60000b40, 0x20600b41,
0x60200b47, 0x60200b4b, 0x20000b4d, 0x20400b55, 0x20200b62, 0x20000b82,
0x20000bbe, 0x60000bbf, 0x20000bc0, 0x60200bc1, 0x60400bc6, 0x60400bca,
0x20000bcd, 0x20000bd7, 0x20000c00, 0x60400c01, 0x20000c04, 0x20400c3e,
0x60600c41, 0x20400c46, 0x20600c4a, 0x20200c55, 0x20200c62, 0x20000c81,
0x60200c82, 0x20000cbc, 0x60000cbe, 0x20000cbf, 0x60200cc0, 0x20000cc2,
0x60200cc3, 0x20000cc6, 0x60200cc7, 0x60200cca, 0x20200ccc, 0x20200cd5,
0x20200ce2, 0x20200d00, 0x60200d02, 0x20200d3b, 0x20000d3e, 0x60200d3f,
0x20600d41, 0x60400d46, 0x60400d4a, 0x20000d4d, 0x40000d4e, 0x20000d57,
0x20200d62, 0x20000d81, 0x60200d82, 0x20000dca, 0x20000dcf, 0x60200dd0,
0x20400dd2, 0x20000dd6, 0x60c00dd8, 0x20000ddf, 0x60200df2, 0x20000e31,
0x60000e33, 0x20c00e34, 0x20e00e47, 0x20000eb1, 0x60000eb3, 0x21000eb4,
0x20a00ec8, 0x20200f18, 0x20000f35, 0x20000f37, 0x20000f39, 0x60200f3e,
0x21a00f71, 0x60000f7f, 0x20800f80, 0x20200f86, 0x21400f8d, 0x24600f99,
0x20000fc6, 0x2060102d, 0x60001031, 0x20a01032, 0x20201039, 0x6020103b,
0x2020103d, 0x60201056, 0x20201058, 0x2040105e, 0x20601071, 0x20001082,
0x60001084, 0x20201085, 0x2000108d, 0x2000109d, 0x2040135d, 0x20401712,
0x20401732, 0x20201752, 0x20201772, 0x202017b4, 0x600017b6, 0x20c017b7,
0x60e017be, 0x200017c6, 0x602017c7, 0x214017c9, 0x200017dd, 0x2040180b,
0x180e, 0x20201885, 0x200018a9, 0x20401920, 0x60601923, 0x20201927,
0x60401929, 0x60201930, 0x20001932, 0x60a01933, 0x20401939, 0x20201a17,
0x60201a19, 0x20001a1b, 0x60001a55, 0x20001a56, 0x60001a57, 0x20c01a58,
0x20001a60, 0x20001a62, 0x20e01a65, 0x60a01a6d, 0x21201a73, 0x20001a7f,
0x22001ab0, 0x20601b00, 0x60001b04, 0x20c01b34, 0x60001b3b, 0x20001b3c,
0x60801b3d, 0x20001b42, 0x60201b43, 0x21001b6b, 0x20201b80, 0x60001b82,
0x60001ba1, 0x20601ba2, 0x60201ba6, 0x20201ba8, 0x60001baa, 0x20401bab,
0x20001be6, 0x60001be7, 0x20201be8, 0x60401bea, 0x20001bed, 0x60001bee,
0x20401bef, 0x60201bf2, 0x60e01c24, 0x20e01c2c, 0x60201c34, 0x20201c36,
0x20401cd0, 0x21801cd4, 0x60001ce1, 0x20c01ce2, 0x20001ced, 0x20001cf4,
0x60001cf7, 0x20201cf8, 0x27201dc0, 0x20801dfb, 0x200b, 0x2000200c, 0x20200e,
0xc02028, 0x8000203c, 0x80002049, 0x1e02060, 0x240020d0, 0x80002122,
0x80002139, 0x80a02194, 0x802021a9, 0x8020231a, 0x80002328, 0x80002388,
0x800023cf, 0x814023e9, 0x804023f8, 0x800024c2, 0x802025aa, 0x800025b6,
0x800025c0, 0x806025fb, 0x80a02600, 0x81602607, 0x8e202614, 0x8ea02690,
0x81402708, 0x80002714, 0x80002716, 0x8000271d, 0x80002721, 0x80002728,
0x80202733, 0x80002744, 0x80002747, 0x8000274c, 0x8000274e, 0x80402753,
0x80002757, 0x80802763, 0x80402795, 0x800027a1, 0x800027b0, 0x800027bf,
0x80202934, 0x80402b05, 0x80202b1b, 0x80002b50, 0x80002b55, 0x20402cef,
0x20002d7f, 0x23e02de0, 0x20a0302a, 0x80003030, 0x8000303d, 0x20203099,
0x80003297, 0x80003299, 0x2060a66f, 0x2120a674, 0x2020a69e, 0x2020a6f0,
0x2000a802, 0x2000a806, 0x2000a80b, 0x6020a823, 0x2020a825, 0x6000a827,
0x2000a82c, 0x6020a880, 0x61e0a8b4, 0x2020a8c4, 0x2220a8e0, 0x2000a8ff,
0x20e0a926, 0x2140a947, 0x6020a952, 0x2040a980, 0x6000a983, 0x2000a9b3,
0x6020a9b4, 0x2060a9b6, 0x6020a9ba, 0x2020a9bc, 0x6040a9be, 0x2000a9e5,
0x20a0aa29, 0x6020aa2f, 0x2020aa31, 0x6020aa33, 0x2020aa35, 0x2000aa43,
0x2000aa4c, 0x6000aa4d, 0x2000aa7c, 0x2000aab0, 0x2040aab2, 0x2020aab7,
0x2020aabe, 0x2000aac1, 0x6000aaeb, 0x2020aaec, 0x6020aaee, 0x6000aaf5,
0x2000aaf6, 0x6020abe3, 0x2000abe5, 0x6020abe6, 0x2000abe8, 0x6020abe9,
0x6000abec, 0x2000abed, 0x2000fb1e, 0x21e0fe00, 0x21e0fe20, 0xfeff,
0x2020ff9e, 0x160fff0, 0x200101fd, 0x200102e0, 0x20810376, 0x20410a01,
0x20210a05, 0x20610a0c, 0x20410a38, 0x20010a3f, 0x20210ae5, 0x20610d24,
0x20210eab, 0x21410f46, 0x60011000, 0x20011001, 0x60011002, 0x21c11038,
0x2041107f, 0x60011082, 0x604110b0, 0x206110b3, 0x602110b7, 0x202110b9,
0x400110bd, 0x400110cd, 0x20411100, 0x20811127, 0x6001112c, 0x20e1112d,
0x60211145, 0x20011173, 0x20211180, 0x60011182, 0x604111b3, 0x210111b6,
0x602111bf, 0x402111c2, 0x206111c9, 0x600111ce, 0x200111cf, 0x6041122c,
0x2041122f, 0x60211232, 0x20011234, 0x60011235, 0x20211236, 0x2001123e,
0x200112df, 0x604112e0, 0x20e112e3, 0x20211300, 0x60211302, 0x2021133b,
0x2001133e, 0x6001133f, 0x20011340, 0x60611341, 0x60211347, 0x6041134b,
0x20011357, 0x60211362, 0x20c11366, 0x20811370, 0x60411435, 0x20e11438,
0x60211440, 0x20411442, 0x60011445, 0x20011446, 0x2001145e, 0x200114b0,
0x602114b1, 0x20a114b3, 0x600114b9, 0x200114ba, 0x602114bb, 0x200114bd,
0x600114be, 0x202114bf, 0x600114c1, 0x202114c2, 0x200115af, 0x602115b0,
0x206115b2, 0x606115b8, 0x202115bc, 0x600115be, 0x202115bf, 0x202115dc,
0x60411630, 0x20e11633, 0x6021163b, 0x2001163d, 0x6001163e, 0x2021163f,
0x200116ab, 0x600116ac, 0x200116ad, 0x602116ae, 0x20a116b0, 0x600116b6,
0x200116b7, 0x2041171d, 0x60211720, 0x20611722, 0x60011726, 0x20811727,
0x6041182c, 0x2101182f, 0x60011838, 0x20211839, 0x20011930, 0x60811931,
0x60211937, 0x2021193b, 0x6001193d, 0x2001193e, 0x4001193f, 0x60011940,
0x40011941, 0x60011942, 0x20011943, 0x604119d1, 0x206119d4, 0x202119da,
0x606119dc, 0x200119e0, 0x600119e4, 0x21211a01, 0x20a11a33, 0x60011a39,
0x40011a3a, 0x20611a3b, 0x20011a47, 0x20a11a51, 0x60211a57, 0x20411a59,
0x40a11a84, 0x21811a8a, 0x60011a97, 0x20211a98, 0x60011c2f, 0x20c11c30,
0x20a11c38, 0x60011c3e, 0x20011c3f, 0x22a11c92, 0x60011ca9, 0x20c11caa,
0x60011cb1, 0x20211cb2, 0x60011cb4, 0x20211cb5, 0x20a11d31, 0x20011d3a,
0x20211d3c, 0x20c11d3f, 0x40011d46, 0x20011d47, 0x60811d8a, 0x20211d90,
0x60211d93, 0x20011d95, 0x60011d96, 0x20011d97, 0x20211ef3, 0x60211ef5,
0x1013430, 0x20816af0, 0x20c16b30, 0x20016f4f, 0x66c16f51, 0x20616f8f,
0x20016fe4, 0x60216ff0, 0x2021bc9d, 0x61bca0, 0x2001d165, 0x6001d166,
0x2041d167, 0x6001d16d, 0x2081d16e, 0xe1d173, 0x20e1d17b, 0x20c1d185,
0x2061d1aa, 0x2041d242, 0x26c1da00, 0x2621da3b, 0x2001da75, 0x2001da84,
0x2081da9b, 0x21c1daa1, 0x20c1e000, 0x2201e008, 0x20c1e01b, 0x2021e023,
0x2081e026, 0x20c1e130, 0x2061e2ec, 0x20c1e8d0, 0x20c1e944, 0x9fe1f000,
0x8041f10d, 0x8001f12f, 0x80a1f16c, 0x8021f17e, 0x8001f18e, 0x8121f191,
0x8701f1ad, 0x81c1f201, 0x8001f21a, 0x8001f22f, 0x8101f232, 0x8061f23c,
0xb621f249, 0x2081f3fb, 0xa7a1f400, 0xa121f546, 0x8fe1f680, 0x8161f774,
0x8541f7d5, 0x8061f80c, 0x80e1f848, 0x80a1f85a, 0x80e1f888, 0x8a21f8ae,
0x85c1f90c, 0x8121f93c, 0xb701f947, 0x3ee0000, 0x2bee0020, 0xfee0080,
0x3dee0100,
};
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
auto low = 0;
auto high = 610 - 1;
while (high >= low) {
auto idx = low + (high - low) / 2;
auto entry = _swift_stdlib_graphemeBreakProperties[idx];
// Shift the enum and range count out of the value.
auto lower = (entry << 11) >> 11;
// Shift the enum out first, then shift out the scalar value.
auto upper = lower + ((entry << 3) >> 24);
// Shift everything out.
auto enumValue = (__swift_uint8_t)(entry >> 29);
// Special case: extendedPictographic who used an extra bit for the range.
if (enumValue == 5) {
upper = lower + ((entry << 2) >> 23);
}
if (scalar >= lower && scalar <= upper) {
return enumValue;
}
if (scalar > upper) {
low = idx + 1;
continue;
}
if (scalar < lower) {
high = idx - 1;
continue;
}
}
// If we made it out here, then our scalar was not found in the grapheme
// array (this occurs when a scalar doesn't map to any grapheme break
// property). Return the max value here to indicate .any.
return 0xFF;
}