Switch grapheme break property searching to Eytzinger binary search (#71668)

This commit is contained in:
David Smith
2024-02-16 16:06:20 -08:00
committed by GitHub
parent 3fa0886206
commit ea7d07714f
6 changed files with 158 additions and 119 deletions

View File

@@ -18,48 +18,40 @@
#include "swift/shims/UnicodeData.h"
#include <limits>
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
swift::swift_abortDisabledUnicodeSupport();
#else
auto low = 0;
auto high = GRAPHEME_BREAK_DATA_COUNT - 1;
while (high >= low) {
auto idx = low + (high - low) / 2;
auto entry = _swift_stdlib_graphemeBreakProperties[idx];
auto index = 1; //0th element is a dummy element
while (index < GRAPHEME_BREAK_DATA_COUNT) {
auto entry = _swift_stdlib_graphemeBreakProperties[index];
// Shift the enum and range count out of the value.
auto lower = (entry << 11) >> 11;
// Shift the enum out first, then shift out the scalar value.
auto upper = lower + ((entry << 3) >> 24);
// Shift everything out.
auto enumValue = (__swift_uint8_t)(entry >> 29);
// Special case: extendedPictographic who used an extra bit for the range.
if (enumValue == 5) {
upper = lower + ((entry << 2) >> 23);
}
if (scalar >= lower && scalar <= upper) {
return enumValue;
}
if (scalar > upper) {
low = idx + 1;
continue;
}
//If we want the left child of the current node in our virtual tree,
//that's at index * 2, if we want the right child it's at (index * 2) + 1
if (scalar < lower) {
high = idx - 1;
continue;
index = 2 * index;
} else if (scalar <= upper) {
return enumValue;
} else {
index = 2 * index + 1;
}
}
// If we made it out here, then our scalar was not found in the grapheme
// array (this occurs when a scalar doesn't map to any grapheme break
// property). Return the max value here to indicate .any.