Eytzingerize word break data (#71731)

Switch word break property searching to Eytzinger binary search
This commit is contained in:
David Smith
2024-02-20 22:49:34 -08:00
committed by GitHub
parent 6a86bf3464
commit f1feba0e42
7 changed files with 1818 additions and 225 deletions

View File

@@ -23,35 +23,26 @@ __swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar) {
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
swift::swift_abortDisabledUnicodeSupport();
#else
auto low = 0;
auto high = WORD_BREAK_DATA_COUNT - 1;
while (high >= low) {
auto idx = low + (high - low) / 2;
auto entry = _swift_stdlib_words[idx];
auto index = 1; //0th element is a dummy element
while (index < WORD_BREAK_DATA_COUNT) {
auto entry = _swift_stdlib_words[index];
// Shift the range count out of the value.
auto lower = (entry << 11) >> 11;
// Shift the enum out first, then shift out the scalar value.
auto upper = lower + (entry >> 21) - 1;
if (scalar >= lower && scalar <= upper) {
return _swift_stdlib_words_data[idx];
}
if (scalar > upper) {
low = idx + 1;
continue;
}
//If we want the left child of the current node in our virtual tree,
//that's at index * 2, if we want the right child it's at (index * 2) + 1
if (scalar < lower) {
high = idx - 1;
continue;
index = 2 * index;
} else if (scalar <= upper) {
return _swift_stdlib_words_data[index];
} else {
index = 2 * index + 1;
}
}
// If we made it out here, then our scalar was not found in the word
// array (this occurs when a scalar doesn't map to any word break
// property). Return the max value here to indicate .any.