mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
573 lines
17 KiB
C++
573 lines
17 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2021 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#if SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
|
|
#if defined(__APPLE__)
|
|
#include "Apple/ScalarPropsData.h"
|
|
#else
|
|
#include "Common/ScalarPropsData.h"
|
|
#endif
|
|
|
|
#include "Common/CaseData.h"
|
|
#include "Common/ScriptData.h"
|
|
|
|
#else
|
|
#include "swift/Runtime/Debug.h"
|
|
#endif
|
|
|
|
#include "SwiftShims/UnicodeData.h"
|
|
#include <limits>
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto lowerBoundIndex = 0;
|
|
auto endIndex = BIN_PROPS_COUNT;
|
|
auto upperBoundIndex = endIndex - 1;
|
|
|
|
while (upperBoundIndex >= lowerBoundIndex) {
|
|
auto index = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
|
|
|
|
auto entry = _swift_stdlib_scalar_binProps[index];
|
|
|
|
// Shift the ccc value out of the scalar.
|
|
auto lowerBoundScalar = (entry << 11) >> 11;
|
|
|
|
__swift_uint32_t upperBoundScalar = 0;
|
|
|
|
// If we're not at the end of the array, the range count is simply the
|
|
// distance to the next element.
|
|
if (index != endIndex - 1) {
|
|
auto nextEntry = _swift_stdlib_scalar_binProps[index + 1];
|
|
|
|
auto nextLower = (nextEntry << 11) >> 11;
|
|
|
|
upperBoundScalar = nextLower - 1;
|
|
} else {
|
|
// Otherwise, the range count is the distance to 0x10FFFF
|
|
upperBoundScalar = 0x10FFFF;
|
|
}
|
|
|
|
// Shift everything out.
|
|
auto dataIndex = entry >> 21;
|
|
|
|
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
|
|
return _swift_stdlib_scalar_binProps_data[dataIndex];
|
|
}
|
|
|
|
if (scalar > upperBoundScalar) {
|
|
lowerBoundIndex = index + 1;
|
|
continue;
|
|
}
|
|
|
|
if (scalar < lowerBoundScalar) {
|
|
upperBoundIndex = index - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we make it out of this loop, then it means the scalar was not found at
|
|
// all in the array. This should never happen because the array represents all
|
|
// scalars from 0x0 to 0x10FFFF, but if somehow this branch gets reached,
|
|
// return 0 to indicate no properties.
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_uint8_t _swift_stdlib_getNumericType(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto lowerBoundIndex = 0;
|
|
auto endIndex = NUMERIC_TYPE_COUNT;
|
|
auto upperBoundIndex = endIndex - 1;
|
|
|
|
while (upperBoundIndex >= lowerBoundIndex) {
|
|
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
|
|
|
|
auto entry = _swift_stdlib_numeric_type[idx];
|
|
|
|
auto lowerBoundScalar = (entry << 11) >> 11;
|
|
auto rangeCount = (entry << 3) >> 24;
|
|
auto upperBoundScalar = lowerBoundScalar + rangeCount;
|
|
|
|
auto numericType = (__swift_uint8_t)(entry >> 29);
|
|
|
|
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
|
|
return numericType;
|
|
}
|
|
|
|
if (scalar > upperBoundScalar) {
|
|
lowerBoundIndex = idx + 1;
|
|
continue;
|
|
}
|
|
|
|
if (scalar < lowerBoundScalar) {
|
|
upperBoundIndex = idx - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we made it out here, then our scalar was not found in the composition
|
|
// array.
|
|
// Return the max here to indicate that we couldn't find one.
|
|
return std::numeric_limits<__swift_uint8_t>::max();
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
double _swift_stdlib_getNumericValue(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto levelCount = NUMERIC_VALUES_LEVEL_COUNT;
|
|
__swift_intptr_t scalarIdx = _swift_stdlib_getMphIdx(scalar, levelCount,
|
|
_swift_stdlib_numeric_values_keys,
|
|
_swift_stdlib_numeric_values_ranks,
|
|
_swift_stdlib_numeric_values_sizes);
|
|
|
|
auto valueIdx = _swift_stdlib_numeric_values_indices[scalarIdx];
|
|
return _swift_stdlib_numeric_values[valueIdx];
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
const char *_swift_stdlib_getNameAlias(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
|
|
_swift_stdlib_nameAlias,
|
|
_swift_stdlib_nameAlias_ranks);
|
|
|
|
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
|
|
return nullptr;
|
|
}
|
|
|
|
return _swift_stdlib_nameAlias_data[dataIdx];
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_int32_t _swift_stdlib_getMapping(__swift_uint32_t scalar,
|
|
__swift_uint8_t mapping) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
|
|
_swift_stdlib_mappings,
|
|
_swift_stdlib_mappings_ranks);
|
|
|
|
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
|
|
return 0;
|
|
}
|
|
|
|
auto mappings = _swift_stdlib_mappings_data_indices[dataIdx];
|
|
|
|
__swift_uint8_t mappingIdx;
|
|
|
|
switch (mapping) {
|
|
// Uppercase
|
|
case 0:
|
|
mappingIdx = mappings & 0xFF;
|
|
break;
|
|
|
|
// Lowercase
|
|
case 1:
|
|
mappingIdx = (mappings & 0xFF00) >> 8;
|
|
break;
|
|
|
|
// Titlecase
|
|
case 2:
|
|
mappingIdx = (mappings & 0xFF0000) >> 16;
|
|
break;
|
|
|
|
// Unknown mapping
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
if (mappingIdx == 0xFF) {
|
|
return 0;
|
|
}
|
|
|
|
return _swift_stdlib_mappings_data[mappingIdx];
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
const __swift_uint8_t *_swift_stdlib_getSpecialMapping(__swift_uint32_t scalar,
|
|
__swift_uint8_t mapping,
|
|
__swift_intptr_t *length) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
|
|
_swift_stdlib_special_mappings,
|
|
_swift_stdlib_special_mappings_ranks);
|
|
|
|
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto index = _swift_stdlib_special_mappings_data_indices[dataIdx];
|
|
|
|
auto uppercase = _swift_stdlib_special_mappings_data + index;
|
|
auto lowercase = uppercase + 1 + *uppercase;
|
|
auto titlecase = lowercase + 1 + *lowercase;
|
|
|
|
switch (mapping) {
|
|
// Uppercase
|
|
case 0:
|
|
*length = *uppercase;
|
|
return uppercase + 1;
|
|
|
|
// Lowercase
|
|
case 1:
|
|
*length = *lowercase;
|
|
return lowercase + 1;
|
|
|
|
// Titlecase
|
|
case 2:
|
|
*length = *titlecase;
|
|
return titlecase + 1;
|
|
|
|
// Unknown mapping.
|
|
default:
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_intptr_t _swift_stdlib_getScalarName(__swift_uint32_t scalar,
|
|
__swift_uint8_t *buffer,
|
|
__swift_intptr_t capacity) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto setOffset = _swift_stdlib_names_scalar_sets[scalar >> 7];
|
|
|
|
if (setOffset == std::numeric_limits<__swift_uint16_t>::max()) {
|
|
return 0;
|
|
}
|
|
|
|
auto scalarIndex = (setOffset << 7) + (scalar & ((1 << 7) - 1));
|
|
auto scalarOffset = _swift_stdlib_names_scalars[scalarIndex];
|
|
|
|
// U+20 is the first scalar that Unicode defines a name for, so their offset
|
|
// will the only valid 0.
|
|
if (scalarOffset == 0 && scalar != 0x20) {
|
|
return 0;
|
|
}
|
|
|
|
__swift_uint32_t nextScalarOffset = 0;
|
|
|
|
if (scalarIndex != NAMES_SCALARS_MAX_INDEX) {
|
|
int i = 1;
|
|
|
|
// Look for the next scalar who has a name and their position in the names
|
|
// array. This tells us exactly how many bytes our name takes up.
|
|
while (nextScalarOffset == 0) {
|
|
nextScalarOffset = _swift_stdlib_names_scalars[scalarIndex + i];
|
|
i += 1;
|
|
}
|
|
} else {
|
|
// This is the last element in the array which represents the last scalar
|
|
// name that Unicode defines (excluding variation selectors).
|
|
nextScalarOffset = NAMES_LAST_SCALAR_OFFSET;
|
|
}
|
|
|
|
auto nameSize = nextScalarOffset - scalarOffset;
|
|
|
|
// The total number of initialized bytes in the name string.
|
|
int c = 0;
|
|
|
|
for (__swift_uint32_t i = 0; i < nameSize; i += 1) {
|
|
__swift_uint16_t wordIndex = (__swift_uint16_t) _swift_stdlib_names[
|
|
scalarOffset + i
|
|
];
|
|
|
|
// If our word index is 0xFF, then it means our word index is larger than a
|
|
// byte, so the next two bytes will compose the 16 bit index.
|
|
if (wordIndex == 0xFF) {
|
|
i += 1;
|
|
auto firstPart = _swift_stdlib_names[scalarOffset + i];
|
|
wordIndex = firstPart;
|
|
|
|
i += 1;
|
|
auto secondPart = _swift_stdlib_names[scalarOffset + i];
|
|
wordIndex |= secondPart << 8;
|
|
}
|
|
|
|
auto wordOffset = _swift_stdlib_word_indices[wordIndex];
|
|
|
|
auto word = _swift_stdlib_words + wordOffset;
|
|
|
|
// The last character in a word has the 7th bit set.
|
|
while (*word < 0x80) {
|
|
if (c >= capacity) {
|
|
return c;
|
|
}
|
|
|
|
buffer[c++] = *word++;
|
|
}
|
|
|
|
if (c >= capacity) {
|
|
return c;
|
|
}
|
|
|
|
buffer[c++] = *word & 0x7F;
|
|
|
|
if (c >= capacity) {
|
|
return c;
|
|
}
|
|
|
|
buffer[c++] = ' ';
|
|
}
|
|
|
|
// Remove the trailing space.
|
|
c -= 1;
|
|
|
|
// The return value is the number of initialized bytes.
|
|
return c;
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_uint16_t _swift_stdlib_getAge(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto lowerBoundIndex = 0;
|
|
auto endIndex = AGE_COUNT;
|
|
auto upperBoundIndex = endIndex - 1;
|
|
|
|
while (upperBoundIndex >= lowerBoundIndex) {
|
|
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
|
|
|
|
auto entry = _swift_stdlib_ages[idx];
|
|
|
|
auto lowerBoundScalar = (entry << 43) >> 43;
|
|
auto rangeCount = entry >> 32;
|
|
auto upperBoundScalar = lowerBoundScalar + rangeCount;
|
|
|
|
auto ageIdx = (__swift_uint8_t)((entry << 32) >> 32 >> 21);
|
|
|
|
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
|
|
return _swift_stdlib_ages_data[ageIdx];
|
|
}
|
|
|
|
if (scalar > upperBoundScalar) {
|
|
lowerBoundIndex = idx + 1;
|
|
continue;
|
|
}
|
|
|
|
if (scalar < lowerBoundScalar) {
|
|
upperBoundIndex = idx - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we made it out here, then our scalar was not found in the composition
|
|
// array.
|
|
// Return the max here to indicate that we couldn't find one.
|
|
return std::numeric_limits<__swift_uint16_t>::max();
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_uint8_t _swift_stdlib_getGeneralCategory(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto lowerBoundIndex = 0;
|
|
auto endIndex = GENERAL_CATEGORY_COUNT;
|
|
auto upperBoundIndex = endIndex - 1;
|
|
|
|
while (upperBoundIndex >= lowerBoundIndex) {
|
|
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
|
|
|
|
auto entry = _swift_stdlib_generalCategory[idx];
|
|
|
|
auto lowerBoundScalar = (entry << 43) >> 43;
|
|
auto rangeCount = entry >> 32;
|
|
auto upperBoundScalar = lowerBoundScalar + rangeCount;
|
|
|
|
auto generalCategory = (__swift_uint8_t)((entry << 32) >> 32 >> 21);
|
|
|
|
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
|
|
return generalCategory;
|
|
}
|
|
|
|
if (scalar > upperBoundScalar) {
|
|
lowerBoundIndex = idx + 1;
|
|
continue;
|
|
}
|
|
|
|
if (scalar < lowerBoundScalar) {
|
|
upperBoundIndex = idx - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we made it out here, then our scalar was not found in the composition
|
|
// array.
|
|
// Return the max here to indicate that we couldn't find one.
|
|
return std::numeric_limits<__swift_uint8_t>::max();
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
__swift_uint8_t _swift_stdlib_getScript(__swift_uint32_t scalar) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto lowerBoundIndex = 0;
|
|
auto endIndex = SCRIPTS_COUNT;
|
|
auto upperBoundIndex = endIndex - 1;
|
|
|
|
while (upperBoundIndex >= lowerBoundIndex) {
|
|
auto index = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
|
|
|
|
auto entry = _swift_stdlib_scripts[index];
|
|
|
|
// Shift the enum value out of the scalar.
|
|
auto lowerBoundScalar = (entry << 11) >> 11;
|
|
|
|
__swift_uint32_t upperBoundScalar = 0;
|
|
|
|
// If we're not at the end of the array, the range count is simply the
|
|
// distance to the next element.
|
|
if (index != endIndex - 1) {
|
|
auto nextEntry = _swift_stdlib_scripts[index + 1];
|
|
|
|
auto nextLower = (nextEntry << 11) >> 11;
|
|
|
|
upperBoundScalar = nextLower - 1;
|
|
} else {
|
|
// Otherwise, the range count is the distance to 0x10FFFF
|
|
upperBoundScalar = 0x10FFFF;
|
|
}
|
|
|
|
// Shift the scalar out and get the enum value.
|
|
auto script = entry >> 21;
|
|
|
|
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
|
|
return script;
|
|
}
|
|
|
|
if (scalar > upperBoundScalar) {
|
|
lowerBoundIndex = index + 1;
|
|
continue;
|
|
}
|
|
|
|
if (scalar < lowerBoundScalar) {
|
|
upperBoundIndex = index - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we make it out of this loop, then it means the scalar was not found at
|
|
// all in the array. This should never happen because the array represents all
|
|
// scalars from 0x0 to 0x10FFFF, but if somehow this branch gets reached,
|
|
// return 255 to indicate a failure.
|
|
return std::numeric_limits<__swift_uint8_t>::max();
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
const __swift_uint8_t *_swift_stdlib_getScriptExtensions(__swift_uint32_t scalar,
|
|
__swift_uint8_t *count) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
|
|
_swift_stdlib_script_extensions,
|
|
_swift_stdlib_script_extensions_ranks);
|
|
|
|
// If we don't have an index into the data indices, then this scalar has no
|
|
// script extensions
|
|
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
|
|
return 0;
|
|
}
|
|
|
|
auto scalarDataIdx = _swift_stdlib_script_extensions_data_indices[dataIdx];
|
|
*count = scalarDataIdx >> 11;
|
|
|
|
return _swift_stdlib_script_extensions_data + (scalarDataIdx & 0x7FF);
|
|
#endif
|
|
}
|
|
|
|
SWIFT_RUNTIME_STDLIB_INTERNAL
|
|
void _swift_stdlib_getCaseMapping(__swift_uint32_t scalar,
|
|
__swift_uint32_t *buffer) {
|
|
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
|
|
swift::swift_abortDisabledUnicodeSupport();
|
|
#else
|
|
auto mphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FOLD_LEVEL_COUNT,
|
|
_swift_stdlib_case_keys,
|
|
_swift_stdlib_case_ranks,
|
|
_swift_stdlib_case_sizes);
|
|
|
|
auto caseValue = _swift_stdlib_case[mphIdx];
|
|
__swift_uint32_t hashedScalar = (caseValue << 43) >> 43;
|
|
|
|
// If our scalar is not the original one we hashed, then this scalar has no
|
|
// case mapping. It maps to itself.
|
|
if (scalar != hashedScalar) {
|
|
buffer[0] = scalar;
|
|
return;
|
|
}
|
|
|
|
// If the top bit is NOT set, then this scalar simply maps to another scalar.
|
|
// We have stored the distance to said scalar in this value.
|
|
if ((caseValue & ((__swift_uint64_t)(0x1) << 63)) == 0) {
|
|
auto distance = (__swift_int32_t)((caseValue << 1) >> 22);
|
|
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);
|
|
|
|
buffer[0] = mappedScalar;
|
|
return;
|
|
}
|
|
|
|
// Our top bit WAS set which means this scalar maps to multiple scalars.
|
|
// Lookup our mapping in the full mph.
|
|
auto fullMphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FULL_FOLD_LEVEL_COUNT,
|
|
_swift_stdlib_case_full_keys,
|
|
_swift_stdlib_case_full_ranks,
|
|
_swift_stdlib_case_full_sizes);
|
|
|
|
auto fullCaseValue = _swift_stdlib_case_full[fullMphIdx];
|
|
|
|
// Count is either 2 or 3.
|
|
auto count = fullCaseValue >> 62;
|
|
|
|
for (__swift_uint64_t i = 0; i != count; i += 1) {
|
|
auto distance = (__swift_int32_t)(fullCaseValue & 0xFFFF);
|
|
|
|
if ((fullCaseValue & 0x10000) != 0) {
|
|
distance = -distance;
|
|
}
|
|
|
|
fullCaseValue >>= 17;
|
|
|
|
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);
|
|
|
|
buffer[i] = mappedScalar;
|
|
}
|
|
#endif
|
|
}
|