mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
[stdlib]Unify String hashing implementation (#14921)
* Add partial range subscripts to _UnmanagedOpaqueString * Use SipHash13+_NormalizedCodeUnitIterator for String hashes on all platforms * Remove unecessary collation algorithm shims * Pass the buffer to the SipHasher for ASCII * Hash the ascii parts of UTF16 strings the same way we hash pure ascii strings * De-dupe some code that can be shared between _UnmanagedOpaqueString and _UnmanagedString<UInt16> * ASCII strings now hash consistently for in hashASCII() and hashUTF16() * Fix zalgo comparison regression * Use hasher * Fix crash when appending to an empty _FixedArray * Compact ASCII characters into a single UInt64 for hashing * String: Switch to _hash(into:)-based hashing This should speed up String hashing quite a bit, as doing it through hashValue involves two rounds of SipHash nested in each other. * Remove obsolete workaround for ARC traffic * Ditch _FixedArray<UInt8> in favor of _UIntBuffer<UInt64, UInt8> * Bad rebase remnants * Fix failing benchmarks * michael's feedback * clarify the comment about nul-terminated string hashes
This commit is contained in:
@@ -103,93 +103,6 @@ static const UCollator *GetRootCollator() {
|
||||
return SWIFT_LAZY_CONSTANT(MakeRootCollator());
|
||||
}
|
||||
|
||||
/// This class caches the collation element results for the ASCII subset of
|
||||
/// unicode.
|
||||
class ASCIICollation {
|
||||
public:
|
||||
friend class swift::Lazy<ASCIICollation>;
|
||||
|
||||
static swift::Lazy<ASCIICollation> theTable;
|
||||
static const ASCIICollation *getTable() {
|
||||
return &theTable.get();
|
||||
}
|
||||
|
||||
int32_t CollationTable[128];
|
||||
|
||||
/// Maps an ASCII character to a collation element priority as would be
|
||||
/// returned by a call to ucol_next().
|
||||
int32_t map(unsigned char c) const {
|
||||
return CollationTable[c];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Construct the ASCII collation table.
|
||||
ASCIICollation() {
|
||||
const UCollator *Collator = GetRootCollator();
|
||||
for (unsigned char c = 0; c < 128; ++c) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
intptr_t NumCollationElts = 0;
|
||||
UChar Buffer[1];
|
||||
Buffer[0] = c;
|
||||
|
||||
UCollationElements *CollationIterator =
|
||||
ucol_openElements(Collator, Buffer, 1, &ErrorCode);
|
||||
|
||||
while (U_SUCCESS(ErrorCode)) {
|
||||
intptr_t Elem = ucol_next(CollationIterator, &ErrorCode);
|
||||
if (Elem != UCOL_NULLORDER) {
|
||||
CollationTable[c] = Elem;
|
||||
++NumCollationElts;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ucol_closeElements(CollationIterator);
|
||||
if (U_FAILURE(ErrorCode) || NumCollationElts != 1) {
|
||||
swift::crash("Error setting up the ASCII collation table");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASCIICollation &operator=(const ASCIICollation &) = delete;
|
||||
ASCIICollation(const ASCIICollation &) = delete;
|
||||
};
|
||||
|
||||
void *swift::_swift_stdlib_unicodeCollationIterator_create(
|
||||
const __swift_uint16_t *Str, __swift_uint32_t Length) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
UCollationElements *CollationIterator =
|
||||
ucol_openElements(GetRootCollator(), reinterpret_cast<const UChar *>(Str),
|
||||
Length, &ErrorCode);
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
|
||||
}
|
||||
return CollationIterator;
|
||||
}
|
||||
|
||||
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
|
||||
void *CollationIterator, bool *HitEnd) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
auto Result = ucol_next(
|
||||
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash(
|
||||
"_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
|
||||
}
|
||||
*HitEnd = (Result == UCOL_NULLORDER);
|
||||
return Result;
|
||||
}
|
||||
|
||||
void swift::_swift_stdlib_unicodeCollationIterator_delete(
|
||||
void *CollationIterator) {
|
||||
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
|
||||
}
|
||||
|
||||
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
|
||||
return ASCIICollation::getTable()->CollationTable;
|
||||
}
|
||||
|
||||
/// Convert the unicode string to uppercase. This function will return the
|
||||
/// required buffer length as a result. If this length does not match the
|
||||
/// 'DestinationCapacity' this function must be called again with a buffer of
|
||||
@@ -231,8 +144,6 @@ swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination,
|
||||
}
|
||||
return OutputLength;
|
||||
}
|
||||
|
||||
swift::Lazy<ASCIICollation> ASCIICollation::theTable;
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
Reference in New Issue
Block a user