mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
stdlib: use SipHash-1-3 for string hashing on non-ObjC platforms
Part of rdar://problem/24109692
This commit is contained in:
@@ -57,7 +57,6 @@ static const UCollator *GetRootCollator() {
|
||||
/// This class caches the collation element results for the ASCII subset of
|
||||
/// unicode.
|
||||
class ASCIICollation {
|
||||
int32_t CollationTable[128];
|
||||
public:
|
||||
friend class swift::Lazy<ASCIICollation>;
|
||||
|
||||
@@ -66,6 +65,8 @@ public:
|
||||
return &theTable.get();
|
||||
}
|
||||
|
||||
int32_t CollationTable[128];
|
||||
|
||||
/// Maps an ASCII character to a collation element priority as would be
|
||||
/// returned by a call to ucol_next().
|
||||
int32_t map(unsigned char c) const {
|
||||
@@ -189,91 +190,42 @@ swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
|
||||
return Diff;
|
||||
}
|
||||
|
||||
// These functions use murmurhash2 in its 32 and 64bit forms, which are
|
||||
// differentiated by the constants defined below. This seems like a good choice
|
||||
// for now because it operates efficiently in blocks rather than bytes, and
|
||||
// the data returned from the collation iterator comes in 4byte chunks.
|
||||
#if __arm__ || __i386__
|
||||
#define HASH_SEED 0x88ddcc21
|
||||
#define HASH_M 0x5bd1e995
|
||||
#define HASH_R 24
|
||||
#else
|
||||
#define HASH_SEED 0x429b126688ddcc21
|
||||
#define HASH_M 0xc6a4a7935bd1e995
|
||||
#define HASH_R 47
|
||||
#endif
|
||||
|
||||
static intptr_t hashChunk(const UCollator *Collator, intptr_t HashState,
|
||||
const uint16_t *Str, uint32_t Length,
|
||||
UErrorCode *ErrorCode) {
|
||||
void *swift::_swift_stdlib_unicodeCollationIterator_create(
|
||||
const __swift_uint16_t *Str, __swift_uint32_t Length) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
#if defined(__CYGWIN__) || defined(_MSC_VER)
|
||||
UCollationElements *CollationIterator = ucol_openElements(
|
||||
Collator, reinterpret_cast<const UChar *>(Str), Length, ErrorCode);
|
||||
GetRootCollator(), reinterpret_cast<const UChar *>(Str), Length,
|
||||
&ErrorCode);
|
||||
#else
|
||||
UCollationElements *CollationIterator = ucol_openElements(
|
||||
Collator, Str, Length, ErrorCode);
|
||||
GetRootCollator(), Str, Length, &ErrorCode);
|
||||
#endif
|
||||
while (U_SUCCESS(*ErrorCode)) {
|
||||
intptr_t Elem = ucol_next(CollationIterator, ErrorCode);
|
||||
// Ignore zero valued collation elements. They don't participate in the
|
||||
// ordering relation.
|
||||
if (Elem == 0)
|
||||
continue;
|
||||
if (Elem != UCOL_NULLORDER) {
|
||||
Elem *= HASH_M;
|
||||
Elem ^= Elem >> HASH_R;
|
||||
Elem *= HASH_M;
|
||||
|
||||
HashState *= HASH_M;
|
||||
HashState ^= Elem;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ucol_closeElements(CollationIterator);
|
||||
return HashState;
|
||||
}
|
||||
|
||||
static intptr_t hashFinish(intptr_t HashState) {
|
||||
HashState ^= HashState >> HASH_R;
|
||||
HashState *= HASH_M;
|
||||
HashState ^= HashState >> HASH_R;
|
||||
return HashState;
|
||||
}
|
||||
|
||||
intptr_t
|
||||
swift::_swift_stdlib_unicode_hash(const uint16_t *Str, int32_t Length) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
intptr_t HashState = HASH_SEED;
|
||||
HashState = hashChunk(GetRootCollator(), HashState, Str, Length, &ErrorCode);
|
||||
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash("hashChunk: Unexpected error hashing unicode string.");
|
||||
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
|
||||
}
|
||||
return hashFinish(HashState);
|
||||
return CollationIterator;
|
||||
}
|
||||
|
||||
intptr_t swift::_swift_stdlib_unicode_hash_ascii(const unsigned char *Str,
|
||||
int32_t Length) {
|
||||
const ASCIICollation *Table = ASCIICollation::getTable();
|
||||
intptr_t HashState = HASH_SEED;
|
||||
int32_t Pos = 0;
|
||||
while (Pos < Length) {
|
||||
const unsigned char c = Str[Pos++];
|
||||
assert((c & 0x80) == 0 && "This table only exists for the ASCII subset");
|
||||
intptr_t Elem = Table->map(c);
|
||||
// Ignore zero valued collation elements. They don't participate in the
|
||||
// ordering relation.
|
||||
if (Elem == 0)
|
||||
continue;
|
||||
Elem *= HASH_M;
|
||||
Elem ^= Elem >> HASH_R;
|
||||
Elem *= HASH_M;
|
||||
|
||||
HashState *= HASH_M;
|
||||
HashState ^= Elem;
|
||||
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
|
||||
void *CollationIterator, bool *HitEnd) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
auto Result = ucol_next(
|
||||
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
|
||||
}
|
||||
return hashFinish(HashState);
|
||||
*HitEnd = (Result == UCOL_NULLORDER);
|
||||
return Result;
|
||||
}
|
||||
|
||||
void swift::_swift_stdlib_unicodeCollationIterator_delete(
|
||||
void *CollationIterator) {
|
||||
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
|
||||
}
|
||||
|
||||
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
|
||||
return ASCIICollation::getTable()->CollationTable;
|
||||
}
|
||||
|
||||
/// Convert the unicode string to uppercase. This function will return the
|
||||
|
||||
Reference in New Issue
Block a user