stdlib: use SipHash-1-3 for string hashing on non-ObjC platforms

Part of rdar://problem/24109692
This commit is contained in:
Dmitri Gribenko
2016-09-02 23:27:11 -07:00
parent daa7bfc281
commit e8e8b35610
7 changed files with 119 additions and 84 deletions

View File

@@ -57,7 +57,6 @@ static const UCollator *GetRootCollator() {
/// This class caches the collation element results for the ASCII subset of
/// unicode.
class ASCIICollation {
int32_t CollationTable[128];
public:
friend class swift::Lazy<ASCIICollation>;
@@ -66,6 +65,8 @@ public:
return &theTable.get();
}
int32_t CollationTable[128];
/// Maps an ASCII character to a collation element priority as would be
/// returned by a call to ucol_next().
int32_t map(unsigned char c) const {
@@ -189,91 +190,42 @@ swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
return Diff;
}
// These functions use murmurhash2 in its 32 and 64bit forms, which are
// differentiated by the constants defined below. This seems like a good choice
// for now because it operates efficiently in blocks rather than bytes, and
// the data returned from the collation iterator comes in 4byte chunks.
#if __arm__ || __i386__
#define HASH_SEED 0x88ddcc21
#define HASH_M 0x5bd1e995
#define HASH_R 24
#else
#define HASH_SEED 0x429b126688ddcc21
#define HASH_M 0xc6a4a7935bd1e995
#define HASH_R 47
#endif
static intptr_t hashChunk(const UCollator *Collator, intptr_t HashState,
const uint16_t *Str, uint32_t Length,
UErrorCode *ErrorCode) {
void *swift::_swift_stdlib_unicodeCollationIterator_create(
const __swift_uint16_t *Str, __swift_uint32_t Length) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined(_MSC_VER)
UCollationElements *CollationIterator = ucol_openElements(
Collator, reinterpret_cast<const UChar *>(Str), Length, ErrorCode);
GetRootCollator(), reinterpret_cast<const UChar *>(Str), Length,
&ErrorCode);
#else
UCollationElements *CollationIterator = ucol_openElements(
Collator, Str, Length, ErrorCode);
GetRootCollator(), Str, Length, &ErrorCode);
#endif
while (U_SUCCESS(*ErrorCode)) {
intptr_t Elem = ucol_next(CollationIterator, ErrorCode);
// Ignore zero valued collation elements. They don't participate in the
// ordering relation.
if (Elem == 0)
continue;
if (Elem != UCOL_NULLORDER) {
Elem *= HASH_M;
Elem ^= Elem >> HASH_R;
Elem *= HASH_M;
HashState *= HASH_M;
HashState ^= Elem;
} else {
break;
}
}
ucol_closeElements(CollationIterator);
return HashState;
}
static intptr_t hashFinish(intptr_t HashState) {
HashState ^= HashState >> HASH_R;
HashState *= HASH_M;
HashState ^= HashState >> HASH_R;
return HashState;
}
intptr_t
swift::_swift_stdlib_unicode_hash(const uint16_t *Str, int32_t Length) {
UErrorCode ErrorCode = U_ZERO_ERROR;
intptr_t HashState = HASH_SEED;
HashState = hashChunk(GetRootCollator(), HashState, Str, Length, &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("hashChunk: Unexpected error hashing unicode string.");
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
}
return hashFinish(HashState);
return CollationIterator;
}
intptr_t swift::_swift_stdlib_unicode_hash_ascii(const unsigned char *Str,
int32_t Length) {
const ASCIICollation *Table = ASCIICollation::getTable();
intptr_t HashState = HASH_SEED;
int32_t Pos = 0;
while (Pos < Length) {
const unsigned char c = Str[Pos++];
assert((c & 0x80) == 0 && "This table only exists for the ASCII subset");
intptr_t Elem = Table->map(c);
// Ignore zero valued collation elements. They don't participate in the
// ordering relation.
if (Elem == 0)
continue;
Elem *= HASH_M;
Elem ^= Elem >> HASH_R;
Elem *= HASH_M;
HashState *= HASH_M;
HashState ^= Elem;
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
void *CollationIterator, bool *HitEnd) {
UErrorCode ErrorCode = U_ZERO_ERROR;
auto Result = ucol_next(
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
}
return hashFinish(HashState);
*HitEnd = (Result == UCOL_NULLORDER);
return Result;
}
void swift::_swift_stdlib_unicodeCollationIterator_delete(
void *CollationIterator) {
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
}
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
return ASCIICollation::getTable()->CollationTable;
}
/// Convert the unicode string to uppercase. This function will return the