[stdlib]Unify String hashing implementation (#14921)

* Add partial range subscripts to _UnmanagedOpaqueString

* Use SipHash13+_NormalizedCodeUnitIterator for String hashes on all platforms

* Remove unecessary collation algorithm shims

* Pass the buffer to the SipHasher for ASCII

* Hash the ascii parts of UTF16 strings the same way we hash pure ascii strings

* De-dupe some code that can be shared between _UnmanagedOpaqueString and _UnmanagedString<UInt16>

* ASCII strings now hash consistently for in hashASCII() and hashUTF16()

* Fix zalgo comparison regression

* Use hasher

* Fix crash when appending to an empty _FixedArray

* Compact ASCII characters into a single UInt64 for hashing

* String: Switch to _hash(into:)-based hashing

This should speed up String hashing quite a bit, as doing it through hashValue involves two rounds of SipHash nested in each other.

* Remove obsolete workaround for ARC traffic

* Ditch _FixedArray<UInt8> in favor of _UIntBuffer<UInt64, UInt8>

* Bad rebase remnants

* Fix failing benchmarks

* michael's feedback

* clarify the comment about nul-terminated string hashes
This commit is contained in:
Lance Parker
2018-03-17 22:13:37 -07:00
committed by GitHub
parent f5d43e2df9
commit cbf157f924
9 changed files with 228 additions and 401 deletions

View File

@@ -103,93 +103,6 @@ static const UCollator *GetRootCollator() {
return SWIFT_LAZY_CONSTANT(MakeRootCollator());
}
/// This class caches the collation element results for the ASCII subset of
/// unicode.
class ASCIICollation {
public:
friend class swift::Lazy<ASCIICollation>;
static swift::Lazy<ASCIICollation> theTable;
static const ASCIICollation *getTable() {
return &theTable.get();
}
int32_t CollationTable[128];
/// Maps an ASCII character to a collation element priority as would be
/// returned by a call to ucol_next().
int32_t map(unsigned char c) const {
return CollationTable[c];
}
private:
/// Construct the ASCII collation table.
ASCIICollation() {
const UCollator *Collator = GetRootCollator();
for (unsigned char c = 0; c < 128; ++c) {
UErrorCode ErrorCode = U_ZERO_ERROR;
intptr_t NumCollationElts = 0;
UChar Buffer[1];
Buffer[0] = c;
UCollationElements *CollationIterator =
ucol_openElements(Collator, Buffer, 1, &ErrorCode);
while (U_SUCCESS(ErrorCode)) {
intptr_t Elem = ucol_next(CollationIterator, &ErrorCode);
if (Elem != UCOL_NULLORDER) {
CollationTable[c] = Elem;
++NumCollationElts;
} else {
break;
}
}
ucol_closeElements(CollationIterator);
if (U_FAILURE(ErrorCode) || NumCollationElts != 1) {
swift::crash("Error setting up the ASCII collation table");
}
}
}
ASCIICollation &operator=(const ASCIICollation &) = delete;
ASCIICollation(const ASCIICollation &) = delete;
};
void *swift::_swift_stdlib_unicodeCollationIterator_create(
const __swift_uint16_t *Str, __swift_uint32_t Length) {
UErrorCode ErrorCode = U_ZERO_ERROR;
UCollationElements *CollationIterator =
ucol_openElements(GetRootCollator(), reinterpret_cast<const UChar *>(Str),
Length, &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
}
return CollationIterator;
}
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
void *CollationIterator, bool *HitEnd) {
UErrorCode ErrorCode = U_ZERO_ERROR;
auto Result = ucol_next(
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash(
"_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
}
*HitEnd = (Result == UCOL_NULLORDER);
return Result;
}
void swift::_swift_stdlib_unicodeCollationIterator_delete(
void *CollationIterator) {
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
}
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
return ASCIICollation::getTable()->CollationTable;
}
/// Convert the unicode string to uppercase. This function will return the
/// required buffer length as a result. If this length does not match the
/// 'DestinationCapacity' this function must be called again with a buffer of
@@ -231,8 +144,6 @@ swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination,
}
return OutputLength;
}
swift::Lazy<ASCIICollation> ASCIICollation::theTable;
#endif
namespace {