mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[stdlib]Un-revert string comparison (#14694)
Restore (un-revert) sting comparison, with fixes More exhaustive testing of opaque strings, which consistently reproduces prior sporadic failure. Shims fixups. Some test tweaking.
This commit is contained in:
committed by
Michael Ilseman
parent
76af5c5b16
commit
0661de22a2
@@ -22,17 +22,34 @@
|
||||
|
||||
// Declare a few external functions to avoid a dependency on ICU headers.
|
||||
extern "C" {
|
||||
|
||||
// Types
|
||||
typedef struct UBreakIterator UBreakIterator;
|
||||
typedef struct UBreakIterator UNormalizer2;
|
||||
typedef enum UBreakIteratorType {} UBreakIteratorType;
|
||||
typedef enum UErrorCode {} UErrorCode;
|
||||
typedef uint16_t UChar;
|
||||
typedef int32_t UChar32;
|
||||
typedef int8_t UBool;
|
||||
typedef swift::__swift_stdlib_UProperty UProperty;
|
||||
|
||||
// Grapheme breaking APIs
|
||||
void ubrk_close(UBreakIterator *);
|
||||
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
|
||||
int32_t, UErrorCode *);
|
||||
int32_t ubrk_preceding(UBreakIterator *, int32_t);
|
||||
int32_t ubrk_following(UBreakIterator *, int32_t);
|
||||
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
|
||||
|
||||
// Comparison, normalization, and character property APIs
|
||||
int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *, const UChar *, int32_t,
|
||||
UErrorCode *);
|
||||
int32_t unorm2_normalize(const UNormalizer2 *, const UChar *, int32_t, UChar *,
|
||||
int32_t, UErrorCode *);
|
||||
const UNormalizer2 *unorm2_getNFCInstance(UErrorCode *);
|
||||
UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
|
||||
UBool u_hasBinaryProperty(UChar32, UProperty);
|
||||
UBool u_isdefined(UChar32);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -45,6 +62,7 @@ void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
|
||||
#include <unicode/ucoleitr.h>
|
||||
#include <unicode/uiter.h>
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/uchar.h>
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -137,76 +155,6 @@ private:
|
||||
ASCIICollation(const ASCIICollation &) = delete;
|
||||
};
|
||||
|
||||
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
|
||||
/// Results are the usual string comparison results:
|
||||
/// <0 the left string is less than the right string.
|
||||
/// ==0 the strings are equal according to their collation.
|
||||
/// >0 the left string is greater than the right string.
|
||||
int32_t
|
||||
swift::_swift_stdlib_unicode_compare_utf16_utf16(const uint16_t *LeftString,
|
||||
int32_t LeftLength,
|
||||
const uint16_t *RightString,
|
||||
int32_t RightLength) {
|
||||
// ICU UChar type is platform dependent. In Cygwin, it is defined
|
||||
// as wchar_t which size is 2. It seems that the underlying binary
|
||||
// representation is same with swift utf16 representation.
|
||||
// On Clang 4.0 under a recent Linux, ICU uses the built-in char16_t type.
|
||||
return ucol_strcoll(GetRootCollator(),
|
||||
reinterpret_cast<const UChar *>(LeftString), LeftLength,
|
||||
reinterpret_cast<const UChar *>(RightString), RightLength);
|
||||
}
|
||||
|
||||
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
|
||||
/// Results are the usual string comparison results:
|
||||
/// <0 the left string is less than the right string.
|
||||
/// ==0 the strings are equal according to their collation.
|
||||
/// >0 the left string is greater than the right string.
|
||||
int32_t
|
||||
swift::_swift_stdlib_unicode_compare_utf8_utf16(const unsigned char *LeftString,
|
||||
int32_t LeftLength,
|
||||
const uint16_t *RightString,
|
||||
int32_t RightLength) {
|
||||
UCharIterator LeftIterator;
|
||||
UCharIterator RightIterator;
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
|
||||
uiter_setUTF8(&LeftIterator, reinterpret_cast<const char *>(LeftString), LeftLength);
|
||||
uiter_setString(&RightIterator, reinterpret_cast<const UChar *>(RightString),
|
||||
RightLength);
|
||||
|
||||
uint32_t Diff = ucol_strcollIter(GetRootCollator(),
|
||||
&LeftIterator, &RightIterator, &ErrorCode);
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison.");
|
||||
}
|
||||
return Diff;
|
||||
}
|
||||
|
||||
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
|
||||
/// Results are the usual string comparison results:
|
||||
/// <0 the left string is less than the right string.
|
||||
/// ==0 the strings are equal according to their collation.
|
||||
/// >0 the left string is greater than the right string.
|
||||
int32_t
|
||||
swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
|
||||
int32_t LeftLength,
|
||||
const unsigned char *RightString,
|
||||
int32_t RightLength) {
|
||||
UCharIterator LeftIterator;
|
||||
UCharIterator RightIterator;
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
|
||||
uiter_setUTF8(&LeftIterator, reinterpret_cast<const char *>(LeftString), LeftLength);
|
||||
uiter_setUTF8(&RightIterator, reinterpret_cast<const char *>(RightString), RightLength);
|
||||
|
||||
uint32_t Diff = ucol_strcollIter(GetRootCollator(),
|
||||
&LeftIterator, &RightIterator, &ErrorCode);
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf8 string comparison.");
|
||||
}
|
||||
return Diff;
|
||||
}
|
||||
|
||||
void *swift::_swift_stdlib_unicodeCollationIterator_create(
|
||||
const __swift_uint16_t *Str, __swift_uint32_t Length) {
|
||||
UErrorCode ErrorCode = U_ZERO_ERROR;
|
||||
@@ -225,7 +173,8 @@ __swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
|
||||
auto Result = ucol_next(
|
||||
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
|
||||
if (U_FAILURE(ErrorCode)) {
|
||||
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
|
||||
swift::crash(
|
||||
"_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
|
||||
}
|
||||
*HitEnd = (Result == UCOL_NULLORDER);
|
||||
return Result;
|
||||
@@ -328,6 +277,44 @@ void swift::__swift_stdlib_ubrk_setText(
|
||||
textLength, ptr_cast<UErrorCode>(status));
|
||||
}
|
||||
|
||||
swift::__swift_stdlib_UBool swift::__swift_stdlib_unorm2_hasBoundaryBefore(
|
||||
const __swift_stdlib_UNormalizer2 *ptr, __swift_stdlib_UChar32 char32) {
|
||||
return unorm2_hasBoundaryBefore(ptr_cast<UNormalizer2>(ptr), char32);
|
||||
}
|
||||
const swift::__swift_stdlib_UNormalizer2 *
|
||||
swift::__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *err) {
|
||||
return ptr_cast<__swift_stdlib_UNormalizer2>(
|
||||
unorm2_getNFCInstance(ptr_cast<UErrorCode>(err)));
|
||||
}
|
||||
|
||||
int32_t swift::__swift_stdlib_unorm2_normalize(
|
||||
const __swift_stdlib_UNormalizer2 *norm, const __swift_uint16_t *src,
|
||||
__swift_int32_t len, __swift_uint16_t *dst, __swift_int32_t capacity,
|
||||
__swift_stdlib_UErrorCode *err) {
|
||||
return unorm2_normalize(ptr_cast<UNormalizer2>(norm), src, len, dst, capacity,
|
||||
ptr_cast<UErrorCode>(err));
|
||||
}
|
||||
|
||||
__swift_int32_t swift::__swift_stdlib_unorm2_spanQuickCheckYes(
|
||||
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *ptr,
|
||||
__swift_int32_t len, __swift_stdlib_UErrorCode *err) {
|
||||
return unorm2_spanQuickCheckYes(ptr_cast<UNormalizer2>(norm),
|
||||
ptr_cast<UChar>(ptr), len,
|
||||
ptr_cast<UErrorCode>(err));
|
||||
}
|
||||
|
||||
swift::__swift_stdlib_UBool
|
||||
swift::__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32 c,
|
||||
__swift_stdlib_UProperty p) {
|
||||
return u_hasBinaryProperty(c, static_cast<UProperty>(p));
|
||||
}
|
||||
|
||||
swift::__swift_stdlib_UBool
|
||||
swift::__swift_stdlib_u_isdefined(UChar32 c) {
|
||||
return u_isdefined(c);
|
||||
}
|
||||
|
||||
|
||||
// Force an autolink with ICU
|
||||
#if defined(__MACH__)
|
||||
asm(".linker_option \"-licucore\"\n");
|
||||
|
||||
Reference in New Issue
Block a user