//===--- UnicodeNormalization.cpp - Unicode Normalization Helpers ---------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // Functions that use ICU to do unicode normalization and collation. // //===----------------------------------------------------------------------===// #include "../SwiftShims/UnicodeShims.h" #if !defined(__APPLE__) #include "swift/Basic/Lazy.h" #include "swift/Runtime/Config.h" #include "swift/Runtime/Debug.h" #include #include #include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #include #include #include #include #include #pragma clang diagnostic pop static const UCollator *MakeRootCollator() { UErrorCode ErrorCode = U_ZERO_ERROR; UCollator *root = ucol_open("", &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_open: Failure setting up default collation."); } ucol_setAttribute(root, UCOL_NORMALIZATION_MODE, UCOL_ON, &ErrorCode); ucol_setAttribute(root, UCOL_STRENGTH, UCOL_TERTIARY, &ErrorCode); ucol_setAttribute(root, UCOL_NUMERIC_COLLATION, UCOL_OFF, &ErrorCode); ucol_setAttribute(root, UCOL_CASE_LEVEL, UCOL_OFF, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_setAttribute: Failure setting up default collation."); } return root; } // According to this thread in the ICU mailing list, it should be safe // to assume the UCollator object is thread safe so long as you're only // passing it to functions that take a const pointer to it. So, we make it // const here to make sure we don't misuse it. // http://sourceforge.net/p/icu/mailman/message/27427062/ static const UCollator *GetRootCollator() { return SWIFT_LAZY_CONSTANT(MakeRootCollator()); } /// This class caches the collation element results for the ASCII subset of /// unicode. class ASCIICollation { public: friend class swift::Lazy; static swift::Lazy theTable; static const ASCIICollation *getTable() { return &theTable.get(); } int32_t CollationTable[128]; /// Maps an ASCII character to a collation element priority as would be /// returned by a call to ucol_next(). int32_t map(unsigned char c) const { return CollationTable[c]; } private: /// Construct the ASCII collation table. ASCIICollation() { const UCollator *Collator = GetRootCollator(); for (unsigned char c = 0; c < 128; ++c) { UErrorCode ErrorCode = U_ZERO_ERROR; intptr_t NumCollationElts = 0; #if defined(__CYGWIN__) || defined(_MSC_VER) UChar Buffer[1]; #else uint16_t Buffer[1]; #endif Buffer[0] = c; UCollationElements *CollationIterator = ucol_openElements(Collator, Buffer, 1, &ErrorCode); while (U_SUCCESS(ErrorCode)) { intptr_t Elem = ucol_next(CollationIterator, &ErrorCode); if (Elem != UCOL_NULLORDER) { CollationTable[c] = Elem; ++NumCollationElts; } else { break; } } ucol_closeElements(CollationIterator); if (U_FAILURE(ErrorCode) || NumCollationElts != 1) { swift::crash("Error setting up the ASCII collation table"); } } } ASCIICollation &operator=(const ASCIICollation &) = delete; ASCIICollation(const ASCIICollation &) = delete; }; /// Compares the strings via the Unicode Collation Algorithm on the root locale. /// Results are the usual string comparison results: /// <0 the left string is less than the right string. /// ==0 the strings are equal according to their collation. /// >0 the left string is greater than the right string. int32_t swift::_swift_stdlib_unicode_compare_utf16_utf16(const uint16_t *LeftString, int32_t LeftLength, const uint16_t *RightString, int32_t RightLength) { #if defined(__CYGWIN__) || defined(_MSC_VER) // ICU UChar type is platform dependent. In Cygwin, it is defined // as wchar_t which size is 2. It seems that the underlying binary // representation is same with swift utf16 representation. return ucol_strcoll(GetRootCollator(), reinterpret_cast(LeftString), LeftLength, reinterpret_cast(RightString), RightLength); #else return ucol_strcoll(GetRootCollator(), LeftString, LeftLength, RightString, RightLength); #endif } /// Compares the strings via the Unicode Collation Algorithm on the root locale. /// Results are the usual string comparison results: /// <0 the left string is less than the right string. /// ==0 the strings are equal according to their collation. /// >0 the left string is greater than the right string. int32_t swift::_swift_stdlib_unicode_compare_utf8_utf16(const unsigned char *LeftString, int32_t LeftLength, const uint16_t *RightString, int32_t RightLength) { UCharIterator LeftIterator; UCharIterator RightIterator; UErrorCode ErrorCode = U_ZERO_ERROR; uiter_setUTF8(&LeftIterator, reinterpret_cast(LeftString), LeftLength); #if defined(__CYGWIN__) || defined(_MSC_VER) uiter_setString(&RightIterator, reinterpret_cast(RightString), RightLength); #else uiter_setString(&RightIterator, RightString, RightLength); #endif uint32_t Diff = ucol_strcollIter(GetRootCollator(), &LeftIterator, &RightIterator, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison."); } return Diff; } /// Compares the strings via the Unicode Collation Algorithm on the root locale. /// Results are the usual string comparison results: /// <0 the left string is less than the right string. /// ==0 the strings are equal according to their collation. /// >0 the left string is greater than the right string. int32_t swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString, int32_t LeftLength, const unsigned char *RightString, int32_t RightLength) { UCharIterator LeftIterator; UCharIterator RightIterator; UErrorCode ErrorCode = U_ZERO_ERROR; uiter_setUTF8(&LeftIterator, reinterpret_cast(LeftString), LeftLength); uiter_setUTF8(&RightIterator, reinterpret_cast(RightString), RightLength); uint32_t Diff = ucol_strcollIter(GetRootCollator(), &LeftIterator, &RightIterator, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf8 string comparison."); } return Diff; } void *swift::_swift_stdlib_unicodeCollationIterator_create( const __swift_uint16_t *Str, __swift_uint32_t Length) { UErrorCode ErrorCode = U_ZERO_ERROR; #if defined(__CYGWIN__) || defined(_MSC_VER) UCollationElements *CollationIterator = ucol_openElements( GetRootCollator(), reinterpret_cast(Str), Length, &ErrorCode); #else UCollationElements *CollationIterator = ucol_openElements( GetRootCollator(), Str, Length, &ErrorCode); #endif if (U_FAILURE(ErrorCode)) { swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed."); } return CollationIterator; } __swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next( void *CollationIterator, bool *HitEnd) { UErrorCode ErrorCode = U_ZERO_ERROR; auto Result = ucol_next( static_cast(CollationIterator), &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed."); } *HitEnd = (Result == UCOL_NULLORDER); return Result; } void swift::_swift_stdlib_unicodeCollationIterator_delete( void *CollationIterator) { ucol_closeElements(static_cast(CollationIterator)); } const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() { return ASCIICollation::getTable()->CollationTable; } /// Convert the unicode string to uppercase. This function will return the /// required buffer length as a result. If this length does not match the /// 'DestinationCapacity' this function must be called again with a buffer of /// the required length to get an uppercase version of the string. int32_t swift::_swift_stdlib_unicode_strToUpper(uint16_t *Destination, int32_t DestinationCapacity, const uint16_t *Source, int32_t SourceLength) { UErrorCode ErrorCode = U_ZERO_ERROR; #if defined(__CYGWIN__) || defined(_MSC_VER) uint32_t OutputLength = u_strToUpper(reinterpret_cast(Destination), DestinationCapacity, reinterpret_cast(Source), SourceLength, "", &ErrorCode); #else uint32_t OutputLength = u_strToUpper(Destination, DestinationCapacity, Source, SourceLength, "", &ErrorCode); #endif if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) { swift::crash("u_strToUpper: Unexpected error uppercasing unicode string."); } return OutputLength; } /// Convert the unicode string to lowercase. This function will return the /// required buffer length as a result. If this length does not match the /// 'DestinationCapacity' this function must be called again with a buffer of /// the required length to get a lowercase version of the string. int32_t swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination, int32_t DestinationCapacity, const uint16_t *Source, int32_t SourceLength) { UErrorCode ErrorCode = U_ZERO_ERROR; #if defined(__CYGWIN__) || defined(_MSC_VER) uint32_t OutputLength = u_strToLower(reinterpret_cast(Destination), DestinationCapacity, reinterpret_cast(Source), SourceLength, "", &ErrorCode); #else uint32_t OutputLength = u_strToLower(Destination, DestinationCapacity, Source, SourceLength, "", &ErrorCode); #endif if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) { swift::crash("u_strToLower: Unexpected error lowercasing unicode string."); } return OutputLength; } swift::Lazy ASCIICollation::theTable; #endif namespace { template T *ptr_cast(U *p) { return static_cast(static_cast(p)); } template const T *ptr_cast(const U *p) { return static_cast(static_cast(p)); } } #if defined(__APPLE__) #include extern "C" { // Declare a few external functions to avoid a dependency on ICU headers. typedef struct UBreakIterator UBreakIterator; typedef enum UBreakIteratorType {} UBreakIteratorType; typedef enum UErrorCode {} UErrorCode; typedef uint16_t UChar; void ubrk_close(UBreakIterator *); UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *, int32_t, UErrorCode *); int32_t ubrk_preceding(UBreakIterator *, int32_t); int32_t ubrk_following(UBreakIterator *, int32_t); void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *); } // Force an autolink with ICU asm(".linker_option \"-licucore\"\n"); #endif // defined(__APPLE__) void swift::__swift_stdlib_ubrk_close( swift::__swift_stdlib_UBreakIterator *bi) { ubrk_close(ptr_cast(bi)); } swift::__swift_stdlib_UBreakIterator *swift::__swift_stdlib_ubrk_open( swift::__swift_stdlib_UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, __swift_stdlib_UErrorCode *status) { return ptr_cast( ubrk_open(static_cast(type), locale, text, textLength, ptr_cast(status))); } int32_t swift::__swift_stdlib_ubrk_preceding(swift::__swift_stdlib_UBreakIterator *bi, int32_t offset) { return ubrk_preceding(ptr_cast(bi), offset); } int32_t swift::__swift_stdlib_ubrk_following(swift::__swift_stdlib_UBreakIterator *bi, int32_t offset) { return ubrk_following(ptr_cast(bi), offset); } void swift::__swift_stdlib_ubrk_setText( swift::__swift_stdlib_UBreakIterator *bi, const __swift_stdlib_UChar *text, __swift_int32_t textLength, __swift_stdlib_UErrorCode *status) { return ubrk_setText(ptr_cast(bi), ptr_cast(text), textLength, ptr_cast(status)); }