Files
swift-mirror/stdlib/public/stubs/UnicodeNormalization.cpp
Michael Ilseman 8e0362e2c1 [stubs] Autolink against icucore on Darwin.
Programs using a statically linked build of the standard library need
to explicitly link against icucore. There are various potential
hacks^Wsolutions to this problem, and this is an attempt at a lesser
of evils approach.

Emit a linker directive to perform autolinking against icucore on
Darwin systems. This allows us to avoid hacking the compiler driver
and propagating that hack onto any build systems that don't go through
the driver.
2017-05-19 10:39:24 -07:00

356 lines
13 KiB
C++

//===--- UnicodeNormalization.cpp - Unicode Normalization Helpers ---------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Functions that use ICU to do unicode normalization and collation.
//
//===----------------------------------------------------------------------===//
#include "../SwiftShims/UnicodeShims.h"
#if !defined(__APPLE__)
#include "swift/Basic/Lazy.h"
#include "swift/Runtime/Config.h"
#include "swift/Runtime/Debug.h"
#include <algorithm>
#include <mutex>
#include <assert.h>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
#include <unicode/ustring.h>
#include <unicode/ucol.h>
#include <unicode/ucoleitr.h>
#include <unicode/uiter.h>
#include <unicode/ubrk.h>
#pragma clang diagnostic pop
static const UCollator *MakeRootCollator() {
UErrorCode ErrorCode = U_ZERO_ERROR;
UCollator *root = ucol_open("", &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("ucol_open: Failure setting up default collation.");
}
ucol_setAttribute(root, UCOL_NORMALIZATION_MODE, UCOL_ON, &ErrorCode);
ucol_setAttribute(root, UCOL_STRENGTH, UCOL_TERTIARY, &ErrorCode);
ucol_setAttribute(root, UCOL_NUMERIC_COLLATION, UCOL_OFF, &ErrorCode);
ucol_setAttribute(root, UCOL_CASE_LEVEL, UCOL_OFF, &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("ucol_setAttribute: Failure setting up default collation.");
}
return root;
}
// According to this thread in the ICU mailing list, it should be safe
// to assume the UCollator object is thread safe so long as you're only
// passing it to functions that take a const pointer to it. So, we make it
// const here to make sure we don't misuse it.
// http://sourceforge.net/p/icu/mailman/message/27427062/
static const UCollator *GetRootCollator() {
return SWIFT_LAZY_CONSTANT(MakeRootCollator());
}
/// This class caches the collation element results for the ASCII subset of
/// unicode.
class ASCIICollation {
public:
friend class swift::Lazy<ASCIICollation>;
static swift::Lazy<ASCIICollation> theTable;
static const ASCIICollation *getTable() {
return &theTable.get();
}
int32_t CollationTable[128];
/// Maps an ASCII character to a collation element priority as would be
/// returned by a call to ucol_next().
int32_t map(unsigned char c) const {
return CollationTable[c];
}
private:
/// Construct the ASCII collation table.
ASCIICollation() {
const UCollator *Collator = GetRootCollator();
for (unsigned char c = 0; c < 128; ++c) {
UErrorCode ErrorCode = U_ZERO_ERROR;
intptr_t NumCollationElts = 0;
#if defined(__CYGWIN__) || defined(_MSC_VER)
UChar Buffer[1];
#else
uint16_t Buffer[1];
#endif
Buffer[0] = c;
UCollationElements *CollationIterator =
ucol_openElements(Collator, Buffer, 1, &ErrorCode);
while (U_SUCCESS(ErrorCode)) {
intptr_t Elem = ucol_next(CollationIterator, &ErrorCode);
if (Elem != UCOL_NULLORDER) {
CollationTable[c] = Elem;
++NumCollationElts;
} else {
break;
}
}
ucol_closeElements(CollationIterator);
if (U_FAILURE(ErrorCode) || NumCollationElts != 1) {
swift::crash("Error setting up the ASCII collation table");
}
}
}
ASCIICollation &operator=(const ASCIICollation &) = delete;
ASCIICollation(const ASCIICollation &) = delete;
};
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
/// Results are the usual string comparison results:
/// <0 the left string is less than the right string.
/// ==0 the strings are equal according to their collation.
/// >0 the left string is greater than the right string.
int32_t
swift::_swift_stdlib_unicode_compare_utf16_utf16(const uint16_t *LeftString,
int32_t LeftLength,
const uint16_t *RightString,
int32_t RightLength) {
#if defined(__CYGWIN__) || defined(_MSC_VER)
// ICU UChar type is platform dependent. In Cygwin, it is defined
// as wchar_t which size is 2. It seems that the underlying binary
// representation is same with swift utf16 representation.
return ucol_strcoll(GetRootCollator(),
reinterpret_cast<const UChar *>(LeftString), LeftLength,
reinterpret_cast<const UChar *>(RightString), RightLength);
#else
return ucol_strcoll(GetRootCollator(),
LeftString, LeftLength,
RightString, RightLength);
#endif
}
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
/// Results are the usual string comparison results:
/// <0 the left string is less than the right string.
/// ==0 the strings are equal according to their collation.
/// >0 the left string is greater than the right string.
int32_t
swift::_swift_stdlib_unicode_compare_utf8_utf16(const unsigned char *LeftString,
int32_t LeftLength,
const uint16_t *RightString,
int32_t RightLength) {
UCharIterator LeftIterator;
UCharIterator RightIterator;
UErrorCode ErrorCode = U_ZERO_ERROR;
uiter_setUTF8(&LeftIterator, reinterpret_cast<const char *>(LeftString), LeftLength);
#if defined(__CYGWIN__) || defined(_MSC_VER)
uiter_setString(&RightIterator, reinterpret_cast<const UChar *>(RightString),
RightLength);
#else
uiter_setString(&RightIterator, RightString, RightLength);
#endif
uint32_t Diff = ucol_strcollIter(GetRootCollator(),
&LeftIterator, &RightIterator, &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison.");
}
return Diff;
}
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
/// Results are the usual string comparison results:
/// <0 the left string is less than the right string.
/// ==0 the strings are equal according to their collation.
/// >0 the left string is greater than the right string.
int32_t
swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
int32_t LeftLength,
const unsigned char *RightString,
int32_t RightLength) {
UCharIterator LeftIterator;
UCharIterator RightIterator;
UErrorCode ErrorCode = U_ZERO_ERROR;
uiter_setUTF8(&LeftIterator, reinterpret_cast<const char *>(LeftString), LeftLength);
uiter_setUTF8(&RightIterator, reinterpret_cast<const char *>(RightString), RightLength);
uint32_t Diff = ucol_strcollIter(GetRootCollator(),
&LeftIterator, &RightIterator, &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf8 string comparison.");
}
return Diff;
}
void *swift::_swift_stdlib_unicodeCollationIterator_create(
const __swift_uint16_t *Str, __swift_uint32_t Length) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined(_MSC_VER)
UCollationElements *CollationIterator = ucol_openElements(
GetRootCollator(), reinterpret_cast<const UChar *>(Str), Length,
&ErrorCode);
#else
UCollationElements *CollationIterator = ucol_openElements(
GetRootCollator(), Str, Length, &ErrorCode);
#endif
if (U_FAILURE(ErrorCode)) {
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
}
return CollationIterator;
}
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
void *CollationIterator, bool *HitEnd) {
UErrorCode ErrorCode = U_ZERO_ERROR;
auto Result = ucol_next(
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
}
*HitEnd = (Result == UCOL_NULLORDER);
return Result;
}
void swift::_swift_stdlib_unicodeCollationIterator_delete(
void *CollationIterator) {
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
}
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
return ASCIICollation::getTable()->CollationTable;
}
/// Convert the unicode string to uppercase. This function will return the
/// required buffer length as a result. If this length does not match the
/// 'DestinationCapacity' this function must be called again with a buffer of
/// the required length to get an uppercase version of the string.
int32_t
swift::_swift_stdlib_unicode_strToUpper(uint16_t *Destination,
int32_t DestinationCapacity,
const uint16_t *Source,
int32_t SourceLength) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined(_MSC_VER)
uint32_t OutputLength = u_strToUpper(reinterpret_cast<UChar *>(Destination),
DestinationCapacity,
reinterpret_cast<const UChar *>(Source),
SourceLength,
"", &ErrorCode);
#else
uint32_t OutputLength = u_strToUpper(Destination, DestinationCapacity,
Source, SourceLength,
"", &ErrorCode);
#endif
if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
swift::crash("u_strToUpper: Unexpected error uppercasing unicode string.");
}
return OutputLength;
}
/// Convert the unicode string to lowercase. This function will return the
/// required buffer length as a result. If this length does not match the
/// 'DestinationCapacity' this function must be called again with a buffer of
/// the required length to get a lowercase version of the string.
int32_t
swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination,
int32_t DestinationCapacity,
const uint16_t *Source,
int32_t SourceLength) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined(_MSC_VER)
uint32_t OutputLength = u_strToLower(reinterpret_cast<UChar *>(Destination),
DestinationCapacity,
reinterpret_cast<const UChar *>(Source),
SourceLength,
"", &ErrorCode);
#else
uint32_t OutputLength = u_strToLower(Destination, DestinationCapacity,
Source, SourceLength,
"", &ErrorCode);
#endif
if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
swift::crash("u_strToLower: Unexpected error lowercasing unicode string.");
}
return OutputLength;
}
swift::Lazy<ASCIICollation> ASCIICollation::theTable;
#endif
namespace {
template <typename T, typename U> T *ptr_cast(U *p) {
return static_cast<T *>(static_cast<void *>(p));
}
template <typename T, typename U> const T *ptr_cast(const U *p) {
return static_cast<const T *>(static_cast<const void *>(p));
}
}
#if defined(__APPLE__)
#include <stdint.h>
extern "C" {
// Declare a few external functions to avoid a dependency on ICU headers.
typedef struct UBreakIterator UBreakIterator;
typedef enum UBreakIteratorType {} UBreakIteratorType;
typedef enum UErrorCode {} UErrorCode;
typedef uint16_t UChar;
void ubrk_close(UBreakIterator *);
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
int32_t, UErrorCode *);
int32_t ubrk_preceding(UBreakIterator *, int32_t);
int32_t ubrk_following(UBreakIterator *, int32_t);
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
}
// Force an autolink with ICU
asm(".linker_option \"-licucore\"\n");
#endif // defined(__APPLE__)
void swift::__swift_stdlib_ubrk_close(
swift::__swift_stdlib_UBreakIterator *bi) {
ubrk_close(ptr_cast<UBreakIterator>(bi));
}
swift::__swift_stdlib_UBreakIterator *swift::__swift_stdlib_ubrk_open(
swift::__swift_stdlib_UBreakIteratorType type, const char *locale,
const UChar *text, int32_t textLength, __swift_stdlib_UErrorCode *status) {
return ptr_cast<swift::__swift_stdlib_UBreakIterator>(
ubrk_open(static_cast<UBreakIteratorType>(type), locale, text, textLength,
ptr_cast<UErrorCode>(status)));
}
int32_t
swift::__swift_stdlib_ubrk_preceding(swift::__swift_stdlib_UBreakIterator *bi,
int32_t offset) {
return ubrk_preceding(ptr_cast<UBreakIterator>(bi), offset);
}
int32_t
swift::__swift_stdlib_ubrk_following(swift::__swift_stdlib_UBreakIterator *bi,
int32_t offset) {
return ubrk_following(ptr_cast<UBreakIterator>(bi), offset);
}
void swift::__swift_stdlib_ubrk_setText(
swift::__swift_stdlib_UBreakIterator *bi, const __swift_stdlib_UChar *text,
__swift_int32_t textLength, __swift_stdlib_UErrorCode *status) {
return ubrk_setText(ptr_cast<UBreakIterator>(bi), ptr_cast<UChar>(text),
textLength, ptr_cast<UErrorCode>(status));
}