Add unicode normalization shims

This commit is contained in:
Lance Parker
2018-02-14 12:02:23 -08:00
parent 5b375ef453
commit 32d21c489f
2 changed files with 214 additions and 4 deletions

View File

@@ -109,6 +109,123 @@ __swift_int32_t _swift_stdlib_unicode_strToLower(
__swift_uint16_t *Destination, __swift_int32_t DestinationCapacity, __swift_uint16_t *Destination, __swift_int32_t DestinationCapacity,
const __swift_uint16_t *Source, __swift_int32_t SourceLength); const __swift_uint16_t *Source, __swift_int32_t SourceLength);
typedef enum __swift_stdlib_UProperty {
__swift_stdlib_UCHAR_ALPHABETIC = 0,
__swift_stdlib_UCHAR_BINARY_START = __swift_stdlib_UCHAR_ALPHABETIC,
__swift_stdlib_UCHAR_ASCII_HEX_DIGIT = 1,
__swift_stdlib_UCHAR_BIDI_CONTROL = 2,
__swift_stdlib_UCHAR_BIDI_MIRRORED = 3,
__swift_stdlib_UCHAR_DASH = 4,
__swift_stdlib_UCHAR_DEFAULT_IGNORABLE_CODE_POINT = 5,
__swift_stdlib_UCHAR_DEPRECATED = 6,
__swift_stdlib_UCHAR_DIACRITIC = 7,
__swift_stdlib_UCHAR_EXTENDER = 8,
__swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION = 9,
__swift_stdlib_UCHAR_GRAPHEME_BASE = 10,
__swift_stdlib_UCHAR_GRAPHEME_EXTEND = 11,
__swift_stdlib_UCHAR_GRAPHEME_LINK = 12,
__swift_stdlib_UCHAR_HEX_DIGIT = 13,
__swift_stdlib_UCHAR_HYPHEN = 14,
__swift_stdlib_UCHAR_ID_CONTINUE = 15,
__swift_stdlib_UCHAR_ID_START = 16,
__swift_stdlib_UCHAR_IDEOGRAPHIC = 17,
__swift_stdlib_UCHAR_IDS_BINARY_OPERATOR = 18,
__swift_stdlib_UCHAR_IDS_TRINARY_OPERATOR = 19,
__swift_stdlib_UCHAR_JOIN_CONTROL = 20,
__swift_stdlib_UCHAR_LOGICAL_ORDER_EXCEPTION = 21,
__swift_stdlib_UCHAR_LOWERCASE = 22,
__swift_stdlib_UCHAR_MATH = 23,
__swift_stdlib_UCHAR_NONCHARACTER_CODE_POINT = 24,
__swift_stdlib_UCHAR_QUOTATION_MARK = 25,
__swift_stdlib_UCHAR_RADICAL = 26,
__swift_stdlib_UCHAR_SOFT_DOTTED = 27,
__swift_stdlib_UCHAR_TERMINAL_PUNCTUATION = 28,
__swift_stdlib_UCHAR_UNIFIED_IDEOGRAPH = 29,
__swift_stdlib_UCHAR_UPPERCASE = 30,
__swift_stdlib_UCHAR_WHITE_SPACE = 31,
__swift_stdlib_UCHAR_XID_CONTINUE = 32,
__swift_stdlib_UCHAR_XID_START = 33,
__swift_stdlib_UCHAR_CASE_SENSITIVE = 34,
__swift_stdlib_UCHAR_S_TERM = 35,
__swift_stdlib_UCHAR_VARIATION_SELECTOR = 36,
__swift_stdlib_UCHAR_NFD_INERT = 37,
__swift_stdlib_UCHAR_NFKD_INERT = 38,
__swift_stdlib_UCHAR_NFC_INERT = 39,
__swift_stdlib_UCHAR_NFKC_INERT = 40,
__swift_stdlib_UCHAR_SEGMENT_STARTER = 41,
__swift_stdlib_UCHAR_PATTERN_SYNTAX = 42,
__swift_stdlib_UCHAR_PATTERN_WHITE_SPACE = 43,
__swift_stdlib_UCHAR_POSIX_ALNUM = 44,
__swift_stdlib_UCHAR_POSIX_BLANK = 45,
__swift_stdlib_UCHAR_POSIX_GRAPH = 46,
__swift_stdlib_UCHAR_POSIX_PRINT = 47,
__swift_stdlib_UCHAR_POSIX_XDIGIT = 48,
__swift_stdlib_UCHAR_CASED = 49,
__swift_stdlib_UCHAR_CASE_IGNORABLE = 50,
__swift_stdlib_UCHAR_CHANGES_WHEN_LOWERCASED = 51,
__swift_stdlib_UCHAR_CHANGES_WHEN_UPPERCASED = 52,
__swift_stdlib_UCHAR_CHANGES_WHEN_TITLECASED = 53,
__swift_stdlib_UCHAR_CHANGES_WHEN_CASEFOLDED = 54,
__swift_stdlib_UCHAR_CHANGES_WHEN_CASEMAPPED = 55,
__swift_stdlib_UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED = 56,
__swift_stdlib_UCHAR_EMOJI = 57,
__swift_stdlib_UCHAR_EMOJI_PRESENTATION = 58,
__swift_stdlib_UCHAR_EMOJI_MODIFIER = 59,
__swift_stdlib_UCHAR_EMOJI_MODIFIER_BASE = 60,
__swift_stdlib_UCHAR_BIDI_CLASS = 0x1000,
__swift_stdlib_UCHAR_INT_START = __swift_stdlib_UCHAR_BIDI_CLASS,
__swift_stdlib_UCHAR_BLOCK = 0x1001,
__swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS = 0x1002,
__swift_stdlib_UCHAR_DECOMPOSITION_TYPE = 0x1003,
__swift_stdlib_UCHAR_EAST_ASIAN_WIDTH = 0x1004,
__swift_stdlib_UCHAR_GENERAL_CATEGORY = 0x1005,
__swift_stdlib_UCHAR_JOINING_GROUP = 0x1006,
__swift_stdlib_UCHAR_JOINING_TYPE = 0x1007,
__swift_stdlib_UCHAR_LINE_BREAK = 0x1008,
__swift_stdlib_UCHAR_NUMERIC_TYPE = 0x1009,
__swift_stdlib_UCHAR_SCRIPT = 0x100A,
__swift_stdlib_UCHAR_HANGUL_SYLLABLE_TYPE = 0x100B,
__swift_stdlib_UCHAR_NFD_QUICK_CHECK = 0x100C,
__swift_stdlib_UCHAR_NFKD_QUICK_CHECK = 0x100D,
__swift_stdlib_UCHAR_NFC_QUICK_CHECK = 0x100E,
__swift_stdlib_UCHAR_NFKC_QUICK_CHECK = 0x100F,
__swift_stdlib_UCHAR_LEAD_CANONICAL_COMBINING_CLASS = 0x1010,
__swift_stdlib_UCHAR_TRAIL_CANONICAL_COMBINING_CLASS = 0x1011,
__swift_stdlib_UCHAR_GRAPHEME_CLUSTER_BREAK = 0x1012,
__swift_stdlib_UCHAR_SENTENCE_BREAK = 0x1013,
__swift_stdlib_UCHAR_WORD_BREAK = 0x1014,
__swift_stdlib_UCHAR_BIDI_PAIRED_BRACKET_TYPE = 0x1015,
__swift_stdlib_UCHAR_GENERAL_CATEGORY_MASK = 0x2000,
__swift_stdlib_UCHAR_MASK_START = __swift_stdlib_UCHAR_GENERAL_CATEGORY_MASK,
__swift_stdlib_UCHAR_NUMERIC_VALUE = 0x3000,
__swift_stdlib_UCHAR_DOUBLE_START = __swift_stdlib_UCHAR_NUMERIC_VALUE,
__swift_stdlib_UCHAR_AGE = 0x4000,
__swift_stdlib_UCHAR_STRING_START = __swift_stdlib_UCHAR_AGE,
__swift_stdlib_UCHAR_BIDI_MIRRORING_GLYPH = 0x4001,
__swift_stdlib_UCHAR_CASE_FOLDING = 0x4002,
__swift_stdlib_UCHAR_LOWERCASE_MAPPING = 0x4004,
__swift_stdlib_UCHAR_NAME = 0x4005,
__swift_stdlib_UCHAR_SIMPLE_CASE_FOLDING = 0x4006,
__swift_stdlib_UCHAR_SIMPLE_LOWERCASE_MAPPING = 0x4007,
__swift_stdlib_UCHAR_SIMPLE_TITLECASE_MAPPING = 0x4008,
__swift_stdlib_UCHAR_SIMPLE_UPPERCASE_MAPPING = 0x4009,
__swift_stdlib_UCHAR_TITLECASE_MAPPING = 0x400A,
__swift_stdlib_UCHAR_UPPERCASE_MAPPING = 0x400C,
__swift_stdlib_UCHAR_BIDI_PAIRED_BRACKET = 0x400D,
__swift_stdlib_UCHAR_SCRIPT_EXTENSIONS = 0x7000,
__swift_stdlib_UCHAR_OTHER_PROPERTY_START =
__swift_stdlib_UCHAR_SCRIPT_EXTENSIONS,
__swift_stdlib_UCHAR_INVALID_CODE = -1
} __swift_stdlib_UProperty;
typedef enum __swift_stdlib_UErrorCode { typedef enum __swift_stdlib_UErrorCode {
__swift_stdlib_U_USING_FALLBACK_WARNING = -128, __swift_stdlib_U_USING_FALLBACK_WARNING = -128,
__swift_stdlib_U_ERROR_WARNING_START = -128, __swift_stdlib_U_ERROR_WARNING_START = -128,
@@ -294,7 +411,10 @@ typedef enum __swift_stdlib_UBreakIteratorType {
} __swift_stdlib_UBreakIteratorType; } __swift_stdlib_UBreakIteratorType;
typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator; typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator;
typedef struct __swift_stdlib_UNormalizer2 __swift_stdlib_UNormalizer2;
typedef __swift_uint16_t __swift_stdlib_UChar; typedef __swift_uint16_t __swift_stdlib_UChar;
typedef __swift_int32_t __swift_stdlib_UChar32;
typedef __swift_int8_t __swift_stdlib_UBool;
SWIFT_RUNTIME_STDLIB_INTERFACE SWIFT_RUNTIME_STDLIB_INTERFACE
void __swift_stdlib_ubrk_close(__swift_stdlib_UBreakIterator *bi); void __swift_stdlib_ubrk_close(__swift_stdlib_UBreakIterator *bi);
@@ -321,6 +441,37 @@ SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t __swift_stdlib_ubrk_following(__swift_stdlib_UBreakIterator *bi, __swift_int32_t __swift_stdlib_ubrk_following(__swift_stdlib_UBreakIterator *bi,
__swift_int32_t offset); __swift_int32_t offset);
SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_stdlib_UBool
__swift_stdlib_unorm2_hasBoundaryBefore(const __swift_stdlib_UNormalizer2 *,
__swift_stdlib_UChar32);
SWIFT_RUNTIME_STDLIB_INTERFACE
const __swift_stdlib_UNormalizer2 *
__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *);
SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t
__swift_stdlib_unorm2_normalize(const __swift_stdlib_UNormalizer2 *,
const __swift_stdlib_UChar *, __swift_int32_t,
__swift_stdlib_UChar *, __swift_int32_t,
__swift_stdlib_UErrorCode *);
SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t __swift_stdlib_unorm2_spanQuickCheckYes(
const __swift_stdlib_UNormalizer2 *, const __swift_stdlib_UChar *,
__swift_int32_t, __swift_stdlib_UErrorCode *);
SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_stdlib_UBool
__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32,
__swift_stdlib_UProperty);
SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_stdlib_UBool
__swift_stdlib_u_isdefined(__swift_stdlib_UChar32);
#ifdef __cplusplus #ifdef __cplusplus
}} // extern "C", namespace swift }} // extern "C", namespace swift
#endif #endif

View File

@@ -22,17 +22,34 @@
// Declare a few external functions to avoid a dependency on ICU headers. // Declare a few external functions to avoid a dependency on ICU headers.
extern "C" { extern "C" {
// Types
typedef struct UBreakIterator UBreakIterator; typedef struct UBreakIterator UBreakIterator;
typedef struct UBreakIterator UNormalizer2;
typedef enum UBreakIteratorType {} UBreakIteratorType; typedef enum UBreakIteratorType {} UBreakIteratorType;
typedef enum UErrorCode {} UErrorCode; typedef enum UErrorCode {} UErrorCode;
typedef uint16_t UChar; typedef uint16_t UChar;
typedef int32_t UChar32;
typedef int8_t UBool;
typedef swift::__swift_stdlib_UProperty UProperty;
// Grapheme breaking APIs
void ubrk_close(UBreakIterator *); void ubrk_close(UBreakIterator *);
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *, UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
int32_t, UErrorCode *); int32_t, UErrorCode *);
int32_t ubrk_preceding(UBreakIterator *, int32_t); int32_t ubrk_preceding(UBreakIterator *, int32_t);
int32_t ubrk_following(UBreakIterator *, int32_t); int32_t ubrk_following(UBreakIterator *, int32_t);
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *); void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
// Comparison, normalization, and character property APIs
int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *, const UChar *, int32_t,
UErrorCode *);
int32_t unorm2_normalize(const UNormalizer2 *, const UChar *, int32_t, UChar *,
int32_t, UErrorCode *);
const UNormalizer2 *unorm2_getNFCInstance(UErrorCode *);
UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
UBool u_hasBinaryProperty(UChar32, UProperty);
UBool u_isdefined(UChar32);
} }
#else #else
@@ -45,6 +62,7 @@ void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
#include <unicode/ucoleitr.h> #include <unicode/ucoleitr.h>
#include <unicode/uiter.h> #include <unicode/uiter.h>
#include <unicode/ubrk.h> #include <unicode/ubrk.h>
#include <unicode/uchar.h>
#pragma clang diagnostic pop #pragma clang diagnostic pop
@@ -225,7 +243,8 @@ __swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
auto Result = ucol_next( auto Result = ucol_next(
static_cast<UCollationElements *>(CollationIterator), &ErrorCode); static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
if (U_FAILURE(ErrorCode)) { if (U_FAILURE(ErrorCode)) {
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed."); swift::crash(
"_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
} }
*HitEnd = (Result == UCOL_NULLORDER); *HitEnd = (Result == UCOL_NULLORDER);
return Result; return Result;
@@ -299,9 +318,11 @@ void swift::__swift_stdlib_ubrk_close(
ubrk_close(ptr_cast<UBreakIterator>(bi)); ubrk_close(ptr_cast<UBreakIterator>(bi));
} }
swift::__swift_stdlib_UBreakIterator *swift::__swift_stdlib_ubrk_open( swift::__swift_stdlib_UBreakIterator *
swift::__swift_stdlib_UBreakIteratorType type, const char *locale, swift::__swift_stdlib_ubrk_open(swift::__swift_stdlib_UBreakIteratorType type,
const uint16_t *text, int32_t textLength, __swift_stdlib_UErrorCode *status) { const char *locale, const uint16_t *text,
int32_t textLength,
__swift_stdlib_UErrorCode *status) {
return ptr_cast<swift::__swift_stdlib_UBreakIterator>( return ptr_cast<swift::__swift_stdlib_UBreakIterator>(
ubrk_open(static_cast<UBreakIteratorType>(type), locale, ubrk_open(static_cast<UBreakIteratorType>(type), locale,
reinterpret_cast<const UChar *>(text), textLength, reinterpret_cast<const UChar *>(text), textLength,
@@ -327,6 +348,44 @@ void swift::__swift_stdlib_ubrk_setText(
textLength, ptr_cast<UErrorCode>(status)); textLength, ptr_cast<UErrorCode>(status));
} }
swift::__swift_stdlib_UBool swift::__swift_stdlib_unorm2_hasBoundaryBefore(
const __swift_stdlib_UNormalizer2 *ptr, __swift_stdlib_UChar32 char32) {
return unorm2_hasBoundaryBefore(ptr_cast<UNormalizer2>(ptr), char32);
}
const swift::__swift_stdlib_UNormalizer2 *
swift::__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *err) {
return ptr_cast<__swift_stdlib_UNormalizer2>(
unorm2_getNFCInstance(ptr_cast<UErrorCode>(err)));
}
int32_t swift::__swift_stdlib_unorm2_normalize(
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *src,
__swift_int32_t len, __swift_stdlib_UChar *dst, __swift_int32_t capacity,
__swift_stdlib_UErrorCode *err) {
return unorm2_normalize(ptr_cast<UNormalizer2>(norm), src, len, dst, capacity,
ptr_cast<UErrorCode>(err));
}
__swift_int32_t swift::__swift_stdlib_unorm2_spanQuickCheckYes(
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *ptr,
__swift_int32_t len, __swift_stdlib_UErrorCode *err) {
return unorm2_spanQuickCheckYes(ptr_cast<UNormalizer2>(norm),
ptr_cast<UChar>(ptr), len,
ptr_cast<UErrorCode>(err));
}
swift::__swift_stdlib_UBool
swift::__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32 c,
__swift_stdlib_UProperty p) {
return u_hasBinaryProperty(c, static_cast<UProperty>(p));
}
swift::__swift_stdlib_UBool
swift::__swift_stdlib_u_isdefined(UChar32 c) {
return u_isdefined(c);
}
// Force an autolink with ICU // Force an autolink with ICU
#if defined(__MACH__) #if defined(__MACH__)
asm(".linker_option \"-licucore\"\n"); asm(".linker_option \"-licucore\"\n");