mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
249 lines
7.3 KiB
Swift
249 lines
7.3 KiB
Swift
//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// RUN: rm -rf %t && mkdir -p %t && %S/../../utils/gyb -DunicodeGraphemeBreakPropertyFile=%S/../../utils/UnicodeData/GraphemeBreakProperty.txt -DunicodeGraphemeBreakTestFile=%S/../../utils/UnicodeData/GraphemeBreakTest.txt %s -o %t/UnicodeTrie.swift
|
|
// RUN: %S/../../utils/line-directive %t/UnicodeTrie.swift -- %target-build-swift %t/UnicodeTrie.swift -o %t/a.out -g -Xfrontend -disable-access-control
|
|
// RUN: %S/../../utils/line-directive %t/UnicodeTrie.swift -- %target-run %t/a.out
|
|
|
|
// FIXME: rdar://problem/19648117 Needs splitting objc parts out
|
|
// XFAIL: linux
|
|
|
|
%{
|
|
|
|
from GYBUnicodeDataUtils import *
|
|
|
|
grapheme_cluster_break_property_table = \
|
|
GraphemeClusterBreakPropertyTable(unicodeGraphemeBreakPropertyFile)
|
|
|
|
}%
|
|
|
|
import SwiftPrivate
|
|
import StdlibUnittest
|
|
import Darwin
|
|
import Foundation
|
|
|
|
var graphemeBreakPropertyTable = [
|
|
// 'as Int' annotations are needed to help prevent the type-checker from
|
|
// blowing the stack. <rdar://problem/17539704>
|
|
% for start_code_point,end_code_point,value in grapheme_cluster_break_property_table.property_value_ranges:
|
|
(${start_code_point} as Int, ${end_code_point} as Int, _GraphemeClusterBreakPropertyValue.${value}),
|
|
% end
|
|
]
|
|
|
|
var UnicodeTrie = TestSuite("UnicodeTrie")
|
|
|
|
UnicodeTrie.test("_UnicodeGraphemeClusterBreakPropertyTrie") {
|
|
// Verify that the trie reports correct values of the property for every code
|
|
// point.
|
|
|
|
var trie = _UnicodeGraphemeClusterBreakPropertyTrie()
|
|
|
|
var expected = [_GraphemeClusterBreakPropertyValue](count: 0x110000,
|
|
repeatedValue: _GraphemeClusterBreakPropertyValue.Other)
|
|
for (startCodePoint, endCodePoint, value) in graphemeBreakPropertyTable {
|
|
for cp in startCodePoint...endCodePoint {
|
|
expected[cp] = value
|
|
}
|
|
}
|
|
|
|
for cp in UInt32(0)...UInt32(0x10ffff) {
|
|
if cp % 0x10000 == 0 {
|
|
print("\(cp)...")
|
|
}
|
|
expectEqual(expected[Int(cp)], trie.getPropertyValue(cp)) {
|
|
"code point \(cp)"
|
|
}
|
|
}
|
|
}
|
|
|
|
%{
|
|
|
|
grapheme_cluster_break_tests = \
|
|
get_grapheme_cluster_break_tests_as_unicode_scalars(unicodeGraphemeBreakTestFile)
|
|
|
|
}%
|
|
|
|
struct ArraySinkOf<T> : SinkType {
|
|
init() {}
|
|
|
|
init(_ array: [T]) {
|
|
self.array = array
|
|
}
|
|
|
|
mutating func put(x: T) {
|
|
array.append(x)
|
|
}
|
|
|
|
var array: [T] = []
|
|
}
|
|
|
|
// The most simple subclass of NSString that CoreFoundation does not know
|
|
// about.
|
|
class NonContiguousNSString : NSString {
|
|
override init() {
|
|
_value = []
|
|
super.init()
|
|
}
|
|
|
|
required init(coder aDecoder: NSCoder) {
|
|
fatalError("don't call this initializer")
|
|
}
|
|
|
|
init(_ value: [UInt16]) {
|
|
_value = value
|
|
super.init()
|
|
}
|
|
|
|
convenience init(_ scalars: [UInt32]) {
|
|
var encoded = ArraySinkOf<UInt16>()
|
|
var g = scalars.generate()
|
|
let hadError =
|
|
transcode(UTF32.self, UTF16.self, g, &encoded, stopOnError: true)
|
|
expectFalse(hadError)
|
|
self.init(encoded.array)
|
|
}
|
|
|
|
@objc override func copyWithZone(zone: NSZone) -> AnyObject {
|
|
// Ensure that copying this string produces a class that CoreFoundation
|
|
// does not know about.
|
|
return self
|
|
}
|
|
|
|
@objc override var length: Int {
|
|
return _value.count
|
|
}
|
|
|
|
@objc override func characterAtIndex(index: Int) -> unichar {
|
|
return _value[index]
|
|
}
|
|
|
|
var _value: [UInt16]
|
|
}
|
|
|
|
/// Verify that extended grapheme cluster boundaries in `subject` occur at
|
|
/// positions specified in `expectedBoundaries`.
|
|
func checkGraphemeClusterSegmentation(
|
|
expectedBoundaries: [Int], _ subject: String, _ stackTrace: SourceLocStack
|
|
) {
|
|
var actualBoundaries: [Int] = [ 0 ]
|
|
var unicodeScalarCount = 0
|
|
for c in subject.characters {
|
|
let currentClusterSize = String(c).unicodeScalars.count()
|
|
unicodeScalarCount += currentClusterSize
|
|
actualBoundaries += [ unicodeScalarCount ]
|
|
}
|
|
expectEqual(expectedBoundaries, actualBoundaries,
|
|
stackTrace: stackTrace.withCurrentLoc()) {
|
|
"scalars: \(asHex(lazy(subject.unicodeScalars).map { $0.value }.array))"
|
|
}
|
|
|
|
var expectedCharacters: [Character] = Array(subject.characters)
|
|
checkSliceableWithBidirectionalIndex(expectedCharacters, subject.characters,
|
|
stackTrace.withCurrentLoc())
|
|
}
|
|
|
|
func checkGraphemeClusterSegmentation(
|
|
expectedBoundaries: [Int], scalars: [UInt32], _ stackTrace: SourceLocStack
|
|
) {
|
|
let subject = NonContiguousNSString(scalars) as String
|
|
checkGraphemeClusterSegmentation(expectedBoundaries, subject,
|
|
stackTrace.withCurrentLoc())
|
|
}
|
|
|
|
func checkGraphemeClusterSegmentation(
|
|
expectedBoundaries: [Int], codeUnits: [UInt16], _ stackTrace: SourceLocStack
|
|
) {
|
|
let subject = NonContiguousNSString(codeUnits) as String
|
|
checkGraphemeClusterSegmentation(expectedBoundaries, subject,
|
|
stackTrace.withCurrentLoc())
|
|
}
|
|
|
|
UnicodeTrie.test("GraphemeClusterSegmentation/UnicodeSpec") {
|
|
// Test segmentation algorithm using test data from the Unicode
|
|
// specification.
|
|
|
|
% for code_points,expected_boundaries in grapheme_cluster_break_tests:
|
|
if true {
|
|
let scalars: [UInt32] =
|
|
[ ${", ".join([ str(cp) for cp in code_points ])} ]
|
|
let expectedBoundaries: [Int] =
|
|
[ ${", ".join([ str(x) for x in expected_boundaries ])} ]
|
|
checkGraphemeClusterSegmentation(expectedBoundaries, scalars: scalars,
|
|
SourceLocStack().withCurrentLoc())
|
|
}
|
|
|
|
% end
|
|
}
|
|
|
|
UnicodeTrie.test("GraphemeClusterSegmentation/Extra") {
|
|
// Extra tests for input Strings that contain ill-formed code unit sequences.
|
|
|
|
// U+D800 (high-surrogate)
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 1 ],
|
|
codeUnits: [ 0xd800 ],
|
|
SourceLocStack().withCurrentLoc())
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+D800 (high-surrogate)
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 1, 2 ],
|
|
codeUnits: [ 0xd800, 0xd800 ],
|
|
SourceLocStack().withCurrentLoc())
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+D800 (high-surrogate)
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 1, 2 ],
|
|
codeUnits: [ 0x0041, 0xd800 ],
|
|
SourceLocStack().withCurrentLoc())
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 1, 2 ],
|
|
codeUnits: [ 0xd800, 0x0041 ],
|
|
SourceLocStack().withCurrentLoc())
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0301 COMBINING ACUTE ACCENT
|
|
// U+D800 (high-surrogate)
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 2, 3 ],
|
|
codeUnits: [ 0x0041, 0x0301, 0xd800 ],
|
|
SourceLocStack().withCurrentLoc())
|
|
|
|
// U+D800 (high-surrogate)
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+0301 COMBINING ACUTE ACCENT
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 1, 3 ],
|
|
codeUnits: [ 0xd800, 0x0041, 0x0301 ],
|
|
SourceLocStack().withCurrentLoc())
|
|
}
|
|
|
|
UnicodeTrie.test("GraphemeClusterSegmentation/Unicode_7_0_0") {
|
|
// Verify that we are using Unicode 7.0.0+ data tables.
|
|
|
|
// In Unicode 6.3.0, this sequence was segmented into two grapheme clusters.
|
|
//
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
// U+1122C KHOJKI VOWEL SIGN AA
|
|
checkGraphemeClusterSegmentation(
|
|
[ 0, 2 ],
|
|
scalars: [ 0x0041, 0x1122c ],
|
|
SourceLocStack().withCurrentLoc())
|
|
}
|
|
|
|
runAllTests()
|
|
|