//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // RUN: rm -rf %t && mkdir -p %t && %S/../../utils/gyb -DunicodeGraphemeBreakPropertyFile=%S/../../utils/UnicodeData/GraphemeBreakProperty.txt -DunicodeGraphemeBreakTestFile=%S/../../utils/UnicodeData/GraphemeBreakTest.txt %s -o %t/UnicodeTrie.swift // RUN: %S/../../utils/line-directive %t/UnicodeTrie.swift -- %target-build-swift %t/UnicodeTrie.swift -o %t/a.out -g -Xfrontend -disable-access-control // RUN: %S/../../utils/line-directive %t/UnicodeTrie.swift -- %target-run %t/a.out // FIXME: rdar://problem/19648117 Needs splitting objc parts out // XFAIL: linux %{ from GYBUnicodeDataUtils import * grapheme_cluster_break_property_table = \ GraphemeClusterBreakPropertyTable(unicodeGraphemeBreakPropertyFile) }% import SwiftPrivate import StdlibUnittest import Darwin import Foundation var graphemeBreakPropertyTable = [ // 'as Int' annotations are needed to help prevent the type-checker from // blowing the stack. % for start_code_point,end_code_point,value in grapheme_cluster_break_property_table.property_value_ranges: (${start_code_point} as Int, ${end_code_point} as Int, _GraphemeClusterBreakPropertyValue.${value}), % end ] var UnicodeTrie = TestSuite("UnicodeTrie") UnicodeTrie.test("_UnicodeGraphemeClusterBreakPropertyTrie") { // Verify that the trie reports correct values of the property for every code // point. var trie = _UnicodeGraphemeClusterBreakPropertyTrie() var expected = [_GraphemeClusterBreakPropertyValue](count: 0x110000, repeatedValue: _GraphemeClusterBreakPropertyValue.Other) for (startCodePoint, endCodePoint, value) in graphemeBreakPropertyTable { for cp in startCodePoint...endCodePoint { expected[cp] = value } } for cp in UInt32(0)...UInt32(0x10ffff) { if cp % 0x10000 == 0 { print("\(cp)...") } expectEqual(expected[Int(cp)], trie.getPropertyValue(cp)) { "code point \(cp)" } } } %{ grapheme_cluster_break_tests = \ get_grapheme_cluster_break_tests_as_unicode_scalars(unicodeGraphemeBreakTestFile) }% struct ArraySinkOf : SinkType { init() {} init(_ array: [T]) { self.array = array } mutating func put(x: T) { array.append(x) } var array: [T] = [] } // The most simple subclass of NSString that CoreFoundation does not know // about. class NonContiguousNSString : NSString { override init() { _value = [] super.init() } required init(coder aDecoder: NSCoder) { fatalError("don't call this initializer") } init(_ value: [UInt16]) { _value = value super.init() } convenience init(_ scalars: [UInt32]) { var encoded = ArraySinkOf() var g = scalars.generate() let hadError = transcode(UTF32.self, UTF16.self, g, &encoded, stopOnError: true) expectFalse(hadError) self.init(encoded.array) } @objc override func copyWithZone(zone: NSZone) -> AnyObject { // Ensure that copying this string produces a class that CoreFoundation // does not know about. return self } @objc override var length: Int { return _value.count } @objc override func characterAtIndex(index: Int) -> unichar { return _value[index] } var _value: [UInt16] } /// Verify that extended grapheme cluster boundaries in `subject` occur at /// positions specified in `expectedBoundaries`. func checkGraphemeClusterSegmentation( expectedBoundaries: [Int], _ subject: String, _ stackTrace: SourceLocStack ) { var actualBoundaries: [Int] = [ 0 ] var unicodeScalarCount = 0 for c in subject.characters { let currentClusterSize = String(c).unicodeScalars.count() unicodeScalarCount += currentClusterSize actualBoundaries += [ unicodeScalarCount ] } expectEqual(expectedBoundaries, actualBoundaries, stackTrace: stackTrace.withCurrentLoc()) { "scalars: \(asHex(lazy(subject.unicodeScalars).map { $0.value }.array))" } var expectedCharacters: [Character] = Array(subject.characters) checkSliceableWithBidirectionalIndex(expectedCharacters, subject.characters, stackTrace.withCurrentLoc()) } func checkGraphemeClusterSegmentation( expectedBoundaries: [Int], scalars: [UInt32], _ stackTrace: SourceLocStack ) { let subject = NonContiguousNSString(scalars) as String checkGraphemeClusterSegmentation(expectedBoundaries, subject, stackTrace.withCurrentLoc()) } func checkGraphemeClusterSegmentation( expectedBoundaries: [Int], codeUnits: [UInt16], _ stackTrace: SourceLocStack ) { let subject = NonContiguousNSString(codeUnits) as String checkGraphemeClusterSegmentation(expectedBoundaries, subject, stackTrace.withCurrentLoc()) } UnicodeTrie.test("GraphemeClusterSegmentation/UnicodeSpec") { // Test segmentation algorithm using test data from the Unicode // specification. % for code_points,expected_boundaries in grapheme_cluster_break_tests: if true { let scalars: [UInt32] = [ ${", ".join([ str(cp) for cp in code_points ])} ] let expectedBoundaries: [Int] = [ ${", ".join([ str(x) for x in expected_boundaries ])} ] checkGraphemeClusterSegmentation(expectedBoundaries, scalars: scalars, SourceLocStack().withCurrentLoc()) } % end } UnicodeTrie.test("GraphemeClusterSegmentation/Extra") { // Extra tests for input Strings that contain ill-formed code unit sequences. // U+D800 (high-surrogate) checkGraphemeClusterSegmentation( [ 0, 1 ], codeUnits: [ 0xd800 ], SourceLocStack().withCurrentLoc()) // U+D800 (high-surrogate) // U+D800 (high-surrogate) checkGraphemeClusterSegmentation( [ 0, 1, 2 ], codeUnits: [ 0xd800, 0xd800 ], SourceLocStack().withCurrentLoc()) // U+0041 LATIN CAPITAL LETTER A // U+D800 (high-surrogate) checkGraphemeClusterSegmentation( [ 0, 1, 2 ], codeUnits: [ 0x0041, 0xd800 ], SourceLocStack().withCurrentLoc()) // U+D800 (high-surrogate) // U+0041 LATIN CAPITAL LETTER A checkGraphemeClusterSegmentation( [ 0, 1, 2 ], codeUnits: [ 0xd800, 0x0041 ], SourceLocStack().withCurrentLoc()) // U+0041 LATIN CAPITAL LETTER A // U+0301 COMBINING ACUTE ACCENT // U+D800 (high-surrogate) checkGraphemeClusterSegmentation( [ 0, 2, 3 ], codeUnits: [ 0x0041, 0x0301, 0xd800 ], SourceLocStack().withCurrentLoc()) // U+D800 (high-surrogate) // U+0041 LATIN CAPITAL LETTER A // U+0301 COMBINING ACUTE ACCENT checkGraphemeClusterSegmentation( [ 0, 1, 3 ], codeUnits: [ 0xd800, 0x0041, 0x0301 ], SourceLocStack().withCurrentLoc()) } UnicodeTrie.test("GraphemeClusterSegmentation/Unicode_7_0_0") { // Verify that we are using Unicode 7.0.0+ data tables. // In Unicode 6.3.0, this sequence was segmented into two grapheme clusters. // // U+0041 LATIN CAPITAL LETTER A // U+1122C KHOJKI VOWEL SIGN AA checkGraphemeClusterSegmentation( [ 0, 2 ], scalars: [ 0x0041, 0x1122c ], SourceLocStack().withCurrentLoc()) } runAllTests()