Files
swift-mirror/validation-test/stdlib/UnicodeTrie.swift.gyb
Dmitri Hrybenko 61214ec55b stdlib: remove Sliceable conformance from String
Swift SVN r28442
2015-05-11 20:58:31 +00:00

249 lines
7.3 KiB
Swift

//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// RUN: rm -rf %t && mkdir -p %t && %S/../../utils/gyb -DunicodeGraphemeBreakPropertyFile=%S/../../utils/UnicodeData/GraphemeBreakProperty.txt -DunicodeGraphemeBreakTestFile=%S/../../utils/UnicodeData/GraphemeBreakTest.txt %s -o %t/UnicodeTrie.swift
// RUN: %S/../../utils/line-directive %t/UnicodeTrie.swift -- %target-build-swift %t/UnicodeTrie.swift -o %t/a.out -g -Xfrontend -disable-access-control
// RUN: %S/../../utils/line-directive %t/UnicodeTrie.swift -- %target-run %t/a.out
// FIXME: rdar://problem/19648117 Needs splitting objc parts out
// XFAIL: linux
%{
from GYBUnicodeDataUtils import *
grapheme_cluster_break_property_table = \
GraphemeClusterBreakPropertyTable(unicodeGraphemeBreakPropertyFile)
}%
import SwiftPrivate
import StdlibUnittest
import Darwin
import Foundation
var graphemeBreakPropertyTable = [
// 'as Int' annotations are needed to help prevent the type-checker from
// blowing the stack. <rdar://problem/17539704>
% for start_code_point,end_code_point,value in grapheme_cluster_break_property_table.property_value_ranges:
(${start_code_point} as Int, ${end_code_point} as Int, _GraphemeClusterBreakPropertyValue.${value}),
% end
]
var UnicodeTrie = TestSuite("UnicodeTrie")
UnicodeTrie.test("_UnicodeGraphemeClusterBreakPropertyTrie") {
// Verify that the trie reports correct values of the property for every code
// point.
var trie = _UnicodeGraphemeClusterBreakPropertyTrie()
var expected = [_GraphemeClusterBreakPropertyValue](count: 0x110000,
repeatedValue: _GraphemeClusterBreakPropertyValue.Other)
for (startCodePoint, endCodePoint, value) in graphemeBreakPropertyTable {
for cp in startCodePoint...endCodePoint {
expected[cp] = value
}
}
for cp in UInt32(0)...UInt32(0x10ffff) {
if cp % 0x10000 == 0 {
print("\(cp)...")
}
expectEqual(expected[Int(cp)], trie.getPropertyValue(cp)) {
"code point \(cp)"
}
}
}
%{
grapheme_cluster_break_tests = \
get_grapheme_cluster_break_tests_as_unicode_scalars(unicodeGraphemeBreakTestFile)
}%
struct ArraySinkOf<T> : SinkType {
init() {}
init(_ array: [T]) {
self.array = array
}
mutating func put(x: T) {
array.append(x)
}
var array: [T] = []
}
// The most simple subclass of NSString that CoreFoundation does not know
// about.
class NonContiguousNSString : NSString {
override init() {
_value = []
super.init()
}
required init(coder aDecoder: NSCoder) {
fatalError("don't call this initializer")
}
init(_ value: [UInt16]) {
_value = value
super.init()
}
convenience init(_ scalars: [UInt32]) {
var encoded = ArraySinkOf<UInt16>()
var g = scalars.generate()
let hadError =
transcode(UTF32.self, UTF16.self, g, &encoded, stopOnError: true)
expectFalse(hadError)
self.init(encoded.array)
}
@objc override func copyWithZone(zone: NSZone) -> AnyObject {
// Ensure that copying this string produces a class that CoreFoundation
// does not know about.
return self
}
@objc override var length: Int {
return _value.count
}
@objc override func characterAtIndex(index: Int) -> unichar {
return _value[index]
}
var _value: [UInt16]
}
/// Verify that extended grapheme cluster boundaries in `subject` occur at
/// positions specified in `expectedBoundaries`.
func checkGraphemeClusterSegmentation(
expectedBoundaries: [Int], _ subject: String, _ stackTrace: SourceLocStack
) {
var actualBoundaries: [Int] = [ 0 ]
var unicodeScalarCount = 0
for c in subject.characters {
let currentClusterSize = String(c).unicodeScalars.count()
unicodeScalarCount += currentClusterSize
actualBoundaries += [ unicodeScalarCount ]
}
expectEqual(expectedBoundaries, actualBoundaries,
stackTrace: stackTrace.withCurrentLoc()) {
"scalars: \(asHex(lazy(subject.unicodeScalars).map { $0.value }.array))"
}
var expectedCharacters: [Character] = Array(subject.characters)
checkSliceableWithBidirectionalIndex(expectedCharacters, subject.characters,
stackTrace.withCurrentLoc())
}
func checkGraphemeClusterSegmentation(
expectedBoundaries: [Int], scalars: [UInt32], _ stackTrace: SourceLocStack
) {
let subject = NonContiguousNSString(scalars) as String
checkGraphemeClusterSegmentation(expectedBoundaries, subject,
stackTrace.withCurrentLoc())
}
func checkGraphemeClusterSegmentation(
expectedBoundaries: [Int], codeUnits: [UInt16], _ stackTrace: SourceLocStack
) {
let subject = NonContiguousNSString(codeUnits) as String
checkGraphemeClusterSegmentation(expectedBoundaries, subject,
stackTrace.withCurrentLoc())
}
UnicodeTrie.test("GraphemeClusterSegmentation/UnicodeSpec") {
// Test segmentation algorithm using test data from the Unicode
// specification.
% for code_points,expected_boundaries in grapheme_cluster_break_tests:
if true {
let scalars: [UInt32] =
[ ${", ".join([ str(cp) for cp in code_points ])} ]
let expectedBoundaries: [Int] =
[ ${", ".join([ str(x) for x in expected_boundaries ])} ]
checkGraphemeClusterSegmentation(expectedBoundaries, scalars: scalars,
SourceLocStack().withCurrentLoc())
}
% end
}
UnicodeTrie.test("GraphemeClusterSegmentation/Extra") {
// Extra tests for input Strings that contain ill-formed code unit sequences.
// U+D800 (high-surrogate)
checkGraphemeClusterSegmentation(
[ 0, 1 ],
codeUnits: [ 0xd800 ],
SourceLocStack().withCurrentLoc())
// U+D800 (high-surrogate)
// U+D800 (high-surrogate)
checkGraphemeClusterSegmentation(
[ 0, 1, 2 ],
codeUnits: [ 0xd800, 0xd800 ],
SourceLocStack().withCurrentLoc())
// U+0041 LATIN CAPITAL LETTER A
// U+D800 (high-surrogate)
checkGraphemeClusterSegmentation(
[ 0, 1, 2 ],
codeUnits: [ 0x0041, 0xd800 ],
SourceLocStack().withCurrentLoc())
// U+D800 (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
checkGraphemeClusterSegmentation(
[ 0, 1, 2 ],
codeUnits: [ 0xd800, 0x0041 ],
SourceLocStack().withCurrentLoc())
// U+0041 LATIN CAPITAL LETTER A
// U+0301 COMBINING ACUTE ACCENT
// U+D800 (high-surrogate)
checkGraphemeClusterSegmentation(
[ 0, 2, 3 ],
codeUnits: [ 0x0041, 0x0301, 0xd800 ],
SourceLocStack().withCurrentLoc())
// U+D800 (high-surrogate)
// U+0041 LATIN CAPITAL LETTER A
// U+0301 COMBINING ACUTE ACCENT
checkGraphemeClusterSegmentation(
[ 0, 1, 3 ],
codeUnits: [ 0xd800, 0x0041, 0x0301 ],
SourceLocStack().withCurrentLoc())
}
UnicodeTrie.test("GraphemeClusterSegmentation/Unicode_7_0_0") {
// Verify that we are using Unicode 7.0.0+ data tables.
// In Unicode 6.3.0, this sequence was segmented into two grapheme clusters.
//
// U+0041 LATIN CAPITAL LETTER A
// U+1122C KHOJKI VOWEL SIGN AA
checkGraphemeClusterSegmentation(
[ 0, 2 ],
scalars: [ 0x0041, 0x1122c ],
SourceLocStack().withCurrentLoc())
}
runAllTests()