stdlib/String: use the Unicode collation algorithm in String's and Character's conformance to Comparable

rdar://17498444


Swift SVN r20554
This commit is contained in:
Dmitri Hrybenko
2014-07-25 18:32:36 +00:00
parent 61f3cd9e6e
commit ed855afb68
7 changed files with 253 additions and 114 deletions

View File

@@ -24,7 +24,7 @@ internal struct IntEncoder : SinkType {
/// segmentation algorithm. /// segmentation algorithm.
public enum Character : public enum Character :
_BuiltinExtendedGraphemeClusterLiteralConvertible, _BuiltinExtendedGraphemeClusterLiteralConvertible,
ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable { ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable, Comparable {
// Fundamentally, it is just a String, but it is optimized for the // Fundamentally, it is just a String, but it is optimized for the
// common case where the UTF-8 representation fits in 63 bits. The // common case where the UTF-8 representation fits in 63 bits. The
@@ -135,3 +135,8 @@ public func ==(lhs: Character, rhs: Character) -> Bool {
return String(lhs) == String(rhs) return String(lhs) == String(rhs)
} }
public func <(lhs: Character, rhs: Character) -> Bool {
// FIXME(performance): constructing two temporary strings is extremely
// wasteful and inefficient.
return String(lhs) < String(rhs)
}

View File

@@ -184,13 +184,13 @@ extension String {
/// The behavior is equivalent to `NSString.compare()` with default options. /// The behavior is equivalent to `NSString.compare()` with default options.
/// ///
/// :returns: /// :returns:
/// * -1 if `lhs < rhs`, /// * an unspecified value less than zero if `lhs < rhs`,
/// * 0 if `lhs == rhs`, /// * zero if `lhs == rhs`,
/// * 1 if `lhs > rhs`. /// * an unspecified value greater than zero if `lhs > rhs`.
@asmname("swift_stdlib_compareNSStringDeterministicUnicodeCollation") @asmname("swift_stdlib_compareNSStringDeterministicUnicodeCollation")
public func _stdlib_compareNSStringDeterministicUnicodeCollation( public func _stdlib_compareNSStringDeterministicUnicodeCollation(
lhs: AnyObject, rhs: AnyObject lhs: AnyObject, rhs: AnyObject
)-> Int )-> Int32
extension String: Equatable { extension String: Equatable {
} }
@@ -202,11 +202,12 @@ public func ==(lhs: String, rhs: String) -> Bool {
lhs._bridgeToObjectiveCImpl(), rhs._bridgeToObjectiveCImpl()) == 0 lhs._bridgeToObjectiveCImpl(), rhs._bridgeToObjectiveCImpl()) == 0
} }
extension String : Comparable {
}
public func <(lhs: String, rhs: String) -> Bool { public func <(lhs: String, rhs: String) -> Bool {
// FIXME: Does lexicographical ordering on component UnicodeScalars, return _stdlib_compareNSStringDeterministicUnicodeCollation(
// but should eventually do a proper unicode String collation. See lhs._bridgeToObjectiveCImpl(), rhs._bridgeToObjectiveCImpl()) < 0
// the comment on == for more information.
return lexicographicalCompare(lhs.unicodeScalars, rhs.unicodeScalars)
} }
// Support for copy-on-write // Support for copy-on-write

View File

@@ -5,6 +5,7 @@ add_swift_library(swiftFoundation INSTALL TARGET_LIBRARY
FoundationMirrors.swift.gyb FoundationMirrors.swift.gyb
ExtraStringAPIs.swift ExtraStringAPIs.swift
Misc.mm Misc.mm
MixedStringNSStringOperations.swift.gyb
NSStringAPI.swift NSStringAPI.swift
NSValue.swift NSValue.swift
DEPENDS swiftCore swiftObjectiveC swiftCoreGraphics swiftDispatch DEPENDS swiftCore swiftObjectiveC swiftCoreGraphics swiftDispatch

View File

@@ -0,0 +1,41 @@
//===--- MixedStringNSStringOperations.swift.gyb --------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Mixed-type comparisons between String and NSString.
//
% for op in [ '==', '!=', '<', '<=', '>=', '>' ]:
@transparent
public func ${op} (lhs: String, rhs: NSString) -> Bool {
// FIXME(performance): constructing a temporary string is extremely
// wasteful and inefficient.
return lhs ${op} (rhs as String)
}
@transparent
public func ${op} (lhs: NSString, rhs: String) -> Bool {
// FIXME(performance): constructing a temporary string is extremely
// wasteful and inefficient.
return (lhs as String) ${op} rhs
}
% end
// This overload is required to disambiguate homogeneous NSString/NSString
// comparisons in non-generic code.
@transparent
public func != (lhs: NSString, rhs: NSString) -> Bool {
return !(lhs == rhs)
}

View File

@@ -1434,13 +1434,3 @@ extension String {
} }
} }
@transparent public
func == (lhs: String, rhs: NSString) -> Bool {
return lhs == (rhs as String)
}
@transparent public
func == (lhs: NSString, rhs: String) -> Bool {
return (lhs as String) == rhs
}

View File

@@ -93,7 +93,7 @@ extern "C" bool swift_stdlib_NSObject_isEqual(NSObject *lhs, NSObject *rhs) {
return Result; return Result;
} }
extern "C" int extern "C" int32_t
swift_stdlib_compareNSStringDeterministicUnicodeCollation(NSString *lhs, swift_stdlib_compareNSStringDeterministicUnicodeCollation(NSString *lhs,
NSString *rhs) { NSString *rhs) {
// 'kCFCompareNonliteral' actually means "normalize to NFD". // 'kCFCompareNonliteral' actually means "normalize to NFD".

View File

@@ -1138,14 +1138,167 @@ NSStringAPIs.test("writeToURL(_:atomically:encoding:error:)") {
// FIXME // FIXME
} }
func checkEqualityImpl( enum ExpectedComparisonResult {
expectedEqualNFD: Bool, lhs: String, rhs: String, case LT, EQ, GT
stackTrace: SourceLocStack
func isLT() -> Bool {
return self == .LT
}
func isEQ() -> Bool {
return self == .EQ
}
func isGT() -> Bool {
return self == .GT
}
func isLE() -> Bool {
return isLT() || isEQ()
}
func isGE() -> Bool {
return isGT() || isEQ()
}
func isNE() -> Bool {
return !isEQ()
}
func flip() -> ExpectedComparisonResult {
switch self {
case .LT:
return .GT
case .EQ:
return .EQ
case .GT:
return .LT
}
}
}
struct ComparisonTest {
let expectedUnicodeCollation: ExpectedComparisonResult
let lhs: String
let rhs: String
let loc: SourceLoc
init(
_ expectedUnicodeCollation: ExpectedComparisonResult,
_ lhs: String, _ rhs: String,
file: String = __FILE__, line: UWord = __LINE__
) {
self.expectedUnicodeCollation = expectedUnicodeCollation
self.lhs = lhs
self.rhs = rhs
self.loc = SourceLoc(file, line, comment: "test data")
}
}
let comparisonTests = [
ComparisonTest(.EQ, "", ""),
ComparisonTest(.LT, "", "a"),
// U+0301 COMBINING ACUTE ACCENT
// U+00E1 LATIN SMALL LETTER A WITH ACUTE
ComparisonTest(.EQ, "a\u{301}", "\u{e1}"),
ComparisonTest(.LT, "a", "a\u{301}"),
ComparisonTest(.LT, "a", "\u{e1}"),
// U+304B HIRAGANA LETTER KA
// U+304C HIRAGANA LETTER GA
// U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
ComparisonTest(.EQ, "\u{304b}", "\u{304b}"),
ComparisonTest(.EQ, "\u{304c}", "\u{304c}"),
ComparisonTest(.LT, "\u{304b}", "\u{304c}"),
ComparisonTest(.LT, "\u{304b}", "\u{304c}\u{3099}"),
ComparisonTest(.EQ, "\u{304c}", "\u{304b}\u{3099}"),
ComparisonTest(.LT, "\u{304c}", "\u{304c}\u{3099}"),
// U+212B ANGSTROM SIGN
// U+030A COMBINING RING ABOVE
// U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
ComparisonTest(.EQ, "\u{212b}", "A\u{30a}"),
ComparisonTest(.EQ, "\u{212b}", "\u{c5}"),
ComparisonTest(.EQ, "A\u{30a}", "\u{c5}"),
ComparisonTest(.LT, "A\u{30a}", "a"),
// U+2126 OHM SIGN
// U+03A9 GREEK CAPITAL LETTER OMEGA
ComparisonTest(.EQ, "\u{2126}", "\u{03a9}"),
// U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
// U+0323 COMBINING DOT BELOW
// U+0307 COMBINING DOT ABOVE
// U+1E63 LATIN SMALL LETTER S WITH DOT BELOW
ComparisonTest(.EQ, "\u{1e69}", "s\u{323}\u{307}"),
ComparisonTest(.EQ, "\u{1e69}", "s\u{307}\u{323}"),
ComparisonTest(.EQ, "\u{1e69}", "\u{1e63}\u{307}"),
ComparisonTest(.EQ, "\u{1e63}\u{307}", "s\u{323}\u{307}"),
ComparisonTest(.EQ, "\u{1e63}\u{307}", "s\u{307}\u{323}"),
// U+FB01 LATIN SMALL LIGATURE FI
ComparisonTest(.EQ, "\u{fb01}", "\u{fb01}"),
ComparisonTest(.LT, "fi", "\u{fb01}"),
// Test that Unicode collation is performed in deterministic mode.
//
// U+0301 COMBINING ACUTE ACCENT
// U+0341 COMBINING ACUTE TONE MARK
// U+0954 DEVANAGARI ACUTE ACCENT
//
// Collation elements from DUCET:
// 0301 ; [.0000.0024.0002] # COMBINING ACUTE ACCENT
// 0341 ; [.0000.0024.0002] # COMBINING ACUTE TONE MARK
// 0954 ; [.0000.0024.0002] # DEVANAGARI ACUTE ACCENT
//
// U+0301 and U+0954 don't decompose in the canonical decomposition mapping.
// U+0341 has a canonical decomposition mapping of U+0301.
ComparisonTest(.EQ, "\u{0301}", "\u{0341}"),
ComparisonTest(.LT, "\u{0301}", "\u{0954}"),
ComparisonTest(.LT, "\u{0341}", "\u{0954}"),
]
func checkComparable<T : Comparable>(
expected: ExpectedComparisonResult,
lhs: T, rhs: T, stackTrace: SourceLocStack
) {
expectEqual(expected.isLT(), lhs < rhs, stackTrace: stackTrace)
expectEqual(expected.isLE(), lhs <= rhs, stackTrace: stackTrace)
expectEqual(expected.isGE(), lhs >= rhs, stackTrace: stackTrace)
expectEqual(expected.isGT(), lhs > rhs, stackTrace: stackTrace)
}
func checkCharacterComparisonImpl(
expected: ExpectedComparisonResult,
lhs: Character, rhs: Character, stackTrace: SourceLocStack
) {
// Character / Character
expectEqual(expected.isEQ(), lhs == rhs, stackTrace: stackTrace)
expectEqual(expected.isNE(), lhs != rhs, stackTrace: stackTrace)
checkHashable(expected.isEQ(), lhs, rhs, stackTrace.withCurrentLoc())
expectEqual(expected.isLT(), lhs < rhs, stackTrace: stackTrace)
expectEqual(expected.isLE(), lhs <= rhs, stackTrace: stackTrace)
expectEqual(expected.isGE(), lhs >= rhs, stackTrace: stackTrace)
expectEqual(expected.isGT(), lhs > rhs, stackTrace: stackTrace)
checkComparable(expected, lhs, rhs, stackTrace.withCurrentLoc())
}
func checkStringComparisonImpl(
expected: ExpectedComparisonResult,
lhs: String, rhs: String, stackTrace: SourceLocStack
) { ) {
// String / String // String / String
expectEqual(expectedEqualNFD, lhs == rhs, stackTrace: stackTrace) expectEqual(expected.isEQ(), lhs == rhs, stackTrace: stackTrace)
expectEqual(!expectedEqualNFD, lhs != rhs, stackTrace: stackTrace) expectEqual(expected.isNE(), lhs != rhs, stackTrace: stackTrace)
checkHashable(expectedEqualNFD, lhs, rhs, stackTrace.withCurrentLoc()) checkHashable(expected.isEQ(), lhs, rhs, stackTrace.withCurrentLoc())
expectEqual(expected.isLT(), lhs < rhs, stackTrace: stackTrace)
expectEqual(expected.isLE(), lhs <= rhs, stackTrace: stackTrace)
expectEqual(expected.isGE(), lhs >= rhs, stackTrace: stackTrace)
expectEqual(expected.isGT(), lhs > rhs, stackTrace: stackTrace)
checkComparable(expected, lhs, rhs, stackTrace.withCurrentLoc())
// NSString / NSString // NSString / NSString
let lhsNSString = lhs as NSString let lhsNSString = lhs as NSString
@@ -1162,105 +1315,53 @@ func checkEqualityImpl(
expectedEqualUnicodeScalars, lhsNSString, rhsNSString, expectedEqualUnicodeScalars, lhsNSString, rhsNSString,
stackTrace.withCurrentLoc()) stackTrace.withCurrentLoc())
// Test mixed comparisons. Currently we rely on implicit bridging for these
// operators to work.
// String / NSString // String / NSString
expectEqual(expectedEqualNFD, lhs == rhsNSString, stackTrace: stackTrace) expectEqual(expected.isEQ(), lhs == rhsNSString, stackTrace: stackTrace)
expectEqual(!expectedEqualNFD, lhs != rhsNSString, stackTrace: stackTrace) expectEqual(expected.isNE(), lhs != rhsNSString, stackTrace: stackTrace)
expectEqual(expected.isLT(), lhs < rhsNSString, stackTrace: stackTrace)
expectEqual(expected.isLE(), lhs <= rhsNSString, stackTrace: stackTrace)
expectEqual(expected.isGE(), lhs >= rhsNSString, stackTrace: stackTrace)
expectEqual(expected.isGT(), lhs > rhsNSString, stackTrace: stackTrace)
// NSString / String // NSString / String
expectEqual(expectedEqualNFD, lhs == rhsNSString, stackTrace: stackTrace) expectEqual(expected.isEQ(), lhsNSString == rhs, stackTrace: stackTrace)
expectEqual(!expectedEqualNFD, lhs != rhsNSString, stackTrace: stackTrace) expectEqual(expected.isNE(), lhsNSString != rhs, stackTrace: stackTrace)
expectEqual(expected.isLT(), lhsNSString < rhs, stackTrace: stackTrace)
expectEqual(expected.isLE(), lhsNSString <= rhs, stackTrace: stackTrace)
expectEqual(expected.isGE(), lhsNSString >= rhs, stackTrace: stackTrace)
expectEqual(expected.isGT(), lhsNSString > rhs, stackTrace: stackTrace)
} }
func checkEquality( func checkComparison(
expectedEqualNFD: Bool, lhs: String, rhs: String, stackTrace: SourceLocStack expectedUnicodeCollation: ExpectedComparisonResult,
lhs: String, rhs: String, stackTrace: SourceLocStack
) { ) {
checkEqualityImpl(expectedEqualNFD, lhs, rhs, stackTrace.withCurrentLoc()) checkStringComparisonImpl(
checkEqualityImpl(expectedEqualNFD, rhs, lhs, stackTrace.withCurrentLoc()) expectedUnicodeCollation, lhs, rhs, stackTrace.withCurrentLoc())
} checkStringComparisonImpl(
expectedUnicodeCollation.flip(), rhs, lhs, stackTrace.withCurrentLoc())
struct EqualityTest { if countElements(lhs) == 1 && countElements(rhs) == 1 {
let expectedEqualNFD: Bool let lhsCharacter = Character(lhs)
let lhs: String let rhsCharacter = Character(rhs)
let rhs: String checkCharacterComparisonImpl(
let loc: SourceLoc expectedUnicodeCollation, lhsCharacter, rhsCharacter,
stackTrace.withCurrentLoc())
init(_ expectedEqualNFD: Bool, _ lhs: String, _ rhs: String, checkCharacterComparisonImpl(
file: String = __FILE__, line: UWord = __LINE__) { expectedUnicodeCollation.flip(), rhsCharacter, lhsCharacter,
self.expectedEqualNFD = expectedEqualNFD stackTrace.withCurrentLoc())
self.lhs = lhs
self.rhs = rhs
self.loc = SourceLoc(file, line, comment: "test data")
} }
} }
let equalityTests = [
EqualityTest(true, "", ""),
EqualityTest(false, "a", ""),
// U+0301 COMBINING ACUTE ACCENT
// U+00E1 LATIN SMALL LETTER A WITH ACUTE
EqualityTest(true, "a\u{301}", "\u{e1}"),
EqualityTest(false, "a\u{301}", "a"),
EqualityTest(false, "\u{e1}", "a"),
// U+304B HIRAGANA LETTER KA
// U+304C HIRAGANA LETTER GA
// U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
EqualityTest(true, "\u{304b}", "\u{304b}"),
EqualityTest(true, "\u{304c}", "\u{304c}"),
EqualityTest(false, "\u{304b}", "\u{304c}"),
EqualityTest(false, "\u{304b}", "\u{304c}\u{3099}"),
EqualityTest(true, "\u{304c}", "\u{304b}\u{3099}"),
EqualityTest(false, "\u{304c}", "\u{304c}\u{3099}"),
// U+212B ANGSTROM SIGN
// U+030A COMBINING RING ABOVE
// U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
EqualityTest(true, "\u{212b}", "A\u{30a}"),
EqualityTest(true, "\u{212b}", "\u{c5}"),
EqualityTest(true, "A\u{30a}", "\u{c5}"),
EqualityTest(false, "A\u{30a}", "a"),
// U+2126 OHM SIGN
// U+03A9 GREEK CAPITAL LETTER OMEGA
EqualityTest(true, "\u{2126}", "\u{03a9}"),
// U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
// U+0323 COMBINING DOT BELOW
// U+0307 COMBINING DOT ABOVE
// U+1E63 LATIN SMALL LETTER S WITH DOT BELOW
EqualityTest(true, "\u{1e69}", "s\u{323}\u{307}"),
EqualityTest(true, "\u{1e69}", "s\u{307}\u{323}"),
EqualityTest(true, "\u{1e69}", "\u{1e63}\u{307}"),
EqualityTest(true, "\u{1e63}\u{307}", "s\u{323}\u{307}"),
EqualityTest(true, "\u{1e63}\u{307}", "s\u{307}\u{323}"),
// U+FB01 LATIN SMALL LIGATURE FI
EqualityTest(true, "\u{fb01}", "\u{fb01}"),
EqualityTest(false, "\u{fb01}", "fi"),
// Test that Unicode collation is performed in deterministic mode.
//
// U+0301 COMBINING ACUTE ACCENT
// U+0341 COMBINING ACUTE TONE MARK
// U+0954 DEVANAGARI ACUTE ACCENT
//
// Collation elements from DUCET:
// 0301 ; [.0000.0024.0002] # COMBINING ACUTE ACCENT
// 0341 ; [.0000.0024.0002] # COMBINING ACUTE TONE MARK
// 0954 ; [.0000.0024.0002] # DEVANAGARI ACUTE ACCENT
//
// U+0301 and U+0954 don't decompose in the canonical decomposition mapping.
// U+0341 has a canonical decomposition mapping of U+0301.
EqualityTest(true, "\u{0301}", "\u{0341}"),
EqualityTest(false, "\u{0301}", "\u{0954}"),
EqualityTest(false, "\u{0341}", "\u{0954}"),
]
NSStringAPIs.test("OperatorEquals") { NSStringAPIs.test("OperatorEquals") {
for test in equalityTests { for test in comparisonTests {
checkEquality( checkComparison(
test.expectedEqualNFD, test.lhs, test.rhs, test.loc.withCurrentLoc()) test.expectedUnicodeCollation, test.lhs, test.rhs, test.loc.withCurrentLoc())
} }
} }