//===--- StringViews.swift ------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // RUN: %target-run-stdlib-swift // REQUIRES: executable_test import Swift import StdlibUnittest import StdlibUnicodeUnittest import StdlibCollectionUnittest #if _runtime(_ObjC) // FIXME: Foundation leaks through StdlibUnittest. It adds some conformances // that overload resolution picks up in this code. import Foundation #endif // CHECK: testing... print("testing...") let replacementUTF16: UTF16.CodeUnit = 0xFFFD let replacementUTF8: [UTF8.CodeUnit] = [0xEF, 0xBF, 0xBD] let replacementScalar = UnicodeScalar(replacementUTF16)! let replacementCharacter = Character(replacementScalar) // This string contains a variety of non-ASCII characters, including // Unicode scalars that must be represented with a surrogate pair in // UTF16, grapheme clusters composed of multiple Unicode scalars, and // invalid UTF16 that should be replaced with replacement characters. let winterUTF16 = Array("🏂☃❅❆❄︎⛄️❄️".utf16) + [0xD83C, 0x0020, 0xDF67, 0xD83C] var winter = winterUTF16.withUnsafeBufferPointer { bufPtr in _StringGuts._createStringFromUTF16(bufPtr) } let winterInvalidUTF8: [UTF8.CodeUnit] = replacementUTF8 + ([0x20] as [UTF8.CodeUnit]) + replacementUTF8 + replacementUTF8 let winterUTF8: [UTF8.CodeUnit] = [ 0xf0, 0x9f, 0x8f, 0x82, 0xe2, 0x98, 0x83, 0xe2, 0x9d, 0x85, 0xe2, 0x9d, 0x86, 0xe2, 0x9d, 0x84, 0xef, 0xb8, 0x8e, 0xe2, 0x9b, 0x84, 0xef, 0xb8, 0x8f, 0xe2, 0x9d, 0x84, 0xef, 0xb8, 0x8f ] + winterInvalidUTF8 let summer = "school's out!" let summerBytes: [UInt8] = [ 0x73, 0x63, 0x68, 0x6f, 0x6f, 0x6c, 0x27, 0x73, 0x20, 0x6f, 0x75, 0x74, 0x21] var tests = TestSuite("StringViews") tests.test("decoding") { expectEqualSequence( winterUTF8, winter.utf8 ) expectEqualSequence( [0xd83c, 0xdfc2, 0x2603, 0x2745, 0x2746, 0x2744, 0xfe0e, 0x26c4, 0xfe0f, 0x2744, 0xfe0f, replacementUTF16, 0x0020, replacementUTF16, replacementUTF16 ], winter.utf16 ) expectEqualSequence( summerBytes, summer.utf8 ) expectEqualSequence( summerBytes.map {UTF16.CodeUnit($0)}, summer.utf16 ) } // winter UTF8 grapheme clusters ([]) and unicode scalars (|) // [f0 9f 8f 82] [e2 98 83] [e2 9d 85] [e2 9d 86] [e2 9d 84 | ef b8 8e] // [e2 9b 84 | ef b8 8f] [e2 9d 84 | ef b8 8f] //===--- To UTF8 ----------------------------------------------------------===// func checkToUTF8( _ id: String, mapIndex: @escaping (String.Index, String.UTF8View)->String.Index? ) { tests.test("index-mapping/character-to-utf8/\(id)") { // the first three utf8 code units at the start of each grapheme // cluster expectEqualSequence( [ [0xf0, 0x9f, 0x8f], [0xe2, 0x98, 0x83], [0xe2, 0x9d, 0x85], [0xe2, 0x9d, 0x86], [0xe2, 0x9d, 0x84], [0xe2, 0x9b, 0x84], [0xe2, 0x9d, 0x84], replacementUTF8, [0x20] + replacementUTF8[0..<2], replacementUTF8, replacementUTF8 ] as [[UTF8.CodeUnit]], winter.characters.indices.map { i in (0..<3).map { winter.utf8[ winter.utf8.index( mapIndex(i, winter.utf8), offsetBy: $0)] } }, sameValue: ==) expectEqual( winter.utf8.endIndex, mapIndex(winter.endIndex, winter.utf8)) expectEqualSequence( summerBytes, summer.characters.indices.map { summer.utf8[mapIndex($0, summer.utf8)] } ) expectEqual( summer.utf8.endIndex, mapIndex(summer.endIndex, summer.utf8)) } tests.test("index-mapping/unicode-scalar-to-utf8/\(id)") { // the first three utf8 code units at the start of each unicode // scalar expectEqualSequence( [ [0xf0, 0x9f, 0x8f], [0xe2, 0x98, 0x83], [0xe2, 0x9d, 0x85], [0xe2, 0x9d, 0x86], [0xe2, 0x9d, 0x84], [0xef, 0xb8, 0x8e], [0xe2, 0x9b, 0x84], [0xef, 0xb8, 0x8f], [0xe2, 0x9d, 0x84], [0xef, 0xb8, 0x8f], replacementUTF8, [0x20] + replacementUTF8[0..<2], replacementUTF8, replacementUTF8 ] as [[UTF8.CodeUnit]], winter.unicodeScalars.indices.map { i in (0..<3).map { winter.utf8[ winter.utf8.index(mapIndex(i, winter.utf8), offsetBy: $0)] } }, sameValue: ==) expectEqual( winter.utf8.endIndex, mapIndex(winter.unicodeScalars.endIndex, winter.utf8)) expectEqualSequence( summerBytes, summer.unicodeScalars.indices.map { summer.utf8[mapIndex($0, summer.utf8)] } ) expectEqual( summer.utf8.endIndex, mapIndex(summer.unicodeScalars.endIndex, summer.utf8)) } tests.test("index-mapping/utf16-to-utf8/\(id)") { // check the first three utf8 code units at the start of each utf16 // code unit expectEqualSequence( [ [0xf0, 0x9f, 0x8f], // does not align with any utf8 code unit id == "legacy" ? [] : replacementUTF8, [0xe2, 0x98, 0x83], [0xe2, 0x9d, 0x85], [0xe2, 0x9d, 0x86], [0xe2, 0x9d, 0x84], [0xef, 0xb8, 0x8e], [0xe2, 0x9b, 0x84], [0xef, 0xb8, 0x8f], [0xe2, 0x9d, 0x84], [0xef, 0xb8, 0x8f], replacementUTF8, [0x20] + replacementUTF8[0..<2], replacementUTF8, replacementUTF8 ] as [[UTF8.CodeUnit]], winter.utf16.indices.map { i16 in mapIndex(i16, winter.utf8).map { i8 in (0..<3).map { winter.utf8[winter.utf8.index(i8, offsetBy: $0)] } } ?? [] }, sameValue: ==) expectNotNil(mapIndex(winter.utf16.endIndex, winter.utf8)) expectEqual( winter.utf8.endIndex, mapIndex(winter.utf16.endIndex, winter.utf8)!) expectEqualSequence( summerBytes, summer.utf16.indices.map { summer.utf8[mapIndex($0, summer.utf8)!] } ) expectNotNil(mapIndex(summer.utf16.endIndex, summer.utf8)) expectEqual( summer.utf8.endIndex, mapIndex(summer.utf16.endIndex, summer.utf8)!) } tests.test("index-mapping/utf8-to-utf8/\(id)") { // should always succeed for i in winter.utf8.indices { expectEqual(i, mapIndex(i, winter.utf8)!) } } } checkToUTF8("legacy") { $0.samePosition(in: $1) } checkToUTF8("interchange") { i, _ in i } //===--- To UTF16 ---------------------------------------------------------===// func checkToUTF16( _ id: String, mapIndex: @escaping (String.Index, String.UTF16View)->String.Index? ) { func err(_ codeUnit: Unicode.UTF16.CodeUnit) -> Unicode.UTF16.CodeUnit? { return id == "legacy" ? nil : codeUnit } tests.test("index-mapping/character-to-utf16/\(id)") { expectEqualSequence( [ 0xd83c, // 0xdfc2, 0x2603, 0x2745, 0x2746, 0x2744, // 0xfe0e, 0x26c4, // 0xfe0f, 0x2744, // 0xfe0f, replacementUTF16, 0x20, replacementUTF16, replacementUTF16 ] as [UTF16.CodeUnit], winter.characters.indices.map { winter.utf16[mapIndex($0, winter.utf16)] }, sameValue: ==) expectEqual(winter.utf16.endIndex, mapIndex(winter.endIndex, winter.utf16)) expectEqualSequence( summerBytes.map { UTF16.CodeUnit($0) }, summer.characters.indices.map { summer.utf16[mapIndex($0, summer.utf16)] } ) expectEqual(summer.utf16.endIndex, mapIndex(summer.endIndex, summer.utf16)) } tests.test("index-mapping/unicode-scalar-to-utf16/\(id)") { expectEqualSequence( [ 0xd83c, // 0xdfc2, 0x2603, 0x2745, 0x2746, 0x2744, 0xfe0e, 0x26c4, 0xfe0f, 0x2744, 0xfe0f, replacementUTF16, 0x20, replacementUTF16, replacementUTF16 ] as [UTF16.CodeUnit], winter.unicodeScalars.indices.map { winter.utf16[mapIndex($0, winter.utf16)] }) expectEqual( winter.utf16.endIndex, mapIndex(winter.unicodeScalars.endIndex, winter.utf16)) expectEqualSequence( summerBytes.map { UTF16.CodeUnit($0) }, summer.unicodeScalars.indices.map { summer.utf16[mapIndex($0, summer.utf16)] } ) expectEqual( summer.utf16.endIndex, mapIndex(summer.unicodeScalars.endIndex, summer.utf16)) } tests.test("index-mapping/utf8-to-utf16/\(id)") { expectEqualSequence( [ 0xd83c, err(0xd83c), err(0xd83c), err(0xd83c), 0x2603, err(0x2603), err(0x2603), 0x2745, err(0x2745), err(0x2745), 0x2746, err(0x2746), err(0x2746), 0x2744, err(0x2744), err(0x2744), 0xfe0e, err(0xfe0e), err(0xfe0e), 0x26c4, err(0x26c4), err(0x26c4), 0xfe0f, err(0xfe0f), err(0xfe0f), 0x2744, err(0x2744), err(0x2744), 0xfe0f, err(0xfe0f), err(0xfe0f), replacementUTF16, err(replacementUTF16), err(replacementUTF16), 0x20, replacementUTF16, err(replacementUTF16), err(replacementUTF16), replacementUTF16, err(replacementUTF16), err(replacementUTF16) ] as [UTF16.CodeUnit?], winter.utf8.indices.map { mapIndex($0, winter.utf16).map { winter.utf16[$0] } }, sameValue: ==) expectNotNil(mapIndex(winter.utf8.endIndex, winter.utf16)) expectEqual( winter.utf16.endIndex, mapIndex(winter.utf8.endIndex, winter.utf16)!) expectEqualSequence( summerBytes.map { UTF16.CodeUnit($0) }, summer.utf8.indices.map { summer.utf16[mapIndex($0, summer.utf16)!] } ) expectNotNil(mapIndex(summer.utf8.endIndex, summer.utf16)) expectEqual( summer.utf16.endIndex, mapIndex(summer.utf8.endIndex, summer.utf16)!) } } checkToUTF16("legacy") { $0.samePosition(in: $1) } checkToUTF16("interchange") { i, _ in i } //===--- To UnicodeScalar -------------------------------------------------===// func checkToUnicodeScalar( _ id: String, mapIndex: @escaping (String.Index, String.UnicodeScalarView)->String.Index? ) { func err(_ scalarValue: UInt32) -> UnicodeScalar? { return id == "legacy" ? nil : UnicodeScalar(scalarValue) } tests.test("index-mapping/character-to-unicode-scalar/\(id)") { let winterCharacterUnicodeScalars: [UnicodeScalar] = [ UnicodeScalar(0x1f3c2)!, UnicodeScalar(0x2603)!, UnicodeScalar(0x2745)!, UnicodeScalar(0x2746)!, UnicodeScalar(0x2744)!, // 0xfe0e, UnicodeScalar(0x26c4)!, // 0xfe0f, UnicodeScalar(0x2744)!, // 0xfe0f replacementScalar, UnicodeScalar(0x20)!, replacementScalar, replacementScalar ] expectEqualSequence( winterCharacterUnicodeScalars, winter.characters.indices.map { winter.unicodeScalars[mapIndex($0, winter.unicodeScalars)] }) expectEqual(winter.unicodeScalars.endIndex, mapIndex(winter.endIndex, winter.unicodeScalars)) expectEqualSequence( summerBytes.map { UnicodeScalar($0) }, summer.characters.indices.map { summer.unicodeScalars[mapIndex($0, summer.unicodeScalars)] } ) expectEqual(summer.unicodeScalars.endIndex, mapIndex(summer.endIndex, summer.unicodeScalars)) } tests.test("index-mapping/utf8-to-unicode-scalar/\(id)") { // Define expectation separately to help the type-checker, which // otherwise runs out of time solving. let winterUtf8UnicodeScalars: [UnicodeScalar?] = [ UnicodeScalar(0x1f3c2), err(0x1f3c2), err(0x1f3c2), err(0x1f3c2), UnicodeScalar(0x2603), err(0x2603), err(0x2603), UnicodeScalar(0x2745), err(0x2745), err(0x2745), UnicodeScalar(0x2746), err(0x2746), err(0x2746), UnicodeScalar(0x2744), err(0x2744), err(0x2744), UnicodeScalar(0xfe0e), err(0xfe0e), err(0xfe0e), UnicodeScalar(0x26c4), err(0x26c4), err(0x26c4), UnicodeScalar(0xfe0f), err(0xfe0f), err(0xfe0f), UnicodeScalar(0x2744), err(0x2744), err(0x2744), UnicodeScalar(0xfe0f), err(0xfe0f), err(0xfe0f), replacementScalar, err(replacementScalar.value), err(replacementScalar.value), UnicodeScalar(0x20), replacementScalar, err(replacementScalar.value), err(replacementScalar.value), replacementScalar, err(replacementScalar.value), err(replacementScalar.value) ] expectEqualSequence( winterUtf8UnicodeScalars, winter.utf8.indices.map { i in mapIndex(i, winter.unicodeScalars).map { winter.unicodeScalars[$0] } }, sameValue: ==) expectNotNil(mapIndex(winter.utf8.endIndex, winter.unicodeScalars)) expectEqual( winter.unicodeScalars.endIndex, mapIndex(winter.utf8.endIndex, winter.unicodeScalars)!) expectEqualSequence( summerBytes.map { UnicodeScalar($0) as UnicodeScalar? }, summer.utf8.indices.map { i in mapIndex(i, summer.unicodeScalars).map { summer.unicodeScalars[$0] } }, sameValue: ==) expectNotNil(mapIndex(summer.utf8.endIndex, summer.unicodeScalars)) expectEqual( summer.unicodeScalars.endIndex, mapIndex(summer.utf8.endIndex, summer.unicodeScalars)!) } tests.test("index-mapping/utf16-to-unicode-scalar/\(id)") { let winterUtf16UnicodeScalars: [UnicodeScalar?] = [ UnicodeScalar(0x1f3c2), err(replacementScalar.value), UnicodeScalar(0x2603), UnicodeScalar(0x2745), UnicodeScalar(0x2746), UnicodeScalar(0x2744), UnicodeScalar(0xfe0e), UnicodeScalar(0x26c4), UnicodeScalar(0xfe0f), UnicodeScalar(0x2744), UnicodeScalar(0xfe0f), replacementScalar, UnicodeScalar(0x20), replacementScalar, replacementScalar ] expectEqualSequence( winterUtf16UnicodeScalars, winter.utf16.indices.map { i in mapIndex(i, winter.unicodeScalars).map { winter.unicodeScalars[$0] } }, sameValue: ==) expectNotNil(mapIndex(winter.utf16.endIndex, winter.unicodeScalars)) expectEqual( winter.unicodeScalars.endIndex, mapIndex(winter.utf16.endIndex, winter.unicodeScalars)!) expectEqualSequence( summerBytes.map { UnicodeScalar($0) as UnicodeScalar? }, summer.utf16.indices.map { i in mapIndex(i, summer.unicodeScalars).map { summer.unicodeScalars[$0] } }, sameValue: ==) expectNotNil(mapIndex(summer.utf16.endIndex, summer.unicodeScalars)) expectEqual( summer.unicodeScalars.endIndex, mapIndex(summer.utf16.endIndex, summer.unicodeScalars)!) } } checkToUnicodeScalar("legacy") { $0.samePosition(in: $1) } checkToUnicodeScalar("interchange") { i, _ in i } //===--- To Character -----------------------------------------------------===// func checkToCharacter( _ id: String, mapIndex: @escaping (String.Index, String)->String.Index? ) { func err(_ c: Character) -> Character? { return id == "legacy" ? nil : c } tests.test("index-mapping/unicode-scalar-to-character/\(id)") { let winterUnicodeScalarCharacters: [Character?] = [ "🏂", "☃", "❅", "❆", "❄︎", err("\u{FE0E}"), "⛄️", err("\u{FE0F}"), "❄️", err("\u{FE0F}"), replacementCharacter, "\u{20}", replacementCharacter, replacementCharacter ] expectEqualSequence( winterUnicodeScalarCharacters, winter.unicodeScalars.indices.map { i in mapIndex(i, winter).map { winter[$0] } }, sameValue: ==) expectEqual(winter.endIndex, mapIndex(winter.unicodeScalars.endIndex, winter)!) expectEqualSequence( summerBytes.map { Character(UnicodeScalar($0)) }, summer.unicodeScalars.indices.map { summer[mapIndex($0, summer)!] } ) expectEqual(summer.endIndex, mapIndex(summer.unicodeScalars.endIndex, summer)!) } tests.test("index-mapping/utf8-to-character/\(id)") { // Define expectation separately to help the type-checker, which // otherwise runs out of time solving. let winterUtf8Characters: [Character?] = [ "🏂", err("🏂"), err("🏂"), err("🏂"), "☃", err("☃"), err("☃"), "❅", err("❅"), err("❅"), "❆", err("❆"), err("❆"), "❄︎", err("❄︎"), err("❄︎"), err("\u{fe0e}"), err("\u{fe0e}"), err("\u{fe0e}"), "⛄️", err("⛄️"), err("⛄️"), err("\u{fe0f}"), err("\u{fe0f}"), err("\u{fe0f}"), "❄️", err("❄️"), err("❄️"), err("\u{fe0f}"), err("\u{fe0f}"), err("\u{fe0f}"), replacementCharacter, err(replacementCharacter), err(replacementCharacter), "\u{20}", replacementCharacter, err(replacementCharacter), err(replacementCharacter), replacementCharacter, err(replacementCharacter), err(replacementCharacter), ] expectEqualSequence( winterUtf8Characters, winter.utf8.indices.map { (i:String.UTF8Index) -> Character? in mapIndex(i, winter).map { winter[$0] } }, sameValue: ==) expectNotNil(mapIndex(winter.utf8.endIndex, winter)) expectEqual( winter.endIndex, mapIndex(winter.utf8.endIndex, winter)!) expectEqualSequence( summerBytes.map { Character(UnicodeScalar($0)) }, summer.utf8.indices.map { summer[mapIndex($0, summer)!] } ) expectNotNil(mapIndex(summer.utf8.endIndex, summer)) expectEqual( summer.endIndex, mapIndex(summer.utf8.endIndex, summer)!) } tests.test("index-mapping/utf16-to-character/\(id)") { let winterUtf16Characters: [Character?] = [ "🏂", err(replacementCharacter), "☃", "❅", "❆", "❄︎", err("\u{fe0e}"), "⛄️", err("\u{fe0f}"), "❄️", err("\u{fe0f}"), replacementCharacter, "\u{20}", replacementCharacter, replacementCharacter ] expectEqualSequence( winterUtf16Characters, winter.utf16.indices.map { i in mapIndex(i, winter).map { winter[$0] } }, sameValue: ==) expectNotNil(mapIndex(winter.utf16.endIndex, winter)) expectEqual( winter.endIndex, mapIndex(winter.utf16.endIndex, winter)!) expectEqualSequence( summerBytes.map { Character(UnicodeScalar($0)) }, summer.utf16.indices.map { summer[mapIndex($0, summer)!] } ) expectNotNil(mapIndex(summer.utf16.endIndex, summer)) expectEqual( summer.endIndex, mapIndex(summer.utf16.endIndex, summer)!) } } checkToCharacter("legacy") { $0.samePosition(in: $1) } checkToCharacter("interchange") { i, _ in i } //===----------------------------------------------------------------------===// // These are rather complicated due to their internal buffers, so // rigorous tests are required tests.test("UTF8 indexes") { // Make sure that equivalent UTF8 indices computed in different ways // are still equal. // // CHECK-NEXT: true let abc = "abcdefghijklmnop" do { let start = String.UTF8Index(abc.startIndex, within: abc.utf8) expectEqual( abc.utf8.index(after: start), String.UTF8Index(abc.index(after: abc.startIndex), within: abc.utf8)) } let diverseCharacters = summer + winter + winter + summer let s = diverseCharacters.unicodeScalars let u8 = diverseCharacters.utf8 let u16 = diverseCharacters.utf16 //===--- nested for...in loops ------------------------------------------===// // Test all valid subranges si0..) checkComparable(str.unicodeScalars.indices, oracle: <=>) checkComparable(str.utf16.indices, oracle: <=>) checkComparable(str.utf8.indices, oracle: <=>) } tests.test("UTF16->String") { let s = summer + winter + winter + summer let v = s.utf16 for i in v.indices { for j in v.indices[i..String") { let s = summer + winter + winter + summer let v = s.utf8 for i in v.indices { for j in v.indices[i..String") { let s = summer + winter + winter + summer let v = s.unicodeScalars for i in s.characters.indices { for j in s.characters.indices[i..