Files
swift-mirror/test/stdlib/StringIndex.swift
2025-10-29 17:38:15 -07:00

1293 lines
41 KiB
Swift
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// RUN: %empty-directory(%t)
// RUN: %target-build-swift -g %s -o %t/StringIndex
// RUN: %target-codesign %t/StringIndex
// RUN: env %env-SWIFT_BINARY_COMPATIBILITY_VERSION=0x050700 %target-run %t/StringIndex %S/Inputs/
// Note: the environment variable above forces the stdlib's bincompat version to
// 5.7 so that we can test new behavior even if the SDK we're using predates it.
// REQUIRES: executable_test
// REQUIRES: optimized_stdlib
// UNSUPPORTED: freestanding
import StdlibUnittest
#if _runtime(_ObjC)
import Foundation
import StdlibUnicodeUnittest
#endif
var suite = TestSuite("StringIndexTests")
defer { runAllTests() }
enum SimpleString: String {
case smallASCII = "abcdefg"
case smallUnicode = "abéÏ𓀀"
case largeASCII = "012345678901234567890"
case largeUnicode = "abéÏ012345678901234567890𓀀"
case emoji = "😀😃🤢🤮👩🏿‍🎤🧛🏻‍♂️🧛🏻‍♂️👩‍👩‍👦‍👦"
}
let simpleStrings: [String] = [
SimpleString.smallASCII.rawValue,
SimpleString.smallUnicode.rawValue,
SimpleString.largeASCII.rawValue,
SimpleString.largeUnicode.rawValue,
SimpleString.emoji.rawValue,
"",
]
suite.test("basic sanity checks")
.forEach(in: simpleStrings) { s in
let utf8 = Array(s.utf8)
let subUTF8 = Array(s[...].utf8)
let utf16 = Array(s.utf16)
let subUTF16 = Array(s[...].utf16)
let utf32 = Array(s.unicodeScalars.map { $0.value })
let subUTF32 = Array(s[...].unicodeScalars.map { $0.value })
expectEqual(s, String(decoding: utf8, as: UTF8.self))
expectEqual(s, String(decoding: subUTF8, as: UTF8.self))
expectEqual(s, String(decoding: utf16, as: UTF16.self))
expectEqual(s, String(decoding: subUTF16, as: UTF16.self))
expectEqual(s, String(decoding: utf32, as: UTF32.self))
expectEqual(s, String(decoding: subUTF32, as: UTF32.self))
}
suite.test("view counts")
.forEach(in: simpleStrings) { s in
func validateViewCount<View: BidirectionalCollection>(
_ view: View, for string: String,
stackTrace: SourceLocStack = SourceLocStack(),
showFrame: Bool = true,
file: String = #file, line: UInt = #line
) where View.Element: Equatable, View.Index == String.Index {
let stackTrace = stackTrace.pushIf(showFrame, file: file, line: line)
let count = view.count
func expect(_ i: Int,
file: String = #file, line: UInt = #line
) {
expectEqual(count, i, "for String: \(string)",
stackTrace: stackTrace.pushIf(showFrame, file: file, line: line),
showFrame: false)
}
let reversedView = view.reversed()
expect(Array(view).count)
expect(view.indices.count)
expect(view.indices.reversed().count)
expect(reversedView.indices.count)
expect(view.distance(from: view.startIndex, to: view.endIndex))
expect(reversedView.distance(
from: reversedView.startIndex, to: reversedView.endIndex))
// Access the elements from the indices
expectEqual(Array(view), view.indices.map { view[$0] })
expectEqual(
Array(reversedView), reversedView.indices.map { reversedView[$0] })
let indicesArray = Array<String.Index>(view.indices)
for i in 0..<indicesArray.count {
var idx = view.startIndex
idx = view.index(idx, offsetBy: i)
expectEqual(indicesArray[i], idx)
}
}
validateViewCount(s, for: s)
validateViewCount(s.utf8, for: s)
validateViewCount(s.utf16, for: s)
validateViewCount(s.unicodeScalars, for: s)
validateViewCount(s[...], for: s)
validateViewCount(s[...].utf8, for: s)
validateViewCount(s[...].utf16, for: s)
validateViewCount(s[...].unicodeScalars, for: s)
}
suite.test("interchange")
.forEach(in: simpleStrings) { s in
// Basic index alignment
for idx in s.utf8.indices {
let char = s.utf8[idx]
// ASCII or leading code unit in the scalar
if char <= 0x7F || char >= 0b1100_0000 {
expectEqual(idx, idx.samePosition(in: s.unicodeScalars))
expectEqual(idx, idx.samePosition(in: s.utf16))
// ASCII
if char <= 0x7F {
expectEqual(UInt16(char), s.utf16[idx])
expectEqual(UInt32(char), s.unicodeScalars[idx].value)
}
} else {
// Continuation code unit
assert(char & 0b1100_0000 == 0b1000_0000)
expectNil(idx.samePosition(in: s))
expectNil(idx.samePosition(in: s.utf16))
expectNil(idx.samePosition(in: s.unicodeScalars))
}
}
}
suite.test("UTF-16 Offsets")
.forEach(in: simpleStrings) { s in
let end = s.endIndex
let utf16Count = s.utf16.count
expectEqual(end, String.Index(utf16Offset: utf16Count, in: s))
expectEqual(end, String.Index(utf16Offset: utf16Count, in: s[...]))
let pastEnd = String.Index(utf16Offset: utf16Count+1, in: s)
expectNotEqual(end, pastEnd)
expectEqual(pastEnd, String.Index(utf16Offset: utf16Count+1, in: s[...]))
expectEqual(pastEnd, String.Index(utf16Offset: utf16Count+2, in: s))
expectEqual(pastEnd, String.Index(utf16Offset: -1, in: s))
expectEqual(
pastEnd, String.Index(utf16Offset: Swift.max(1, utf16Count), in: s.dropFirst()))
let utf16Indices = Array(s.utf16.indices)
expectEqual(utf16Count, utf16Indices.count)
for i in 0..<utf16Indices.count {
let idx = String.Index(utf16Offset: i, in: s)
expectEqual(utf16Indices[i], idx)
expectEqual(i, idx.utf16Offset(in: s))
expectEqual(i, idx.utf16Offset(in: s[...]))
if i < s.dropLast().utf16.count {
expectEqual(
utf16Indices[i], String.Index(utf16Offset: i, in: s.dropLast()))
expectEqual(i, idx.utf16Offset(in: s.dropLast()))
} else if i == s.dropLast().utf16.count {
expectEqual(
utf16Indices[i], String.Index(utf16Offset: i, in: s.dropLast()))
} else {
expectNotEqual(
utf16Indices[i], String.Index(utf16Offset: i, in: s.dropLast()))
}
}
}
func swift5ScalarAlign(_ idx: String.Index, in str: String) -> String.Index {
var idx = idx
while str.utf8[idx] & 0xC0 == 0x80 { str.utf8.formIndex(before: &idx) }
return idx
}
suite.test("Scalar Align UTF-8 indices") {
// TODO: Test a new aligning API when we add it. For now, we
// test scalar-aligning UTF-8 indices
let str = "a😇"
let subScalarIdx = str.utf8.index(str.utf8.startIndex, offsetBy: 2)
let roundedIdx = swift5ScalarAlign(subScalarIdx, in: str)
expectEqual(1, roundedIdx.utf16Offset(in: str))
let roundedIdx2 = str.utf8[...subScalarIdx].lastIndex { $0 & 0xC0 != 0x80 }
expectEqual(roundedIdx, roundedIdx2)
var roundedIdx3 = subScalarIdx
while roundedIdx3.samePosition(in: str.unicodeScalars) == nil {
str.utf8.formIndex(before: &roundedIdx3)
}
expectEqual(roundedIdx, roundedIdx3)
}
#if _runtime(_ObjC)
suite.test("String.Index(_:within) / Range<String.Index>(_:in:)") {
guard #available(SwiftStdlib 5.1, *) else {
return
}
let str = simpleStrings.joined()
let substr = str[...]
for idx in str.utf8.indices {
expectEqual(
String.Index(idx, within: str), String.Index(idx, within: substr))
}
expectNil(String.Index(str.startIndex, within: str.dropFirst()))
expectNil(String.Index(str.endIndex, within: str.dropLast()))
expectNotNil(String.Index(str.startIndex, within: str))
expectNotNil(String.Index(str.endIndex, within: str))
let utf16Count = str.utf16.count
let utf16Indices = Array(str.utf16.indices) + [str.utf16.endIndex]
for location in 0..<utf16Count {
for length in 0...(utf16Count - location) {
let strLB = String.Index(utf16Indices[location], within: str)
let substrLB = String.Index(utf16Indices[location], within: substr)
let strUB = String.Index(utf16Indices[location+length], within: str)
let substrUB = String.Index(utf16Indices[location+length], within: substr)
expectEqual(strLB, substrLB)
expectEqual(strUB, substrUB)
let nsRange = NSRange(location: location, length: length)
let strRange = Range<String.Index>(nsRange, in: str)
let substrRange = Range<String.Index>(nsRange, in: substr)
expectEqual(strRange, substrRange)
guard strLB != nil && strUB != nil else {
// TODO: rdar://112643333
//expectNil(strRange)
continue
}
expectEqual(strRange, Range(uncheckedBounds: (strLB!, strUB!)))
}
}
}
#endif
#if _runtime(_ObjC)
suite.test("Misaligned") {
// Misaligned indices were fixed in 5.1
guard _hasSwift_5_1() else { return }
func doIt(_ str: String) {
let characterIndices = Array(str.indices)
let scalarIndices = Array(str.unicodeScalars.indices) + [str.endIndex]
let utf8Indices = Array(str.utf8.indices)
var lastScalarI = 0
for i in 1..<utf8Indices.count {
let idx = utf8Indices[i]
// Skip aligned indices
guard idx < scalarIndices[lastScalarI + 1] else {
assert(idx == scalarIndices[lastScalarI + 1])
lastScalarI += 1
continue
}
expectTrue(UTF8.isContinuation(str.utf8[idx]))
let lastScalarIdx = scalarIndices[lastScalarI]
// Check aligning-down
expectEqual(str[lastScalarIdx], str[idx])
expectEqual(str.utf16[lastScalarIdx], str.utf16[idx])
expectEqual(str.unicodeScalars[lastScalarIdx], str.unicodeScalars[idx])
// Check distance
let (start, end) = (str.startIndex, str.endIndex)
if characterIndices.contains(lastScalarIdx) {
expectEqual(0, str.distance(from: lastScalarIdx, to: idx))
expectEqual(str[..<idx].count, str.distance(from: start, to: idx))
expectEqual(str[idx...].count, str.distance(from: idx, to: end))
}
expectEqual(
0, str.unicodeScalars.distance(from: lastScalarIdx, to: idx))
expectEqual(
str.unicodeScalars[..<idx].count,
str.unicodeScalars.distance(from: start, to: idx))
expectEqual(
str.unicodeScalars[idx...].count,
str.unicodeScalars.distance(from: idx, to: end))
expectEqual(0, str.utf16.distance(from: lastScalarIdx, to: idx))
expectEqual(
str.utf16[..<idx].count, str.utf16.distance(from: start, to: idx))
expectEqual(
str.utf16[idx...].count, str.utf16.distance(from: idx, to: end))
}
}
let nsstring: NSString = "aодиde\u{301}日🧟‍♀️"
doIt(nsstring as String)
let string = "aодиde\u{301}日🧟‍♀️"
doIt(string)
}
#endif
let _examples: [StaticString] = [
"abc\r\ndefg",
"ab\r\ncдe\u{301}日🧟x🧟x🏳🌈🇺🇸🇨🇦",
]
let examples: [String] = _examples.flatMap { s in
let str = "\(s)"
#if _runtime(_ObjC)
let unichars = Array(str.utf16)
let nsstr = NSString(characters: unichars, length: unichars.count)
return [str, nsstr as String]
#else
return [str]
#endif
}
suite.test("Trivial index conversion cases/characters")
.forEach(in: examples) { s in
for i in s.indices {
let j = String.Index(i, within: s)
expectNotNil(j, "i: \(i)")
expectEqual(j, i)
}
}
suite.test("Trivial index conversion cases/scalars")
.forEach(in: examples) { s in
for i in s.unicodeScalars.indices {
let j = String.Index(i, within: s.unicodeScalars)
expectNotNil(j, "i: \(i)")
expectEqual(j, i)
}
}
suite.test("Trivial index conversion cases/UTF-8")
.forEach(in: examples) { s in
for i in s.utf8.indices {
let j = String.Index(i, within: s.utf8)
expectNotNil(j, "i: \(i)")
expectEqual(j, i)
}
}
suite.test("Trivial index conversion cases/UTF-16")
.forEach(in: examples) { s in
guard #available(SwiftStdlib 5.7, *) else {
// Prior to 5.7, String.Index.init(_:within:) used to incorrectly reject
// indices pointing to trailing surrogates.
return
}
for i in s.utf16.indices {
let j = String.Index(i, within: s.utf16)
expectNotNil(j, "i: \(i)")
expectEqual(j, i)
}
}
#if _runtime(_ObjC)
suite.test("Exhaustive Index Interchange")
.forEach(in: examples) { str in
guard #available(SwiftStdlib 5.1, *) else {
return
}
//str.dumpIndices()
var curCharIdx = str.startIndex
var curScalarIdx = str.startIndex
var curUTF8Idx = str.startIndex
var curUTF16Idx = str.startIndex
while curCharIdx < str.endIndex {
let curChar = str[curCharIdx]
expectEqual(curChar, str[curScalarIdx])
expectEqual(curChar, str[curUTF8Idx])
expectEqual(curChar, str[curUTF16Idx])
// Advance the character index once and have the scalar index catch up
str.formIndex(after: &curCharIdx)
let scalarStartIdx = curScalarIdx
defer {
let sub = str[scalarStartIdx..<curScalarIdx]
expectEqual(sub.count, 1)
expectEqual(sub.first, curChar)
expectEqual(str.distance(from: scalarStartIdx, to: curScalarIdx), 1)
}
while curScalarIdx < curCharIdx {
let scalarStartIdx = curScalarIdx
let curScalar = str.unicodeScalars[curScalarIdx]
let curSubChar = str[curScalarIdx]
// If there is a Character prior to this scalar, remember it and check
// that misaligned code unit indices also produce it.
let scalarPriorCharacter: Character?
if scalarStartIdx == str.startIndex {
scalarPriorCharacter = nil
} else {
scalarPriorCharacter = str[str.index(before: scalarStartIdx)]
}
// Advance the scalar index once and have the code unit indices catch up
str.unicodeScalars.formIndex(after: &curScalarIdx)
let utf8StartIdx = curUTF8Idx
defer {
let sub = str.unicodeScalars[utf8StartIdx..<curUTF8Idx]
expectEqual(sub.count, 1)
expectEqual(sub.first, curScalar)
expectEqual(
str.unicodeScalars.distance(from: utf8StartIdx, to: curUTF8Idx),
1)
expectEqual(
str.utf8.distance(from: utf8StartIdx, to: curUTF8Idx),
curScalar.utf8.count)
}
while curUTF8Idx < curScalarIdx {
expectEqual(curScalar, str.unicodeScalars[curUTF8Idx])
expectEqual(curSubChar, str[curUTF8Idx])
expectFalse(UTF16.isTrailSurrogate(str.utf16[curUTF8Idx]))
expectEqual(utf8StartIdx, str[curUTF8Idx...].startIndex)
expectTrue(str[utf8StartIdx..<curUTF8Idx].isEmpty)
expectEqual(str.utf16.distance(from: utf8StartIdx, to: curUTF8Idx), 0)
if let scalarPrior = scalarPriorCharacter {
expectEqual(scalarPrior, str[str.index(before: curUTF8Idx)])
}
str.utf8.formIndex(after: &curUTF8Idx)
}
expectEqual(curUTF8Idx, curScalarIdx)
var utf8RevIdx = curUTF8Idx
while utf8RevIdx > utf8StartIdx {
str.utf8.formIndex(before: &utf8RevIdx)
expectEqual(curScalar, str.unicodeScalars[utf8RevIdx])
expectEqual(curSubChar, str[utf8RevIdx])
expectFalse(UTF16.isTrailSurrogate(str.utf16[utf8RevIdx]))
expectEqual(utf8StartIdx, str[utf8RevIdx...].startIndex)
expectTrue(str[utf8StartIdx..<utf8RevIdx].isEmpty)
expectEqual(str.utf16.distance(from: utf8StartIdx, to: utf8RevIdx), 0)
}
expectEqual(utf8RevIdx, utf8StartIdx)
let utf16StartIdx = curUTF16Idx
defer {
let sub = str.unicodeScalars[utf16StartIdx..<curUTF16Idx]
expectEqual(sub.count, 1)
expectEqual(sub.first, curScalar)
expectEqual(
str.unicodeScalars.distance(from: utf16StartIdx, to: curUTF16Idx),
1)
expectEqual(
str.utf16.distance(from: utf16StartIdx, to: curUTF16Idx),
curScalar.utf16.count)
}
while curUTF16Idx < curScalarIdx {
expectEqual(curScalar, str.unicodeScalars[curUTF16Idx])
expectEqual(curSubChar, str[curUTF16Idx])
expectFalse(UTF8.isContinuation(str.utf8[curUTF16Idx]))
expectEqual(utf16StartIdx, str[curUTF16Idx...].startIndex)
expectTrue(str[utf16StartIdx..<curUTF16Idx].isEmpty)
expectEqual(str.utf8.distance(from: utf16StartIdx, to: curUTF16Idx), 0)
if let scalarPrior = scalarPriorCharacter {
expectEqual(scalarPrior, str[str.index(before: curUTF16Idx)])
}
str.utf16.formIndex(after: &curUTF16Idx)
}
expectEqual(curUTF16Idx, curScalarIdx)
var utf16RevIdx = curUTF16Idx
while utf16RevIdx > utf16StartIdx {
str.utf16.formIndex(before: &utf16RevIdx)
expectEqual(curScalar, str.unicodeScalars[utf16RevIdx])
expectEqual(curSubChar, str[utf16RevIdx])
expectFalse(UTF8.isContinuation(str.utf8[utf16RevIdx]))
expectEqual(utf16StartIdx, str[utf16RevIdx...].startIndex)
expectTrue(str[utf16StartIdx..<utf16RevIdx].isEmpty)
expectEqual(str.utf8.distance(from: utf16StartIdx, to: utf16RevIdx), 0)
}
expectEqual(utf16RevIdx, utf16StartIdx)
}
}
}
#endif
func fullyExhaustiveIndexInterchange(_ string: String) {
guard #available(SwiftStdlib 5.7, *) else {
// Index navigation in 5.7 always rounds input indices down to the nearest
// Character, so that we always have a well-defined distance between
// indices, even if they aren't valid.
//
// 5.6 and below did not behave consistently in this case.
return
}
//string.dumpIndices()
let scalarMap = string.scalarMap()
let characterMap = string.characterMap()
let allIndices = string.allIndices()
func referenceCharacterDistance(
from i: String.Index, to j: String.Index
) -> Int {
let ci = characterMap[i]!
let cj = characterMap[j]!
return cj.offset - ci.offset
}
func referenceScalarDistance(
from i: String.Index, to j: String.Index
) -> Int {
let si = scalarMap[i]!
let sj = scalarMap[j]!
return sj.offset - si.offset
}
for i in allIndices {
for j in allIndices {
let characterDistance = referenceCharacterDistance(from: i, to: j)
let scalarDistance = referenceScalarDistance(from: i, to: j)
// Check distance calculations.
expectEqual(
string.distance(from: i, to: j),
characterDistance,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
""")
expectEqual(
string.unicodeScalars.distance(from: i, to: j),
scalarDistance,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
""")
if i <= j {
// The substring `string[i..<j]` does not round its bounds to
// `Character` boundaries, so it may have a different count than what
// the base string reports.
let si = scalarMap[i]!.index
let sj = scalarMap[j]!.index
let s = String.UnicodeScalarView(string.unicodeScalars[si ..< sj])
let substringDistance = String(s).count
let substring = string[i ..< j]
let subscalars = string.unicodeScalars[i ..< j]
expectEqual(substring.count, substringDistance,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
""")
// The `Character` alignment consideration above doesn't apply to
// Unicode scalars in a substring.
expectEqual(subscalars.count, scalarDistance,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
""")
// Check reachability of substring bounds.
var i = substring.startIndex
for _ in 0 ..< substringDistance {
substring.formIndex(after: &i)
}
expectEqual(i, substring.endIndex)
expectEqual(
substring.index(substring.startIndex, offsetBy: substringDistance),
substring.endIndex,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
distance: \(characterDistance)
""")
expectEqual(
substring.index(substring.endIndex, offsetBy: -substringDistance),
substring.startIndex,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
distance: \(-characterDistance)
""")
expectEqual(
subscalars.index(subscalars.startIndex, offsetBy: scalarDistance),
subscalars.endIndex,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
distance: \(scalarDistance)
""")
expectEqual(
subscalars.index(subscalars.endIndex, offsetBy: -scalarDistance),
subscalars.startIndex,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
distance: \(-scalarDistance)
""")
}
// Check `String.index(_:offsetBy:limitedBy:)`.
for limit in allIndices {
let dest = characterMap[j]!.index
let expectHit = (
characterDistance > 0 && i <= limit && dest > limit ? true
: characterDistance < 0 && i >= limit && dest < limit ? true
: false)
expectEqual(
string.index(i, offsetBy: characterDistance, limitedBy: limit),
expectHit ? nil : dest,
"""
string: \(string.debugDescription)
i: \(i)
j: \(j) (distance: \(characterDistance))
limit: \(limit)
""")
}
}
}
}
suite.test("Fully exhaustive index interchange")
.forEach(in: examples) { string in
fullyExhaustiveIndexInterchange(string)
}
#if _runtime(_ObjC)
suite.test("Fully exhaustive index interchange/GraphemeBreakTests") {
for test in graphemeBreakTests {
fullyExhaustiveIndexInterchange(test.string)
}
}
#endif
suite.test("Global vs local grapheme cluster boundaries") {
guard #available(SwiftStdlib 5.7, *) else {
// Index navigation in 5.7 always rounds input indices down to the nearest
// Character, so that we always have a well-defined distance between
// indices, even if they aren't valid.
//
// 5.6 and below did not behave consistently in this case.
return
}
let str = "a🇺🇸🇨🇦b"
// U+0061 LATIN SMALL LETTER A
// U+1F1FA REGIONAL INDICATOR SYMBOL LETTER U
// U+1F1F8 REGIONAL INDICATOR SYMBOL LETTER S
// U+1F1E8 REGIONAL INDICATOR SYMBOL LETTER C
// U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A
// U+0062 LATIN SMALL LETTER B
let c = Array(str.indices) + [str.endIndex]
let s = Array(str.unicodeScalars.indices) + [str.unicodeScalars.endIndex]
let u8 = Array(str.utf8.indices) + [str.utf8.endIndex]
let u16 = Array(str.utf16.indices) + [str.utf16.endIndex]
// Index navigation must always round the input index down to the nearest
// Character.
expectEqual(str.count, 4)
expectEqual(str.index(after: c[0]), c[1])
expectEqual(str.index(after: c[1]), c[2])
expectEqual(str.index(after: c[2]), c[3])
expectEqual(str.index(after: c[3]), c[4])
expectEqual(str.index(before: c[4]), c[3])
expectEqual(str.index(before: c[3]), c[2])
expectEqual(str.index(before: c[2]), c[1])
expectEqual(str.index(before: c[1]), c[0])
// Scalars
expectEqual(str.unicodeScalars.count, 6)
expectEqual(str.index(after: s[0]), s[1])
expectEqual(str.index(after: s[1]), s[3])
expectEqual(str.index(after: s[2]), s[3]) // s[2] s[1]
expectEqual(str.index(after: s[3]), s[5])
expectEqual(str.index(after: s[4]), s[5]) // s[4] s[3]
expectEqual(str.index(after: s[5]), s[6])
expectEqual(str.index(before: s[6]), s[5])
expectEqual(str.index(before: s[5]), s[3])
expectEqual(str.index(before: s[4]), s[1]) // s[4] s[3]
expectEqual(str.index(before: s[3]), s[1])
expectEqual(str.index(before: s[2]), s[0]) // s[2] s[1]
expectEqual(str.index(before: s[1]), s[0])
// UTF-8
expectEqual(str.utf8.count, 18)
expectEqual(str.index(after: u8[0]), u8[1])
for i in 1 ..< 9 { // s[i] s[1]
expectEqual(str.index(after: u8[i]), u8[9])
}
for i in 9 ..< 17 { // s[i] s[9]
expectEqual(str.index(after: u8[i]), u8[17])
}
expectEqual(str.index(after: u8[17]), u8[18])
// UTF-16
expectEqual(str.utf16.count, 10)
expectEqual(str.index(after: u16[0]), u16[1])
expectEqual(str.index(after: u16[1]), u16[5])
expectEqual(str.index(after: u16[2]), u16[5]) // s[2] s[1]
expectEqual(str.index(after: u16[3]), u16[5]) // s[3] s[1]
expectEqual(str.index(after: u16[4]), u16[5]) // s[4] s[1]
expectEqual(str.index(after: u16[5]), u16[9])
expectEqual(str.index(after: u16[6]), u16[9]) // s[6] s[5]
expectEqual(str.index(after: u16[7]), u16[9]) // s[7] s[5]
expectEqual(str.index(after: u16[8]), u16[9]) // s[8] s[5]
expectEqual(str.index(after: u16[9]), u16[10])
let i = s[2] // second scalar of US flag
let j = s[4] // second scalar of CA flag
// However, subscripting should only round down to the nearest scalar.
// Per SE-0180, `s[i..<j]` should be the Seychelles flag (country code SC)
let slice = str[i ..< j]
expectEqual(slice, "\u{1F1F8}\u{1F1E8}")
expectEqual(slice.first, "\u{1F1F8}\u{1F1E8}" as Character)
expectEqual(slice.count, 1)
// Index navigation within the substring must work as if we were in a string
// containing exactly the same scalars. Substring bounds must be reachable,
// even if they aren't reachable in the base string.
expectEqual(slice.startIndex, i)
expectEqual(slice.endIndex, j)
expectEqual(slice.index(after: slice.startIndex), j)
expectEqual(slice.index(before: slice.endIndex), i)
expectEqual(slice.unicodeScalars.count, 2)
expectEqual(slice.utf8.count, 8)
expectEqual(slice.utf16.count, 4)
}
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-16→8/subscripts") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
// This tests a special case in String's index validation where we allow
// UTF-16-encoded indices to be applied to UTF-8 strings, by transcoding the
// offset of the index. Applying such indices is always an error, but it
// happens relatively often when someone is erroneously holding on to an index
// of a UTF-16-encoded bridged NSString value through a mutation. Mutating a
// bridged string converts it to a UTF-8 native string, changing the meaning
// of the offset value. (Even if the mutation wouldn't otherwise affect the
// original index.)
//
// Before 5.7, the stdlib did not track the offset encoding of String indices,
// so they simply used the UTF-16 offset to access UTF-8 storage. This
// generally produces nonsensical results, but if the string happens to be
// ASCII, then this actually did work "fine".
//
// To avoid breaking binaries that rely on this behavior, the 5.7 stdlib
// automatically converts UTF-16-encoded indices to UTF-8 when needed.
// This can be quite slow, but it always produces the "right" results.
//
// This conversion also works in the other direction, and it does not account
// for the effect of the actual mutation. If the mutation's effect included
// the data addressed by the original index, then we may still get nonsensical
// results.
var s = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
//s.dumpIndices()
let originals = s.allIndices(includingEnd: false).map {
($0, s[$0], s.unicodeScalars[$0], s.utf8[$0], s.utf16[$0])
}
s.makeContiguousUTF8()
//s.dumpIndices()
for (i, char, scalar, u8, u16) in originals {
expectEqual(s[i], char, "i: \(i)")
expectEqual(s.unicodeScalars[i], scalar, "i: \(i)")
expectEqual(s.utf8[i], u8, "i: \(i)")
expectEqual(s.utf16[i], u16, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-8→16/subscripts") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
// This tests the UTF-8 to UTF-16 direction of the special case in String's
// index validation above.
let native = "🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b"
let cocoa = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let nativeIndices = native.allIndices(includingEnd: false).map {
(
$0,
native[$0],
native.unicodeScalars[$0],
native.utf8[$0],
native.utf16[$0])
}
for (i, char, scalar, u8, u16) in nativeIndices {
expectEqual(cocoa[i], char, "i: \(i)")
expectEqual(cocoa.unicodeScalars[i], scalar, "i: \(i)")
expectEqual(cocoa.utf8[i], u8, "i: \(i)")
expectEqual(cocoa.utf16[i], u16, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-16→8/conversions/characters") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
var s = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let chars = s.indices.map { ($0, s[$0]) }
s.makeContiguousUTF8()
for (i, char) in chars {
let j = String.Index(i, within: s)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { s[$0] }, char, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-8→16/conversions/characters") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
let native = "🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b"
let cocoa = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let chars = native.indices.map { ($0, native[$0]) }
for (i, char) in chars {
let j = String.Index(i, within: cocoa)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { cocoa[$0] }, char, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-16→8/conversions/scalars") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
var s = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let scalars = s.unicodeScalars.indices.map { ($0, s.unicodeScalars[$0]) }
s.makeContiguousUTF8()
for (i, scalar) in scalars {
let j = String.Index(i, within: s.unicodeScalars)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { s.unicodeScalars[$0] }, scalar, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-8→16/conversions/scalars") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
let native = "🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b"
let cocoa = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let scalars = native.unicodeScalars.indices.map {
($0, native.unicodeScalars[$0])
}
for (i, scalar) in scalars {
let j = String.Index(i, within: cocoa.unicodeScalars)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { cocoa.unicodeScalars[$0] }, scalar, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-16→8/conversions/UTF-8") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
var s = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let utf8Units = s.utf8.indices.map { ($0, s.utf8[$0]) }
s.makeContiguousUTF8()
for (i, u8) in utf8Units {
let j = String.Index(i, within: s.utf8)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { s.utf8[$0] }, u8, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-8→16/conversions/UTF-8") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
let native = "🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b"
let cocoa = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let utf8Units = native.utf8.indices.map { ($0, native.utf8[$0]) }
for (i, u8) in utf8Units {
let j = String.Index(i, within: cocoa.utf8)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { cocoa.utf8[$0] }, u8, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-16→8/conversions/UTF-16") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
var s = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let utf16Units = s.utf16.indices.map { ($0, s.utf16[$0]) }
s.makeContiguousUTF8()
for (i, u16) in utf16Units {
let j = String.Index(i, within: s.utf16)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { s.utf16[$0] }, u16, "i: \(i)")
}
}
#endif
#if _runtime(_ObjC)
suite.test("Index encoding correction/UTF-8→16/conversions/UTF-16") {
guard #available(SwiftStdlib 5.7, *) else {
// String indices did not track their encoding until 5.7.
return
}
let native = "🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b"
let cocoa = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
let utf16Units = native.utf16.indices.map { ($0, native.utf16[$0]) }
for (i, u16) in utf16Units {
let j = String.Index(i, within: cocoa.utf16)
expectNotNil(j, "i: \(i)")
expectEqual(j.map { cocoa.utf16[$0] }, u16, "i: \(i)")
}
}
#endif
suite.test("UTF-16 breadcrumbs") {
let string = #"""
The powerful programming language that is also easy to learn.
손쉽게 학습할 수 있는 강력한 프로그래밍 언어.
🪙 A 🥞 short 🍰 piece 🫘 of 🌰 text 👨‍👨‍👧‍👧 with 👨‍👩‍👦 some 🚶🏽 emoji 🇺🇸🇨🇦 characters 🧈
some🔩times 🛺 placed 🎣 in 🥌 the 🆘 mid🔀dle 🇦🇶or🏁 around 🏳️‍🌈 a 🍇 w🍑o🥒r🥨d
Unicode is such fun!
U̷n̷i̷c̷o̴d̴e̷ ̶i̸s̷ ̸s̵u̵c̸h̷ ̸f̵u̷n̴!̵
U̴̡̲͋̾n̵̻̳͌ì̶̠̕c̴̭̈͘ǫ̷̯͋̊d̸͖̩̈̈́ḛ̴́ ̴̟͎͐̈i̴̦̓s̴̜̱͘ ̶̲̮̚s̶̙̞͘u̵͕̯̎̽c̵̛͕̜̓h̶̘̍̽ ̸̜̞̿f̵̤̽ṷ̴͇̎͘ń̷͓̒!̷͍̾̚
U̷̢̢̧̨̼̬̰̪͓̞̠͔̗̼̙͕͕̭̻̗̮̮̥̣͉̫͉̬̲̺͍̺͊̂ͅ\#
n̶̨̢̨̯͓̹̝̲̣̖̞̼̺̬̤̝̊̌́̑̋̋͜͝ͅ\#
ḭ̸̦̺̺͉̳͎́͑\#
c̵̛̘̥̮̙̥̟̘̝͙̤̮͉͔̭̺̺̅̀̽̒̽̏̊̆͒͌̂͌̌̓̈́̐̔̿̂͑͠͝͝ͅ\#
"""#
let indices = Array(string.utf16.indices) + [string.utf16.endIndex]
for i in 0 ..< indices.count {
for j in 0 ..< indices.count {
let distance = string.utf16.distance(from: indices[i], to: indices[j])
expectEqual(distance, j - i,
"""
i: \(i), indices[i]: \(indices[i])
j: \(j), indices[j]: \(indices[j])
""")
let target = string.utf16.index(indices[i], offsetBy: j - i)
expectEqual(target, indices[j],
"""
i: \(i), indices[i]: \(indices[i])
j: \(j), indices[j]: \(indices[j])
target: \(target)
""")
}
}
}
suite.test("String.replaceSubrange index validation")
.forEach(in: examples) { string in
guard #available(SwiftStdlib 5.7, *) else {
// Index navigation in 5.7 always rounds input indices down to the nearest
// Character, so that we always have a well-defined distance between
// indices, even if they aren't valid.
//
// 5.6 and below did not behave consistently in this case.
return
}
//string.dumpIndices()
let scalarMap = string.scalarMap()
let allIndices = string.allIndices()
for i in allIndices {
for j in allIndices {
guard i <= j else { continue }
let si = scalarMap[i]!.index
let sj = scalarMap[j]!.index
// Check String.replaceSubrange(_:with:)
do {
let replacement = "x"
var expected = "".unicodeScalars
expected += string.unicodeScalars[..<si]
expected += replacement.unicodeScalars
expected += string.unicodeScalars[sj...]
var actual = string
actual.replaceSubrange(i ..< j, with: replacement)
expectEqual(actual, String(expected),
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
""")
}
// Check String.unicodeScalars.replaceSubrange(_:with:)
do {
let replacement = "x".unicodeScalars
var expected = "".unicodeScalars
expected += string.unicodeScalars[..<si]
expected += replacement
expected += string.unicodeScalars[sj...]
var actual = string
actual.unicodeScalars.replaceSubrange(i ..< j, with: replacement)
expectEqual(actual, String(expected),
"""
string: \(string.debugDescription)
i: \(i)
j: \(j)
""")
}
}
}
}
suite.test("Substring.removeSubrange entire range") {
guard #available(SwiftStdlib 5.8, *) else {
// This was a regression in 5.7.0, fixed in 5.7.1+
return
}
var a: Substring = "abcdef"
let aStart = a.startIndex
let aEnd = a.endIndex
a.removeSubrange(aStart ..< aEnd)
expectTrue(a.isEmpty)
#if _runtime(_ObjC)
var b: Substring = ("å∫ç∂éƒ" as NSString) as Substring
let bStart = b.startIndex
let bEnd = b.endIndex
b.removeSubrange(bStart ..< bEnd)
expectTrue(b.isEmpty)
#endif
}
if #available(SwiftStdlib 5.8, *) {
suite.test("String index rounding/Characters")
.forEach(in: examples) { string in
for index in string.allIndices(includingEnd: true) {
let end = string.endIndex
let expected = (index < end
? string.indices.lastIndex { $0 <= index }!
: end)
let actual = string._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index)
actual: \(actual)
expected: \(expected)
""")
}
}
}
suite.test("String index rounding/Scalars")
.forEach(in: examples) { string in
for index in string.allIndices(includingEnd: true) {
let end = string.unicodeScalars.endIndex
let expected = (index < end
? string.unicodeScalars.indices.lastIndex { $0 <= index }!
: end)
let actual = string.unicodeScalars._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index)
actual: \(actual)
expected: \(expected)
""")
}
}
suite.test("String index rounding/UTF-16")
.forEach(in: examples) { string in
//string.dumpIndices()
var utf16Indices = Set(string.utf16.indices)
utf16Indices.insert(string.utf16.endIndex)
for index in string.allIndices(includingEnd: true) {
let expected: String.Index
if utf16Indices.contains(index) {
expected = index
} else {
// If the index isn't valid in the UTF-16 view, it gets rounded down
// to the nearest scalar boundary. (Unintuitively, this is generally *not*
// the closest valid index within the UTF-16 view.)
expected = string.unicodeScalars.indices.lastIndex { $0 <= index }!
}
let actual = string.utf16._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index)
actual: \(actual)
expected: \(expected)
""")
}
}
suite.test("String index rounding/UTF-8")
.forEach(in: examples) { string in
//string.dumpIndices()
var utf8Indices = Set(string.utf8.indices)
utf8Indices.insert(string.utf8.endIndex)
for index in string.allIndices(includingEnd: true) {
let expected: String.Index
if utf8Indices.contains(index) {
expected = index
} else {
// If the index isn't valid in the UTF-8 view, it gets rounded down
// to the nearest scalar boundary. (Unintuitively, this is generally *not*
// the closest valid index within the UTF-8 view.)
expected = string.unicodeScalars.indices.lastIndex { $0 <= index }!
}
let actual = string.utf8._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index)
actual: \(actual)
expected: \(expected)
""")
}
}
if #available(SwiftStdlib 6.1, *) {
suite.test("String index printing (native)") {
let str = "nai\u{308}ve 🪻"
let utf8Indices = [
"0[any]", "1[utf8]", "2[utf8]", "3[utf8]", "4[utf8]", "5[utf8]",
"6[utf8]", "7[utf8]", "8[utf8]", "9[utf8]", "10[utf8]", "11[utf8]"
]
expectEqual(str.utf8.indices.map { "\($0)" }, utf8Indices)
let utf16Indices = [
"0[any]", "1[utf8]", "2[utf8]", "3[utf8]", "5[utf8]", "6[utf8]",
"7[utf8]", "8[utf8]", "8[utf8]+1"
]
expectEqual(str.utf16.indices.map { "\($0)" }, utf16Indices)
let scalarIndices = [
"0[any]", "1[utf8]", "2[utf8]", "3[utf8]", "5[utf8]", "6[utf8]",
"7[utf8]", "8[utf8]"
]
expectEqual(str.unicodeScalars.indices.map { "\($0)" }, scalarIndices)
let characterIndices = [
"0[any]", "1[utf8]", "2[utf8]", "5[utf8]", "6[utf8]", "7[utf8]", "8[utf8]"
]
expectEqual(str.indices.map { "\($0)" }, characterIndices)
}
}
suite.test("String index debugDescription backdeployment") {
// Note: no availability check
let str = "i\u{308}"
// Result can be `any` or `unknown` depending on inlining behavior
expectTrue(
str.startIndex.debugDescription == "0[any]" ||
str.startIndex.debugDescription == "0[unknown]"
)
// Result can be `utf8` or `unknown` depending on inlining behavior
expectTrue(
str.endIndex.debugDescription == "3[utf8]" ||
str.endIndex.debugDescription == "3[unknown]"
)
}
#if _runtime(_ObjC)
if #available(SwiftStdlib 6.1, *) {
suite.test("String index printing (bridged Cocoa)") {
let utf16 = Array("nai\u{308}ve 🪻".utf16)
let nsstr = NSString(characters: utf16, length: utf16.count)
let str = nsstr as String
let utf8Indices = [
"0[any]", "1[utf16]", "2[utf16]", "3[utf16]", "3[utf16]+1", "4[utf16]",
"5[utf16]", "6[utf16]", "7[utf16]", "7[utf16]+1", "7[utf16]+2",
"7[utf16]+3"
]
expectEqual(str.utf8.indices.map { "\($0)" }, utf8Indices)
let utf16Indices = [
"0[any]", "1[utf16]", "2[utf16]", "3[utf16]", "4[utf16]", "5[utf16]",
"6[utf16]", "7[utf16]", "8[utf16]"
]
expectEqual(str.utf16.indices.map { "\($0)" }, utf16Indices)
let scalarIndices = [
"0[any]", "1[utf16]", "2[utf16]", "3[utf16]", "4[utf16]", "5[utf16]",
"6[utf16]", "7[utf16]"
]
expectEqual(str.unicodeScalars.indices.map { "\($0)" }, scalarIndices)
let characterIndices = [
"0[any]", "1[utf16]", "2[utf16]", "4[utf16]", "5[utf16]", "6[utf16]",
"7[utf16]"
]
expectEqual(str.indices.map { "\($0)" }, characterIndices)
}
}
#endif