Files
swift-mirror/stdlib/public/core/StringIndexConversions.swift
Karoy Lorentey 4d557b0b45 [stdlib] Make String.Index(_:within:) initializers more permissive
In Swift 5.6 and below, (broken) code that acquired indices from a
UTF-16-encoded string bridged from Cocoa and kept using them after a
`makeContiguousUTF8` call (or other mutation) may have appeared to be
working correctly as long as the string was ASCII.

Since https://github.com/apple/swift/pull/41417, the
`String(_:within:)` initializers recognize miscoded indices and reject
them by returning nil. This is technically correct, but it
unfortunately may be a binary compatibility issue, as these used to
return non-nil in previous versions.

Mitigate this issue by accepting UTF-16 indices on a UTF-8 string,
transcoding their offset as needed. (Attempting to use an UTF-8 index
on a UTF-16 string is still rejected — we do not implicitly convert
strings in that direction.)

rdar://89369680
2022-04-18 21:02:14 -07:00

184 lines
7.5 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
extension String.Index {
/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
/// If the index passed as `sourcePosition` represents the start of an
/// extended grapheme cluster---the element type of a string---then the
/// initializer succeeds.
///
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string. The character at that
/// position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(cafe[...stringIndex])
/// // Prints "Café"
///
/// If the index passed as `sourcePosition` doesn't have an exact
/// corresponding position in `target`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the combining
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
/// their own position in a string.
///
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
///
/// print(nextStringIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - sourcePosition: A position in a view of the `target` parameter.
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(_ sourcePosition: String.Index, within target: String) {
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
// code that keeps using indices from a bridged string after converting the
// string to a native representation. Such indices are invalid, but
// returning nil here can break code that appeared to work fine for ASCII
// strings in Swift releases prior to 5.7.
guard
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
target._isValidIndex(i)
else { return nil }
self = i._characterAligned
}
/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
/// If the index passed as `sourcePosition` represents the start of an
/// extended grapheme cluster---the element type of a string---then the
/// initializer succeeds.
///
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string. The character at that
/// position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(cafe[...stringIndex])
/// // Prints "Café"
///
/// If the index passed as `sourcePosition` doesn't have an exact
/// corresponding position in `target`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the combining
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
/// their own position in a string.
///
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
///
/// print(nextStringIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - sourcePosition: A position in a view of the `target` parameter.
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
@available(SwiftStdlib 5.1, *)
public init?<S: StringProtocol>(
_ sourcePosition: String.Index, within target: S
) {
if let str = target as? String {
self.init(sourcePosition, within: str)
return
}
if let str = target as? Substring {
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
// code that keeps using indices from a bridged string after converting
// the string to a native representation. Such indices are invalid, but
// returning nil here can break code that appeared to work fine for ASCII
// strings in Swift releases prior to 5.7.
guard
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
str._isValidIndex(i)
else { return nil }
self = i
return
}
self.init(sourcePosition, within: String(target))
}
/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// This example first finds the position of the character `"é"`, and then
/// uses this method find the same position in the string's `utf8` view.
///
/// let cafe = "Café"
/// if let i = cafe.firstIndex(of: "é") {
/// let j = i.samePosition(in: cafe.utf8)!
/// print(Array(cafe.utf8[j...]))
/// }
/// // Prints "[195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion. This index
/// must be a valid index of at least one view of the string shared by
/// `utf8`.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
/// If this index does not have an exact corresponding position in `utf8`,
/// this method returns `nil`. For example, an attempt to convert the
/// position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}
/// Returns the position in the given UTF-16 view that corresponds exactly to
/// this index.
///
/// The index must be a valid index of `String(utf16)`.
///
/// This example first finds the position of the character `"é"` and then
/// uses this method find the same position in the string's `utf16` view.
///
/// let cafe = "Café"
/// if let i = cafe.firstIndex(of: "é") {
/// let j = i.samePosition(in: cafe.utf16)!
/// print(cafe.utf16[j])
/// }
/// // Prints "233"
///
/// - Parameter utf16: The view to use for the index conversion. This index
/// must be a valid index of at least one view of the string shared by
/// `utf16`.
/// - Returns: The position in `utf16` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `utf16`, this method returns `nil`. For example, an attempt to convert
/// the position of a UTF-8 continuation byte returns `nil`.
public func samePosition(
in utf16: String.UTF16View
) -> String.UTF16View.Index? {
return String.UTF16View.Index(self, within: utf16)
}
}