mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
In Swift 5.6 and below, (broken) code that acquired indices from a UTF-16-encoded string bridged from Cocoa and kept using them after a `makeContiguousUTF8` call (or other mutation) may have appeared to be working correctly as long as the string was ASCII. Since https://github.com/apple/swift/pull/41417, the `String(_:within:)` initializers recognize miscoded indices and reject them by returning nil. This is technically correct, but it unfortunately may be a binary compatibility issue, as these used to return non-nil in previous versions. Mitigate this issue by accepting UTF-16 indices on a UTF-8 string, transcoding their offset as needed. (Attempting to use an UTF-8 index on a UTF-16 string is still rejected — we do not implicitly convert strings in that direction.) rdar://89369680
184 lines
7.5 KiB
Swift
184 lines
7.5 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
extension String.Index {
|
|
/// Creates an index in the given string that corresponds exactly to the
|
|
/// specified position.
|
|
///
|
|
/// If the index passed as `sourcePosition` represents the start of an
|
|
/// extended grapheme cluster---the element type of a string---then the
|
|
/// initializer succeeds.
|
|
///
|
|
/// The following example converts the position of the Unicode scalar `"e"`
|
|
/// into its corresponding position in the string. The character at that
|
|
/// position is the composed `"é"` character.
|
|
///
|
|
/// let cafe = "Cafe\u{0301}"
|
|
/// print(cafe)
|
|
/// // Prints "Café"
|
|
///
|
|
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
|
|
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
|
|
///
|
|
/// print(cafe[...stringIndex])
|
|
/// // Prints "Café"
|
|
///
|
|
/// If the index passed as `sourcePosition` doesn't have an exact
|
|
/// corresponding position in `target`, the result of the initializer is
|
|
/// `nil`. For example, an attempt to convert the position of the combining
|
|
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
|
|
/// their own position in a string.
|
|
///
|
|
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
|
|
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
|
|
///
|
|
/// print(nextStringIndex)
|
|
/// // Prints "nil"
|
|
///
|
|
/// - Parameters:
|
|
/// - sourcePosition: A position in a view of the `target` parameter.
|
|
/// `sourcePosition` must be a valid index of at least one of the views
|
|
/// of `target`.
|
|
/// - target: The string referenced by the resulting index.
|
|
public init?(_ sourcePosition: String.Index, within target: String) {
|
|
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
|
|
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
|
|
// code that keeps using indices from a bridged string after converting the
|
|
// string to a native representation. Such indices are invalid, but
|
|
// returning nil here can break code that appeared to work fine for ASCII
|
|
// strings in Swift releases prior to 5.7.
|
|
guard
|
|
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
|
|
target._isValidIndex(i)
|
|
else { return nil }
|
|
self = i._characterAligned
|
|
}
|
|
|
|
/// Creates an index in the given string that corresponds exactly to the
|
|
/// specified position.
|
|
///
|
|
/// If the index passed as `sourcePosition` represents the start of an
|
|
/// extended grapheme cluster---the element type of a string---then the
|
|
/// initializer succeeds.
|
|
///
|
|
/// The following example converts the position of the Unicode scalar `"e"`
|
|
/// into its corresponding position in the string. The character at that
|
|
/// position is the composed `"é"` character.
|
|
///
|
|
/// let cafe = "Cafe\u{0301}"
|
|
/// print(cafe)
|
|
/// // Prints "Café"
|
|
///
|
|
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
|
|
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
|
|
///
|
|
/// print(cafe[...stringIndex])
|
|
/// // Prints "Café"
|
|
///
|
|
/// If the index passed as `sourcePosition` doesn't have an exact
|
|
/// corresponding position in `target`, the result of the initializer is
|
|
/// `nil`. For example, an attempt to convert the position of the combining
|
|
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
|
|
/// their own position in a string.
|
|
///
|
|
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
|
|
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
|
|
///
|
|
/// print(nextStringIndex)
|
|
/// // Prints "nil"
|
|
///
|
|
/// - Parameters:
|
|
/// - sourcePosition: A position in a view of the `target` parameter.
|
|
/// `sourcePosition` must be a valid index of at least one of the views
|
|
/// of `target`.
|
|
/// - target: The string referenced by the resulting index.
|
|
@available(SwiftStdlib 5.1, *)
|
|
public init?<S: StringProtocol>(
|
|
_ sourcePosition: String.Index, within target: S
|
|
) {
|
|
if let str = target as? String {
|
|
self.init(sourcePosition, within: str)
|
|
return
|
|
}
|
|
if let str = target as? Substring {
|
|
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
|
|
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
|
|
// code that keeps using indices from a bridged string after converting
|
|
// the string to a native representation. Such indices are invalid, but
|
|
// returning nil here can break code that appeared to work fine for ASCII
|
|
// strings in Swift releases prior to 5.7.
|
|
guard
|
|
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
|
|
str._isValidIndex(i)
|
|
else { return nil }
|
|
self = i
|
|
return
|
|
}
|
|
self.init(sourcePosition, within: String(target))
|
|
}
|
|
|
|
/// Returns the position in the given UTF-8 view that corresponds exactly to
|
|
/// this index.
|
|
///
|
|
/// This example first finds the position of the character `"é"`, and then
|
|
/// uses this method find the same position in the string's `utf8` view.
|
|
///
|
|
/// let cafe = "Café"
|
|
/// if let i = cafe.firstIndex(of: "é") {
|
|
/// let j = i.samePosition(in: cafe.utf8)!
|
|
/// print(Array(cafe.utf8[j...]))
|
|
/// }
|
|
/// // Prints "[195, 169]"
|
|
///
|
|
/// - Parameter utf8: The view to use for the index conversion. This index
|
|
/// must be a valid index of at least one view of the string shared by
|
|
/// `utf8`.
|
|
/// - Returns: The position in `utf8` that corresponds exactly to this index.
|
|
/// If this index does not have an exact corresponding position in `utf8`,
|
|
/// this method returns `nil`. For example, an attempt to convert the
|
|
/// position of a UTF-16 trailing surrogate returns `nil`.
|
|
public func samePosition(
|
|
in utf8: String.UTF8View
|
|
) -> String.UTF8View.Index? {
|
|
return String.UTF8View.Index(self, within: utf8)
|
|
}
|
|
|
|
/// Returns the position in the given UTF-16 view that corresponds exactly to
|
|
/// this index.
|
|
///
|
|
/// The index must be a valid index of `String(utf16)`.
|
|
///
|
|
/// This example first finds the position of the character `"é"` and then
|
|
/// uses this method find the same position in the string's `utf16` view.
|
|
///
|
|
/// let cafe = "Café"
|
|
/// if let i = cafe.firstIndex(of: "é") {
|
|
/// let j = i.samePosition(in: cafe.utf16)!
|
|
/// print(cafe.utf16[j])
|
|
/// }
|
|
/// // Prints "233"
|
|
///
|
|
/// - Parameter utf16: The view to use for the index conversion. This index
|
|
/// must be a valid index of at least one view of the string shared by
|
|
/// `utf16`.
|
|
/// - Returns: The position in `utf16` that corresponds exactly to this
|
|
/// index. If this index does not have an exact corresponding position in
|
|
/// `utf16`, this method returns `nil`. For example, an attempt to convert
|
|
/// the position of a UTF-8 continuation byte returns `nil`.
|
|
public func samePosition(
|
|
in utf16: String.UTF16View
|
|
) -> String.UTF16View.Index? {
|
|
return String.UTF16View.Index(self, within: utf16)
|
|
}
|
|
}
|
|
|