//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2025 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// /// A borrowed view into contiguous memory that contains validly-encoded UTF-8 /// code units. @frozen @safe @available(SwiftStdlib 6.2, *) public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable { @usableFromInline internal var _unsafeBaseAddress: UnsafeRawPointer? /* A bit-packed count and flags (such as isASCII) ╔═══════╦═════╦══════════╦═══════╗ ║ b63 ║ b62 ║ b61:56 ║ b55:0 ║ ╠═══════╬═════╬══════════╬═══════╣ ║ ASCII ║ NFC ║ reserved ║ count ║ ╚═══════╩═════╩══════════╩═══════╝ ASCII means the contents are known to be all-ASCII (<=0x7F). NFC means contents are known to be in normal form C for fast comparisons. */ @usableFromInline internal var _countAndFlags: UInt64 // @_alwaysEmitIntoClient @inline(__always) @lifetime(borrow start) internal init( _unsafeAssumingValidUTF8 start: borrowing UnsafeRawPointer, _countAndFlags: UInt64 ) { unsafe self._unsafeBaseAddress = copy start self._countAndFlags = _countAndFlags _invariantCheck() } /// Creates a UTF8Span, bypassing safety and security checks. The caller /// must guarantee that `codeUnits` contains validly-encoded UTF-8, or else /// undefined behavior may result upon use. If `isKnownASCII: true is /// passed`, the contents must be ASCII, or else undefined behavior may /// result upon use. @unsafe @lifetime(copy codeUnits) public init( unchecked codeUnits: Span, isKnownASCII: Bool = false ) { self.init( _uncheckedAssumingValidUTF8: codeUnits, isKnownASCII: isKnownASCII, isKnownNFC: false ) } // FIXME: we need to make sure ALL API are nil safe, that is they // at least check the count first @_alwaysEmitIntoClient internal func _start() -> UnsafeRawPointer { unsafe _unsafeBaseAddress._unsafelyUnwrappedUnchecked } } // TODO: try to convert code to be ran on Span instead of URP @available(SwiftStdlib 6.2, *) extension UTF8Span { /// Creates a UTF8Span containing `codeUnits`. Validates that the input is /// valid UTF-8, otherwise throws an error. /// /// The resulting UTF8Span has the same lifetime constraints as `codeUnits`. /// /// - Complexity: O(n) @lifetime(copy codeUnits) public init( validating codeUnits: consuming Span ) throws(UTF8.ValidationError) { try self.init(_validating: codeUnits) } // TODO: this doesn't need to be underscored, I don't think @lifetime(copy codeUnits) internal init( _validating codeUnits: consuming Span ) throws(UTF8.ValidationError) { guard let basePtr = unsafe codeUnits._pointer else { unsafe self._unsafeBaseAddress = nil self._countAndFlags = 0 return } let count = codeUnits._count let isASCII = unsafe try basePtr._validateUTF8(limitedBy: count) unsafe self._unsafeBaseAddress = .init(basePtr) self._countAndFlags = UInt64(truncatingIfNeeded: count) if isASCII { _setIsASCII() } _internalInvariant(self.count == codeUnits.count) } // TODO: SPI? @lifetime(copy codeUnits) internal init( _uncheckedAssumingValidUTF8 codeUnits: consuming Span, isKnownASCII: Bool, isKnownNFC: Bool ) { guard let ptr = unsafe codeUnits._pointer else { unsafe self._unsafeBaseAddress = nil self._countAndFlags = 0 return } unsafe self._unsafeBaseAddress = ptr self._countAndFlags = UInt64(truncatingIfNeeded: codeUnits.count) if isKnownASCII { _setIsASCII() } if isKnownNFC { _setIsNFC() } _internalInvariant(self.count == codeUnits.count) } // HACK: working around lack of internal plumbing work internal var _str: String { unsafe _start()._str(0..( _ body: (_ buffer: /*borrowing*/ UnsafeBufferPointer) throws(E) -> Result ) throws(E) -> Result { try unsafe body(_start()._ubp(0.. { @lifetime(copy self) get { let newSpan = unsafe Span(_unchecked: _unsafeBaseAddress, count: self.count) return unsafe _overrideLifetime(newSpan, copying: self) } } } extension String { /// Creates a new string, copying the specified code units. /// /// This initializer skips UTF-8 validation because `codeUnits` must contain /// valid UTF-8. /// /// - Complexity: O(n) @available(SwiftStdlib 6.2, *) public init(copying codeUnits: UTF8Span) { let isASCII = codeUnits.isKnownASCII self = unsafe codeUnits._withUnsafeBufferPointer { bufPtr in unsafe String._uncheckedFromUTF8(bufPtr, isASCII: isASCII) } } } @available(SwiftStdlib 6.2, *) extension String { @lifetime(borrow self) private borrowing func _underlyingSpan() -> Span { #if _runtime(_ObjC) // handle non-UTF8 Objective-C bridging cases here if !_guts.isFastUTF8, _guts._object.hasObjCBridgeableObject { let storage = _guts._getOrAllocateAssociatedStorage() let (start, count) = unsafe (storage.start, storage.count) let span = unsafe Span(_unsafeStart: start, count: count) return unsafe _overrideLifetime(span, borrowing: self) } #endif // _runtime(_ObjC) let count = _guts.count if _guts.isSmall { let a = Builtin.addressOfBorrow(self) let address = unsafe UnsafePointer(a) let span = unsafe Span(_unsafeStart: address, count: count) return unsafe _overrideLifetime(span, borrowing: self) } let isFastUTF8 = _guts.isFastUTF8 _precondition(isFastUTF8, "String must be contiguous UTF8") let buffer = unsafe _guts._object.fastUTF8 let span = unsafe Span(_unsafeElements: buffer) return unsafe _overrideLifetime(span, borrowing: self) } #if !(os(watchOS) && _pointerBitWidth(_32)) /// A UTF-8 span over the code units that make up this string. /// /// - Note: In the case of bridged UTF-16 string instances (on Apple /// platforms) this property transcodes the code units the first time /// it's called. The transcoded buffer is cached, and subsequent calls /// can reuse the buffer. /// /// - Returns: A `UTF8Span` over the code units of this string. /// /// - Complexity: O(1) for native UTF-8 strings, amortized O(1) for bridged /// UTF-16 strings. @available(SwiftStdlib 6.2, *) public var utf8Span: UTF8Span { @lifetime(borrow self) borrowing get { unsafe UTF8Span( unchecked: _underlyingSpan(), isKnownASCII: _guts.isASCII ) } } /// A UTF-8 span over the code units that make up this string. /// /// - Note: In the case of bridged UTF-16 string instances (on Apple /// platforms) this property transcodes the code units the first time /// it's called. The transcoded buffer is cached, and subsequent calls /// can reuse the buffer. /// /// - Returns: A `UTF8Span` over the code units of this string. /// /// - Complexity: O(1) for native UTF-8 strings, amortized O(1) for bridged /// UTF-16 strings. @available(SwiftStdlib 6.2, *) public var _utf8Span: UTF8Span? { @_alwaysEmitIntoClient @inline(__always) @lifetime(borrow self) borrowing get { utf8Span } } #else // !(os(watchOS) && _pointerBitWidth(_32)) @available(watchOS, unavailable) public var utf8Span: UTF8Span { fatalError("\(#function) unavailable on 32-bit watchOS") } /// A UTF-8 span over the code units that make up this string. /// /// - Note: In the case of bridged UTF-16 string instances (on Apple /// platforms) this property transcodes the code units the first time /// it's called. The transcoded buffer is cached, and subsequent calls /// can reuse the buffer. /// /// - Returns: A `UTF8Span` over the code units of this string, or `nil` /// if the string does not have a contiguous representation. /// /// - Complexity: O(1) for native UTF-8 strings, amortized O(1) for bridged /// UTF-16 strings. @available(SwiftStdlib 6.2, *) public var _utf8Span: UTF8Span? { @lifetime(borrow self) borrowing get { if _guts.isSmall, _guts.count > _SmallString.contiguousCapacity() { return nil } return unsafe UTF8Span( unchecked: _underlyingSpan(), isKnownASCII: _guts.isASCII ) } } #endif // !(os(watchOS) && _pointerBitWidth(_32)) } @available(SwiftStdlib 6.2, *) extension Substring { @lifetime(borrow self) private borrowing func _underlyingSpan() -> Span { #if _runtime(_ObjC) // handle non-UTF8 Objective-C bridging cases here if !_wholeGuts.isFastUTF8, _wholeGuts._object.hasObjCBridgeableObject { let base: String.UTF8View = _slice._base.utf8 let first = base._foreignDistance(from: base.startIndex, to: startIndex) let count = base._foreignDistance(from: startIndex, to: endIndex) let span = base._underlyingSpan().extracting(first..<(first &+ count)) return unsafe _overrideLifetime(span, borrowing: self) } #endif // _runtime(_ObjC) let first = _slice._startIndex._encodedOffset let end = _slice._endIndex._encodedOffset if _wholeGuts.isSmall { let a = Builtin.addressOfBorrow(self) let offset = first &+ (2 &* MemoryLayout.stride) let start = unsafe UnsafePointer(a).advanced(by: offset) let span = unsafe Span(_unsafeStart: start, count: end &- first) return unsafe _overrideLifetime(span, borrowing: self) } let isFastUTF8 = _wholeGuts.isFastUTF8 _precondition(isFastUTF8, "Substring must be contiguous UTF8") var span = unsafe Span(_unsafeElements: _wholeGuts._object.fastUTF8) span = span.extracting(first.. _SmallString.contiguousCapacity() { // substring is spannable only when the whole string is spannable. return nil } return unsafe UTF8Span( unchecked: _underlyingSpan(), isKnownASCII: base._guts.isASCII ) } } #endif // !(os(watchOS) && _pointerBitWidth(_32)) }