mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Fix undefined behavior in SmallString.withUTF8
withUTF8 currently vends a typed UInt8 pointer to the underlying SmallString. That pointer type differs from SmallString's representation. It should simply vend a raw pointer, which would be both type safe and convenient for UTF8 data. However, since this method is already @inlinable, I added calls to bindMemory to prevent the optimizer from reasoning about access to the typed pointer that we vend. rdar://67983613 (Undefinied behavior in SmallString.withUTF8 is miscompiled) Additional commentary: SmallString creates a situation where there are two types, the in-memory type, (UInt64, UInt64), vs. the element type, UInt8. `UnsafePointer<T>` specifies the in-memory type of the pointee, because that's how C works. If you want to specify an element type, not the in-memory type, then you need to use something other than UnsafePointer to view the memory. A trivial `BufferView<UInt8>` would be fine, although, frankly, I think UnsafeRawPointer is a perfectly good type on its own for UTF8 bytes. Unfortunately, a lot of the UTF8 helper code is ABI-exposed, so to work around this, we need to insert calls to bindMemory at strategic points to avoid undefined behavior. This is high-risk and can negatively affect performance. So far, I was able to resolve the regressions in our microbenchmarks just by tweaking the inliner.
This commit is contained in:
@@ -197,11 +197,18 @@ extension _SmallString {
|
||||
internal func withUTF8<Result>(
|
||||
_ f: (UnsafeBufferPointer<UInt8>) throws -> Result
|
||||
) rethrows -> Result {
|
||||
let count = self.count
|
||||
var raw = self.zeroTerminatedRawCodeUnits
|
||||
return try Swift.withUnsafeBytes(of: &raw) { rawBufPtr in
|
||||
let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
.assumingMemoryBound(to: UInt8.self)
|
||||
return try f(UnsafeBufferPointer(start: ptr, count: self.count))
|
||||
return try Swift.withUnsafeBytes(of: &raw) {
|
||||
let rawPtr = $0.baseAddress._unsafelyUnwrappedUnchecked
|
||||
// Rebind the underlying (UInt64, UInt64) tuple to UInt8 for the
|
||||
// duration of the closure. Accessing self after this rebind is undefined.
|
||||
let ptr = rawPtr.bindMemory(to: UInt8.self, capacity: count)
|
||||
defer {
|
||||
// Restore the memory type of self._storage
|
||||
_ = rawPtr.bindMemory(to: RawBitPattern.self, capacity: 1)
|
||||
}
|
||||
return try f(UnsafeBufferPointer(start: ptr, count: count))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,14 +216,11 @@ extension _SmallString {
|
||||
// new count.
|
||||
@inline(__always)
|
||||
internal mutating func withMutableCapacity(
|
||||
_ f: (UnsafeMutableBufferPointer<UInt8>) throws -> Int
|
||||
_ f: (UnsafeMutableRawBufferPointer) throws -> Int
|
||||
) rethrows {
|
||||
let len = try withUnsafeMutableBytes(of: &self._storage) {
|
||||
(rawBufPtr: UnsafeMutableRawBufferPointer) -> Int in
|
||||
let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
.assumingMemoryBound(to: UInt8.self)
|
||||
return try f(UnsafeMutableBufferPointer(
|
||||
start: ptr, count: _SmallString.capacity))
|
||||
return try f(rawBufPtr)
|
||||
}
|
||||
if len == 0 {
|
||||
self = _SmallString()
|
||||
@@ -273,7 +277,17 @@ extension _SmallString {
|
||||
) rethrows {
|
||||
self.init()
|
||||
try self.withMutableCapacity {
|
||||
return try initializer($0)
|
||||
let capacity = $0.count
|
||||
let rawPtr = $0.baseAddress._unsafelyUnwrappedUnchecked
|
||||
// Rebind the underlying (UInt64, UInt64) tuple to UInt8 for the
|
||||
// duration of the closure. Accessing self after this rebind is undefined.
|
||||
let ptr = rawPtr.bindMemory(to: UInt8.self, capacity: capacity)
|
||||
defer {
|
||||
// Restore the memory type of self._storage
|
||||
_ = rawPtr.bindMemory(to: RawBitPattern.self, capacity: 1)
|
||||
}
|
||||
return try initializer(
|
||||
UnsafeMutableBufferPointer<UInt8>(start: ptr, count: capacity))
|
||||
}
|
||||
self._invariantCheck()
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ internal func _cocoaStringSubscript(
|
||||
@_effects(releasenone)
|
||||
private func _NSStringCopyUTF8(
|
||||
_ o: _StringSelectorHolder,
|
||||
into bufPtr: UnsafeMutableBufferPointer<UInt8>
|
||||
into bufPtr: UnsafeMutableRawBufferPointer
|
||||
) -> Int? {
|
||||
let ptr = bufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
let len = o.length
|
||||
@@ -193,7 +193,7 @@ private func _NSStringCopyUTF8(
|
||||
@_effects(releasenone)
|
||||
internal func _cocoaStringCopyUTF8(
|
||||
_ target: _CocoaString,
|
||||
into bufPtr: UnsafeMutableBufferPointer<UInt8>
|
||||
into bufPtr: UnsafeMutableRawBufferPointer
|
||||
) -> Int? {
|
||||
return _NSStringCopyUTF8(_objc(target), into: bufPtr)
|
||||
}
|
||||
@@ -206,7 +206,7 @@ private func _NSStringUTF8Count(
|
||||
var remainingRange = _SwiftNSRange(location: 0, length: 0)
|
||||
var usedLen = 0
|
||||
let success = 0 != o.getBytes(
|
||||
UnsafeMutablePointer<UInt8>(Builtin.inttoptr_Word(0._builtinWordValue)),
|
||||
UnsafeMutableRawPointer(Builtin.inttoptr_Word(0._builtinWordValue)),
|
||||
maxLength: 0,
|
||||
usedLength: &usedLen,
|
||||
encoding: _cocoaUTF8Encoding,
|
||||
@@ -340,7 +340,7 @@ internal enum _KnownCocoaString {
|
||||
@_effects(releasenone) // @opaque
|
||||
internal func _bridgeTagged(
|
||||
_ cocoa: _CocoaString,
|
||||
intoUTF8 bufPtr: UnsafeMutableBufferPointer<UInt8>
|
||||
intoUTF8 bufPtr: UnsafeMutableRawBufferPointer
|
||||
) -> Int? {
|
||||
_internalInvariant(_isObjCTaggedPointer(cocoa))
|
||||
return _cocoaStringCopyUTF8(cocoa, into: bufPtr)
|
||||
|
||||
@@ -250,7 +250,9 @@ extension _StringGuts {
|
||||
) -> Int? {
|
||||
#if _runtime(_ObjC)
|
||||
// Currently, foreign means NSString
|
||||
if let res = _cocoaStringCopyUTF8(_object.cocoaObject, into: mbp) {
|
||||
if let res = _cocoaStringCopyUTF8(_object.cocoaObject,
|
||||
into: UnsafeMutableRawBufferPointer(start: mbp.baseAddress,
|
||||
count: mbp.count)) {
|
||||
return res
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user