mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Fix undefined behavior in SmallString.withUTF8
withUTF8 currently vends a typed UInt8 pointer to the underlying SmallString. That pointer type differs from SmallString's representation. It should simply vend a raw pointer, which would be both type safe and convenient for UTF8 data. However, since this method is already @inlinable, I added calls to bindMemory to prevent the optimizer from reasoning about access to the typed pointer that we vend. rdar://67983613 (Undefinied behavior in SmallString.withUTF8 is miscompiled) Additional commentary: SmallString creates a situation where there are two types, the in-memory type, (UInt64, UInt64), vs. the element type, UInt8. `UnsafePointer<T>` specifies the in-memory type of the pointee, because that's how C works. If you want to specify an element type, not the in-memory type, then you need to use something other than UnsafePointer to view the memory. A trivial `BufferView<UInt8>` would be fine, although, frankly, I think UnsafeRawPointer is a perfectly good type on its own for UTF8 bytes. Unfortunately, a lot of the UTF8 helper code is ABI-exposed, so to work around this, we need to insert calls to bindMemory at strategic points to avoid undefined behavior. This is high-risk and can negatively affect performance. So far, I was able to resolve the regressions in our microbenchmarks just by tweaking the inliner.
This commit is contained in:
@@ -197,11 +197,18 @@ extension _SmallString {
|
||||
internal func withUTF8<Result>(
|
||||
_ f: (UnsafeBufferPointer<UInt8>) throws -> Result
|
||||
) rethrows -> Result {
|
||||
let count = self.count
|
||||
var raw = self.zeroTerminatedRawCodeUnits
|
||||
return try Swift.withUnsafeBytes(of: &raw) { rawBufPtr in
|
||||
let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
.assumingMemoryBound(to: UInt8.self)
|
||||
return try f(UnsafeBufferPointer(start: ptr, count: self.count))
|
||||
return try Swift.withUnsafeBytes(of: &raw) {
|
||||
let rawPtr = $0.baseAddress._unsafelyUnwrappedUnchecked
|
||||
// Rebind the underlying (UInt64, UInt64) tuple to UInt8 for the
|
||||
// duration of the closure. Accessing self after this rebind is undefined.
|
||||
let ptr = rawPtr.bindMemory(to: UInt8.self, capacity: count)
|
||||
defer {
|
||||
// Restore the memory type of self._storage
|
||||
_ = rawPtr.bindMemory(to: RawBitPattern.self, capacity: 1)
|
||||
}
|
||||
return try f(UnsafeBufferPointer(start: ptr, count: count))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,14 +216,11 @@ extension _SmallString {
|
||||
// new count.
|
||||
@inline(__always)
|
||||
internal mutating func withMutableCapacity(
|
||||
_ f: (UnsafeMutableBufferPointer<UInt8>) throws -> Int
|
||||
_ f: (UnsafeMutableRawBufferPointer) throws -> Int
|
||||
) rethrows {
|
||||
let len = try withUnsafeMutableBytes(of: &self._storage) {
|
||||
(rawBufPtr: UnsafeMutableRawBufferPointer) -> Int in
|
||||
let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
|
||||
.assumingMemoryBound(to: UInt8.self)
|
||||
return try f(UnsafeMutableBufferPointer(
|
||||
start: ptr, count: _SmallString.capacity))
|
||||
return try f(rawBufPtr)
|
||||
}
|
||||
if len == 0 {
|
||||
self = _SmallString()
|
||||
@@ -273,7 +277,17 @@ extension _SmallString {
|
||||
) rethrows {
|
||||
self.init()
|
||||
try self.withMutableCapacity {
|
||||
return try initializer($0)
|
||||
let capacity = $0.count
|
||||
let rawPtr = $0.baseAddress._unsafelyUnwrappedUnchecked
|
||||
// Rebind the underlying (UInt64, UInt64) tuple to UInt8 for the
|
||||
// duration of the closure. Accessing self after this rebind is undefined.
|
||||
let ptr = rawPtr.bindMemory(to: UInt8.self, capacity: capacity)
|
||||
defer {
|
||||
// Restore the memory type of self._storage
|
||||
_ = rawPtr.bindMemory(to: RawBitPattern.self, capacity: 1)
|
||||
}
|
||||
return try initializer(
|
||||
UnsafeMutableBufferPointer<UInt8>(start: ptr, count: capacity))
|
||||
}
|
||||
self._invariantCheck()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user