[crash-reduce] Avoid converting crash logs to String

This can result in a major memory bottleneck and performance hit,
just keep them as arrays of bytes.
This commit is contained in:
Hamish Knight
2026-04-20 15:33:29 +01:00
parent 6c960a6f3b
commit 98e75379f9
5 changed files with 100 additions and 56 deletions
@@ -18,32 +18,38 @@ public struct Assertion: Hashable, Sendable {
public var message: String
public var function: String?
private static func matchAssert(_ str: String) -> Assertion? {
str.scanningUTF8 { scanner in
while scanner.hasInput {
let start = scanner.cursor
guard scanner.tryEat(utf8: "Assertion fail") else {
_ = scanner.eat()
continue
}
scanner.skip(untilAfter: { $0 == ":" })
scanner.skip(while: \.isSpaceOrTab)
guard scanner.peek == "(", let msg = scanner.consumeMessage() else {
return nil
}
scanner.skip(while: \.isSpaceOrTab)
guard scanner.tryEat(",") else { return nil }
scanner.skip(while: \.isSpaceOrTab)
guard scanner.tryEat(utf8: "function") else { return nil }
scanner.skip(while: \.isSpaceOrTab)
let fn = scanner.eat(while: { $0 != "," && !$0.isSpaceOrTab })
let full = scanner.decodeUTF8(start ..< scanner.cursor)
return Assertion(
fullMessage: full, message: msg, function: fn.map(String.init)
)
private static func matchAssertImpl(_ scanner: inout ByteScanner) -> Assertion? {
while scanner.hasInput {
let start = scanner.cursor
guard scanner.tryEat(utf8: "Assertion fail") else {
_ = scanner.eat()
continue
}
return nil
scanner.skip(untilAfter: { $0 == ":" })
scanner.skip(while: \.isSpaceOrTab)
guard scanner.peek == "(", let msg = scanner.consumeMessage() else {
return nil
}
scanner.skip(while: \.isSpaceOrTab)
guard scanner.tryEat(",") else { return nil }
scanner.skip(while: \.isSpaceOrTab)
guard scanner.tryEat(utf8: "function") else { return nil }
scanner.skip(while: \.isSpaceOrTab)
let fn = scanner.eat(while: { $0 != "," && !$0.isSpaceOrTab })
let full = scanner.decodeUTF8(start ..< scanner.cursor)
return Assertion(
fullMessage: full, message: msg, function: fn.map(String.init)
)
}
return nil
}
private static func matchAssert(_ bytes: some Sequence<UInt8>) -> Assertion? {
bytes.scanning(matchAssertImpl)
}
private static func matchAssert(_ string: String) -> Assertion? {
string.scanningUTF8(matchAssertImpl)
}
private static func matchExact(_ str: String, _ message: String) -> String? {
@@ -58,6 +64,14 @@ public struct Assertion: Hashable, Sendable {
}
}
public init?(from bytes: some Sequence<UInt8>) {
if let match = Self.matchAssert(bytes) {
self = match
} else {
return nil
}
}
public init?(from str: String) {
if let match = Self.matchAssert(str) {
self = match
@@ -29,26 +29,43 @@ public struct CrashLog: Sendable {
static let abortRegex = #/^Abort:\s*function\s*(?<symbol>[^\s]+).*$/#
private static func checkStackOverflow(_ lines: [String]) -> Bool {
lines.contains { $0.scanningUTF8 { $0.scanForStackOverflow() } }
private static func checkStackOverflow(
_ lines: [some Sequence<UInt8>]
) -> Bool {
lines.contains { $0.scanning { $0.scanForStackOverflow() } }
}
private static func getFrames(from lines: [String]) -> [Frame] {
private static func getFrames(from lines: [some Collection<UInt8>]) -> [Frame] {
var lines = lines[...]
guard let stackDumpStart = lines.firstIndex(where: {
$0.hasPrefix("Stack dump without symbol names")
$0.scanning { $0.tryEat(utf8: "Stack dump without symbol names") }
}) ?? lines.firstIndex(where: {
$0.contains("Stack dump without symbol names")
$0.scanning { scanner in
repeat {
if scanner.tryEat(utf8: "Stack dump without symbol names") {
return true
}
} while scanner.tryEat()
return false
}
}) else {
// The frame symbol can be included in the UBSan error.
for line in lines {
guard let match = line.wholeMatch(of: sanitizerFrameSymbolRegex)?.output else {
guard
line.scanning({
$0.skip(while: \.isSpaceOrTab);
return $0.tryEat(utf8: "SUMMARY:")
}),
case let lineStr = String(utf8: line),
// TODO: Use a scanner instead of regex here.
let match = lineStr.wholeMatch(of: sanitizerFrameSymbolRegex)?.output
else {
continue
}
return [
Frame(
line: line,
line: lineStr,
image: String(match.image),
symbol: String(match.symbol),
offset: nil
@@ -60,7 +77,7 @@ public struct CrashLog: Sendable {
lines = lines[(stackDumpStart + 1)...]
var frames: [Frame] = []
while let line = lines.first, let frame = Frame(from: line) {
while let line = lines.first, let frame = Frame(from: String(utf8: line)) {
frames.append(frame)
lines = lines.dropFirst()
}
@@ -114,16 +131,18 @@ public struct CrashLog: Sendable {
return [firstSymbol]
}
private static func findAbort(_ lines: [String]) -> String? {
private static func findAbort(_ lines: [some Collection<UInt8>]) -> String? {
for line in lines {
if let match = line.wholeMatch(of: Self.abortRegex) {
guard line.scanning({ $0.tryEat(utf8: "Abort:") }) else { continue }
// TODO: Use scanner for this.
if let match = String(utf8: line).wholeMatch(of: Self.abortRegex) {
return String(match.symbol)
}
}
return nil
}
private static func findAssertion(_ lines: [String]) -> Assertion? {
private static func findAssertion(_ lines: [some Sequence<UInt8>]) -> Assertion? {
for line in lines {
guard let assert = Assertion(from: line) else { continue }
return assert
@@ -131,8 +150,12 @@ public struct CrashLog: Sendable {
return nil
}
public init?(from log: String) {
let lines = log.components(separatedBy: "\n")
public init(from str: String) {
self.init(from: str.utf8)
}
public init(from bytes: some Collection<UInt8>) {
let lines = bytes.split(separator: UInt8(ascii: "\n"))
self.isStackOverflow = Self.checkStackOverflow(lines)
self.frames = Self.getFrames(from: lines)
@@ -38,17 +38,7 @@ extension Toolchain {
case .exited(code: 0), .exited(code: 1):
return nil
default:
let output = String(
decoding: result.standardError.prefix(1_000_000), as: UTF8.self
)
guard let crashLog = CrashLog(from: output) else {
throw ReproducerError("""
couldn't extract sig for \
\(inputs.first!.parentDir!.fileName) \
<sig>\(output)</sig>
""")
}
return crashLog
return CrashLog(from: result.standardError)
}
}
@@ -73,6 +73,10 @@ public extension String {
self = buffer.withUnsafeBytes(String.init(utf8:))
}
init(utf8 seq: some Collection<UInt8>) {
self.init(decoding: seq, as: UTF8.self)
}
func scanningUTF8<R>(_ scan: (inout ByteScanner) throws -> R) rethrows -> R {
var tmp = self
return try tmp.withUTF8 { utf8 in
@@ -147,6 +151,22 @@ public extension String {
}
}
extension Sequence where Element == UInt8 {
public func scanning<R>(_ scan: (inout ByteScanner) throws -> R) rethrows -> R {
let result = try withContiguousStorageIfAvailable { buffer in
var scanner = ByteScanner(buffer)
return try scan(&scanner)
}
if let result {
return result
}
return try Array(self).withUnsafeBufferPointer { buffer in
var scanner = ByteScanner(buffer)
return try scan(&scanner)
}
}
}
/// Pattern match by `is` property. E.g. `case \.isNewline: ...`
public func ~= <T>(keyPath: KeyPath<T, Bool>, subject: T) -> Bool {
return subject[keyPath: keyPath]
@@ -195,18 +195,15 @@ struct GetSignatureCommand: ParsableCommand {
}
return input
}()
func runOnce() -> Signature? {
CrashLog(from: input)?.signature
func runOnce() -> Signature {
CrashLog(from: input).signature
}
let start = Date()
for _ in 0 ..< repeats {
guard runOnce() != nil else {
Darwin.exit(1)
}
}
guard let sig = runOnce() else {
Darwin.exit(1)
// TODO: Make sure this doesn't get optimized out?
_ = runOnce()
}
let sig = runOnce()
print(sig)
if repeats > 0 {
print("\(Int((Date().timeIntervalSince(start) * 1000).rounded()))ms")