mirror of
https://github.com/apple/swift.git
synced 2026-06-20 15:42:51 +02:00
[crash-reduce] Avoid converting crash logs to String
This can result in a major memory bottleneck and performance hit, just keep them as arrays of bytes.
This commit is contained in:
@@ -18,32 +18,38 @@ public struct Assertion: Hashable, Sendable {
|
||||
public var message: String
|
||||
public var function: String?
|
||||
|
||||
private static func matchAssert(_ str: String) -> Assertion? {
|
||||
str.scanningUTF8 { scanner in
|
||||
while scanner.hasInput {
|
||||
let start = scanner.cursor
|
||||
guard scanner.tryEat(utf8: "Assertion fail") else {
|
||||
_ = scanner.eat()
|
||||
continue
|
||||
}
|
||||
scanner.skip(untilAfter: { $0 == ":" })
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
guard scanner.peek == "(", let msg = scanner.consumeMessage() else {
|
||||
return nil
|
||||
}
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
guard scanner.tryEat(",") else { return nil }
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
guard scanner.tryEat(utf8: "function") else { return nil }
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
let fn = scanner.eat(while: { $0 != "," && !$0.isSpaceOrTab })
|
||||
let full = scanner.decodeUTF8(start ..< scanner.cursor)
|
||||
return Assertion(
|
||||
fullMessage: full, message: msg, function: fn.map(String.init)
|
||||
)
|
||||
private static func matchAssertImpl(_ scanner: inout ByteScanner) -> Assertion? {
|
||||
while scanner.hasInput {
|
||||
let start = scanner.cursor
|
||||
guard scanner.tryEat(utf8: "Assertion fail") else {
|
||||
_ = scanner.eat()
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
scanner.skip(untilAfter: { $0 == ":" })
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
guard scanner.peek == "(", let msg = scanner.consumeMessage() else {
|
||||
return nil
|
||||
}
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
guard scanner.tryEat(",") else { return nil }
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
guard scanner.tryEat(utf8: "function") else { return nil }
|
||||
scanner.skip(while: \.isSpaceOrTab)
|
||||
let fn = scanner.eat(while: { $0 != "," && !$0.isSpaceOrTab })
|
||||
let full = scanner.decodeUTF8(start ..< scanner.cursor)
|
||||
return Assertion(
|
||||
fullMessage: full, message: msg, function: fn.map(String.init)
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private static func matchAssert(_ bytes: some Sequence<UInt8>) -> Assertion? {
|
||||
bytes.scanning(matchAssertImpl)
|
||||
}
|
||||
|
||||
private static func matchAssert(_ string: String) -> Assertion? {
|
||||
string.scanningUTF8(matchAssertImpl)
|
||||
}
|
||||
|
||||
private static func matchExact(_ str: String, _ message: String) -> String? {
|
||||
@@ -58,6 +64,14 @@ public struct Assertion: Hashable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public init?(from bytes: some Sequence<UInt8>) {
|
||||
if let match = Self.matchAssert(bytes) {
|
||||
self = match
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
public init?(from str: String) {
|
||||
if let match = Self.matchAssert(str) {
|
||||
self = match
|
||||
|
||||
@@ -29,26 +29,43 @@ public struct CrashLog: Sendable {
|
||||
|
||||
static let abortRegex = #/^Abort:\s*function\s*(?<symbol>[^\s]+).*$/#
|
||||
|
||||
private static func checkStackOverflow(_ lines: [String]) -> Bool {
|
||||
lines.contains { $0.scanningUTF8 { $0.scanForStackOverflow() } }
|
||||
private static func checkStackOverflow(
|
||||
_ lines: [some Sequence<UInt8>]
|
||||
) -> Bool {
|
||||
lines.contains { $0.scanning { $0.scanForStackOverflow() } }
|
||||
}
|
||||
|
||||
private static func getFrames(from lines: [String]) -> [Frame] {
|
||||
private static func getFrames(from lines: [some Collection<UInt8>]) -> [Frame] {
|
||||
var lines = lines[...]
|
||||
|
||||
guard let stackDumpStart = lines.firstIndex(where: {
|
||||
$0.hasPrefix("Stack dump without symbol names")
|
||||
$0.scanning { $0.tryEat(utf8: "Stack dump without symbol names") }
|
||||
}) ?? lines.firstIndex(where: {
|
||||
$0.contains("Stack dump without symbol names")
|
||||
$0.scanning { scanner in
|
||||
repeat {
|
||||
if scanner.tryEat(utf8: "Stack dump without symbol names") {
|
||||
return true
|
||||
}
|
||||
} while scanner.tryEat()
|
||||
return false
|
||||
}
|
||||
}) else {
|
||||
// The frame symbol can be included in the UBSan error.
|
||||
for line in lines {
|
||||
guard let match = line.wholeMatch(of: sanitizerFrameSymbolRegex)?.output else {
|
||||
guard
|
||||
line.scanning({
|
||||
$0.skip(while: \.isSpaceOrTab);
|
||||
return $0.tryEat(utf8: "SUMMARY:")
|
||||
}),
|
||||
case let lineStr = String(utf8: line),
|
||||
// TODO: Use a scanner instead of regex here.
|
||||
let match = lineStr.wholeMatch(of: sanitizerFrameSymbolRegex)?.output
|
||||
else {
|
||||
continue
|
||||
}
|
||||
return [
|
||||
Frame(
|
||||
line: line,
|
||||
line: lineStr,
|
||||
image: String(match.image),
|
||||
symbol: String(match.symbol),
|
||||
offset: nil
|
||||
@@ -60,7 +77,7 @@ public struct CrashLog: Sendable {
|
||||
lines = lines[(stackDumpStart + 1)...]
|
||||
|
||||
var frames: [Frame] = []
|
||||
while let line = lines.first, let frame = Frame(from: line) {
|
||||
while let line = lines.first, let frame = Frame(from: String(utf8: line)) {
|
||||
frames.append(frame)
|
||||
lines = lines.dropFirst()
|
||||
}
|
||||
@@ -114,16 +131,18 @@ public struct CrashLog: Sendable {
|
||||
return [firstSymbol]
|
||||
}
|
||||
|
||||
private static func findAbort(_ lines: [String]) -> String? {
|
||||
private static func findAbort(_ lines: [some Collection<UInt8>]) -> String? {
|
||||
for line in lines {
|
||||
if let match = line.wholeMatch(of: Self.abortRegex) {
|
||||
guard line.scanning({ $0.tryEat(utf8: "Abort:") }) else { continue }
|
||||
// TODO: Use scanner for this.
|
||||
if let match = String(utf8: line).wholeMatch(of: Self.abortRegex) {
|
||||
return String(match.symbol)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private static func findAssertion(_ lines: [String]) -> Assertion? {
|
||||
private static func findAssertion(_ lines: [some Sequence<UInt8>]) -> Assertion? {
|
||||
for line in lines {
|
||||
guard let assert = Assertion(from: line) else { continue }
|
||||
return assert
|
||||
@@ -131,8 +150,12 @@ public struct CrashLog: Sendable {
|
||||
return nil
|
||||
}
|
||||
|
||||
public init?(from log: String) {
|
||||
let lines = log.components(separatedBy: "\n")
|
||||
public init(from str: String) {
|
||||
self.init(from: str.utf8)
|
||||
}
|
||||
|
||||
public init(from bytes: some Collection<UInt8>) {
|
||||
let lines = bytes.split(separator: UInt8(ascii: "\n"))
|
||||
self.isStackOverflow = Self.checkStackOverflow(lines)
|
||||
self.frames = Self.getFrames(from: lines)
|
||||
|
||||
|
||||
@@ -38,17 +38,7 @@ extension Toolchain {
|
||||
case .exited(code: 0), .exited(code: 1):
|
||||
return nil
|
||||
default:
|
||||
let output = String(
|
||||
decoding: result.standardError.prefix(1_000_000), as: UTF8.self
|
||||
)
|
||||
guard let crashLog = CrashLog(from: output) else {
|
||||
throw ReproducerError("""
|
||||
couldn't extract sig for \
|
||||
\(inputs.first!.parentDir!.fileName) \
|
||||
<sig>\(output)</sig>
|
||||
""")
|
||||
}
|
||||
return crashLog
|
||||
return CrashLog(from: result.standardError)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -73,6 +73,10 @@ public extension String {
|
||||
self = buffer.withUnsafeBytes(String.init(utf8:))
|
||||
}
|
||||
|
||||
init(utf8 seq: some Collection<UInt8>) {
|
||||
self.init(decoding: seq, as: UTF8.self)
|
||||
}
|
||||
|
||||
func scanningUTF8<R>(_ scan: (inout ByteScanner) throws -> R) rethrows -> R {
|
||||
var tmp = self
|
||||
return try tmp.withUTF8 { utf8 in
|
||||
@@ -147,6 +151,22 @@ public extension String {
|
||||
}
|
||||
}
|
||||
|
||||
extension Sequence where Element == UInt8 {
|
||||
public func scanning<R>(_ scan: (inout ByteScanner) throws -> R) rethrows -> R {
|
||||
let result = try withContiguousStorageIfAvailable { buffer in
|
||||
var scanner = ByteScanner(buffer)
|
||||
return try scan(&scanner)
|
||||
}
|
||||
if let result {
|
||||
return result
|
||||
}
|
||||
return try Array(self).withUnsafeBufferPointer { buffer in
|
||||
var scanner = ByteScanner(buffer)
|
||||
return try scan(&scanner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern match by `is` property. E.g. `case \.isNewline: ...`
|
||||
public func ~= <T>(keyPath: KeyPath<T, Bool>, subject: T) -> Bool {
|
||||
return subject[keyPath: keyPath]
|
||||
|
||||
@@ -195,18 +195,15 @@ struct GetSignatureCommand: ParsableCommand {
|
||||
}
|
||||
return input
|
||||
}()
|
||||
func runOnce() -> Signature? {
|
||||
CrashLog(from: input)?.signature
|
||||
func runOnce() -> Signature {
|
||||
CrashLog(from: input).signature
|
||||
}
|
||||
let start = Date()
|
||||
for _ in 0 ..< repeats {
|
||||
guard runOnce() != nil else {
|
||||
Darwin.exit(1)
|
||||
}
|
||||
}
|
||||
guard let sig = runOnce() else {
|
||||
Darwin.exit(1)
|
||||
// TODO: Make sure this doesn't get optimized out?
|
||||
_ = runOnce()
|
||||
}
|
||||
let sig = runOnce()
|
||||
print(sig)
|
||||
if repeats > 0 {
|
||||
print("\(Int((Date().timeIntervalSince(start) * 1000).rounded()))ms")
|
||||
|
||||
Reference in New Issue
Block a user