//===--- ProcessReproducers.swift -----------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2026 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// import Foundation import Subprocess import Synchronization public actor ProcessReproducers { private var reproducersBySignature: [Signature: ReproducerFile] = [:] let toolchain: Toolchain let inputDir: AbsolutePath? let otherInputs: [AbsolutePath] let outputDirs: OutputDirs let executablePath: AbsolutePath let quickMode: Bool let deleteInputs: Bool let reprocess: Bool let ignoreExisting: Bool let fileIssues: Bool let frontendArgs: [Command.Argument] let checkOnly: Bool public init( from inputDir: AbsolutePath?, to outputDirs: OutputDirs, otherInputs: [AbsolutePath], toolchain: Toolchain, quickMode: Bool, deleteInputs: Bool, reprocess: Bool, ignoreExisting: Bool, fileIssues: Bool, frontendArgs: [Command.Argument], checkOnly: Bool ) throws { self.toolchain = toolchain self.inputDir = inputDir self.otherInputs = otherInputs self.outputDirs = outputDirs guard let execPath = Bundle.main.executablePath else { struct CannotFindExecutableError: Error {} throw CannotFindExecutableError() } self.executablePath = AnyPath(execPath).absoluteInWorkingDir self.quickMode = quickMode self.deleteInputs = deleteInputs self.reprocess = reprocess self.ignoreExisting = ignoreExisting self.fileIssues = fileIssues self.frontendArgs = frontendArgs self.checkOnly = checkOnly } func writeReproducer(_ reproducer: Reproducer) throws { guard firstNewSignature(reproducer.signatures) != nil else { return } let repoFile = ReproducerFile( in: outputDirs.outputDir(for: reproducer), reproducer: reproducer ) recordReproducer(repoFile) try repoFile.write() // Check the file round-trips do { let before = try repoFile.path.read() let repo = try Reproducer.decoding(before) let after = try? repo?.serialize() if after != before { throw ReproducerError(""" \(repoFile) didn't round-trip! before: \(String(decoding: before, as: UTF8.self)) after: \(String(decoding: after ?? Data(), as: UTF8.self)) """) } } } func getCrash(for reproFile: ReproducerFile) async throws -> CrashInfo? { let repro = reproFile.reproducer if let crashInfo = repro.crashInfo { return crashInfo } // If we have multiple signatures, run multiple times to gather them all. let crasher = PotentialCrasher(reproFile) if !repro.signatures.secondaries.isEmpty && repro.options.isDeterministic { return try await checkDeterministicCrash(of: crasher)?.crashInfo } return try await checkCrash(of: crasher)?.crashInfo } func firstNewSignature(_ sigs: KnownSignatures) -> Signature? { sigs.sigs.sorted().first(where: { reproducersBySignature[$0] == nil }) } func recordReproducer(_ repro: ReproducerFile) { for sig in repro.reproducer.signatures.sigs { reproducersBySignature[sig] = repro } } func fileReproIssues() async throws { for (_, reproFile) in reproducersBySignature.sorted(by: \.key).prefix(1) { var repro: Reproducer { _read { yield reproFile.reproducer } _modify { yield &reproFile.reproducer } } guard repro.issueID == nil, let crashLog = try await getCrash(for: reproFile)?.primary, let issue = GitHubIssue(from: repro, crashLog: crashLog) else { continue } print("Will file issue:") print("Title: \(issue.title)") print("Body: \n\(issue.body)\n") print("Labels: \(issue.labels.map(\.rawValue))") print("Press ENTER to post") _ = readLine() do { let response = try await issue.post(owner: "swiftlang", repoName: "swift") log.info("posted \(response.htmlURL)") repro.issueID = response.number reproFile.path = reproFile.path.parentDir! .appending("issue-\(response.number).swift") try reproFile.write() } catch { log.warning("failed to file issue \(error)") } } } private func reprocessReproducers(_ reproFiles: [ReproducerFile]) async { log.info("re-processing reproducers...") let worklist = TaskWorklist() for repro in reproFiles { worklist.addTask { do { guard let crash = try await self.getCrash(for: repro) else { // FIXME: Workaround https://github.com/swiftlang/swift/issues/86623 let logMsg = "\(repro) did not re-reproduce" log.warning(logMsg) return } let oldSigs = repro.reproducer.signatures let oldIsStackOverflow = repro.reproducer.isStackOverflow repro.reproducer.signatures = crash.signatures repro.reproducer.isStackOverflow = crash.primary.isStackOverflow do { // FIXME: Workaround https://github.com/swiftlang/swift/issues/86623 let logMsg = "found: \(repro)" log.info(logMsg) } if oldSigs.primary != crash.signatures.primary || oldSigs != crash.signatures || oldIsStackOverflow != crash.primary.isStackOverflow { repro.checkFilename(warn: false) try repro.write() } } catch { log.warning("\(error)") } } } for await _ in worklist.results {} } private func importExistingReproducers() async throws { var reproFiles: [ReproducerFile] = [] for outputPath in outputDirs.allPaths { for file in try outputPath.getDirContents() where file.hasExtension(.swift) { let absPath = outputPath.appending(file) guard let reproFile = try ReproducerFile(from: absPath) else { continue } reproFiles.append(reproFile) } } if reprocess { await reprocessReproducers(reproFiles) } for repro in reproFiles where !repro.reproducer.primarySig.isUnknown { recordReproducer(repro) } } private func getAllInputPaths() throws -> [AbsolutePath] { var inputPaths = otherInputs if let inputDir { inputPaths += try inputDir.getDirContents() .map { inputDir.appending($0) } .filter { !$0.isDirectory } } return inputPaths } private func getAllNewCrashers( for inputs: [AbsolutePath] ) async throws -> ([Crasher], Set) { let count = inputs.count let progress = Atomic(0) log.prefixFn = { output, useColor in let progress = progress.load(ordering: .acquiring) "[\(progress)/\(count)]".withColor(.gray).write(to: output, useColor: useColor) } defer { log.prefixFn = nil } var seenSigs: Set = [] let repros = await inputs.parallelUnorderedMap { inputPath -> [Crasher] in defer { progress.add(1, ordering: .releasing) } var results: [Crasher] = [] var errors: [any Error] = [] do { // Evaluate the batches. let batch = try self.getPotentialCrashers(for: inputPath) results = await batch.eval { input in do { guard let crasher = try await self.getCrasher(input) else { return nil } // We initially assume determinism and source order to make the // initial gathering quicker, make sure we undo that here. return crasher .withDeterministic(false) .withSourceOrderCompletion(false) } catch { errors.append(error) return nil } } } catch { log.warning("\(error)") return [] } defer { for result in results { seenSigs.formUnion(result.signatures.sigs) } } if !self.quickMode, results.isEmpty { if let error = errors.last { log.warning("\(error)") } else { log.warning("\(inputPath.fileName) didn't reproduce issue") if let noreproDir = self.noreproDir { let outPath = noreproDir.appending(inputPath.fileName) if outPath.exists { inputPath.remove() } else { try? FileManager.default.moveItem( atPath: inputPath.rawPath, toPath: outPath.rawPath ) } } } return [] } let hasNewSig = results.contains(where: { result in let sigs = result.signatures if self.firstNewSignature(sigs) == nil { return false } if self.quickMode, seenSigs.isSuperset(of: sigs.sigs) { return false } return true }) guard hasNewSig else { if self.deleteInputs { inputPath.remove() } return [] } return results }.flatMap { $0 }.sorted(by: \.path.rawPath) return (repros, seenSigs) } private func reduceAllCrashers( _ repros: [Crasher] ) async throws -> [Reproducer] { var groupedRepros: [Signature: [Crasher]] = [:] for crasher in repros { guard let sig = firstNewSignature(crasher.signatures) else { continue } groupedRepros[sig, default: []].append(crasher) } log.info("found \(groupedRepros.count) new crasher(s), reducing...") var processedPaths = Set(repros.map(\.path)) // creduce already parallelizes so only do 3 in parallel. let worklist = TaskWorklist<[Reproducer]>(maxParallel: 3) for reproGroup in groupedRepros.sorted(by: \.key).map(\.value) { worklist.addTask { // Take the first reproducer in the group that reduces successfully. for repro in reproGroup { do { return try await self.reduce(repro) } catch { log.warning("\(error)") } } processedPaths.subtract(reproGroup.map(\.path)) return [] } } var processed: [Reproducer] = [] for await reproGroup in worklist.results { for reduced in reproGroup { do { try self.writeReproducer(reduced) processed.append(reduced) } catch { log.warning("\(error)") processedPaths.remove(reduced.originalPath!) } } } if deleteInputs { for path in processedPaths { path.remove() } } return processed } public func process() async throws { let start = Date() if !ignoreExisting { try await importExistingReproducers() } let inputPaths = try getAllInputPaths() let (repros, seenSigs) = try await getAllNewCrashers(for: inputPaths) let reduced = if !checkOnly && !repros.isEmpty { try await reduceAllCrashers(repros) } else { [] } let delta = Int(Date().timeIntervalSince(start).rounded()) log.info(""" Finished processing \(inputPaths.count) files in \(delta)s, \ \(seenSigs.count) unique signatures, \(reduced.count) new reproducers """ ) if fileIssues { try await fileReproIssues() } } func checkCrash( of input: PotentialCrasher, matchingSignatures signatures: KnownSignatures? = nil ) async throws -> Crasher? { try await input.withInputFiles { paths in guard let crash = try await toolchain.checkCrash( of: paths, options: input.options, matchingSignatures: signatures ) else { return nil } return Crasher(input: input, crashInfo: .init(crash)) } } func checkDeterministicCrash( of input: PotentialCrasher ) async throws -> Crasher? { try await input.withInputFiles { paths in guard let crashInfo = try await toolchain.checkDeterministicCrash( of: paths, options: input.options ) else { return nil } return Crasher(input: input.withDeterministic(), crashInfo: crashInfo) } } /// The directory to place non-reproducing crashers into. Only used when an /// input directory is given and input deletion is enabled. private lazy var noreproDir: AbsolutePath? = { guard let inputDir, deleteInputs else { return nil } let result = inputDir.appending("norepro") if !result.exists { try? result.makeDir() } return result }() private func getCrasher(_ input: PotentialCrasher) async throws -> Crasher? { log.info("checking: \(input)...".withColor(.gray)) guard let crasher = try await checkCrash(of: input) else { return nil } log.info("found: \(crasher)") return crasher } private enum PotentialCrasherBatch { case empty case one(PotentialCrasher) case allOf([PotentialCrasherBatch]) case firstOf([PotentialCrasherBatch]) case lastOf([PotentialCrasherBatch]) var isEmpty: Bool { if case .empty = self { true } else { false } } /// Make the batch more resilient to non-deterministic failures by /// attempting multiple times and trying things like guard malloc. var withNonDeterminismHandling: Self { .firstOf([ self, self.map(\.withGuardMalloc), self.map { $0.withDeterministic(false) }, ]) } static func base(_ crasher: PotentialCrasher) -> Self { if crasher.buffers.count > 1 { var result: [PotentialCrasher] = [crasher.withJoinedBuffers()] if crasher.options.kind == .complete { // Completion requires a primary file. result += crasher.buffers.indices.map { crasher.withPrimaryIdx($0) } } else { result.append(crasher) } return .firstOf(result.map(one)) } else { return .one(crasher) } } func map( _ fn: (PotentialCrasher) throws -> PotentialCrasher ) rethrows -> Self { switch self { case .empty: return self case .one(let crasher): return try .one(fn(crasher)) case .allOf(let elts): return .allOf(try elts.map { try $0.map(fn) }) case .firstOf(let elts): return .firstOf(try elts.map { try $0.map(fn) }) case .lastOf(let elts): return .lastOf(try elts.map { try $0.map(fn) }) } } func compactMap( _ fn: (PotentialCrasher) throws -> PotentialCrasher? ) rethrows -> Self { switch self { case .empty: return self case .one(let crasher): guard let crasher = try fn(crasher) else { return .empty } return .one(crasher) case .allOf(let elts): let elts = try elts.compactMap { try $0.compactMap(fn) } return elts.isEmpty ? .empty : .allOf(elts) case .firstOf(let elts): let elts = try elts.compactMap { try $0.compactMap(fn) } return elts.isEmpty ? .empty : .firstOf(elts) case .lastOf(let elts): let elts = try elts.compactMap { try $0.compactMap(fn) } return elts.isEmpty ? .empty : .lastOf(elts) } } func eval(@_inheritActorContext _ fn: @Sendable (PotentialCrasher) async throws -> T?) async rethrows -> [T] { switch self { case .empty: return [] case .one(let crasher): guard let result = try await fn(crasher) else { return [] } return [result] case .allOf(let elts): var results: [T] = [] for elt in elts { results += try await elt.eval(fn) } return results case .firstOf(let elts): for elt in elts { let result = try await elt.eval(fn) guard !result.isEmpty else { continue } return result } return [] case .lastOf(let elts): var currentResult: [T] = [] for elt in elts { let result = try await elt.eval(fn) guard !result.isEmpty else { break } currentResult = result } return currentResult } } } /// For a given input crasher, produce a batch of potential crasher /// configurations to try. private func getPotentialCrashers( for path: AbsolutePath ) throws -> PotentialCrasherBatch { typealias Batch = PotentialCrasherBatch let input = try FuzzerInput(from: path) // If we have custom frontend args, then only try those. let frontendArgs = input.header.frontendArgs ?? self.frontendArgs if !frontendArgs.isEmpty { return .one(.custom(input, frontendArgs: frontendArgs)) .withNonDeterminismHandling } let completeBatch: Batch = try PotentialCrasher.completion(input).map { Batch.base($0.withSourceOrderCompletion(true).withSolverLimits()) } ?? .empty let compileBatch = Batch.firstOf([ .base(.typecheck(input).withSolverLimits()), .lastOf([ .emitIR(input), .emitSIL(input), .emitSILGen(input) ].map(Batch.base)) ]) var batch = Batch.allOf([compileBatch, completeBatch]) if !quickMode { // If the initial batch doesn't reproduce, try with no solver limits. var extendedBatch = batch.compactMap { $0.hasSolverLimits ? $0.withSolverLimits(false) : nil } // Then try with no SDK extendedBatch = .firstOf([ extendedBatch, batch.map { $0.withNoSDK.withSolverLimits(false) } ]) // Then try with no objc. extendedBatch = .firstOf([ extendedBatch, extendedBatch.map(\.withNoObjCInterop) ]) // If we still don't have a result, try the batches again with // non-determinism in mind. extendedBatch = extendedBatch.withNonDeterminismHandling batch = .firstOf([batch, extendedBatch]) } return batch } }