[SwiftCaching] Create standalone reproducer from swift caching build

Add a new option `-gen-reproducer` that when swift caching is used,
create a standalone reproducer that can be used to reproduce the
`swift-frontend` invocation.
This commit is contained in:
Steven Wu
2025-06-09 15:01:08 -07:00
parent 78552bb258
commit a24c34252f
8 changed files with 479 additions and 5 deletions

View File

@@ -537,6 +537,11 @@ GROUPED_ERROR(error_load_input_from_cas, CompilationCaching, none, "failed to lo
GROUPED_ERROR(error_wrong_input_num_for_input_file_key, CompilationCaching, none, "-input-file-key only support one input file", ())
ERROR(error_gen_reproducer_not_caching, none, "-gen-reproducer only supports swift caching (-cache-compile-job)", ())
ERROR(error_cannot_create_reproducer_dir, none, "failed to create reproducer director '%0': %1", (StringRef, StringRef))
NOTE(note_reproducer, none, "reproducer is available at: %0", (StringRef))
// Dependency Verifier Diagnostics
ERROR(missing_member_dependency,none,
"expected "

View File

@@ -51,6 +51,11 @@ llvm::Error printCompileJobCacheKey(llvm::cas::ObjectStore &CAS,
llvm::cas::ObjectRef Key,
llvm::raw_ostream &os);
/// Iterating through command-line options in cache key.
llvm::Error iterateCommandLine(llvm::cas::ObjectStore &CAS,
llvm::cas::ObjectRef Key,
std::function<llvm::Error(StringRef)> Callback);
} // namespace swift
#endif

View File

@@ -292,6 +292,12 @@ public:
/// by the Clang importer as part of semantic analysis.
bool ModuleHasBridgingHeader = false;
/// Generate reproducer.
bool GenReproducer = false;
/// Directory to generate reproducer.
std::string GenReproducerDir;
/// Indicates whether or not the frontend should print statistics upon
/// termination.
bool PrintStats = false;

View File

@@ -1533,6 +1533,12 @@ def enable_address_dependencies : Flag<["-"], "enable-address-dependencies">,
def disable_address_dependencies : Flag<["-"], "disable-address-dependencies">,
HelpText<"Disable enforcement of lifetime dependencies on addressable values.">;
def gen_reproducer : Flag<["-"], "gen-reproducer">,
HelpText<"Generate a reproducer for current compilation.">;
def gen_reproducer_dir
: Separate<["-"], "gen-reproducer-dir">,
HelpText<"Path to directory where reproducers write to.">;
} // end let Flags = [FrontendOption, NoDriverOption, HelpHidden]
def disable_experimental_parser_round_trip : Flag<["-"],

View File

@@ -164,6 +164,9 @@ bool ArgsToFrontendOptionsConverter::convert(
Opts.ParallelDependencyScan = Args.hasFlag(OPT_parallel_scan,
OPT_no_parallel_scan,
true);
Opts.GenReproducer |= Args.hasArg(OPT_gen_reproducer);
Opts.GenReproducerDir = Args.getLastArgValue(OPT_gen_reproducer_dir);
if (const Arg *A = Args.getLastArg(OPT_dependency_scan_cache_path)) {
Opts.SerializedDependencyScannerCachePath = A->getValue();
}

View File

@@ -105,17 +105,23 @@ swift::createCompileJobCacheKeyForOutput(llvm::cas::ObjectStore &CAS,
return CAS.storeFromString({BaseKey}, OS.str());
}
static llvm::Error validateCacheKeyNode(llvm::cas::ObjectProxy Proxy) {
if (Proxy.getData().size() != sizeof(uint32_t))
return llvm::createStringError("incorrect size for cache key node");
if (Proxy.getNumReferences() != 1)
return llvm::createStringError("incorrect child number for cache key node");
return llvm::Error::success();
}
llvm::Error swift::printCompileJobCacheKey(llvm::cas::ObjectStore &CAS,
llvm::cas::ObjectRef Key,
llvm::raw_ostream &OS) {
auto Proxy = CAS.getProxy(Key);
if (!Proxy)
return Proxy.takeError();
if (Proxy->getData().size() != sizeof(uint32_t))
return llvm::createStringError("incorrect size for cache key node");
if (Proxy->getNumReferences() != 1)
return llvm::createStringError("incorrect child number for cache key node");
if (auto Err = validateCacheKeyNode(*Proxy))
return Err;
uint32_t InputIndex = llvm::support::endian::read<uint32_t>(
Proxy->getData().data(), llvm::endianness::little);
@@ -153,3 +159,41 @@ llvm::Error swift::printCompileJobCacheKey(llvm::cas::ObjectStore &CAS,
return llvm::Error::success();
}
llvm::Error
swift::iterateCommandLine(llvm::cas::ObjectStore &CAS, llvm::cas::ObjectRef Key,
std::function<llvm::Error(StringRef)> Callback) {
auto Proxy = CAS.getProxy(Key);
if (!Proxy)
return Proxy.takeError();
if (auto Err = validateCacheKeyNode(*Proxy))
return Err;
auto Base = Proxy->getReference(0);
llvm::cas::TreeSchema Schema(CAS);
auto Tree = Schema.load(Base);
if (!Tree)
return Tree.takeError();
std::string BaseStr;
llvm::raw_string_ostream BaseOS(BaseStr);
return Tree->forEachEntry(
[&](const llvm::cas::NamedTreeEntry &Entry) -> llvm::Error {
auto Ref = Entry.getRef();
auto DataProxy = CAS.getProxy(Ref);
if (!DataProxy)
return DataProxy.takeError();
if (Entry.getName() != "command-line")
return llvm::Error::success();
StringRef Line, Remain = DataProxy->getData();
while (!Remain.empty()) {
std::tie(Line, Remain) = Remain.split(0);
if (auto Err = Callback(Line))
return Err;
}
return llvm::Error::success();
});
}

View File

@@ -80,6 +80,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
@@ -89,6 +90,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualOutputBackend.h"
#include "llvm/Support/VirtualOutputBackends.h"
@@ -1395,6 +1397,318 @@ static bool tryReplayCompilerResults(CompilerInstance &Instance) {
return replayed;
}
/// Generate reproducer.
///
/// Return false if reproducer generation has error.
static bool generateReproducer(CompilerInstance &Instance,
ArrayRef<const char *> Args) {
if (!Instance.supportCaching()) {
Instance.getDiags().diagnose(SourceLoc(),
diag::error_gen_reproducer_not_caching);
return true;
}
auto &upstream = Instance.getObjectStore();
auto &diags = Instance.getDiags();
auto &casOpts = Instance.getInvocation().getCASOptions();
// Create a temp directory for reproducer.
llvm::SmallString<256> reproDir(
Instance.getInvocation().getFrontendOptions().GenReproducerDir);
if (!reproDir.empty()) {
if (!llvm::sys::fs::is_directory(reproDir)) {
auto errCode = llvm::sys::fs::create_directory(reproDir);
if (errCode) {
diags.diagnose(SourceLoc(), diag::error_cannot_create_reproducer_dir,
reproDir, errCode.message());
return true;
}
}
} else {
auto errCode =
llvm::sys::fs::createUniqueDirectory("swift-reproducer", reproDir);
if (errCode) {
Instance.getDiags().diagnose(SourceLoc(),
diag::error_cannot_create_reproducer_dir,
reproDir, errCode.message());
return true;
}
}
// Create a CAS for all the inputs.
llvm::SmallString<256> casPath(reproDir);
llvm::sys::path::append(casPath, "cas");
clang::CASOptions newCAS;
newCAS.CASPath = casPath.str();
newCAS.PluginPath = casOpts.CASOpts.PluginPath;
newCAS.PluginOptions = casOpts.CASOpts.PluginOptions;
auto db = newCAS.getOrCreateDatabases();
if (!db) {
diags.diagnose(SourceLoc(), diag::error_cas_initialization,
toString(db.takeError()));
return false;
}
llvm::StringMap<const char *> idsToUpdate;
llvm::BumpPtrAllocator alloc;
llvm::StringSaver argSaver(alloc);
auto newArgs = Args.vec();
// Import all dependencies.
auto importID = [&](StringRef str) {
if (str.empty())
return true;
auto id = upstream.parseID(str);
if (!id) {
diags.diagnose(SourceLoc(), diag::error_invalid_cas_id, str,
toString(id.takeError()));
return true;
}
auto ref = upstream.getReference(*id);
if (!ref) {
diags.diagnose(SourceLoc(), diag::error_load_input_from_cas, str);
return true;
}
auto imported = db->first->importObject(upstream, *ref);
if (!imported) {
diags.diagnose(SourceLoc(), diag::error_cas, "import input dependency",
toString(imported.takeError()));
return true;
}
auto newID = db->first->getID(*imported).toString();
if (newID != str)
idsToUpdate[str] = argSaver.save(newID).data();
return false;
};
auto importKey = [&](StringRef key) -> std::optional<std::string> {
if (key.empty())
return std::nullopt;
auto id = upstream.parseID(key);
if (!id) {
diags.diagnose(SourceLoc(), diag::error_invalid_cas_id, key,
toString(id.takeError()));
return std::nullopt;
}
auto ref = upstream.getReference(*id);
if (!ref) {
diags.diagnose(SourceLoc(), diag::error_load_input_from_cas, key);
return std::nullopt;
}
// Import the entire key.
auto imported = db->first->importObject(upstream, *ref);
if (!imported) {
diags.diagnose(SourceLoc(), diag::error_cas, "import input dependency",
toString(imported.takeError()));
return std::nullopt;
}
auto importedProxy = db->first->getProxy(*imported);
if (!importedProxy) {
diags.diagnose(SourceLoc(), diag::error_cas, "load imported dependency",
toString(importedProxy.takeError()));
return std::nullopt;
}
// If not a binary module, check command-line and import some of its inputs.
// The command-line entries are stored in the format specified in
// Frontend/CompileJobCacheKey.cpp, where each command-line entry is
// space-separated option and its argument (if applicable).
if (importedProxy->getNumReferences() > 0) {
if (auto err = iterateCommandLine(
upstream, *ref, [&](StringRef arg) -> llvm::Error {
if (arg.consume_front("-clang-include-tree-root ") ||
arg.consume_front("-clang-include-tree-filelist "))
importID(arg);
return llvm::Error::success();
})) {
diags.diagnose(SourceLoc(), diag::error_cas, "import dependency cmd",
toString(std::move(err)));
return std::nullopt;
}
}
// Import the value.
auto result = Instance.getActionCache().get(*id);
if (!result) {
diags.diagnose(SourceLoc(), diag::error_cas, "lookup key dependency",
toString(result.takeError()));
return std::nullopt;
}
// Missing value in the action cache, this will result in a failed lookup
// later in the compilation so the reproducer is going to skip this entry.
if (!*result)
return std::nullopt;
auto value = upstream.getReference(**result);
if (!value)
return std::nullopt;
auto newValue = db->first->importObject(upstream, *value);
if (!newValue) {
diags.diagnose(SourceLoc(), diag::error_cas, "import value dependency",
toString(newValue.takeError()));
return std::nullopt;
}
if (auto err = db->second->put(db->first->getID(*imported),
db->first->getID(*newValue))) {
diags.diagnose(SourceLoc(), diag::error_cas,
"associate key/value dependency",
toString(std::move(err)));
return std::nullopt;
}
return db->first->getID(*imported).toString();
};
auto mapKey = [&](StringRef key) {
auto imported = importKey(key);
if (!imported)
return true;
if (*imported != key)
idsToUpdate[key] = argSaver.save(*imported).data();
return false;
};
importID(casOpts.ClangIncludeTree);
importID(casOpts.ClangIncludeTreeFileList);
mapKey(casOpts.InputFileKey);
mapKey(casOpts.BridgingHeaderPCHCacheKey);
// Import module dependencies.
// If building clang/swift modules, the module dependencies are passed on
// command-line.
for (auto &mod : Instance.getInvocation()
.getSearchPathOptions()
.ExplicitSwiftModuleInputs)
importKey(mod.second);
const auto &clangArgs =
Instance.getInvocation().getClangImporterOptions().ExtraArgs;
for (auto xcc = clangArgs.begin(); xcc != clangArgs.end(); ++xcc) {
if (*xcc == "-fmodule-file-cache-key") {
// The clang module key is passed via: -fmodule-file-cache-key <PATH>
// <KEY>.
if (++xcc == clangArgs.end())
continue;
if (++xcc == clangArgs.end())
continue;
importKey(*xcc);
}
}
// If building current module, the module dependencies are passed inside
// explicit module map json file.
auto &mapOpts = Instance.getInvocation()
.getSearchPathOptions()
.ExplicitSwiftModuleMapPath;
if (!mapOpts.empty()) {
auto mapID = upstream.parseID(mapOpts);
if (!mapID) {
diags.diagnose(SourceLoc(), diag::error_invalid_cas_id, mapOpts,
toString(mapID.takeError()));
return true;
}
auto mapProxy = upstream.getProxy(*mapID);
if (!mapProxy) {
diags.diagnose(SourceLoc(), diag::error_cas, "load module map",
toString(mapProxy.takeError()));
return true;
}
auto map = llvm::json::parse(mapProxy->getData());
if (!map) {
diags.diagnose(SourceLoc(), diag::explicit_swift_module_map_corrupted,
mapOpts);
return true;
}
if (auto array = map->getAsArray()) {
for (auto &entry : *array) {
if (auto dep = entry.getAsObject()) {
for (auto &obj : *dep) {
if (obj.first == "moduleCacheKey" ||
obj.first == "clangModuleCacheKey") {
if (auto dep = obj.second.getAsString())
importKey(*dep);
}
}
}
}
}
// Import explicit module map.
auto newMapRef = db->first->storeFromString({}, mapProxy->getData());
if (!newMapRef) {
diags.diagnose(SourceLoc(), diag::error_cas, "store module map",
toString(newMapRef.takeError()));
return true;
}
auto newMapArg = db->first->getID(*newMapRef).toString();
if (newMapArg != mapOpts)
idsToUpdate[mapOpts] = argSaver.save(newMapArg).data();
}
// Drop all the options that no longer applies.
auto dropArg = [&newArgs](StringRef arg, bool hasArg = true) {
auto found =
llvm::find_if(newArgs, [&](const char *a) { return arg == a; });
if (found != newArgs.end())
found = newArgs.erase(found);
if (hasArg && found != newArgs.end())
found = newArgs.erase(found);
};
dropArg("-cas-path");
dropArg("-cas-plugin-path");
dropArg("-cas-plugin-option");
dropArg("-gen-reproducer", false);
dropArg("-gen-reproducer-dir");
// Now upgrade the entire command-line.
for (auto &arg : newArgs) {
if (idsToUpdate.count(arg))
arg = idsToUpdate[arg];
}
// Add the configuration for the new CAS options. Note those options and only
// these options will need to be adjusted if the reproducer is copied into a
// different location.
newArgs.push_back("-cas-path");
newArgs.push_back(casPath.c_str());
if (!newCAS.PluginPath.empty()) {
newArgs.push_back("-cas-plugin-path");
newArgs.push_back(newCAS.PluginPath.c_str());
for (auto Opt : newCAS.PluginOptions) {
newArgs.push_back("-cas-plugin-option");
newArgs.push_back(
argSaver.save(llvm::Twine(Opt.first) + "=" + Opt.second).data());
}
}
// Write shell script.
llvm::SmallString<256> scriptPath(reproDir);
llvm::sys::path::append(scriptPath, "reproduce.sh");
std::error_code ec;
llvm::raw_fd_ostream scriptOS(scriptPath, ec, llvm::sys::fs::CD_CreateNew,
llvm::sys::fs::FA_Write,
llvm::sys::fs::OF_Text);
if (ec) {
diags.diagnose(SourceLoc(), diag::error_cannot_create_reproducer_dir,
scriptPath, ec.message());
return true;
}
for (auto &arg : newArgs)
scriptOS << "\"" << arg << "\" ";
diags.diagnose(SourceLoc(), diag::note_reproducer, reproDir);
return false;
}
/// Performs the compile requested by the user.
/// \param Instance Will be reset after performIRGeneration when the verifier
/// mode is NoVerify and there were no errors.
@@ -2119,6 +2433,12 @@ int swift::performFrontend(ArrayRef<const char *> Args,
observer->configuredCompiler(*Instance);
}
if (Invocation.getFrontendOptions().GenReproducer) {
int ReturnCode = generateReproducer(*Instance, Args) ? 1 : 0;
DH.endMessage(ReturnCode);
return finishDiagProcessing(ReturnCode, /*verifierEnabled*/ false);
}
if (verifierEnabled) {
// Suppress printed diagnostic output during the compile if the verifier is
// enabled.

85
test/CAS/reproducer.swift Normal file
View File

@@ -0,0 +1,85 @@
// RUN: %empty-directory(%t)
// RUN: split-file %s %t
// RUN: %target-swift-frontend -scan-dependencies -module-name Test -O -module-cache-path %t/clang-module-cache \
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import \
// RUN: -import-objc-header %t/Bridging.h -scanner-output-dir %t -auto-bridging-header-chaining -scanner-debug-write-output \
// RUN: %t/test.swift %t/foo.swift -I %t/include -o %t/deps.json -cache-compile-job -cas-path %t/cas
// RUN: %{python} %S/../CAS/Inputs/BuildCommandExtractor.py %t/deps.json clang:SwiftShims > %t/shim.cmd
// RUN: %swift_frontend_plain @%t/shim.cmd
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json clang:Dummy > %t/dummy.cmd
// RUN: %swift_frontend_plain @%t/dummy.cmd
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json Simple > %t/simple.cmd
// RUN: %swift_frontend_plain @%t/simple.cmd
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json bridgingHeader > %t/header.cmd
// RUN: %target-swift-frontend @%t/header.cmd -disable-implicit-swift-modules -O -o %t/bridging.pch
// RUN: %cache-tool -cas-path %t/cas -cache-tool-action print-output-keys -- \
// RUN: %target-swift-frontend @%t/header.cmd -disable-implicit-swift-modules -O -o %t/bridging.pch > %t/keys.json
// RUN: %{python} %S/Inputs/ExtractOutputKey.py %t/keys.json > %t/key
// RUN: %{python} %S/Inputs/GenerateExplicitModuleMap.py %t/deps.json > %t/map.json
// RUN: llvm-cas --cas %t/cas --make-blob --data %t/map.json > %t/map.casid
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json Test > %t/MyApp.cmd
// RUN: echo "\"-disable-implicit-string-processing-module-import\"" >> %t/MyApp.cmd
// RUN: echo "\"-disable-implicit-concurrency-module-import\"" >> %t/MyApp.cmd
// RUN: echo "\"-disable-implicit-swift-modules\"" >> %t/MyApp.cmd
// RUN: echo "\"-import-objc-header\"" >> %t/MyApp.cmd
// RUN: echo "\"%t/Bridging.h\"" >> %t/MyApp.cmd
// RUN: echo "\"-import-pch\"" >> %t/MyApp.cmd
// RUN: echo "\"%t/bridging.pch\"" >> %t/MyApp.cmd
// RUN: echo "\"-bridging-header-pch-key\"" >> %t/MyApp.cmd
// RUN: echo "\"@%t/key\"" >> %t/MyApp.cmd
// RUN: echo "\"-explicit-swift-module-map-file\"" >> %t/MyApp.cmd
// RUN: echo "\"@%t/map.casid\"" >> %t/MyApp.cmd
// RUN: %target-swift-frontend %t/test.swift %t/foo.swift -O -emit-module -emit-module-path %t/Test.swiftmodule -c \
// RUN: -module-name Test -o %t/test.o -cache-compile-job -cas-path %t/cas @%t/MyApp.cmd -gen-reproducer -gen-reproducer-dir %t/crash
// RUN: %FileCheck %s --input-file=%t/crash/reproduce.sh
// CHECK: -cache-compile-job
// CHECK-NOT: -gen-reproducer
/// Delete some inputs from original compilation and run the reproducer.
// RUN: rm -rf %t/include
// RUN: %swift_frontend_plain @%t/crash/reproduce.sh
/// Also test module jobs.
// RUN: %swift_frontend_plain @%t/dummy.cmd -gen-reproducer -gen-reproducer-dir %t/crash-2
// RUN: %FileCheck %s --input-file=%t/crash-2/reproduce.sh
// RUN: %swift_frontend_plain @%t/crash-2/reproduce.sh
// RUN: %swift_frontend_plain @%t/simple.cmd -gen-reproducer -gen-reproducer-dir %t/crash-3
// RUN: %FileCheck %s --input-file=%t/crash-3/reproduce.sh
// RUN: %swift_frontend_plain @%t/crash-3/reproduce.sh
//--- test.swift
import Dummy
import Simple
public func testFunc() {
foo()
bridge()
simple()
}
//--- foo.swift
public func foo() {}
//--- Bridging.h
void bridge(void);
//--- include/module.modulemap
module Dummy {
umbrella header "Dummy.h"
}
//--- include/Dummy.h
void dummy(void);
//--- include/Simple.swiftinterface
// swift-interface-format-version: 1.0
// swift-module-flags: -module-name Simple -O -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import
import Swift
import Dummy
public func simple() { }