[Dependency Scanning] Add functionality to validate contents of a loaded scanner cache state

Checking each module dependency info if it is up-to-date with respect to when the cache contents were serialized in a prior scan.

- Add a timestamp field to the serialization format for the dependency scanner cache
- Add a flag "-validate-prior-dependency-scan-cache" which, when combined with "-load-dependency-scan-cache" will have the scanner prune dependencies from the deserialized cache which have inputs that are newer than the prior scan itself

With the above in-place, the scan otherwise proceeds as-is, getting cache hits for entries still valid since the prior scan.
This commit is contained in:
Artem Chikin
2025-01-24 14:58:22 -08:00
parent 0ba17c5572
commit acb4e847f5
16 changed files with 408 additions and 51 deletions

View File

@@ -58,6 +58,7 @@ class ModuleDependenciesCacheDeserializer {
bool readSignature();
bool enterGraphBlock();
bool readMetadata(StringRef scannerContextHash);
bool readSerializationTime(llvm::sys::TimePoint<> &SerializationTimeStamp);
bool readGraph(ModuleDependenciesCache &cache);
std::optional<std::string> getIdentifier(unsigned n);
@@ -76,7 +77,8 @@ class ModuleDependenciesCacheDeserializer {
public:
ModuleDependenciesCacheDeserializer(llvm::MemoryBufferRef Data)
: Cursor(Data) {}
bool readInterModuleDependenciesCache(ModuleDependenciesCache &cache);
bool readInterModuleDependenciesCache(ModuleDependenciesCache &cache,
llvm::sys::TimePoint<> &serializedCacheTimeStamp);
};
} // namespace swift
@@ -177,6 +179,35 @@ bool ModuleDependenciesCacheDeserializer::readMetadata(StringRef scannerContextH
return false;
}
bool ModuleDependenciesCacheDeserializer::readSerializationTime(llvm::sys::TimePoint<> &SerializationTimeStamp) {
using namespace graph_block;
auto entry = Cursor.advance();
if (!entry) {
consumeError(entry.takeError());
return true;
}
if (entry->Kind != llvm::BitstreamEntry::Record)
return true;
auto recordID = Cursor.readRecord(entry->ID, Scratch, &BlobData);
if (!recordID) {
consumeError(recordID.takeError());
return true;
}
if (*recordID != TIME_NODE)
return true;
TimeLayout::readRecord(Scratch);
std::string serializedTimeStamp = BlobData.str();
SerializationTimeStamp =
llvm::sys::TimePoint<>(llvm::sys::TimePoint<>::duration(std::stoll(serializedTimeStamp)));
return SerializationTimeStamp == llvm::sys::TimePoint<>();
}
/// Read in the top-level block's graph structure by first reading in
/// all of the file's identifiers and arrays of identifiers, followed by
/// consuming individual module info records and registering them into the
@@ -791,7 +822,8 @@ bool ModuleDependenciesCacheDeserializer::readGraph(
}
bool ModuleDependenciesCacheDeserializer::readInterModuleDependenciesCache(
ModuleDependenciesCache &cache) {
ModuleDependenciesCache &cache,
llvm::sys::TimePoint<> &serializedCacheTimeStamp) {
using namespace graph_block;
if (readSignature())
@@ -802,6 +834,9 @@ bool ModuleDependenciesCacheDeserializer::readInterModuleDependenciesCache(
if (readMetadata(cache.scannerContextHash))
return true;
if (readSerializationTime(serializedCacheTimeStamp))
return true;
if (readGraph(cache))
return true;
@@ -975,21 +1010,23 @@ ModuleDependenciesCacheDeserializer::getModuleDependencyIDArray(unsigned n) {
bool swift::dependencies::module_dependency_cache_serialization::
readInterModuleDependenciesCache(llvm::MemoryBuffer &buffer,
ModuleDependenciesCache &cache) {
ModuleDependenciesCache &cache,
llvm::sys::TimePoint<> &serializedCacheTimeStamp) {
ModuleDependenciesCacheDeserializer deserializer(buffer.getMemBufferRef());
return deserializer.readInterModuleDependenciesCache(cache);
return deserializer.readInterModuleDependenciesCache(cache, serializedCacheTimeStamp);
}
bool swift::dependencies::module_dependency_cache_serialization::
readInterModuleDependenciesCache(StringRef path,
ModuleDependenciesCache &cache) {
ModuleDependenciesCache &cache,
llvm::sys::TimePoint<> &serializedCacheTimeStamp) {
PrettyStackTraceStringAction stackTrace(
"loading inter-module dependency graph", path);
auto buffer = llvm::MemoryBuffer::getFile(path);
if (!buffer)
return true;
return readInterModuleDependenciesCache(*buffer.get(), cache);
return readInterModuleDependenciesCache(*buffer.get(), cache, serializedCacheTimeStamp);
}
// MARK: Serialization
@@ -1107,6 +1144,7 @@ class ModuleDependenciesCacheSerializer {
void writeBlockInfoBlock();
void writeMetadata(StringRef scanningContextHash);
void writeSerializationTime(const llvm::sys::TimePoint<> &scanInitializationTime);
void writeIdentifiers();
void writeArraysOfIdentifiers();
@@ -1170,6 +1208,7 @@ void ModuleDependenciesCacheSerializer::writeBlockInfoBlock() {
BLOCK(GRAPH_BLOCK);
BLOCK_RECORD(graph_block, METADATA);
BLOCK_RECORD(graph_block, TIME_NODE);
BLOCK_RECORD(graph_block, IDENTIFIER_NODE);
BLOCK_RECORD(graph_block, IDENTIFIER_ARRAY_NODE);
@@ -1202,6 +1241,15 @@ void ModuleDependenciesCacheSerializer::writeMetadata(StringRef scanningContextH
scanningContextHash);
}
void ModuleDependenciesCacheSerializer::writeSerializationTime(const llvm::sys::TimePoint<> &scanInitializationTime) {
using namespace graph_block;
auto timeSinceEpoch = scanInitializationTime.time_since_epoch().count();
std::string serializationData = std::to_string(timeSinceEpoch);
TimeLayout::emitRecord(Out, ScratchRecord,
AbbrCodes[TimeLayout::Code],
serializationData);
}
void ModuleDependenciesCacheSerializer::writeIdentifiers() {
using namespace graph_block;
for (auto str : Identifiers) {
@@ -1770,6 +1818,7 @@ void ModuleDependenciesCacheSerializer::collectStringsAndArrays(
addIdentifier(swiftBinDeps->sourceInfoPath);
addIdentifier(swiftBinDeps->moduleCacheKey);
addIdentifier(swiftBinDeps->headerImport);
addIdentifier(swiftBinDeps->definingModuleInterfacePath);
addIdentifier(swiftBinDeps->userModuleVersion);
addStringArray(moduleID,
ModuleIdentifierArrayKind::HeaderInputModuleDependencies,
@@ -1848,6 +1897,7 @@ void ModuleDependenciesCacheSerializer::writeInterModuleDependenciesCache(
using namespace graph_block;
registerRecordAbbr<MetadataLayout>();
registerRecordAbbr<TimeLayout>();
registerRecordAbbr<IdentifierNodeLayout>();
registerRecordAbbr<IdentifierArrayLayout>();
registerRecordAbbr<LinkLibraryLayout>();
@@ -1870,6 +1920,9 @@ void ModuleDependenciesCacheSerializer::writeInterModuleDependenciesCache(
// Write the version information
writeMetadata(cache.scannerContextHash);
// The current time-stamp
writeSerializationTime(cache.scanInitializationTime);
// Write the strings
writeIdentifiers();