serialization: obfuscate the serialized search paths

We noticed some Swift clients rely on the serialized search paths in the module to
find dependencies and droping these paths altogether can lead to build failures like
rdar://85840921.

This change teaches the serialization to obfuscate the search paths and the deserialization
to recover them. This allows clients to keep accessing these paths without exposing
them when shipping the module to other users.
This commit is contained in:
Xi Ge
2021-11-30 18:52:21 -08:00
parent f00f2841ab
commit 0047d81f9a
21 changed files with 139 additions and 27 deletions

View File

@@ -405,6 +405,8 @@ public:
virtual void collectBasicSourceFileInfo(
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const {}
virtual void collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const {}
static bool classof(const FileUnit *file) {
return file->getKind() == FileUnitKind::SerializedAST ||
file->getKind() == FileUnitKind::ClangModule ||

View File

@@ -833,6 +833,8 @@ public:
void collectBasicSourceFileInfo(
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const;
void collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const;
/// Retrieve a fingerprint value that summarizes the contents of this module.
///
/// This interface hash a of a module is guaranteed to change if the interface

View File

@@ -103,6 +103,10 @@ public:
/// specified in LLDB from the target.source-map entries.
PathRemapper SearchPathRemapper;
/// Recover the search paths deserialized from .swiftmodule files to their
/// original form.
PathObfuscator DeserializedPathRecoverer;
private:
static StringRef
pathStringFromFrameworkSearchPath(const FrameworkSearchPath &next) {

View File

@@ -58,6 +58,21 @@ public:
}
};
class PathObfuscator {
PathRemapper obfuscator, recoverer;
public:
void addMapping(StringRef FromPrefix, StringRef ToPrefix) {
obfuscator.addMapping(FromPrefix, ToPrefix);
recoverer.addMapping(ToPrefix, FromPrefix);
}
std::string obfuscate(StringRef Path) const {
return obfuscator.remapPath(Path);
}
std::string recover(StringRef Path) const {
return recoverer.remapPath(Path);
}
};
} // end namespace swift
#endif // SWIFT_BASIC_PATHREMAPPER_H

View File

@@ -15,6 +15,7 @@
#include "swift/Basic/FileTypes.h"
#include "swift/Basic/Version.h"
#include "swift/Basic/PathRemapper.h"
#include "swift/Frontend/FrontendInputsAndOutputs.h"
#include "swift/Frontend/InputFile.h"
#include "llvm/ADT/Hashing.h"
@@ -437,6 +438,10 @@ public:
/// Whether to include symbols with SPI information in the symbol graph.
bool IncludeSPISymbolsInSymbolGraph = false;
/// This is used to obfuscate the serialized search paths so we don't have
/// to encode the actual paths into the .swiftmodule file.
PathObfuscator serializedPathObfuscator;
private:
static bool canActionEmitDependencies(ActionType);
static bool canActionEmitReferenceDependencies(ActionType);

View File

@@ -165,6 +165,10 @@ def print_clang_stats : Flag<["-"], "print-clang-stats">,
def serialize_debugging_options : Flag<["-"], "serialize-debugging-options">,
HelpText<"Always serialize options for debugging (default: only for apps)">;
def serialized_path_obfuscate : Separate<["-"], "serialized-path-obfuscate">,
HelpText<"Remap source paths in debug info">, MetaVarName<"<prefix=replacement>">;
def no_serialize_debugging_options :
Flag<["-"], "no-serialize-debugging-options">,
HelpText<"Never serialize options for debugging (default: only for apps)">;

View File

@@ -45,6 +45,10 @@ namespace swift {
/// Path prefixes that should be rewritten in debug info.
PathRemapper DebuggingOptionsPrefixMap;
/// Obfuscate the serialized paths so we don't have the actual paths encoded
/// in the .swiftmodule file.
PathObfuscator PathObfuscator;
/// Describes a single-file dependency for this module, along with the
/// appropriate strategy for how to verify if it's up-to-date.
class FileDependency {

View File

@@ -465,6 +465,9 @@ public:
virtual void collectBasicSourceFileInfo(
llvm::function_ref<void(const BasicSourceFileInfo &)>) const override;
virtual void collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const override;
static bool classof(const FileUnit *file) {
return file->getKind() == FileUnitKind::SerializedAST;
}

View File

@@ -104,7 +104,7 @@ struct ValidationInfo {
/// \sa validateSerializedAST()
class ExtendedValidationInfo {
SmallVector<StringRef, 4> ExtraClangImporterOpts;
StringRef SDKPath;
std::string SDKPath;
StringRef ModuleABIName;
struct {
unsigned ArePrivateImportsEnabled : 1;
@@ -121,7 +121,7 @@ public:
ExtendedValidationInfo() : Bits() {}
StringRef getSDKPath() const { return SDKPath; }
void setSDKPath(StringRef path) {
void setSDKPath(std::string path) {
assert(SDKPath.empty());
SDKPath = path;
}

View File

@@ -1808,6 +1808,15 @@ void ModuleDecl::collectBasicSourceFileInfo(
}
}
void ModuleDecl::collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const {
for (const FileUnit *fileUnit : getFiles()) {
if (auto *serialized = dyn_cast<LoadedFile>(fileUnit)) {
serialized->collectSerializedSearchPath(callback);
}
}
}
Fingerprint ModuleDecl::getFingerprint() const {
StableHasher hasher = StableHasher::defaultHasher();
SmallVector<Fingerprint, 16> FPs;

View File

@@ -292,6 +292,10 @@ bool ArgsToFrontendOptionsConverter::convert(
Opts.HermeticSealAtLink = Args.hasArg(OPT_experimental_hermetic_seal_at_link);
for (auto A : Args.getAllArgValues(options::OPT_serialized_path_obfuscate)) {
auto SplitMap = StringRef(A).split('=');
Opts.serializedPathObfuscator.addMapping(SplitMap.first, SplitMap.second);
}
return false;
}

View File

@@ -1214,6 +1214,11 @@ static bool ParseSearchPathArgs(SearchPathOptions &Opts,
Opts.PlaceholderDependencyModuleMap = A->getValue();
if (const Arg *A = Args.getLastArg(OPT_batch_scan_input_file))
Opts.BatchScanInputFilePath = A->getValue();
for (auto A : Args.getAllArgValues(options::OPT_serialized_path_obfuscate)) {
auto SplitMap = StringRef(A).split('=');
Opts.DeserializedPathRecoverer.addMapping(SplitMap.first, SplitMap.second);
}
// Opts.RuntimeIncludePath is set by calls to
// setRuntimeIncludePath() or setMainExecutablePath().
// Opts.RuntimeImportPath is set by calls to

View File

@@ -164,6 +164,7 @@ SerializationOptions CompilerInvocation::computeSerializationOptions(
opts.SerializeOptionsForDebugging.getValueOr(
!module->isExternallyConsumed());
serializationOpts.PathObfuscator = opts.serializedPathObfuscator;
if (serializationOpts.SerializeOptionsForDebugging &&
opts.DebugPrefixSerializedDebuggingOptions) {
serializationOpts.DebuggingOptionsPrefixMap =

View File

@@ -361,6 +361,7 @@ ModuleFile::getModuleName(ASTContext &Ctx, StringRef modulePath,
serialization::ValidationInfo loadInfo = ModuleFileSharedCore::load(
modulePath.str(), std::move(newBuf), nullptr, nullptr,
/*isFramework*/ isFramework, Ctx.SILOpts.EnableOSSAModules,
Ctx.SearchPathOpts.DeserializedPathRecoverer,
loadedModuleFile);
Name = loadedModuleFile->Name.str();
return std::move(moduleBuf.get());
@@ -999,6 +1000,13 @@ Optional<CommentInfo> ModuleFile::getCommentForDecl(const Decl *D) const {
return getCommentForDeclByUSR(USRBuffer.str());
}
void ModuleFile::collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const {
for (auto path: Core->SearchPaths) {
callback(path.Path);
}
}
void ModuleFile::collectBasicSourceFileInfo(
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const {
if (Core->SourceFileListData.empty())

View File

@@ -743,7 +743,8 @@ public:
Optional<Fingerprint> loadFingerprint(const IterableDeclContext *IDC) const;
void collectBasicSourceFileInfo(
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const;
void collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const;
// MARK: Deserialization interface

View File

@@ -84,7 +84,8 @@ static bool enterTopLevelModuleBlock(llvm::BitstreamCursor &cursor,
/// Returns true on success.
static bool readOptionsBlock(llvm::BitstreamCursor &cursor,
SmallVectorImpl<uint64_t> &scratch,
ExtendedValidationInfo &extendedInfo) {
ExtendedValidationInfo &extendedInfo,
PathObfuscator &pathRecoverer) {
while (!cursor.AtEndOfStream()) {
Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
if (!maybeEntry) {
@@ -119,7 +120,7 @@ static bool readOptionsBlock(llvm::BitstreamCursor &cursor,
unsigned kind = maybeKind.get();
switch (kind) {
case options_block::SDK_PATH:
extendedInfo.setSDKPath(blobData);
extendedInfo.setSDKPath(pathRecoverer.recover(blobData));
break;
case options_block::XCC:
extendedInfo.addExtraClangImporterOption(blobData);
@@ -171,7 +172,8 @@ static bool readOptionsBlock(llvm::BitstreamCursor &cursor,
static ValidationInfo validateControlBlock(
llvm::BitstreamCursor &cursor, SmallVectorImpl<uint64_t> &scratch,
std::pair<uint16_t, uint16_t> expectedVersion, bool requiresOSSAModules,
ExtendedValidationInfo *extendedInfo) {
ExtendedValidationInfo *extendedInfo,
PathObfuscator &pathRecoverer) {
// The control block is malformed until we've at least read a major version
// number.
ValidationInfo result;
@@ -202,7 +204,7 @@ static ValidationInfo validateControlBlock(
result.status = Status::Malformed;
return result;
}
if (!readOptionsBlock(cursor, scratch, *extendedInfo)) {
if (!readOptionsBlock(cursor, scratch, *extendedInfo, pathRecoverer)) {
result.status = Status::Malformed;
return result;
}
@@ -465,10 +467,11 @@ ValidationInfo serialization::validateSerializedAST(
result.status = Status::Malformed;
return result;
}
PathObfuscator localObfuscator;
result = validateControlBlock(
cursor, scratch,
{SWIFTMODULE_VERSION_MAJOR, SWIFTMODULE_VERSION_MINOR},
requiresOSSAModules, extendedInfo);
requiresOSSAModules, extendedInfo, localObfuscator);
if (result.status == Status::Malformed)
return result;
} else if (dependencies &&
@@ -938,7 +941,7 @@ getActualImportControl(unsigned rawValue) {
}
}
bool ModuleFileSharedCore::readModuleDocIfPresent() {
bool ModuleFileSharedCore::readModuleDocIfPresent(PathObfuscator &pathRecoverer) {
if (!this->ModuleDocInputBuffer)
return true;
@@ -977,7 +980,7 @@ bool ModuleFileSharedCore::readModuleDocIfPresent() {
info = validateControlBlock(
docCursor, scratch, {SWIFTDOC_VERSION_MAJOR, SWIFTDOC_VERSION_MINOR},
RequiresOSSAModules,
/*extendedInfo*/ nullptr);
/*extendedInfo*/ nullptr, pathRecoverer);
if (info.status != Status::Valid)
return false;
// Check that the swiftdoc is actually for this module.
@@ -1083,7 +1086,7 @@ bool ModuleFileSharedCore::readDeclLocsBlock(llvm::BitstreamCursor &cursor) {
return false;
}
bool ModuleFileSharedCore::readModuleSourceInfoIfPresent() {
bool ModuleFileSharedCore::readModuleSourceInfoIfPresent(PathObfuscator &pathRecoverer) {
if (!this->ModuleSourceInfoInputBuffer)
return true;
@@ -1121,7 +1124,8 @@ bool ModuleFileSharedCore::readModuleSourceInfoIfPresent() {
infoCursor, scratch,
{SWIFTSOURCEINFO_VERSION_MAJOR, SWIFTSOURCEINFO_VERSION_MINOR},
RequiresOSSAModules,
/*extendedInfo*/ nullptr);
/*extendedInfo*/ nullptr,
pathRecoverer);
if (info.status != Status::Valid)
return false;
// Check that the swiftsourceinfo is actually for this module.
@@ -1196,7 +1200,7 @@ ModuleFileSharedCore::ModuleFileSharedCore(
std::unique_ptr<llvm::MemoryBuffer> moduleDocInputBuffer,
std::unique_ptr<llvm::MemoryBuffer> moduleSourceInfoInputBuffer,
bool isFramework, bool requiresOSSAModules,
serialization::ValidationInfo &info)
serialization::ValidationInfo &info, PathObfuscator &pathRecoverer)
: ModuleInputBuffer(std::move(moduleInputBuffer)),
ModuleDocInputBuffer(std::move(moduleDocInputBuffer)),
ModuleSourceInfoInputBuffer(std::move(moduleSourceInfoInputBuffer)),
@@ -1247,7 +1251,7 @@ ModuleFileSharedCore::ModuleFileSharedCore(
info = validateControlBlock(
cursor, scratch,
{SWIFTMODULE_VERSION_MAJOR, SWIFTMODULE_VERSION_MINOR},
RequiresOSSAModules, &extInfo);
RequiresOSSAModules, &extInfo, pathRecoverer);
if (info.status != Status::Valid) {
error(info.status);
return;
@@ -1372,7 +1376,8 @@ ModuleFileSharedCore::ModuleFileSharedCore(
bool isSystem;
input_block::SearchPathLayout::readRecord(scratch, isFramework,
isSystem);
SearchPaths.push_back({blobData, isFramework, isSystem});
SearchPaths.push_back({pathRecoverer.recover(blobData), isFramework,
isSystem});
break;
}
case input_block::MODULE_INTERFACE_PATH: {
@@ -1572,8 +1577,8 @@ ModuleFileSharedCore::ModuleFileSharedCore(
return;
}
// Read source info file.
readModuleSourceInfoIfPresent();
if (!readModuleDocIfPresent()) {
readModuleSourceInfoIfPresent(pathRecoverer);
if (!readModuleDocIfPresent(pathRecoverer)) {
info.status = error(Status::MalformedDocumentation);
return;
}

View File

@@ -149,7 +149,7 @@ private:
SmallVector<Dependency, 8> Dependencies;
struct SearchPath {
StringRef Path;
std::string Path;
bool IsFramework;
bool IsSystem;
};
@@ -374,7 +374,7 @@ private:
std::unique_ptr<llvm::MemoryBuffer> moduleDocInputBuffer,
std::unique_ptr<llvm::MemoryBuffer> moduleSourceInfoInputBuffer,
bool isFramework, bool requiresOSSAModules,
serialization::ValidationInfo &info);
serialization::ValidationInfo &info, PathObfuscator &pathRecoverer);
/// Change the status of the current module.
Status error(Status issue) {
@@ -464,7 +464,7 @@ private:
/// Loads data from #ModuleDocInputBuffer.
///
/// Returns false if there was an error.
bool readModuleDocIfPresent();
bool readModuleDocIfPresent(PathObfuscator &pathRecoverer);
/// Reads the source loc block, which contains USR to decl location mapping.
///
@@ -474,7 +474,7 @@ private:
/// Loads data from #ModuleSourceInfoInputBuffer.
///
/// Returns false if there was an error.
bool readModuleSourceInfoIfPresent();
bool readModuleSourceInfoIfPresent(PathObfuscator &pathRecoverer);
/// Read an on-disk decl hash table stored in
/// \c sourceinfo_block::DeclUSRSLayout format.
@@ -510,12 +510,13 @@ public:
std::unique_ptr<llvm::MemoryBuffer> moduleDocInputBuffer,
std::unique_ptr<llvm::MemoryBuffer> moduleSourceInfoInputBuffer,
bool isFramework, bool requiresOSSAModules,
PathObfuscator &pathRecoverer,
std::shared_ptr<const ModuleFileSharedCore> &theModule) {
serialization::ValidationInfo info;
auto *core = new ModuleFileSharedCore(
std::move(moduleInputBuffer), std::move(moduleDocInputBuffer),
std::move(moduleSourceInfoInputBuffer), isFramework,
requiresOSSAModules, info);
requiresOSSAModules, info, pathRecoverer);
if (!moduleInterfacePath.empty()) {
ArrayRef<char> path;
core->allocateBuffer(path, moduleInterfacePath);

View File

@@ -1077,9 +1077,11 @@ void Serializer::writeHeader(const SerializationOptions &options) {
options_block::XCCLayout XCC(Out);
const auto &PathRemapper = options.DebuggingOptionsPrefixMap;
const auto &PathObfuscator = options.PathObfuscator;
auto sdkPath = M->getASTContext().SearchPathOpts.SDKPath;
SDKPath.emit(
ScratchRecord,
PathRemapper.remapPath(M->getASTContext().SearchPathOpts.SDKPath));
PathObfuscator.obfuscate(PathRemapper.remapPath(sdkPath)));
auto &Opts = options.ExtraClangOptions;
for (auto Arg = Opts.begin(), E = Opts.end(); Arg != E; ++Arg) {
StringRef arg(*Arg);
@@ -1157,16 +1159,17 @@ void Serializer::writeInputBlock(const SerializationOptions &options) {
input_block::ModuleInterfaceLayout ModuleInterface(Out);
if (options.SerializeOptionsForDebugging) {
const auto &PathObfuscator = options.PathObfuscator;
const auto &PathMapper = options.DebuggingOptionsPrefixMap;
const SearchPathOptions &searchPathOpts = M->getASTContext().SearchPathOpts;
// Put the framework search paths first so that they'll be preferred upon
// deserialization.
for (auto &framepath : searchPathOpts.FrameworkSearchPaths)
SearchPath.emit(ScratchRecord, /*framework=*/true, framepath.IsSystem,
PathMapper.remapPath(framepath.Path));
PathObfuscator.obfuscate(PathMapper.remapPath(framepath.Path)));
for (auto &path : searchPathOpts.ImportSearchPaths)
SearchPath.emit(ScratchRecord, /*framework=*/false, /*system=*/false,
PathMapper.remapPath(path));
PathObfuscator.obfuscate(PathMapper.remapPath(path)));
}
// Note: We're not using StringMap here because we don't need to own the

View File

@@ -404,7 +404,9 @@ llvm::ErrorOr<ModuleDependencies> SerializedModuleLoaderBase::scanModuleFile(
bool isFramework = false;
serialization::ValidationInfo loadInfo = ModuleFileSharedCore::load(
modulePath.str(), std::move(moduleBuf.get()), nullptr, nullptr,
isFramework, isRequiredOSSAModules(), loadedModuleFile);
isFramework, isRequiredOSSAModules(),
Ctx.SearchPathOpts.DeserializedPathRecoverer,
loadedModuleFile);
const std::string moduleDocPath;
const std::string sourceInfoPath;
@@ -730,7 +732,9 @@ LoadedFile *SerializedModuleLoaderBase::loadAST(
serialization::ValidationInfo loadInfo = ModuleFileSharedCore::load(
moduleInterfacePath, std::move(moduleInputBuffer),
std::move(moduleDocInputBuffer), std::move(moduleSourceInfoInputBuffer),
isFramework, isRequiredOSSAModules(), loadedModuleFileCore);
isFramework, isRequiredOSSAModules(),
Ctx.SearchPathOpts.DeserializedPathRecoverer,
loadedModuleFileCore);
SerializedASTFile *fileUnit = nullptr;
if (loadInfo.status == serialization::Status::Valid) {
@@ -1555,3 +1559,8 @@ void SerializedASTFile::collectBasicSourceFileInfo(
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const {
File.collectBasicSourceFileInfo(callback);
}
void SerializedASTFile::collectSerializedSearchPath(
llvm::function_ref<void(StringRef)> callback) const {
File.collectSerializedSearchPath(callback);
}

View File

@@ -0,0 +1,13 @@
// RUN: %empty-directory(%t)
// RUN: %empty-directory(%t.module-cache)
// RUN: %target-swift-frontend -emit-module -o %t/Foo.swiftmodule %s -parse-as-library -serialized-path-obfuscate /FOO=/CHANGED_FOO -serialized-path-obfuscate /BAR=/CHANGED_BAR -I /FOO/contents -I /BAR/contents -module-name Foo -serialize-debugging-options
// RUN: %target-swift-ide-test -print-module-metadata -module-to-print Foo -enable-swiftsourceinfo -I %t -source-filename %s | %FileCheck %s --check-prefix=CHECK-ORIGINAL
// RUN: %target-swift-ide-test -print-module-metadata -module-to-print Foo -enable-swiftsourceinfo -I %t -source-filename %s -serialized-path-obfuscate /FOO=/CHANGED_FOO -serialized-path-obfuscate /BAR=/CHANGED_BAR | %FileCheck %s --check-prefix=CHECK-RECOVER
public class A {}
// CHECK-ORIGINAL: /CHANGED_FOO/contents
// CHECK-ORIGINAL: /CHANGED_BAR/contents
// CHECK-RECOVER: /FOO/contents
// CHECK-RECOVER: /BAR/contents

View File

@@ -803,6 +803,10 @@ static llvm::cl::opt<std::string>
llvm::cl::desc("Define a macro for @available"),
llvm::cl::cat(Category));
static llvm::cl::list<std::string>
SerializedPathObfuscate("serialized-path-obfuscate", llvm::cl::desc("Path to access notes file"),
llvm::cl::cat(Category));
} // namespace options
static std::unique_ptr<llvm::MemoryBuffer>
@@ -2819,6 +2823,9 @@ static void printModuleMetadata(ModuleDecl *MD) {
OS << "size=" << info.getFileSize();
OS << "\n";
});
MD->collectSerializedSearchPath([&](StringRef path) {
OS << "searchpath=" << path << ";\n";
});
}
static int doPrintModuleMetaData(const CompilerInvocation &InitInvok,
@@ -4118,6 +4125,13 @@ int main(int argc, char *argv[]) {
if (!options::DefineAvailability.empty()) {
InitInvok.getLangOptions().AvailabilityMacros.push_back(options::DefineAvailability);
}
for (auto map: options::SerializedPathObfuscate) {
auto SplitMap = StringRef(map).split('=');
InitInvok.getFrontendOptions().serializedPathObfuscator
.addMapping(SplitMap.first, SplitMap.second);
InitInvok.getSearchPathOptions().DeserializedPathRecoverer
.addMapping(SplitMap.first, SplitMap.second);
}
InitInvok.getLangOptions().CollectParsedToken = true;
InitInvok.getLangOptions().BuildSyntaxTree = true;
InitInvok.getLangOptions().EnableCrossImportOverlays =