[Serialization] Improve module loading performance

When looking for a Swift module on disk, we were scanning all module search paths if they contain the module we are searching for. In a setup where each module is contained in its own framework search path, this scaled quadratically with the number of modules being imported. E.g. a setup with 100 modules being imported form 100 module search paths could cause on the order of 10,000 checks of `FileSystem::exists`. While these checks are fairly fast (~10µs), they add up to ~100ms.

To improve this, perform a first scan of all module search paths and list the files they contain. From this, create a lookup map that maps filenames to the search paths they can be found in. E.g. for
```
searchPath1/
  Module1.framework

searchPath2/
  Module1.framework
  Module2.swiftmodule
```
we create the following lookup table
```
Module1.framework -> [searchPath1, searchPath2]
Module2.swiftmodule -> [searchPath2]
```
This commit is contained in:
Alex Hoppen
2021-12-08 22:54:38 +01:00
parent 5a6341bd65
commit fe7878ecce
24 changed files with 581 additions and 212 deletions

View File

@@ -82,26 +82,21 @@ void forEachTargetModuleBasename(const ASTContext &Ctx,
}
}
enum class SearchPathKind {
Import,
Framework,
RuntimeLibrary,
};
/// Apply \p body for each module search path in \p Ctx until \p body returns
/// non-None value. Returns the return value from \p body, or \c None.
Optional<bool> forEachModuleSearchPath(
const ASTContext &Ctx,
llvm::function_ref<Optional<bool>(StringRef, SearchPathKind, bool isSystem)>
llvm::function_ref<Optional<bool>(StringRef, ModuleSearchPathKind,
bool isSystem)>
callback) {
for (const auto &path : Ctx.SearchPathOpts.ImportSearchPaths)
for (const auto &path : Ctx.SearchPathOpts.getImportSearchPaths())
if (auto result =
callback(path, SearchPathKind::Import, /*isSystem=*/false))
callback(path, ModuleSearchPathKind::Import, /*isSystem=*/false))
return result;
for (const auto &path : Ctx.SearchPathOpts.FrameworkSearchPaths)
for (const auto &path : Ctx.SearchPathOpts.getFrameworkSearchPaths())
if (auto result =
callback(path.Path, SearchPathKind::Framework, path.IsSystem))
callback(path.Path, ModuleSearchPathKind::Framework, path.IsSystem))
return result;
// Apple platforms have extra implicit framework search paths:
@@ -109,12 +104,14 @@ Optional<bool> forEachModuleSearchPath(
if (Ctx.LangOpts.Target.isOSDarwin()) {
for (const auto &path : Ctx.getDarwinImplicitFrameworkSearchPaths())
if (auto result =
callback(path, SearchPathKind::Framework, /*isSystem=*/true))
callback(path, ModuleSearchPathKind::DarwinImplictFramework,
/*isSystem=*/true))
return result;
}
for (auto importPath : Ctx.SearchPathOpts.RuntimeLibraryImportPaths) {
if (auto result = callback(importPath, SearchPathKind::RuntimeLibrary,
for (const auto &importPath :
Ctx.SearchPathOpts.getRuntimeLibraryImportPaths()) {
if (auto result = callback(importPath, ModuleSearchPathKind::RuntimeLibrary,
/*isSystem=*/true))
return result;
}
@@ -179,10 +176,10 @@ void SerializedModuleLoaderBase::collectVisibleTopLevelModuleNamesImpl(
return false;
};
forEachModuleSearchPath(Ctx, [&](StringRef searchPath, SearchPathKind Kind,
bool isSystem) {
forEachModuleSearchPath(Ctx, [&](StringRef searchPath,
ModuleSearchPathKind Kind, bool isSystem) {
switch (Kind) {
case SearchPathKind::Import: {
case ModuleSearchPathKind::Import: {
// Look for:
// $PATH/{name}.swiftmodule/{arch}.{extension} or
// $PATH/{name}.{extension}
@@ -206,7 +203,7 @@ void SerializedModuleLoaderBase::collectVisibleTopLevelModuleNamesImpl(
});
return None;
}
case SearchPathKind::RuntimeLibrary: {
case ModuleSearchPathKind::RuntimeLibrary: {
// Look for:
// (Darwin OS) $PATH/{name}.swiftmodule/{arch}.{extension}
// (Other OS) $PATH/{name}.{extension}
@@ -233,7 +230,8 @@ void SerializedModuleLoaderBase::collectVisibleTopLevelModuleNamesImpl(
});
return None;
}
case SearchPathKind::Framework: {
case ModuleSearchPathKind::Framework:
case ModuleSearchPathKind::DarwinImplictFramework: {
// Look for:
// $PATH/{name}.framework/Modules/{name}.swiftmodule/{arch}.{extension}
forEachDirectoryEntryPath(searchPath, [&](StringRef path) {
@@ -561,10 +559,12 @@ SerializedModuleLoaderBase::findModule(ImportPath::Element moduleID,
llvm::SmallString<256> currPath;
enum class SearchResult { Found, NotFound, Error };
/// Returns true if a target-specific module file was found, false if an error
/// was diagnosed, or None if neither one happened and the search should
/// continue.
auto findTargetSpecificModuleFiles = [&](bool IsFramework) -> Optional<bool> {
auto findTargetSpecificModuleFiles = [&](bool IsFramework) -> SearchResult {
Optional<SerializedModuleBaseName> firstAbsoluteBaseName;
for (const auto &targetSpecificBaseName : targetSpecificBaseNames) {
@@ -574,19 +574,16 @@ SerializedModuleLoaderBase::findModule(ImportPath::Element moduleID,
if (!firstAbsoluteBaseName.hasValue())
firstAbsoluteBaseName.emplace(absoluteBaseName);
auto result = findModuleFilesInDirectory(moduleID,
absoluteBaseName,
moduleInterfacePath,
moduleBuffer, moduleDocBuffer,
moduleSourceInfoBuffer,
skipBuildingInterface,
IsFramework);
auto result = findModuleFilesInDirectory(
moduleID, absoluteBaseName, moduleInterfacePath, moduleBuffer,
moduleDocBuffer, moduleSourceInfoBuffer, skipBuildingInterface,
IsFramework);
if (!result) {
return true;
return SearchResult::Found;
} else if (result == std::errc::not_supported) {
return false;
return SearchResult::Error;
} else if (result != std::errc::no_such_file_or_directory) {
return None;
return SearchResult::NotFound;
}
}
@@ -595,74 +592,98 @@ SerializedModuleLoaderBase::findModule(ImportPath::Element moduleID,
if (firstAbsoluteBaseName
&& maybeDiagnoseTargetMismatch(moduleID.Loc, moduleName,
*firstAbsoluteBaseName)) {
return false;
return SearchResult::Error;
} else {
return None;
return SearchResult::NotFound;
}
};
auto result = forEachModuleSearchPath(
Ctx,
[&](StringRef path, SearchPathKind Kind,
bool isSystem) -> Optional<bool> {
currPath = path;
isSystemModule = isSystem;
SmallVector<std::string, 4> InterestingFilenames = {
(moduleName + ".framework").str(),
genericBaseName.getName(file_types::TY_SwiftModuleInterfaceFile),
genericBaseName.getName(file_types::TY_PrivateSwiftModuleInterfaceFile),
genericBaseName.getName(file_types::TY_SwiftModuleFile)};
switch (Kind) {
case SearchPathKind::Import:
case SearchPathKind::RuntimeLibrary: {
isFramework = false;
auto searchPaths = Ctx.SearchPathOpts.moduleSearchPathsContainingFile(
InterestingFilenames, Ctx.SourceMgr.getFileSystem().get(),
Ctx.LangOpts.Target.isOSDarwin());
for (const auto &searchPath : searchPaths) {
currPath = searchPath->Path;
isSystemModule = searchPath->IsSystem;
// On Apple platforms, we can assume that the runtime libraries use
// target-specifi module files wihtin a `.swiftmodule` directory.
// This was not always true on non-Apple platforms, and in order to
// ease the transition, check both layouts.
bool checkTargetSpecificModule = true;
if (Kind != SearchPathKind::RuntimeLibrary ||
!Ctx.LangOpts.Target.isOSDarwin()) {
auto modulePath = currPath;
llvm::sys::path::append(modulePath, genericModuleFileName);
llvm::ErrorOr<llvm::vfs::Status> statResult = fs.status(modulePath);
switch (searchPath->Kind) {
case ModuleSearchPathKind::Import:
case ModuleSearchPathKind::RuntimeLibrary: {
isFramework = false;
// Even if stat fails, we can't just return the error; the path
// we're looking for might not be "Foo.swiftmodule".
checkTargetSpecificModule = statResult && statResult->isDirectory();
}
// On Apple platforms, we can assume that the runtime libraries use
// target-specific module files within a `.swiftmodule` directory.
// This was not always true on non-Apple platforms, and in order to
// ease the transition, check both layouts.
bool checkTargetSpecificModule = true;
if (searchPath->Kind != ModuleSearchPathKind::RuntimeLibrary ||
!Ctx.LangOpts.Target.isOSDarwin()) {
auto modulePath = currPath;
llvm::sys::path::append(modulePath, genericModuleFileName);
llvm::ErrorOr<llvm::vfs::Status> statResult = fs.status(modulePath);
if (checkTargetSpecificModule)
// A .swiftmodule directory contains architecture-specific files.
return findTargetSpecificModuleFiles(isFramework);
// Even if stat fails, we can't just return the error; the path
// we're looking for might not be "Foo.swiftmodule".
checkTargetSpecificModule = statResult && statResult->isDirectory();
}
SerializedModuleBaseName absoluteBaseName{currPath, genericBaseName};
auto result = findModuleFilesInDirectory(
moduleID, absoluteBaseName, moduleInterfacePath,
moduleBuffer, moduleDocBuffer, moduleSourceInfoBuffer,
skipBuildingInterface, isFramework);
if (!result)
return true;
else if (result == std::errc::not_supported)
return false;
else
return None;
if (checkTargetSpecificModule) {
// A .swiftmodule directory contains architecture-specific files.
switch (findTargetSpecificModuleFiles(isFramework)) {
case SearchResult::Found:
return true;
case SearchResult::NotFound:
continue;
case SearchResult::Error:
return false;
}
case SearchPathKind::Framework: {
isFramework = true;
llvm::sys::path::append(currPath, moduleName + ".framework");
}
// Check if the framework directory exists.
if (!fs.exists(currPath))
return None;
SerializedModuleBaseName absoluteBaseName{currPath, genericBaseName};
// Frameworks always use architecture-specific files within a
// .swiftmodule directory.
llvm::sys::path::append(currPath, "Modules");
return findTargetSpecificModuleFiles(isFramework);
}
}
llvm_unreachable("covered switch");
});
return result.getValueOr(false);
auto result = findModuleFilesInDirectory(
moduleID, absoluteBaseName, moduleInterfacePath, moduleBuffer,
moduleDocBuffer, moduleSourceInfoBuffer, skipBuildingInterface,
isFramework);
if (!result) {
return true;
} else if (result == std::errc::not_supported) {
return false;
} else {
continue;
}
}
case ModuleSearchPathKind::Framework:
case ModuleSearchPathKind::DarwinImplictFramework: {
isFramework = true;
llvm::sys::path::append(currPath, moduleName + ".framework");
// Check if the framework directory exists.
if (!fs.exists(currPath)) {
continue;
}
// Frameworks always use architecture-specific files within a
// .swiftmodule directory.
llvm::sys::path::append(currPath, "Modules");
switch (findTargetSpecificModuleFiles(isFramework)) {
case SearchResult::Found:
return true;
case SearchResult::NotFound:
continue;
case SearchResult::Error:
return false;
}
}
}
llvm_unreachable("covered switch");
}
return false;
}
static std::pair<StringRef, clang::VersionTuple>
@@ -914,7 +935,7 @@ void swift::serialization::diagnoseSerializedASTLoadFailure(
missingNames);
}
if (Ctx.SearchPathOpts.SDKPath.empty() &&
if (Ctx.SearchPathOpts.getSDKPath().empty() &&
llvm::Triple(llvm::sys::getProcessTriple()).isMacOSX()) {
Ctx.Diags.diagnose(SourceLoc(), diag::sema_no_import_no_sdk);
Ctx.Diags.diagnose(SourceLoc(), diag::sema_no_import_no_sdk_xcrun);
@@ -947,7 +968,7 @@ void swift::serialization::diagnoseSerializedASTLoadFailure(
case serialization::Status::MissingUnderlyingModule: {
Ctx.Diags.diagnose(diagLoc, diag::serialization_missing_underlying_module,
ModuleName);
if (Ctx.SearchPathOpts.SDKPath.empty() &&
if (Ctx.SearchPathOpts.getSDKPath().empty() &&
llvm::Triple(llvm::sys::getProcessTriple()).isMacOSX()) {
Ctx.Diags.diagnose(SourceLoc(), diag::sema_no_import_no_sdk);
Ctx.Diags.diagnose(SourceLoc(), diag::sema_no_import_no_sdk_xcrun);