[clangd] [C++20] [Modules] Add scanning cache (#125988)

Previously, everytime we want to get a source file declaring a specific
module, we need to scan the whole projects again and again. The
performance is super bad. This patch tries to improve this by
introducing a simple cache.
This commit is contained in:
Chuanqi Xu
2025-02-26 16:03:04 +08:00
committed by GitHub
parent a522c227a1
commit ae839b0250
4 changed files with 164 additions and 20 deletions

View File

@@ -357,10 +357,80 @@ void ModuleFileCache::remove(StringRef ModuleName) {
ModuleFiles.erase(ModuleName);
}
class ModuleNameToSourceCache {
public:
std::string getSourceForModuleName(llvm::StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(CacheMutex);
auto Iter = ModuleNameToSourceCache.find(ModuleName);
if (Iter != ModuleNameToSourceCache.end())
return Iter->second;
return "";
}
void addEntry(llvm::StringRef ModuleName, PathRef Source) {
std::lock_guard<std::mutex> Lock(CacheMutex);
ModuleNameToSourceCache[ModuleName] = Source.str();
}
void eraseEntry(llvm::StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(CacheMutex);
ModuleNameToSourceCache.erase(ModuleName);
}
private:
std::mutex CacheMutex;
llvm::StringMap<std::string> ModuleNameToSourceCache;
};
class CachingProjectModules : public ProjectModules {
public:
CachingProjectModules(std::unique_ptr<ProjectModules> MDB,
ModuleNameToSourceCache &Cache)
: MDB(std::move(MDB)), Cache(Cache) {
assert(this->MDB && "CachingProjectModules should only be created with a "
"valid underlying ProjectModules");
}
std::vector<std::string> getRequiredModules(PathRef File) override {
return MDB->getRequiredModules(File);
}
std::string getModuleNameForSource(PathRef File) override {
return MDB->getModuleNameForSource(File);
}
std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) override {
std::string CachedResult = Cache.getSourceForModuleName(ModuleName);
// Verify Cached Result by seeing if the source declaring the same module
// as we query.
if (!CachedResult.empty()) {
std::string ModuleNameOfCachedSource =
MDB->getModuleNameForSource(CachedResult);
if (ModuleNameOfCachedSource == ModuleName)
return CachedResult;
// Cached Result is invalid. Clear it.
Cache.eraseEntry(ModuleName);
}
auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile);
Cache.addEntry(ModuleName, Result);
return Result;
}
private:
std::unique_ptr<ProjectModules> MDB;
ModuleNameToSourceCache &Cache;
};
/// Collect the directly and indirectly required module names for \param
/// ModuleName in topological order. The \param ModuleName is guaranteed to
/// be the last element in \param ModuleNames.
llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource,
CachingProjectModules &MDB,
StringRef ModuleName) {
llvm::SmallVector<llvm::StringRef> ModuleNames;
llvm::StringSet<> ModuleNamesSet;
@@ -368,8 +438,8 @@ llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
ModuleNamesSet.insert(ModuleName);
for (StringRef RequiredModuleName :
MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
for (StringRef RequiredModuleName : MDB.getRequiredModules(
MDB.getSourceForModuleName(ModuleName, RequiredSource)))
if (ModuleNamesSet.insert(RequiredModuleName).second)
Visitor(RequiredModuleName, Visitor);
@@ -386,24 +456,29 @@ class ModulesBuilder::ModulesBuilderImpl {
public:
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
ModuleNameToSourceCache &getProjectModulesCache() {
return ProjectModulesCache;
}
const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
llvm::Error
getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
ProjectModules &MDB,
getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName,
const ThreadsafeFS &TFS, CachingProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles);
private:
ModuleFileCache Cache;
ModuleNameToSourceCache ProjectModulesCache;
};
llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles) {
PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
return llvm::Error::success();
PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
std::string ModuleUnitFileName =
MDB.getSourceForModuleName(ModuleName, RequiredSource);
/// It is possible that we're meeting third party modules (modules whose
/// source are not in the project. e.g, the std module may be a third-party
/// module for most project) or something wrong with the implementation of
@@ -416,7 +491,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
llvm::formatv("Don't get the module unit for module {0}", ModuleName));
// Get Required modules in topological order.
auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
continue;
@@ -454,8 +529,11 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
elog("Failed to get Project Modules information for {0}", File);
return std::make_unique<FailedPrerequisiteModules>();
}
CachingProjectModules CachedMDB(std::move(MDB),
Impl->getProjectModulesCache());
std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
std::vector<std::string> RequiredModuleNames =
CachedMDB.getRequiredModules(File);
if (RequiredModuleNames.empty())
return std::make_unique<ReusablePrerequisiteModules>();
@@ -463,7 +541,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
// Return early if there is any error.
if (llvm::Error Err = Impl->getOrBuildModuleFile(
RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) {
elog("Failed to build module {0}; due to {1}", RequiredModuleName,
toString(std::move(Err)));
return std::make_unique<FailedPrerequisiteModules>();

View File

@@ -42,9 +42,9 @@ public:
llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;
virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
virtual PathRef
getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile = PathRef()) = 0;
virtual std::string getModuleNameForSource(PathRef File) = 0;
virtual std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) = 0;
virtual void setCommandMangler(CommandMangler Mangler) {}

View File

@@ -134,6 +134,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,
void ModuleDependencyScanner::globalScan(
const ProjectModules::CommandMangler &Mangler) {
if (GlobalScanned)
return;
for (auto &File : CDB->getAllFiles())
scan(File, Mangler);
@@ -189,11 +192,18 @@ public:
/// RequiredSourceFile is not used intentionally. See the comments of
/// ModuleDependencyScanner for detail.
PathRef
getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSourceFile = PathRef()) override {
std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSourceFile) override {
Scanner.globalScan(Mangler);
return Scanner.getSourceForModuleName(ModuleName);
return Scanner.getSourceForModuleName(ModuleName).str();
}
std::string getModuleNameForSource(PathRef File) override {
auto ScanningResult = Scanner.scan(File, Mangler);
if (!ScanningResult || !ScanningResult->ModuleName)
return {};
return *ScanningResult->ModuleName;
}
private:

View File

@@ -27,12 +27,41 @@
namespace clang::clangd {
namespace {
class GlobalScanningCounterProjectModules : public ProjectModules {
public:
GlobalScanningCounterProjectModules(
std::unique_ptr<ProjectModules> Underlying, std::atomic<unsigned> &Count)
: Underlying(std::move(Underlying)), Count(Count) {}
std::vector<std::string> getRequiredModules(PathRef File) override {
return Underlying->getRequiredModules(File);
}
std::string getModuleNameForSource(PathRef File) override {
return Underlying->getModuleNameForSource(File);
}
void setCommandMangler(CommandMangler Mangler) override {
Underlying->setCommandMangler(std::move(Mangler));
}
std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) override {
Count++;
return Underlying->getSourceForModuleName(ModuleName, RequiredSrcFile);
}
private:
std::unique_ptr<ProjectModules> Underlying;
std::atomic<unsigned> &Count;
};
class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
public:
MockDirectoryCompilationDatabase(StringRef TestDir, const ThreadsafeFS &TFS)
: MockCompilationDatabase(TestDir),
MockedCDBPtr(std::make_shared<MockClangCompilationDatabase>(*this)),
TFS(TFS) {
TFS(TFS), GlobalScanningCount(0) {
this->ExtraClangFlags.push_back("-std=c++20");
this->ExtraClangFlags.push_back("-c");
}
@@ -40,9 +69,12 @@ public:
void addFile(llvm::StringRef Path, llvm::StringRef Contents);
std::unique_ptr<ProjectModules> getProjectModules(PathRef) const override {
return scanningProjectModules(MockedCDBPtr, TFS);
return std::make_unique<GlobalScanningCounterProjectModules>(
scanningProjectModules(MockedCDBPtr, TFS), GlobalScanningCount);
}
unsigned getGlobalScanningCount() const { return GlobalScanningCount; }
private:
class MockClangCompilationDatabase : public tooling::CompilationDatabase {
public:
@@ -68,6 +100,8 @@ private:
std::shared_ptr<MockClangCompilationDatabase> MockedCDBPtr;
const ThreadsafeFS &TFS;
mutable std::atomic<unsigned> GlobalScanningCount;
};
// Add files to the working testing directory and the compilation database.
@@ -590,6 +624,28 @@ export constexpr int M = 43;
EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles);
}
TEST_F(PrerequisiteModulesTests, ScanningCacheTest) {
MockDirectoryCompilationDatabase CDB(TestDir, FS);
CDB.addFile("M.cppm", R"cpp(
export module M;
)cpp");
CDB.addFile("A.cppm", R"cpp(
export module A;
import M;
)cpp");
CDB.addFile("B.cppm", R"cpp(
export module B;
import M;
)cpp");
ModulesBuilder Builder(CDB);
Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
EXPECT_EQ(CDB.getGlobalScanningCount(), 1u);
}
} // namespace
} // namespace clang::clangd