[clang][deps] Implement efficient in-process ModuleCache (#129751)

The dependency scanner uses implicitly-built Clang modules under the
hood. This system was originally designed to handle multiple concurrent
processes working on the same module cache, and mutual exclusion was
implemented using file locks. The scanner, however, runs within single
process, making file locks unnecessary. This patch virtualizes the
interface for module cache locking and provides an implementation based
on `std::shared_mutex`. This reduces `clang-scan-deps` runtime by ~17%
on my benchmark.

Note that even when multiple processes run a scan on the same module
cache (and therefore don't coordinate efficiently), this should still be
correct due to the strict context hash, the write-through
`InMemoryModuleCache` and the logic for rebuilding out-of-date or
incompatible modules.
This commit is contained in:
Jan Svoboda
2025-03-18 14:01:04 -07:00
committed by GitHub
parent a65cbc4213
commit 056264b838
5 changed files with 128 additions and 1 deletions

View File

@@ -10,6 +10,7 @@
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
#include "llvm/ADT/BitmaskEnum.h"
namespace clang {
@@ -99,6 +100,8 @@ public:
return SharedCache;
}
ModuleCacheMutexes &getModuleCacheMutexes() { return ModuleCacheMutexes; }
private:
const ScanningMode Mode;
const ScanningOutputFormat Format;
@@ -110,6 +113,8 @@ private:
const bool TraceVFS;
/// The global file system cache.
DependencyScanningFilesystemSharedCache SharedCache;
/// The global module cache mutexes.
ModuleCacheMutexes ModuleCacheMutexes;
};
} // end namespace dependencies

View File

@@ -0,0 +1,31 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H
#include "clang/Serialization/ModuleCache.h"
#include "llvm/ADT/StringMap.h"
#include <shared_mutex>
namespace clang {
namespace tooling {
namespace dependencies {
struct ModuleCacheMutexes {
std::mutex Mutex;
llvm::StringMap<std::unique_ptr<std::shared_mutex>> Map;
};
IntrusiveRefCntPtr<ModuleCache>
makeInProcessModuleCache(ModuleCacheMutexes &Mutexes);
} // namespace dependencies
} // namespace tooling
} // namespace clang
#endif

View File

@@ -10,6 +10,7 @@ add_clang_library(clangDependencyScanning
DependencyScanningService.cpp
DependencyScanningWorker.cpp
DependencyScanningTool.cpp
InProcessModuleCache.cpp
ModuleDepCollector.cpp
DEPENDS

View File

@@ -22,6 +22,7 @@
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Serialization/ObjectFilePCHContainerReader.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -315,9 +316,11 @@ public:
Scanned = true;
// Create a compiler instance to handle the actual work.
ScanInstanceStorage.emplace(std::move(PCHContainerOps));
auto ModCache = makeInProcessModuleCache(Service.getModuleCacheMutexes());
ScanInstanceStorage.emplace(std::move(PCHContainerOps), ModCache.get());
CompilerInstance &ScanInstance = *ScanInstanceStorage;
ScanInstance.setInvocation(std::move(Invocation));
ScanInstance.setBuildingModule(false);
// Create the compiler's actual diagnostics engine.
sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());

View File

@@ -0,0 +1,87 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
#include "clang/Serialization/InMemoryModuleCache.h"
#include "llvm/Support/AdvisoryLock.h"
#include <mutex>
using namespace clang;
using namespace tooling;
using namespace dependencies;
namespace {
class ReaderWriterLock : public llvm::AdvisoryLock {
// TODO: Consider using std::atomic::{wait,notify_all} when we move to C++20.
std::unique_lock<std::shared_mutex> OwningLock;
public:
ReaderWriterLock(std::shared_mutex &Mutex)
: OwningLock(Mutex, std::defer_lock) {}
Expected<bool> tryLock() override { return OwningLock.try_lock(); }
llvm::WaitForUnlockResult
waitForUnlockFor(std::chrono::seconds MaxSeconds) override {
assert(!OwningLock);
// We do not respect the timeout here. It's very generous for implicit
// modules, so we'd typically only reach it if the owner crashed (but so did
// we, since we run in the same process), or encountered deadlock.
(void)MaxSeconds;
std::shared_lock Lock(*OwningLock.mutex());
return llvm::WaitForUnlockResult::Success;
}
std::error_code unsafeMaybeUnlock() override {
// Unlocking the mutex here would trigger UB and we don't expect this to be
// actually called when compiling scanning modules due to the no-timeout
// guarantee above.
return {};
}
~ReaderWriterLock() override = default;
};
class InProcessModuleCache : public ModuleCache {
ModuleCacheMutexes &Mutexes;
// TODO: If we changed the InMemoryModuleCache API and relied on strict
// context hash, we could probably create more efficient thread-safe
// implementation of the InMemoryModuleCache such that it doesn't need to be
// recreated for each translation unit.
InMemoryModuleCache InMemory;
public:
InProcessModuleCache(ModuleCacheMutexes &Mutexes) : Mutexes(Mutexes) {}
void prepareForGetLock(StringRef Filename) override {}
std::unique_ptr<llvm::AdvisoryLock> getLock(StringRef Filename) override {
auto &Mtx = [&]() -> std::shared_mutex & {
std::lock_guard Lock(Mutexes.Mutex);
auto &Mutex = Mutexes.Map[Filename];
if (!Mutex)
Mutex = std::make_unique<std::shared_mutex>();
return *Mutex;
}();
return std::make_unique<ReaderWriterLock>(Mtx);
}
InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
const InMemoryModuleCache &getInMemoryModuleCache() const override {
return InMemory;
}
};
} // namespace
IntrusiveRefCntPtr<ModuleCache>
dependencies::makeInProcessModuleCache(ModuleCacheMutexes &Mutexes) {
return llvm::makeIntrusiveRefCnt<InProcessModuleCache>(Mutexes);
}