Files
clang-p2996/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
Jan Svoboda b768a8c1db [clang][deps] Lazy dependency directives (#86347)
Since b4c83a13f6, `Preprocessor` and
`Lexer` are aware of the concept of scanning dependency directives. This
makes it possible to scan for them on-demand rather than eagerly on the
first filesystem operation (open, or even just stat).

This might improve performance, but is also necessary for the "PCH as
module" mode. Some precompiled header sources use the ".pch" file
extension, which means they were not getting scanned for dependency
directives. This was okay when the PCH was the main input file in a
separate scan step, because there we just lex the file in a
scanning-specific frontend action. But when such source gets treated as
a module implicitly loaded from a TU, it will get compiled as any other
module - with Sema - which will result in compilation errors. (See
attached test case.)

rdar://107663951
2024-03-22 16:09:34 -07:00

355 lines
13 KiB
C++

//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/Threading.h"
#include <optional>
using namespace clang;
using namespace tooling;
using namespace dependencies;
llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
// Load the file and its content from the file system.
auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
if (!MaybeFile)
return MaybeFile.getError();
auto File = std::move(*MaybeFile);
auto MaybeStat = File->status();
if (!MaybeStat)
return MaybeStat.getError();
auto Stat = std::move(*MaybeStat);
auto MaybeBuffer = File->getBuffer(Stat.getName());
if (!MaybeBuffer)
return MaybeBuffer.getError();
auto Buffer = std::move(*MaybeBuffer);
// If the file size changed between read and stat, pretend it didn't.
if (Stat.getSize() != Buffer->getBufferSize())
Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
return TentativeEntry(Stat, std::move(Buffer));
}
bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
EntryRef Ref) {
auto &Entry = Ref.Entry;
if (Entry.isError() || Entry.isDirectory())
return false;
CachedFileContents *Contents = Entry.getCachedContents();
assert(Contents && "contents not initialized");
// Double-checked locking.
if (Contents->DepDirectives.load())
return true;
std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
// Double-checked locking.
if (Contents->DepDirectives.load())
return true;
SmallVector<dependency_directives_scan::Directive, 64> Directives;
// Scan the file for preprocessor directives that might affect the
// dependencies.
if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
Contents->DepDirectiveTokens,
Directives)) {
Contents->DepDirectiveTokens.clear();
// FIXME: Propagate the diagnostic if desired by the client.
Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
return false;
}
// This function performed double-checked locking using `DepDirectives`.
// Assigning it must be the last thing this function does, otherwise other
// threads may skip the critical section (`DepDirectives != nullptr`), leading
// to a data race.
Contents->DepDirectives.store(
new std::optional<DependencyDirectivesTy>(std::move(Directives)));
return true;
}
DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache() {
// This heuristic was chosen using a empirical testing on a
// reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
// sharding gives a performance edge by reducing the lock contention.
// FIXME: A better heuristic might also consider the OS to account for
// the different cost of lock contention on different OSes.
NumShards =
std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
CacheShards = std::make_unique<CacheShard[]>(NumShards);
}
DependencyScanningFilesystemSharedCache::CacheShard &
DependencyScanningFilesystemSharedCache::getShardForFilename(
StringRef Filename) const {
assert(llvm::sys::path::is_absolute_gnu(Filename));
return CacheShards[llvm::hash_value(Filename) % NumShards];
}
DependencyScanningFilesystemSharedCache::CacheShard &
DependencyScanningFilesystemSharedCache::getShardForUID(
llvm::sys::fs::UniqueID UID) const {
auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
return CacheShards[Hash % NumShards];
}
const CachedFileSystemEntry *
DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
StringRef Filename) const {
assert(llvm::sys::path::is_absolute_gnu(Filename));
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = EntriesByFilename.find(Filename);
return It == EntriesByFilename.end() ? nullptr : It->getValue();
}
const CachedFileSystemEntry *
DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
llvm::sys::fs::UniqueID UID) const {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = EntriesByUID.find(UID);
return It == EntriesByUID.end() ? nullptr : It->getSecond();
}
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,
llvm::ErrorOr<llvm::vfs::Status> Stat) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto Insertion = EntriesByFilename.insert({Filename, nullptr});
if (Insertion.second)
Insertion.first->second =
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
return *Insertion.first->second;
}
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
std::unique_ptr<llvm::MemoryBuffer> Contents) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto Insertion = EntriesByUID.insert({UID, nullptr});
if (Insertion.second) {
CachedFileContents *StoredContents = nullptr;
if (Contents)
StoredContents = new (ContentsStorage.Allocate())
CachedFileContents(std::move(Contents));
Insertion.first->second = new (EntryStorage.Allocate())
CachedFileSystemEntry(std::move(Stat), StoredContents);
}
return *Insertion.first->second;
}
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,
const CachedFileSystemEntry &Entry) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
}
static bool shouldCacheStatFailures(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return false; // This may be the module cache directory.
return true;
}
DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
DependencyScanningFilesystemSharedCache &SharedCache,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
: llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
llvm::vfs::ProxyFileSystem>(std::move(FS)),
SharedCache(SharedCache),
WorkingDirForCacheLookup(llvm::errc::invalid_argument) {
updateWorkingDirForCacheLookup();
}
const CachedFileSystemEntry &
DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
TentativeEntry TEntry) {
auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
std::move(TEntry.Status),
std::move(TEntry.Contents));
}
const CachedFileSystemEntry *
DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
StringRef Filename) {
if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
return Entry;
auto &Shard = SharedCache.getShardForFilename(Filename);
if (const auto *Entry = Shard.findEntryByFilename(Filename))
return &LocalCache.insertEntryForFilename(Filename, *Entry);
return nullptr;
}
llvm::ErrorOr<const CachedFileSystemEntry &>
DependencyScanningWorkerFilesystem::computeAndStoreResult(
StringRef OriginalFilename, StringRef FilenameForLookup) {
llvm::ErrorOr<llvm::vfs::Status> Stat =
getUnderlyingFS().status(OriginalFilename);
if (!Stat) {
if (!shouldCacheStatFailures(OriginalFilename))
return Stat.getError();
const auto &Entry =
getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError());
return insertLocalEntryForFilename(FilenameForLookup, Entry);
}
if (const auto *Entry = findSharedEntryByUID(*Stat))
return insertLocalEntryForFilename(FilenameForLookup, *Entry);
auto TEntry =
Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename);
const CachedFileSystemEntry *SharedEntry = [&]() {
if (TEntry) {
const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry);
}
return &getOrEmplaceSharedEntryForFilename(FilenameForLookup,
TEntry.getError());
}();
return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry);
}
llvm::ErrorOr<EntryRef>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
StringRef OriginalFilename) {
StringRef FilenameForLookup;
SmallString<256> PathBuf;
if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
FilenameForLookup = OriginalFilename;
} else if (!WorkingDirForCacheLookup) {
return WorkingDirForCacheLookup.getError();
} else {
StringRef RelFilename = OriginalFilename;
RelFilename.consume_front("./");
PathBuf = *WorkingDirForCacheLookup;
llvm::sys::path::append(PathBuf, RelFilename);
FilenameForLookup = PathBuf.str();
}
assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
if (const auto *Entry =
findEntryByFilenameWithWriteThrough(FilenameForLookup))
return EntryRef(OriginalFilename, *Entry).unwrapError();
auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
if (!MaybeEntry)
return MaybeEntry.getError();
return EntryRef(OriginalFilename, *MaybeEntry).unwrapError();
}
llvm::ErrorOr<llvm::vfs::Status>
DependencyScanningWorkerFilesystem::status(const Twine &Path) {
SmallString<256> OwnedFilename;
StringRef Filename = Path.toStringRef(OwnedFilename);
if (Filename.ends_with(".pcm"))
return getUnderlyingFS().status(Path);
llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
if (!Result)
return Result.getError();
return Result->getStatus();
}
namespace {
/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
/// this subclass.
class DepScanFile final : public llvm::vfs::File {
public:
DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
llvm::vfs::Status Stat)
: Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
bool IsVolatile) override {
return std::move(Buffer);
}
std::error_code close() override { return {}; }
private:
std::unique_ptr<llvm::MemoryBuffer> Buffer;
llvm::vfs::Status Stat;
};
} // end anonymous namespace
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
DepScanFile::create(EntryRef Entry) {
assert(!Entry.isError() && "error");
if (Entry.isDirectory())
return std::make_error_code(std::errc::is_a_directory);
auto Result = std::make_unique<DepScanFile>(
llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
Entry.getStatus().getName(),
/*RequiresNullTerminator=*/false),
Entry.getStatus());
return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
std::unique_ptr<llvm::vfs::File>(std::move(Result)));
}
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
SmallString<256> OwnedFilename;
StringRef Filename = Path.toStringRef(OwnedFilename);
if (Filename.ends_with(".pcm"))
return getUnderlyingFS().openFileForRead(Path);
llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
if (!Result)
return Result.getError();
return DepScanFile::create(Result.get());
}
std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
const Twine &Path) {
std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
updateWorkingDirForCacheLookup();
return EC;
}
void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
llvm::ErrorOr<std::string> CWD =
getUnderlyingFS().getCurrentWorkingDirectory();
if (!CWD) {
WorkingDirForCacheLookup = CWD.getError();
} else if (!llvm::sys::path::is_absolute_gnu(*CWD)) {
WorkingDirForCacheLookup = llvm::errc::invalid_argument;
} else {
WorkingDirForCacheLookup = *CWD;
}
assert(!WorkingDirForCacheLookup ||
llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
}
const char DependencyScanningWorkerFilesystem::ID = 0;