## Summary
- **`[[rules]]`**: TOML array-of-tables config for per-file compilation
flag rules with glob pattern matching (`append`/`remove`). Patterns are
pre-compiled at config load time. Rules whose patterns all fail to
compile are dropped entirely (no silent no-op entries), and rules now
apply uniformly to every compilation — including the header-context
fallback path used when editing a header without its own CDB entry.
- **CDB auto-scan**: Default search scans workspace root + all immediate
subdirectories for `compile_commands.json`, replacing the hardcoded
directory list.
- **LSP `initializationOptions`**: Clients can pass config as JSON via
the LSP initialize request; priority is `initializationOptions >
clice.toml > defaults`.
- **XDG cache paths**: Default cache/index/logging paths prefer
`$XDG_CACHE_HOME/clice/<workspace-hash>/`; fall back to
`$HOME/.cache/clice/<hash>/`, then `<workspace>/.clice/`.
- **`${workspace}` substitution**: supported in `cache_dir`,
`index_dir`, `logging_dir`, and every `compile_commands_paths` entry.
No-op when `workspace_root` is empty.
- **Partial config support**: All TOML/JSON fields are optional via
`kota::meta::defaulted<T>`, so minimal config files work correctly.
- **Detailed diagnostics**: malformed `clice.toml` now logs line, column
and parser description (via toml++ direct parse); a malformed workspace
config surfaces a clear fallback warning instead of silently reverting
to defaults.
## Test plan
- [x] 28 unit tests for config (full suite 545 unit tests pass, Debug)
- [x] 119 integration tests pass
- [x] 2 smoke tests pass
🤖 Generated with [Claude Code](https://claude.com/claude-code)
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit
* **New Features**
* XDG-based, workspace-scoped project cache (PCH/PCM and header caches
moved under project cache) with workspace fallback
* Initialization options JSON can override config (takes precedence over
file/defaults)
* Per-file pattern rules to append/remove compile flags; expanded
discovery of compilation databases (multiple paths)
* **Refactor**
* Configuration fields reorganized under a project scope; runtime
behavior now respects project-scoped values
* **Tests**
* New unit and integration tests for config parsing, rule matching, and
persistent cache behavior
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
377 lines
12 KiB
C++
377 lines
12 KiB
C++
#include "server/workspace.h"
|
|
|
|
#include <algorithm>
|
|
#include <chrono>
|
|
|
|
#include "support/filesystem.h"
|
|
#include "support/logging.h"
|
|
#include "syntax/scan.h"
|
|
|
|
#include "kota/codec/json/json.h"
|
|
#include "kota/ipc/lsp/position.h"
|
|
#include "kota/ipc/lsp/protocol.h"
|
|
#include "llvm/Support/Chrono.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/xxhash.h"
|
|
|
|
namespace clice {
|
|
|
|
namespace lsp = kota::ipc::lsp;
|
|
|
|
/// Find the tightest (innermost) occurrence containing `offset` via binary search.
|
|
const static index::Occurrence* lookup_occurrence(const std::vector<index::Occurrence>& occs,
|
|
std::uint32_t offset) {
|
|
auto it = std::ranges::lower_bound(occs, offset, {}, [](const index::Occurrence& o) {
|
|
return o.range.end;
|
|
});
|
|
const index::Occurrence* best = nullptr;
|
|
while(it != occs.end() && it->range.contains(offset)) {
|
|
if(!best || (it->range.end - it->range.begin) < (best->range.end - best->range.begin)) {
|
|
best = &*it;
|
|
}
|
|
++it;
|
|
}
|
|
return best;
|
|
}
|
|
|
|
std::optional<std::pair<index::SymbolHash, protocol::Range>>
|
|
OpenFileIndex::find_occurrence(std::uint32_t offset) const {
|
|
if(!mapper)
|
|
return std::nullopt;
|
|
auto* occ = lookup_occurrence(file_index.occurrences, offset);
|
|
if(!occ)
|
|
return std::nullopt;
|
|
auto start = mapper->to_position(occ->range.begin);
|
|
auto end = mapper->to_position(occ->range.end);
|
|
if(!start || !end)
|
|
return std::nullopt;
|
|
return std::pair{
|
|
occ->target,
|
|
protocol::Range{*start, *end}
|
|
};
|
|
}
|
|
|
|
std::optional<std::pair<index::SymbolHash, protocol::Range>>
|
|
MergedIndexShard::find_occurrence(std::uint32_t offset) const {
|
|
auto* m = mapper();
|
|
if(!m)
|
|
return std::nullopt;
|
|
std::optional<std::pair<index::SymbolHash, protocol::Range>> result;
|
|
index.lookup(offset, [&](const index::Occurrence& o) {
|
|
auto start = m->to_position(o.range.begin);
|
|
auto end = m->to_position(o.range.end);
|
|
if(start && end) {
|
|
result = {
|
|
o.target,
|
|
protocol::Range{*start, *end}
|
|
};
|
|
}
|
|
return false;
|
|
});
|
|
return result;
|
|
}
|
|
|
|
llvm::SmallVector<std::uint32_t> Workspace::on_file_saved(std::uint32_t path_id) {
|
|
llvm::SmallVector<std::uint32_t> dirtied;
|
|
|
|
// Re-scan the saved file for module declarations and update path_to_module.
|
|
auto file_path = path_pool.resolve(path_id);
|
|
if(auto buf = llvm::MemoryBuffer::getFile(file_path)) {
|
|
auto result = scan((*buf)->getBuffer());
|
|
if(!result.module_name.empty()) {
|
|
path_to_module[path_id] = std::move(result.module_name);
|
|
} else {
|
|
path_to_module.erase(path_id);
|
|
}
|
|
}
|
|
|
|
if(compile_graph) {
|
|
auto result = compile_graph->update(path_id);
|
|
for(auto id: result) {
|
|
dirtied.push_back(id);
|
|
pcm_paths.erase(id);
|
|
pcm_cache.erase(id);
|
|
}
|
|
}
|
|
return dirtied;
|
|
}
|
|
|
|
void Workspace::on_file_closed(std::uint32_t path_id) {
|
|
if(compile_graph && compile_graph->has_unit(path_id)) {
|
|
compile_graph->update(path_id);
|
|
}
|
|
pch_cache.erase(path_id);
|
|
}
|
|
|
|
std::uint64_t hash_file(llvm::StringRef path) {
|
|
auto buf = llvm::MemoryBuffer::getFile(path);
|
|
if(!buf)
|
|
return 0;
|
|
return llvm::xxh3_64bits((*buf)->getBuffer());
|
|
}
|
|
|
|
DepsSnapshot capture_deps_snapshot(PathPool& pool, llvm::ArrayRef<std::string> deps) {
|
|
DepsSnapshot snap;
|
|
// Capture timestamp BEFORE hashing to avoid TOCTOU: if a file is modified
|
|
// during hashing, its mtime will be > build_at, triggering Layer 2 re-hash.
|
|
snap.build_at = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
|
snap.path_ids.reserve(deps.size());
|
|
snap.hashes.reserve(deps.size());
|
|
for(const auto& file: deps) {
|
|
snap.path_ids.push_back(pool.intern(file));
|
|
snap.hashes.push_back(hash_file(file));
|
|
}
|
|
return snap;
|
|
}
|
|
|
|
bool deps_changed(const PathPool& pool, const DepsSnapshot& snap) {
|
|
for(std::size_t i = 0; i < snap.path_ids.size(); ++i) {
|
|
auto path = pool.resolve(snap.path_ids[i]);
|
|
llvm::sys::fs::file_status status;
|
|
if(auto ec = llvm::sys::fs::status(path, status)) {
|
|
// File disappeared — definitely changed.
|
|
if(snap.hashes[i] != 0)
|
|
return true;
|
|
continue;
|
|
}
|
|
|
|
// Layer 1: mtime check (cheap, stat only).
|
|
auto current_mtime = llvm::sys::toTimeT(status.getLastModificationTime());
|
|
if(current_mtime <= snap.build_at)
|
|
continue;
|
|
|
|
// Layer 2: mtime is newer — re-hash content to confirm actual change.
|
|
auto current_hash = hash_file(path);
|
|
if(current_hash != snap.hashes[i])
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct CacheDepEntry {
|
|
std::uint32_t path; // index into CacheData::paths
|
|
std::uint64_t hash;
|
|
};
|
|
|
|
struct CachePCHEntry {
|
|
std::string filename;
|
|
std::uint32_t source_file; // index into CacheData::paths
|
|
std::uint64_t hash;
|
|
std::uint32_t bound;
|
|
std::int64_t build_at;
|
|
std::vector<CacheDepEntry> deps;
|
|
};
|
|
|
|
struct CachePCMEntry {
|
|
std::string filename;
|
|
std::uint32_t source_file;
|
|
std::string module_name;
|
|
std::int64_t build_at;
|
|
std::vector<CacheDepEntry> deps;
|
|
};
|
|
|
|
struct CacheData {
|
|
std::vector<std::string> paths;
|
|
std::vector<CachePCHEntry> pch;
|
|
std::vector<CachePCMEntry> pcm;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
void Workspace::load_cache() {
|
|
if(config.project.cache_dir.empty())
|
|
return;
|
|
|
|
auto cache_path = path::join(config.project.cache_dir, "cache", "cache.json");
|
|
auto content = fs::read(cache_path);
|
|
if(!content) {
|
|
LOG_DEBUG("No cache.json found at {}", cache_path);
|
|
return;
|
|
}
|
|
|
|
CacheData data;
|
|
auto status = kota::codec::json::from_json(*content, data);
|
|
if(!status) {
|
|
LOG_WARN("Failed to parse cache.json");
|
|
return;
|
|
}
|
|
|
|
auto resolve = [&](std::uint32_t idx) -> llvm::StringRef {
|
|
return idx < data.paths.size() ? llvm::StringRef(data.paths[idx]) : "";
|
|
};
|
|
|
|
auto load_deps = [&](std::int64_t build_at, const auto& dep_entries) -> DepsSnapshot {
|
|
DepsSnapshot deps;
|
|
deps.build_at = build_at;
|
|
for(auto& dep: dep_entries) {
|
|
auto dep_path = resolve(dep.path);
|
|
if(dep_path.empty())
|
|
continue;
|
|
deps.path_ids.push_back(path_pool.intern(dep_path));
|
|
deps.hashes.push_back(dep.hash);
|
|
}
|
|
return deps;
|
|
};
|
|
|
|
for(auto& entry: data.pch) {
|
|
auto pch_path = path::join(config.project.cache_dir, "cache", "pch", entry.filename);
|
|
auto source = resolve(entry.source_file);
|
|
if(!llvm::sys::fs::exists(pch_path) || source.empty())
|
|
continue;
|
|
|
|
auto path_id = path_pool.intern(source);
|
|
auto& st = pch_cache[path_id];
|
|
st.path = pch_path;
|
|
st.hash = entry.hash;
|
|
st.bound = entry.bound;
|
|
st.deps = load_deps(entry.build_at, entry.deps);
|
|
|
|
LOG_DEBUG("Loaded cached PCH: {} -> {}", source, pch_path);
|
|
}
|
|
|
|
for(auto& entry: data.pcm) {
|
|
auto pcm_path = path::join(config.project.cache_dir, "cache", "pcm", entry.filename);
|
|
auto source = resolve(entry.source_file);
|
|
if(!llvm::sys::fs::exists(pcm_path) || source.empty())
|
|
continue;
|
|
|
|
auto path_id = path_pool.intern(source);
|
|
pcm_cache[path_id] = {pcm_path, load_deps(entry.build_at, entry.deps)};
|
|
pcm_paths[path_id] = pcm_path;
|
|
|
|
LOG_DEBUG("Loaded cached PCM: {} (module {}) -> {}", source, entry.module_name, pcm_path);
|
|
}
|
|
|
|
LOG_INFO("Loaded cache.json: {} PCH entries, {} PCM entries",
|
|
pch_cache.size(),
|
|
pcm_cache.size());
|
|
}
|
|
|
|
void Workspace::save_cache() {
|
|
if(config.project.cache_dir.empty())
|
|
return;
|
|
|
|
CacheData data;
|
|
std::unordered_map<std::string, std::uint32_t> index_map;
|
|
|
|
auto intern = [&](std::uint32_t runtime_path_id) -> std::uint32_t {
|
|
auto path = std::string(path_pool.resolve(runtime_path_id));
|
|
auto [it, inserted] =
|
|
index_map.try_emplace(path, static_cast<std::uint32_t>(data.paths.size()));
|
|
if(inserted) {
|
|
data.paths.push_back(path);
|
|
}
|
|
return it->second;
|
|
};
|
|
|
|
for(auto& [path_id, st]: pch_cache) {
|
|
if(st.path.empty())
|
|
continue;
|
|
|
|
CachePCHEntry entry;
|
|
entry.filename = std::string(path::filename(st.path));
|
|
entry.source_file = intern(path_id);
|
|
entry.hash = st.hash;
|
|
entry.bound = st.bound;
|
|
entry.build_at = st.deps.build_at;
|
|
for(std::size_t i = 0; i < st.deps.path_ids.size(); ++i) {
|
|
entry.deps.push_back({intern(st.deps.path_ids[i]), st.deps.hashes[i]});
|
|
}
|
|
data.pch.push_back(std::move(entry));
|
|
}
|
|
|
|
for(auto& [path_id, st]: pcm_cache) {
|
|
if(st.path.empty())
|
|
continue;
|
|
|
|
CachePCMEntry entry;
|
|
entry.filename = std::string(path::filename(st.path));
|
|
entry.source_file = intern(path_id);
|
|
auto mod_it = path_to_module.find(path_id);
|
|
entry.module_name = mod_it != path_to_module.end() ? mod_it->second : "";
|
|
entry.build_at = st.deps.build_at;
|
|
for(std::size_t i = 0; i < st.deps.path_ids.size(); ++i) {
|
|
entry.deps.push_back({intern(st.deps.path_ids[i]), st.deps.hashes[i]});
|
|
}
|
|
data.pcm.push_back(std::move(entry));
|
|
}
|
|
|
|
auto json_str = kota::codec::json::to_json(data);
|
|
if(!json_str) {
|
|
LOG_WARN("Failed to serialize cache.json");
|
|
return;
|
|
}
|
|
|
|
auto cache_path = path::join(config.project.cache_dir, "cache", "cache.json");
|
|
auto tmp_path = cache_path + ".tmp";
|
|
auto write_result = fs::write(tmp_path, *json_str);
|
|
if(!write_result) {
|
|
LOG_WARN("Failed to write cache.json.tmp: {}", write_result.error().message());
|
|
return;
|
|
}
|
|
auto rename_result = fs::rename(tmp_path, cache_path);
|
|
if(!rename_result) {
|
|
LOG_WARN("Failed to rename cache.json.tmp to cache.json: {}",
|
|
rename_result.error().message());
|
|
}
|
|
}
|
|
|
|
void Workspace::cleanup_cache(int max_age_days) {
|
|
if(config.project.cache_dir.empty())
|
|
return;
|
|
|
|
auto now = std::chrono::system_clock::now();
|
|
auto max_age = std::chrono::hours(max_age_days * 24);
|
|
|
|
for(auto* subdir: {"cache/pch", "cache/pcm"}) {
|
|
auto dir = path::join(config.project.cache_dir, subdir);
|
|
std::error_code ec;
|
|
for(auto it = llvm::sys::fs::directory_iterator(dir, ec);
|
|
!ec && it != llvm::sys::fs::directory_iterator();
|
|
it.increment(ec)) {
|
|
llvm::sys::fs::file_status status;
|
|
if(auto stat_ec = llvm::sys::fs::status(it->path(), status))
|
|
continue;
|
|
|
|
auto mtime = status.getLastModificationTime();
|
|
auto age = now - mtime;
|
|
if(age > max_age) {
|
|
llvm::sys::fs::remove(it->path());
|
|
LOG_DEBUG("Cleaned up stale cache file: {}", it->path());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Workspace::build_module_map() {
|
|
for(auto& [module_name, path_ids]: dep_graph.modules()) {
|
|
for(auto path_id: path_ids) {
|
|
path_to_module[path_id] = module_name.str();
|
|
}
|
|
}
|
|
}
|
|
|
|
void Workspace::fill_pcm_deps(std::unordered_map<std::string, std::string>& pcms,
|
|
std::uint32_t exclude_path_id) const {
|
|
for(auto& [pid, pcm_path]: pcm_paths) {
|
|
if(pid == exclude_path_id)
|
|
continue;
|
|
auto mod_it = path_to_module.find(pid);
|
|
if(mod_it != path_to_module.end()) {
|
|
pcms[mod_it->second] = pcm_path;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Workspace::cancel_all() {
|
|
if(compile_graph) {
|
|
compile_graph->cancel_all();
|
|
}
|
|
}
|
|
|
|
} // namespace clice
|