Files
clice/src/index/tu_index.cpp
ykiko ada202e489 feat(index): piggyback indexing on PCH/PCM builds and open-file compiles (#402)
## Summary

Piggyback index construction onto existing compilation steps,
eliminating redundant recompilation in background indexing:

- **`TUIndex::build` gains `interested_only` parameter**: `true`
traverses only the main file's top-level decls; `false` (default)
traverses the full AST
- **PCH build indexes preamble headers**: stateless worker calls
`TUIndex::build(unit)` (full traversal) after successful `BuildPCH`,
clears `main_file_index`, serializes and sends back; master merges into
MergedIndex
- **PCM build indexes module interface**: stateless worker calls
`TUIndex::build(unit, true)` after successful `BuildPCM`; master merges
into MergedIndex
- **Open-file compile indexes main file**: stateful worker calls
`TUIndex::build(unit, true)` after successful `Compile`, serialized in
`CompileResult`
- **New `OpenFileIndex` in-memory structure**: master holds `FileIndex +
SymbolTable + buffer text` per open file — not persisted to disk, not
merged, discarded on close
- **Dual-source query path**: `query_index_relations`,
`lookup_symbol_at_position`, `find_symbol_definition_location`, all
hierarchy handlers, and `workspace/symbol` check `OpenFileIndex` first
(fresher), then fall back to `MergedIndex` (disk-indexed)
- **Background indexing skips open files**: checked via
`documents.count()`; on `didClose` the file is re-queued into
`index_queue`
- **`didSave` re-queues non-open dependents**: dirtied files from
`compile_graph->update()` that are not open get pushed into
`index_queue` for background re-indexing
- **Extract `lookup_occurrence` helper**: binary search + forward scan
picking the innermost (narrowest) match, replacing a broken
`while/break/break` pattern
- **Extract `find_symbol_info` helper**: consolidates 6 duplicated
"search open file indices then ProjectIndex" lookups into one method
- **`resolve_hierarchy_item` checks open file indices**: no longer
limited to ProjectIndex only

## Test plan

- [x] 465 unit tests pass
- [x] 105 integration tests pass (including all `test_index` cases:
GoToDefinition, FindReferences, CallHierarchy, TypeHierarchy,
WorkspaceSymbol)
- [x] Manual: open a file and immediately use GoToDefinition — should
work without waiting for background indexing
- [x] Manual: close a file and verify background indexing picks it up
and produces a MergedIndex shard

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 20:50:38 +08:00

318 lines
12 KiB
C++

#include "index/tu_index.h"
#include <tuple>
#include "index/serialization.h"
#include "semantic/ast_utility.h"
#include "semantic/semantic_visitor.h"
#include "llvm/Support/SHA256.h"
namespace clice::index {
namespace {
class Builder : public SemanticVisitor<Builder> {
public:
Builder(TUIndex& result, CompilationUnitRef unit, bool interested_only) :
SemanticVisitor<Builder>(unit, interested_only), result(result) {
result.graph = IncludeGraph::from(unit);
}
void handleDeclOccurrence(const clang::NamedDecl* decl,
RelationKind kind,
clang::SourceLocation location) {
decl = ast::normalize(decl);
if(location.isMacroID()) {
auto spelling = unit.spelling_location(location);
auto expansion = unit.expansion_location(location);
/// FIXME: For location from macro, we only handle the case that the
/// spelling and expansion are in the same file currently.
if(unit.file_id(spelling) != unit.file_id(expansion)) {
return;
}
/// For occurrence, we always use spelling location.
location = spelling;
}
auto [fid, range] = unit.decompose_range(location);
auto& index = result.file_indices[fid];
auto symbol_id = unit.getSymbolID(decl);
auto [it, success] = result.symbols.try_emplace(symbol_id.hash);
if(success) {
auto& symbol = it->second;
symbol.name = ast::display_name_of(decl);
symbol.kind = SymbolKind::from(decl);
}
index.occurrences.emplace_back(range, symbol_id.hash);
}
void handleMacroOccurrence(const clang::MacroInfo* def,
RelationKind kind,
clang::SourceLocation location) {
/// FIXME: Figure out when location is MacroID.
if(location.isMacroID()) {
return;
}
auto [fid, range] = unit.decompose_range(location);
auto& index = result.file_indices[fid];
auto symbol_id = unit.getSymbolID(def);
index.occurrences.emplace_back(range, symbol_id.hash);
Relation relation{
.kind = kind,
.range = range,
.target_symbol = 0,
};
index.relations[symbol_id.hash].emplace_back(relation);
}
void handleRelation(const clang::NamedDecl* decl,
RelationKind kind,
const clang::NamedDecl* target,
clang::SourceRange range) {
auto [fid, relation_range] = unit.decompose_expansion_range(range);
Relation relation{.kind = kind};
if(kind.isDeclOrDef()) {
relation.range = relation_range;
/// FIXME: why definition or declaration has invalid source range? implicit node?
auto source_range = decl->getSourceRange();
if(source_range.isValid()) {
auto [fid2, definition_range] =
unit.decompose_expansion_range(decl->getSourceRange());
assert(fid == fid2 && "Invalid definition location");
relation.set_definition_range(definition_range);
}
} else if(kind.isReference()) {
relation.range = relation_range;
relation.target_symbol = 0;
} else if(kind.isBetweenSymbol()) {
auto symbol_id = unit.getSymbolID(ast::normalize(target));
relation.target_symbol = symbol_id.hash;
} else if(kind.isCall()) {
auto symbol_id = unit.getSymbolID(ast::normalize(target));
relation.range = relation_range;
relation.target_symbol = symbol_id.hash;
} else {
std::unreachable();
}
auto& index = result.file_indices[fid];
auto symbol_id = unit.getSymbolID(ast::normalize(decl));
index.relations[symbol_id.hash].emplace_back(relation);
}
void build() {
run();
for(auto& [fid, index]: result.file_indices) {
for(auto& [symbol_id, relations]: index.relations) {
std::ranges::sort(relations, [](const Relation& lhs, const Relation& rhs) {
return std::tuple(lhs.kind.value(),
lhs.range.begin,
lhs.range.end,
lhs.target_symbol) < std::tuple(rhs.kind.value(),
rhs.range.begin,
rhs.range.end,
rhs.target_symbol);
});
auto range =
std::ranges::unique(relations, [](const Relation& lhs, const Relation& rhs) {
return lhs.kind == rhs.kind && lhs.range == rhs.range &&
lhs.target_symbol == rhs.target_symbol;
});
relations.erase(range.begin(), range.end());
result.symbols[symbol_id].reference_files.add(result.graph.path_id(fid));
}
std::ranges::sort(index.occurrences, [](const Occurrence& lhs, const Occurrence& rhs) {
return std::tuple(lhs.range.begin, lhs.range.end, lhs.target) <
std::tuple(rhs.range.begin, rhs.range.end, rhs.target);
});
auto range =
std::ranges::unique(index.occurrences,
[](const Occurrence& lhs, const Occurrence& rhs) {
return lhs.range == rhs.range && lhs.target == rhs.target;
});
index.occurrences.erase(range.begin(), range.end());
if(fid == unit.interested_file()) {
result.main_file_index = std::move(index);
}
}
result.file_indices.erase(unit.interested_file());
}
private:
TUIndex& result;
};
} // namespace
std::array<std::uint8_t, 32> FileIndex::hash() {
llvm::SHA256 hasher;
using u8 = std::uint8_t;
if(!occurrences.empty()) {
static_assert(sizeof(Occurrence) == sizeof(Range) + sizeof(SymbolHash));
static_assert(sizeof(Occurrence) % 8 == 0);
auto data = reinterpret_cast<u8*>(occurrences.data());
auto size = occurrences.size() * sizeof(Occurrence);
hasher.update(llvm::ArrayRef(data, size));
}
for(auto& [symbol_id, relations]: relations) {
hasher.update(std::bit_cast<std::array<u8, sizeof(symbol_id)>>(symbol_id));
static_assert(sizeof(Relation) ==
sizeof(RelationKind) + 4 + sizeof(Range) + sizeof(SymbolHash));
static_assert(sizeof(Relation) % 8 == 0);
if(!relations.empty()) {
auto data = reinterpret_cast<u8*>(relations.data());
auto size = relations.size() * sizeof(Relation);
hasher.update(llvm::ArrayRef(data, size));
}
}
return hasher.final();
}
TUIndex TUIndex::build(CompilationUnitRef unit, bool interested_only) {
TUIndex index;
index.built_at = unit.build_at();
Builder builder(index, unit, interested_only);
builder.build();
return index;
}
void TUIndex::serialize(llvm::raw_ostream& os) const {
fbs::FlatBufferBuilder builder(4096);
llvm::SmallVector<char, 1024> buffer;
auto paths =
transform(graph.paths, [&](const std::string& p) { return builder.CreateString(p); });
auto syms = transform(symbols, [&](auto&& value) {
auto& [symbol_id, symbol] = value;
buffer.clear();
buffer.resize_for_overwrite(symbol.reference_files.getSizeInBytes(false));
symbol.reference_files.write(buffer.data(), false);
return binary::CreateSymbolEntry(builder,
symbol_id,
binary::CreateSymbol(builder,
CreateString(builder, symbol.name),
symbol.kind.value(),
CreateVector(builder, buffer)));
});
/// Serialize a single FileIndex into a TUFileIndexEntry.
auto serialize_file_index = [&](std::uint32_t fid, const FileIndex& index) {
auto occs = CreateStructVector<binary::Occurrence>(builder, index.occurrences);
auto rels = transform(index.relations, [&](auto&& value) {
auto& [symbol_id, relations] = value;
return binary::CreateTUFileRelationsEntry(
builder,
symbol_id,
CreateStructVector<binary::Relation>(builder, relations));
});
return binary::CreateTUFileIndexEntry(builder, fid, occs, CreateVector(builder, rels));
};
/// Convert FileID-keyed file_indices to path_id-keyed entries.
llvm::SmallVector<fbs::Offset<binary::TUFileIndexEntry>> file_idx_vec;
for(auto& [fid, index]: file_indices) {
auto pid = graph.path_id(fid);
file_idx_vec.push_back(serialize_file_index(pid, index));
}
/// Main file is the last path in graph.paths (convention from IncludeGraph).
auto main_idx =
serialize_file_index(static_cast<std::uint32_t>(graph.paths.size() - 1), main_file_index);
auto tu_index =
binary::CreateTUIndex(builder,
static_cast<std::uint64_t>(built_at.count()),
CreateVector(builder, paths),
CreateStructVector<binary::IncludeLocation>(builder, graph.locations),
CreateVector(builder, syms),
builder.CreateVector(file_idx_vec.data(), file_idx_vec.size()),
main_idx);
builder.Finish(tu_index);
os.write(safe_cast<const char>(builder.GetBufferPointer()), builder.GetSize());
}
TUIndex TUIndex::from(const void* data) {
auto root = fbs::GetRoot<binary::TUIndex>(data);
TUIndex index;
index.built_at = std::chrono::milliseconds(root->built_at());
for(auto p: *root->paths()) {
index.graph.paths.emplace_back(p->str());
}
for(auto loc: *root->locations()) {
index.graph.locations.emplace_back(*safe_cast<IncludeLocation>(loc));
}
for(auto entry: *root->symbols()) {
auto& symbol = index.symbols[entry->symbol_id()];
symbol.name = entry->symbol()->name()->str();
symbol.kind = SymbolKind(static_cast<std::uint8_t>(entry->symbol()->kind()));
symbol.reference_files = read_bitmap(entry->symbol()->refs());
}
/// Helper to deserialize a TUFileIndexEntry into a FileIndex.
auto deserialize_file_index = [](const binary::TUFileIndexEntry* entry) -> FileIndex {
FileIndex fi;
if(entry->occurrences()) {
fi.occurrences.reserve(entry->occurrences()->size());
for(auto o: *entry->occurrences()) {
fi.occurrences.emplace_back(*safe_cast<Occurrence>(o));
}
}
if(entry->relations()) {
for(auto rel_entry: *entry->relations()) {
auto& rels = fi.relations[rel_entry->symbol()];
if(rel_entry->relations()) {
rels.reserve(rel_entry->relations()->size());
for(auto r: *rel_entry->relations()) {
rels.emplace_back(*safe_cast<Relation>(r));
}
}
}
}
return fi;
};
/// Populate path_file_indices keyed by path_id (no clang::FileID needed).
if(root->file_indices()) {
for(auto entry: *root->file_indices()) {
index.path_file_indices[entry->file_id()] = deserialize_file_index(entry);
}
}
if(root->main_file_index()) {
index.main_file_index = deserialize_file_index(root->main_file_index());
}
return index;
}
} // namespace clice::index