## Summary Implement the complete index system for cross-file LSP features. This adds persistent two-tier indexing (ProjectIndex + per-file MergedIndex shards), background indexing triggered on idle, and index-based query handlers for major LSP requests. ### Index Data Layer (`src/index/`) - **TUIndex**: Add binary serialization/deserialization via FlatBuffers, enabling IPC between stateless worker and master server - **ProjectIndex**: Add symbol name/kind storage, `PathPool` path normalization (backslash -> forward slash), and binary persistence - **MergedIndex**: Add `content` field to store file content for reliable offset<->position mapping; add `removed` bitmap for garbage collection of deleted entries; filter removed IDs in `lookup()` queries - **schema.fbs**: Add TUIndex tables, `Symbol.name` field, `MergedIndex.removed` bitmap and `MergedIndex.content` string ### Server (`src/server/`) - **Background indexing**: Idle-triggered coroutine dequeues files from CDB, dispatches `IndexParams` to stateless workers, merges returned `TUIndex` into ProjectIndex/MergedIndex, and persists to `.clice/index/` - **Index persistence**: `save_index()` / `load_index()` for startup restoration; only rewrites shards flagged `need_rewrite()` - **LSP handlers**: - `textDocument/definition` -- index-first lookup with stateful worker fallback - `textDocument/references` -- cross-file reference query via index - `callHierarchy/prepare`, `incomingCalls`, `outgoingCalls` -- Caller/Callee relation traversal - `typeHierarchy/prepare`, `supertypes`, `subtypes` -- Base/Derived relation traversal - `workspace/symbol` -- case-insensitive substring search over ProjectIndex symbols - **Stateless worker**: Add `Index` request handler that builds `TUIndex` from compiled AST and returns serialized data - **Config**: Add `enable_indexing` (default true) and `idle_timeout_ms` (default 3000ms) ### Fixes and Cross-platform - **ElaboratedType handling** in `decl_of()` for correct Base/Derived relation emission - **Windows path normalization** in `PathPool::intern()` and `ProjectIndex::from()` (backslash -> forward slash) - **`.gitattributes`**: Force LF in `tests/data/**` to prevent CRLF byte-offset mismatches on Windows CI - **Test fixture**: Clean `.clice/` before each test for hermetic index state ### Tests - **370-line** `index_query_tests.cpp`: unit tests for occurrence lookup, relation queries, content retrieval, removed bitmap filtering - **282-line** `test_index.py`: E2E integration tests for GoToDefinition, FindReferences, CallHierarchy (prepare/incoming/outgoing), TypeHierarchy (prepare/supertypes/subtypes), WorkspaceSymbol - Updated existing MergedIndex and ProjectIndex tests for new schema fields ## Test plan - [x] 414 C++ unit tests pass (including new IndexQuery, MergedIndex, ProjectIndex tests) - [x] 69 Python integration tests pass (including 10 new index feature tests) - [x] CI green on Linux, macOS, Windows - [ ] Manual smoke test with VSCode extension --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
371 lines
11 KiB
C++
371 lines
11 KiB
C++
#include "test/test.h"
|
|
#include "test/tester.h"
|
|
#include "index/merged_index.h"
|
|
#include "index/project_index.h"
|
|
#include "index/tu_index.h"
|
|
|
|
namespace clice::testing {
|
|
namespace {
|
|
|
|
TEST_SUITE(IndexQuery, Tester) {
|
|
|
|
index::ProjectIndex project_index;
|
|
llvm::DenseMap<std::uint32_t, index::MergedIndex> merged_indices;
|
|
|
|
/// Build TUIndex from code and merge into ProjectIndex + MergedIndex shards.
|
|
void build_and_merge(llvm::StringRef code,
|
|
std::source_location location = std::source_location::current()) {
|
|
add_main("main.cpp", code);
|
|
ASSERT_TRUE(compile());
|
|
|
|
auto tu_index = index::TUIndex::build(*unit);
|
|
auto file_ids_map = project_index.merge(tu_index);
|
|
|
|
// Merge main file index as compilation context.
|
|
auto main_tu_path_id = static_cast<std::uint32_t>(tu_index.graph.paths.size() - 1);
|
|
auto main_global_id = file_ids_map[main_tu_path_id];
|
|
|
|
std::vector<index::IncludeLocation> include_locs;
|
|
for(auto& loc: tu_index.graph.locations) {
|
|
index::IncludeLocation remapped = loc;
|
|
remapped.path_id = file_ids_map[loc.path_id];
|
|
include_locs.push_back(remapped);
|
|
}
|
|
|
|
merged_indices[main_global_id].merge(main_global_id,
|
|
tu_index.built_at,
|
|
std::move(include_locs),
|
|
tu_index.main_file_index,
|
|
{});
|
|
|
|
// Merge header file indices.
|
|
for(auto& [fid, file_idx]: tu_index.file_indices) {
|
|
auto tu_pid = tu_index.graph.path_id(fid);
|
|
auto global_pid = file_ids_map[tu_pid];
|
|
auto include_id = tu_index.graph.include_location_id(fid);
|
|
merged_indices[global_pid].merge(global_pid, include_id, file_idx, {});
|
|
}
|
|
}
|
|
|
|
/// Reset index state between test cases.
|
|
void reset() {
|
|
project_index = index::ProjectIndex();
|
|
merged_indices.clear();
|
|
clear();
|
|
}
|
|
|
|
/// Lookup the symbol hash at a given annotation offset in any merged index.
|
|
index::SymbolHash lookup_symbol(llvm::StringRef pos) {
|
|
auto offset = point(pos);
|
|
index::SymbolHash result = 0;
|
|
for(auto& [path_id, merged]: merged_indices) {
|
|
merged.lookup(offset, [&](const index::Occurrence& o) {
|
|
if(o.range.contains(offset)) {
|
|
result = o.target;
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
if(result != 0)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Find all relations of a given kind for a symbol across all merged indices.
|
|
std::vector<index::Relation> find_relations(index::SymbolHash symbol, RelationKind kind) {
|
|
std::vector<index::Relation> results;
|
|
|
|
auto sym_it = project_index.symbols.find(symbol);
|
|
if(sym_it == project_index.symbols.end())
|
|
return results;
|
|
|
|
// Search every shard that references this symbol.
|
|
for(auto file_id: sym_it->second.reference_files) {
|
|
auto it = merged_indices.find(file_id);
|
|
if(it == merged_indices.end())
|
|
continue;
|
|
|
|
it->second.lookup(symbol, kind, [&](const index::Relation& r) {
|
|
results.push_back(r);
|
|
return true;
|
|
});
|
|
}
|
|
|
|
// Also search all shards (symbol may appear in files not tracked by reference_files).
|
|
if(results.empty()) {
|
|
for(auto& [pid, merged]: merged_indices) {
|
|
merged.lookup(symbol, kind, [&](const index::Relation& r) {
|
|
results.push_back(r);
|
|
return true;
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ============================================================
|
|
// Test cases
|
|
// ============================================================
|
|
|
|
TEST_CASE(GoToDefinition) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
int $(decl)foo();
|
|
|
|
int @def[$(def)foo]() { return 42; }
|
|
|
|
int main() {
|
|
return $(use)foo();
|
|
}
|
|
)");
|
|
|
|
auto hash = lookup_symbol("use");
|
|
ASSERT_NE(hash, 0UL);
|
|
|
|
auto defs = find_relations(hash, RelationKind::Definition);
|
|
ASSERT_FALSE(defs.empty());
|
|
|
|
auto expected = range("def");
|
|
ASSERT_EQ(dump(defs.front().range), dump(expected));
|
|
}
|
|
|
|
TEST_CASE(FindReferences) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
int $(decl)foo();
|
|
|
|
int $(def)foo() { return 42; }
|
|
|
|
int bar() {
|
|
return $(ref1)foo() + $(ref2)foo();
|
|
}
|
|
)");
|
|
|
|
auto hash = lookup_symbol("decl");
|
|
ASSERT_NE(hash, 0UL);
|
|
|
|
auto refs = find_relations(hash, RelationKind::Reference);
|
|
ASSERT_GE(refs.size(), 2U);
|
|
}
|
|
|
|
TEST_CASE(DeclAndDef) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
int $(decl)foo();
|
|
int @def[$(def)foo]() { return 42; }
|
|
)");
|
|
|
|
auto hash = lookup_symbol("decl");
|
|
ASSERT_NE(hash, 0UL);
|
|
|
|
auto decls = find_relations(hash, RelationKind::Declaration);
|
|
ASSERT_FALSE(decls.empty());
|
|
|
|
auto defs = find_relations(hash, RelationKind::Definition);
|
|
ASSERT_FALSE(defs.empty());
|
|
|
|
auto expected_def = range("def");
|
|
ASSERT_EQ(dump(defs.front().range), dump(expected_def));
|
|
}
|
|
|
|
TEST_CASE(CallerCallee) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
void $(callee_def)callee() {}
|
|
|
|
void $(caller_def)caller() {
|
|
$(call_site)callee();
|
|
}
|
|
)");
|
|
|
|
auto caller_hash = lookup_symbol("caller_def");
|
|
ASSERT_NE(caller_hash, 0UL);
|
|
|
|
auto callees = find_relations(caller_hash, RelationKind::Callee);
|
|
ASSERT_FALSE(callees.empty());
|
|
|
|
auto callee_hash = lookup_symbol("callee_def");
|
|
ASSERT_NE(callee_hash, 0UL);
|
|
|
|
auto callers = find_relations(callee_hash, RelationKind::Caller);
|
|
ASSERT_FALSE(callers.empty());
|
|
}
|
|
|
|
TEST_CASE(OverrideRelation) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
struct Base {
|
|
virtual void $(base_method)method() {}
|
|
};
|
|
|
|
struct Derived : Base {
|
|
void $(derived_method)method() override {}
|
|
};
|
|
)");
|
|
|
|
// Derived::method should have Interface relation to Base::method.
|
|
auto derived_hash = lookup_symbol("derived_method");
|
|
ASSERT_NE(derived_hash, 0UL);
|
|
|
|
auto interfaces = find_relations(derived_hash, RelationKind::Interface);
|
|
ASSERT_FALSE(interfaces.empty());
|
|
|
|
// Base::method should have Implementation relation.
|
|
auto base_hash = lookup_symbol("base_method");
|
|
ASSERT_NE(base_hash, 0UL);
|
|
|
|
auto impls = find_relations(base_hash, RelationKind::Implementation);
|
|
ASSERT_FALSE(impls.empty());
|
|
}
|
|
|
|
TEST_CASE(BaseAndDerived) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
struct $(base_cls)Animal {
|
|
virtual void speak() {}
|
|
};
|
|
|
|
struct $(derived_cls)Dog : $(base_ref)Animal {
|
|
void speak() override {}
|
|
};
|
|
)");
|
|
|
|
auto derived_hash = lookup_symbol("derived_cls");
|
|
ASSERT_NE(derived_hash, 0UL);
|
|
|
|
// Look for any Base relation in any shard.
|
|
bool found_base = false;
|
|
for(auto& [pid, merged]: merged_indices) {
|
|
merged.lookup(derived_hash, RelationKind::Base, [&](const index::Relation& r) {
|
|
found_base = true;
|
|
return false;
|
|
});
|
|
}
|
|
ASSERT_TRUE(found_base);
|
|
}
|
|
|
|
TEST_CASE(ClassTemplate) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
template <typename T>
|
|
struct @primary[$(primary)foo] {};
|
|
|
|
$(use)foo<int> x;
|
|
)");
|
|
|
|
auto hash = lookup_symbol("use");
|
|
ASSERT_NE(hash, 0UL);
|
|
|
|
auto defs = find_relations(hash, RelationKind::Definition);
|
|
ASSERT_FALSE(defs.empty());
|
|
}
|
|
|
|
TEST_CASE(SymbolKinds) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
struct $(cls)MyClass {};
|
|
void $(func)myFunc() {}
|
|
int $(var)myVar = 0;
|
|
)");
|
|
|
|
auto cls_hash = lookup_symbol("cls");
|
|
ASSERT_NE(cls_hash, 0UL);
|
|
ASSERT_TRUE(project_index.symbols.contains(cls_hash));
|
|
ASSERT_EQ(project_index.symbols[cls_hash].kind.value(), SymbolKind(SymbolKind::Struct).value());
|
|
|
|
auto func_hash = lookup_symbol("func");
|
|
ASSERT_NE(func_hash, 0UL);
|
|
ASSERT_TRUE(project_index.symbols.contains(func_hash));
|
|
ASSERT_EQ(project_index.symbols[func_hash].kind.value(),
|
|
SymbolKind(SymbolKind::Function).value());
|
|
|
|
auto var_hash = lookup_symbol("var");
|
|
ASSERT_NE(var_hash, 0UL);
|
|
ASSERT_TRUE(project_index.symbols.contains(var_hash));
|
|
ASSERT_EQ(project_index.symbols[var_hash].kind.value(),
|
|
SymbolKind(SymbolKind::Variable).value());
|
|
}
|
|
|
|
TEST_CASE(ReferenceFiles) {
|
|
reset();
|
|
build_and_merge(R"(
|
|
int $(target)target = 42;
|
|
int a = $(ref)target + 1;
|
|
)");
|
|
|
|
auto hash = lookup_symbol("target");
|
|
ASSERT_NE(hash, 0UL);
|
|
|
|
auto sym_it = project_index.symbols.find(hash);
|
|
ASSERT_TRUE(sym_it != project_index.symbols.end());
|
|
|
|
// reference_files should contain at least the main file.
|
|
ASSERT_FALSE(sym_it->second.reference_files.isEmpty());
|
|
}
|
|
|
|
TEST_CASE(CrossFileQuery) {
|
|
reset();
|
|
|
|
add_file("header.h", R"(
|
|
#pragma once
|
|
int $(hdr_decl)helper();
|
|
)");
|
|
add_main("main.cpp", R"(
|
|
#include "header.h"
|
|
|
|
int main() {
|
|
return $(use_helper)helper();
|
|
}
|
|
)");
|
|
ASSERT_TRUE(compile());
|
|
|
|
auto tu_index = index::TUIndex::build(*unit);
|
|
auto file_ids_map = project_index.merge(tu_index);
|
|
|
|
// Merge main file.
|
|
auto main_tu_path_id = static_cast<std::uint32_t>(tu_index.graph.paths.size() - 1);
|
|
auto main_global_id = file_ids_map[main_tu_path_id];
|
|
|
|
std::vector<index::IncludeLocation> include_locs;
|
|
for(auto& loc: tu_index.graph.locations) {
|
|
index::IncludeLocation remapped = loc;
|
|
remapped.path_id = file_ids_map[loc.path_id];
|
|
include_locs.push_back(remapped);
|
|
}
|
|
merged_indices[main_global_id].merge(main_global_id,
|
|
tu_index.built_at,
|
|
std::move(include_locs),
|
|
tu_index.main_file_index,
|
|
{});
|
|
|
|
// Merge header file indices.
|
|
for(auto& [fid, file_idx]: tu_index.file_indices) {
|
|
auto tu_pid = tu_index.graph.path_id(fid);
|
|
auto global_pid = file_ids_map[tu_pid];
|
|
auto include_id = tu_index.graph.include_location_id(fid);
|
|
merged_indices[global_pid].merge(global_pid, include_id, file_idx, {});
|
|
}
|
|
|
|
// Query: from usage in main.cpp, find the symbol via merged index.
|
|
auto use_offset = point("use_helper");
|
|
index::SymbolHash helper_hash = 0;
|
|
merged_indices[main_global_id].lookup(use_offset, [&](const index::Occurrence& o) {
|
|
if(o.range.contains(use_offset)) {
|
|
helper_hash = o.target;
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
ASSERT_NE(helper_hash, 0UL);
|
|
|
|
// Find declaration across all shards -- should find it in header shard.
|
|
auto decls = find_relations(helper_hash, RelationKind::Declaration);
|
|
ASSERT_FALSE(decls.empty());
|
|
}
|
|
|
|
}; // TEST_SUITE(IndexQuery)
|
|
} // namespace
|
|
} // namespace clice::testing
|