## Summary Implement the complete index system for cross-file LSP features. This adds persistent two-tier indexing (ProjectIndex + per-file MergedIndex shards), background indexing triggered on idle, and index-based query handlers for major LSP requests. ### Index Data Layer (`src/index/`) - **TUIndex**: Add binary serialization/deserialization via FlatBuffers, enabling IPC between stateless worker and master server - **ProjectIndex**: Add symbol name/kind storage, `PathPool` path normalization (backslash -> forward slash), and binary persistence - **MergedIndex**: Add `content` field to store file content for reliable offset<->position mapping; add `removed` bitmap for garbage collection of deleted entries; filter removed IDs in `lookup()` queries - **schema.fbs**: Add TUIndex tables, `Symbol.name` field, `MergedIndex.removed` bitmap and `MergedIndex.content` string ### Server (`src/server/`) - **Background indexing**: Idle-triggered coroutine dequeues files from CDB, dispatches `IndexParams` to stateless workers, merges returned `TUIndex` into ProjectIndex/MergedIndex, and persists to `.clice/index/` - **Index persistence**: `save_index()` / `load_index()` for startup restoration; only rewrites shards flagged `need_rewrite()` - **LSP handlers**: - `textDocument/definition` -- index-first lookup with stateful worker fallback - `textDocument/references` -- cross-file reference query via index - `callHierarchy/prepare`, `incomingCalls`, `outgoingCalls` -- Caller/Callee relation traversal - `typeHierarchy/prepare`, `supertypes`, `subtypes` -- Base/Derived relation traversal - `workspace/symbol` -- case-insensitive substring search over ProjectIndex symbols - **Stateless worker**: Add `Index` request handler that builds `TUIndex` from compiled AST and returns serialized data - **Config**: Add `enable_indexing` (default true) and `idle_timeout_ms` (default 3000ms) ### Fixes and Cross-platform - **ElaboratedType handling** in `decl_of()` for correct Base/Derived relation emission - **Windows path normalization** in `PathPool::intern()` and `ProjectIndex::from()` (backslash -> forward slash) - **`.gitattributes`**: Force LF in `tests/data/**` to prevent CRLF byte-offset mismatches on Windows CI - **Test fixture**: Clean `.clice/` before each test for hermetic index state ### Tests - **370-line** `index_query_tests.cpp`: unit tests for occurrence lookup, relation queries, content retrieval, removed bitmap filtering - **282-line** `test_index.py`: E2E integration tests for GoToDefinition, FindReferences, CallHierarchy (prepare/incoming/outgoing), TypeHierarchy (prepare/supertypes/subtypes), WorkspaceSymbol - Updated existing MergedIndex and ProjectIndex tests for new schema fields ## Test plan - [x] 414 C++ unit tests pass (including new IndexQuery, MergedIndex, ProjectIndex tests) - [x] 69 Python integration tests pass (including 10 new index feature tests) - [x] CI green on Linux, macOS, Windows - [ ] Manual smoke test with VSCode extension --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
206 lines
5.8 KiB
C++
206 lines
5.8 KiB
C++
#include "test/test.h"
|
|
#include "test/tester.h"
|
|
#include "index/project_index.h"
|
|
|
|
namespace clice::testing {
|
|
namespace {
|
|
|
|
TEST_SUITE(ProjectIndex, Tester) {
|
|
|
|
bool build_and_index(llvm::StringRef code, index::TUIndex& out) {
|
|
add_main("main.cpp", code);
|
|
if(!compile()) {
|
|
return false;
|
|
}
|
|
out = index::TUIndex::build(*unit);
|
|
return true;
|
|
}
|
|
|
|
TEST_CASE(MergeSingleTU) {
|
|
index::TUIndex tu;
|
|
ASSERT_TRUE(build_and_index(R"(
|
|
int foo() { return 42; }
|
|
int bar() { return foo() + 1; }
|
|
)",
|
|
tu));
|
|
|
|
index::ProjectIndex project;
|
|
auto file_ids_map = project.merge(tu);
|
|
|
|
// Path pool should have entries for the TU's files.
|
|
ASSERT_FALSE(project.path_pool.paths.empty());
|
|
|
|
// Symbols from the TU should be merged into the project.
|
|
ASSERT_FALSE(project.symbols.empty());
|
|
|
|
// Every symbol from the TU should be in the project.
|
|
for(auto& [hash, symbol]: tu.symbols) {
|
|
ASSERT_TRUE(project.symbols.contains(hash));
|
|
}
|
|
}
|
|
|
|
TEST_CASE(MergeMultipleTUs) {
|
|
index::TUIndex tu1;
|
|
ASSERT_TRUE(build_and_index(R"(
|
|
int foo() { return 42; }
|
|
)",
|
|
tu1));
|
|
|
|
index::TUIndex tu2;
|
|
ASSERT_TRUE(build_and_index(R"(
|
|
int bar() { return 99; }
|
|
)",
|
|
tu2));
|
|
|
|
index::ProjectIndex project;
|
|
project.merge(tu1);
|
|
project.merge(tu2);
|
|
|
|
// All symbols from both TUs should be present.
|
|
for(auto& [hash, symbol]: tu1.symbols) {
|
|
ASSERT_TRUE(project.symbols.contains(hash));
|
|
}
|
|
for(auto& [hash, symbol]: tu2.symbols) {
|
|
ASSERT_TRUE(project.symbols.contains(hash));
|
|
}
|
|
}
|
|
|
|
TEST_CASE(MergeDuplicateSymbol) {
|
|
// Build two TUs that both define/reference the same function via header.
|
|
add_file("shared.h", R"(
|
|
#pragma once
|
|
inline int shared_func() { return 1; }
|
|
)");
|
|
add_main("a.cpp", R"(
|
|
#include "shared.h"
|
|
int use_a() { return shared_func(); }
|
|
)");
|
|
ASSERT_TRUE(compile());
|
|
auto tu_a = index::TUIndex::build(*unit);
|
|
|
|
add_file("shared.h", R"(
|
|
#pragma once
|
|
inline int shared_func() { return 1; }
|
|
)");
|
|
add_main("b.cpp", R"(
|
|
#include "shared.h"
|
|
int use_b() { return shared_func(); }
|
|
)");
|
|
ASSERT_TRUE(compile());
|
|
auto tu_b = index::TUIndex::build(*unit);
|
|
|
|
index::ProjectIndex project;
|
|
project.merge(tu_a);
|
|
project.merge(tu_b);
|
|
|
|
// Find the shared_func symbol hash from TU A's symbol table.
|
|
index::SymbolHash shared_hash = 0;
|
|
for(auto& [hash, symbol]: tu_a.symbols) {
|
|
if(symbol.name == "shared_func") {
|
|
shared_hash = hash;
|
|
break;
|
|
}
|
|
}
|
|
ASSERT_TRUE(shared_hash != 0);
|
|
|
|
// The same hash should exist in project symbols.
|
|
ASSERT_TRUE(project.symbols.contains(shared_hash));
|
|
|
|
// reference_files bitmap should contain entries from both TUs.
|
|
auto& proj_sym = project.symbols[shared_hash];
|
|
ASSERT_TRUE(proj_sym.reference_files.cardinality() >= 2U);
|
|
}
|
|
|
|
TEST_CASE(SerializationRoundTrip) {
|
|
index::TUIndex tu;
|
|
ASSERT_TRUE(build_and_index(R"(
|
|
struct Foo { int x; };
|
|
void bar(Foo f) { f.x = 42; }
|
|
)",
|
|
tu));
|
|
|
|
index::ProjectIndex project;
|
|
project.merge(tu);
|
|
|
|
// Serialize.
|
|
llvm::SmallString<4096> buf;
|
|
llvm::raw_svector_ostream os(buf);
|
|
project.serialize(os);
|
|
|
|
// Deserialize.
|
|
auto restored = index::ProjectIndex::from(buf.data());
|
|
|
|
// Path pools should match.
|
|
ASSERT_EQ(project.path_pool.paths.size(), restored.path_pool.paths.size());
|
|
|
|
// Symbol tables should have same size.
|
|
ASSERT_EQ(project.symbols.size(), restored.symbols.size());
|
|
|
|
// Each symbol should be present in restored with same reference count.
|
|
for(auto& [hash, symbol]: project.symbols) {
|
|
ASSERT_TRUE(restored.symbols.contains(hash));
|
|
auto& restored_sym = restored.symbols[hash];
|
|
ASSERT_EQ(symbol.reference_files.cardinality(), restored_sym.reference_files.cardinality());
|
|
}
|
|
}
|
|
|
|
TEST_CASE(FileIdsMapCorrectness) {
|
|
index::TUIndex tu;
|
|
ASSERT_TRUE(build_and_index(R"(
|
|
int x = 1;
|
|
)",
|
|
tu));
|
|
|
|
index::ProjectIndex project;
|
|
auto file_ids_map = project.merge(tu);
|
|
|
|
// file_ids_map should have same size as TU's include graph paths.
|
|
ASSERT_EQ(file_ids_map.size(), tu.graph.paths.size());
|
|
|
|
// Each mapped ID should be valid in the project path pool.
|
|
for(auto mapped_id: file_ids_map) {
|
|
ASSERT_TRUE(mapped_id < project.path_pool.paths.size());
|
|
}
|
|
}
|
|
|
|
TEST_CASE(NameSurvivesRoundTrip) {
|
|
index::TUIndex tu;
|
|
ASSERT_TRUE(build_and_index(R"(
|
|
int my_variable = 42;
|
|
void my_function() {}
|
|
)",
|
|
tu));
|
|
|
|
index::ProjectIndex project;
|
|
project.merge(tu);
|
|
|
|
// Verify names are populated after merge.
|
|
bool found_var = false;
|
|
bool found_func = false;
|
|
for(auto& [hash, symbol]: project.symbols) {
|
|
if(symbol.name == "my_variable")
|
|
found_var = true;
|
|
if(symbol.name == "my_function")
|
|
found_func = true;
|
|
}
|
|
ASSERT_TRUE(found_var);
|
|
ASSERT_TRUE(found_func);
|
|
|
|
// Serialize and deserialize.
|
|
llvm::SmallString<4096> buf;
|
|
llvm::raw_svector_ostream os(buf);
|
|
project.serialize(os);
|
|
auto restored = index::ProjectIndex::from(buf.data());
|
|
|
|
// Verify names survive round-trip.
|
|
for(auto& [hash, symbol]: project.symbols) {
|
|
ASSERT_TRUE(restored.symbols.contains(hash));
|
|
ASSERT_EQ(restored.symbols[hash].name, symbol.name);
|
|
ASSERT_EQ(restored.symbols[hash].kind.value(), symbol.kind.value());
|
|
}
|
|
}
|
|
|
|
}; // TEST_SUITE(ProjectIndex)
|
|
} // namespace
|
|
} // namespace clice::testing
|