Files
clice/tests/unit/index/merged_index_tests.cpp
ykiko e43bb14998 feat: implement index system with LSP query handlers (#382)
## Summary

Implement the complete index system for cross-file LSP features. This
adds persistent two-tier indexing (ProjectIndex + per-file MergedIndex
shards), background indexing triggered on idle, and index-based query
handlers for major LSP requests.

### Index Data Layer (`src/index/`)

- **TUIndex**: Add binary serialization/deserialization via FlatBuffers,
enabling IPC between stateless worker and master server
- **ProjectIndex**: Add symbol name/kind storage, `PathPool` path
normalization (backslash -> forward slash), and binary persistence
- **MergedIndex**: Add `content` field to store file content for
reliable offset<->position mapping; add `removed` bitmap for garbage
collection of deleted entries; filter removed IDs in `lookup()` queries
- **schema.fbs**: Add TUIndex tables, `Symbol.name` field,
`MergedIndex.removed` bitmap and `MergedIndex.content` string

### Server (`src/server/`)

- **Background indexing**: Idle-triggered coroutine dequeues files from
CDB, dispatches `IndexParams` to stateless workers, merges returned
`TUIndex` into ProjectIndex/MergedIndex, and persists to `.clice/index/`
- **Index persistence**: `save_index()` / `load_index()` for startup
restoration; only rewrites shards flagged `need_rewrite()`
- **LSP handlers**:
- `textDocument/definition` -- index-first lookup with stateful worker
fallback
  - `textDocument/references` -- cross-file reference query via index
- `callHierarchy/prepare`, `incomingCalls`, `outgoingCalls` --
Caller/Callee relation traversal
- `typeHierarchy/prepare`, `supertypes`, `subtypes` -- Base/Derived
relation traversal
- `workspace/symbol` -- case-insensitive substring search over
ProjectIndex symbols
- **Stateless worker**: Add `Index` request handler that builds
`TUIndex` from compiled AST and returns serialized data
- **Config**: Add `enable_indexing` (default true) and `idle_timeout_ms`
(default 3000ms)

### Fixes and Cross-platform

- **ElaboratedType handling** in `decl_of()` for correct Base/Derived
relation emission
- **Windows path normalization** in `PathPool::intern()` and
`ProjectIndex::from()` (backslash -> forward slash)
- **`.gitattributes`**: Force LF in `tests/data/**` to prevent CRLF
byte-offset mismatches on Windows CI
- **Test fixture**: Clean `.clice/` before each test for hermetic index
state

### Tests

- **370-line** `index_query_tests.cpp`: unit tests for occurrence
lookup, relation queries, content retrieval, removed bitmap filtering
- **282-line** `test_index.py`: E2E integration tests for
GoToDefinition, FindReferences, CallHierarchy
(prepare/incoming/outgoing), TypeHierarchy
(prepare/supertypes/subtypes), WorkspaceSymbol
- Updated existing MergedIndex and ProjectIndex tests for new schema
fields

## Test plan

- [x] 414 C++ unit tests pass (including new IndexQuery, MergedIndex,
ProjectIndex tests)
- [x] 69 Python integration tests pass (including 10 new index feature
tests)
- [x] CI green on Linux, macOS, Windows
- [ ] Manual smoke test with VSCode extension

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-02 00:20:41 +08:00

363 lines
11 KiB
C++

#include "test/test.h"
#include "test/tester.h"
#include "index/merged_index.h"
namespace clice::testing {
namespace {
TEST_SUITE(MergedIndex, Tester) {
index::TUIndex tu_index;
void build_index(llvm::StringRef code,
std::source_location location = std::source_location::current()) {
add_main("main.cpp", code);
ASSERT_TRUE(compile());
tu_index = index::TUIndex::build(*unit);
};
void EXPECT_SELECT(llvm::StringRef pos,
llvm::StringRef expect_range,
llvm::StringRef file = "",
std::source_location location = std::source_location::current()) {
auto offset = point(pos, file);
auto expected = range(expect_range, file);
auto fid = file.empty() ? unit->interested_file() : unit->file_id(file);
auto& index = tu_index.file_indices[fid];
auto it =
std::ranges::lower_bound(index.occurrences, offset, {}, [](index::Occurrence& occurrence) {
return occurrence.range.end;
});
auto err = std::format("Fail to find symbol for offset: {}, expected range: {}",
offset,
dump(expected));
ASSERT_TRUE(it != index.occurrences.end());
/// FIXME: Make eq pretty print reflectable struct.
ASSERT_EQ(dump(it->range), dump(expected));
}
TEST_CASE(Serialization) {
build_index(R"(
struct Foo { int x; int y; };
Foo make_foo() { return Foo{1, 2}; }
int use_foo() { return make_foo().x; }
)");
llvm::StringMap<index::MergedIndex> merged_indices;
auto& graph = tu_index.graph;
for(auto& [fid, index]: tu_index.file_indices) {
llvm::StringRef path = graph.paths[graph.path_id(fid)];
merged_indices[path].merge(0, graph.include_location_id(fid), index, {});
}
for(auto& [path, merged]: merged_indices) {
llvm::SmallString<1024> s;
llvm::raw_svector_ostream os(s);
merged.serialize(os);
auto view = index::MergedIndex(s);
ASSERT_TRUE(merged == view);
}
}
TEST_CASE(LookupByOffset) {
build_index(R"(
int @func[$(func)foo]() { return 42; }
int bar() { return @ref[$(ref)foo](); }
)");
// Merge the main file index into a MergedIndex.
index::MergedIndex merged;
auto fid = unit->interested_file();
merged.merge(0, tu_index.graph.include_location_id(fid), tu_index.main_file_index, {});
// Lookup at the reference offset should find an occurrence.
auto ref_offset = point("ref");
bool found = false;
merged.lookup(ref_offset, [&](const index::Occurrence& occ) {
if(occ.range.contains(ref_offset)) {
found = true;
}
return true;
});
ASSERT_TRUE(found);
}
TEST_CASE(LookupBySymbolAndKind) {
build_index(R"(
void $(target)target_func() {}
void caller() { $(call)target_func(); }
)");
index::MergedIndex merged;
auto fid = unit->interested_file();
merged.merge(0, tu_index.graph.include_location_id(fid), tu_index.main_file_index, {});
// Find the target_func symbol hash via occurrence lookup.
auto target_offset = point("target");
index::SymbolHash target_hash = 0;
merged.lookup(target_offset, [&](const index::Occurrence& occ) {
if(occ.range.contains(target_offset)) {
target_hash = occ.target;
return false;
}
return true;
});
ASSERT_TRUE(target_hash != 0);
// Lookup Definition relation for the symbol.
bool found_def = false;
merged.lookup(target_hash, RelationKind::Definition, [&](const index::Relation& rel) {
found_def = true;
return true;
});
ASSERT_TRUE(found_def);
}
TEST_CASE(MultipleMergesDedup) {
add_file("header.h", R"(
#pragma once
inline int shared() { return 1; }
)");
add_main("a.cpp", R"(
#include "header.h"
int use_a() { return shared(); }
)");
ASSERT_TRUE(compile());
auto tu_a = index::TUIndex::build(*unit);
add_file("header.h", R"(
#pragma once
inline int shared() { return 1; }
)");
add_main("b.cpp", R"(
#include "header.h"
int use_b() { return shared(); }
)");
ASSERT_TRUE(compile());
auto tu_b = index::TUIndex::build(*unit);
// Merge header indices from both TUs into same MergedIndex.
index::MergedIndex merged_header;
for(auto& [fid, file_index]: tu_a.file_indices) {
merged_header.merge(0, tu_a.graph.include_location_id(fid), file_index, {});
}
for(auto& [fid, file_index]: tu_b.file_indices) {
merged_header.merge(1, tu_b.graph.include_location_id(fid), file_index, {});
}
// Serialize and deserialize to verify dedup survives round-trip.
llvm::SmallString<4096> buf;
llvm::raw_svector_ostream os(buf);
merged_header.serialize(os);
auto restored = index::MergedIndex(buf);
ASSERT_TRUE(merged_header == restored);
}
TEST_CASE(SerializationRoundTripInMemory) {
build_index(R"(
struct Foo { int x; };
Foo make() { return Foo{42}; }
)");
// Merge using the include_id overload (same as existing Serialization test).
index::MergedIndex merged;
auto fid = unit->interested_file();
auto include_id = tu_index.graph.include_location_id(fid);
merged.merge(0, include_id, tu_index.main_file_index, {});
// Serialize.
llvm::SmallString<4096> buf;
llvm::raw_svector_ostream os(buf);
merged.serialize(os);
// Deserialize and compare.
auto restored = index::MergedIndex(buf);
ASSERT_TRUE(merged == restored);
// Lookup should work on the deserialized version too.
bool found = false;
for(auto& occ: tu_index.main_file_index.occurrences) {
restored.lookup(occ.range.begin, [&](const index::Occurrence& o) {
if(o.range.begin == occ.range.begin) {
found = true;
}
return true;
});
if(found)
break;
}
ASSERT_TRUE(found);
}
TEST_CASE(RemoveCompilationContext) {
build_index(R"(
int foo() { return 42; }
int bar() { return foo(); }
)");
// Merge as a compilation context (using the build_at overload).
index::MergedIndex merged;
auto fid = unit->interested_file();
std::vector<index::IncludeLocation> locations;
merged.merge(0, tu_index.built_at, std::move(locations), tu_index.main_file_index, {});
// Verify occurrence lookup works before remove.
bool found_before = false;
for(auto& occ: tu_index.main_file_index.occurrences) {
merged.lookup(occ.range.begin, [&](const index::Occurrence& o) {
found_before = true;
return false;
});
if(found_before)
break;
}
ASSERT_TRUE(found_before);
// Remove the compilation context.
merged.remove(0);
// Serialize and verify the removed data round-trips.
llvm::SmallString<4096> buf;
llvm::raw_svector_ostream os(buf);
merged.serialize(os);
// Should not crash.
auto restored = index::MergedIndex(buf);
}
TEST_CASE(RemoveHeaderContext) {
add_file("header.h", R"(
#pragma once
inline int shared() { return 1; }
)");
add_main("main.cpp", R"(
#include "header.h"
int use() { return shared(); }
)");
ASSERT_TRUE(compile());
tu_index = index::TUIndex::build(*unit);
// Merge header index as header context.
index::MergedIndex merged_header;
for(auto& [fid, file_index]: tu_index.file_indices) {
merged_header.merge(0, tu_index.graph.include_location_id(fid), file_index, {});
}
// Remove should not crash.
merged_header.remove(0);
// Serialize after remove should work.
llvm::SmallString<4096> buf;
llvm::raw_svector_ostream os(buf);
merged_header.serialize(os);
}
TEST_CASE(RemovedBitmapRoundTrip) {
build_index(R"(
int foo() { return 42; }
)");
// Merge as compilation context.
index::MergedIndex merged;
std::vector<index::IncludeLocation> locations;
merged.merge(0, tu_index.built_at, std::move(locations), tu_index.main_file_index, {});
// Remove to populate the removed bitmap.
merged.remove(0);
// Serialize.
llvm::SmallString<4096> buf;
llvm::raw_svector_ostream os(buf);
merged.serialize(os);
// Deserialize and compare.
auto restored = index::MergedIndex(buf);
ASSERT_TRUE(merged == restored);
}
TEST_CASE(LookupFiltersRemoved) {
build_index(R"(
int $(target)foo() { return 42; }
)");
// Merge as compilation context.
index::MergedIndex merged;
std::vector<index::IncludeLocation> locations;
merged.merge(0, tu_index.built_at, std::move(locations), tu_index.main_file_index, {});
// Verify lookup finds something before removal.
auto offset = point("target");
bool found_before = false;
merged.lookup(offset, [&](const index::Occurrence& occ) {
if(occ.range.contains(offset))
found_before = true;
return true;
});
ASSERT_TRUE(found_before);
// Remove the compilation context.
merged.remove(0);
// Verify lookup finds nothing after removal.
bool found_after = false;
merged.lookup(offset, [&](const index::Occurrence& occ) {
if(occ.range.contains(offset))
found_after = true;
return true;
});
ASSERT_FALSE(found_after);
}
TEST_CASE(CacheInvalidatedAfterMerge) {
build_index(R"(
int $(first)foo() { return 42; }
)");
// Merge first TU as header context.
index::MergedIndex merged;
auto fid = unit->interested_file();
merged.merge(0, tu_index.graph.include_location_id(fid), tu_index.main_file_index, {});
// Trigger cache build by doing a lookup.
auto first_offset = point("first");
bool found_first = false;
merged.lookup(first_offset, [&](const index::Occurrence& occ) {
if(occ.range.contains(first_offset))
found_first = true;
return true;
});
ASSERT_TRUE(found_first);
// Build a second TU with different content.
build_index(R"(
int $(second)bar() { return 99; }
)");
// Merge second TU.
auto fid2 = unit->interested_file();
merged.merge(1, tu_index.graph.include_location_id(fid2), tu_index.main_file_index, {});
// Verify lookup finds the new occurrence (cache was invalidated).
auto second_offset = point("second");
bool found_second = false;
merged.lookup(second_offset, [&](const index::Occurrence& occ) {
if(occ.range.contains(second_offset))
found_second = true;
return true;
});
ASSERT_TRUE(found_second);
}
}; // TEST_SUITE(MergedIndex)
} // namespace
} // namespace clice::testing