Compare commits
2 Commits
lint
...
feat/symbo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e1dde9ee92 | ||
|
|
005fc9ac21 |
@@ -200,6 +200,14 @@ if(CLICE_ENABLE_BENCHMARK)
|
||||
"${PROJECT_SOURCE_DIR}/src"
|
||||
)
|
||||
target_link_libraries(scan_benchmark PRIVATE clice::core kota::deco)
|
||||
|
||||
add_executable(index_benchmark
|
||||
"${PROJECT_SOURCE_DIR}/benchmarks/index_benchmark.cpp"
|
||||
)
|
||||
target_include_directories(index_benchmark PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/src"
|
||||
)
|
||||
target_link_libraries(index_benchmark PRIVATE clice::core kota::deco)
|
||||
endif()
|
||||
|
||||
if(CLICE_RELEASE)
|
||||
|
||||
664
benchmarks/index_benchmark.cpp
Normal file
664
benchmarks/index_benchmark.cpp
Normal file
@@ -0,0 +1,664 @@
|
||||
/// Benchmark for TUIndex build, MergedIndex merge, and query performance.
|
||||
///
|
||||
/// Usage:
|
||||
/// index_benchmark [OPTIONS] <compile_commands.json>
|
||||
///
|
||||
/// Example:
|
||||
/// ./build/RelWithDebInfo/bin/index_benchmark \
|
||||
/// /home/ykiko/C++/clice/.llvm/build-debug/compile_commands.json
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <print>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "command/command.h"
|
||||
#include "compile/compilation.h"
|
||||
#include "index/merged_index.h"
|
||||
#include "index/project_index.h"
|
||||
#include "index/tu_index.h"
|
||||
#include "support/logging.h"
|
||||
|
||||
#include "kota/deco/deco.h"
|
||||
#include "kota/ipc/lsp/position.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace clice;
|
||||
|
||||
struct Options {
|
||||
DecoKV(names = {"--log-level"}; help = "Log level"; required = false;)
|
||||
<std::string> log_level = "off";
|
||||
|
||||
DecoKV(names = {"--limit"}; help = "Max files to index (0 = all)"; required = false;)
|
||||
<int> limit = 100;
|
||||
|
||||
DecoKV(names = {"--jobs", "-j"}; help = "Parallel compile jobs (0 = nproc)"; required = false;)
|
||||
<int> jobs = 0;
|
||||
|
||||
DecoFlag(names = {"-h", "--help"}; help = "Show help"; required = false;)
|
||||
help;
|
||||
|
||||
DecoInput(meta_var = "CDB"; help = "Path to compile_commands.json"; required = false;)
|
||||
<std::string> cdb_path;
|
||||
};
|
||||
|
||||
struct Timer {
|
||||
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
|
||||
|
||||
double ms() const {
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
return std::chrono::duration<double, std::milli>(end - start).count();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
start = std::chrono::steady_clock::now();
|
||||
}
|
||||
};
|
||||
|
||||
struct IndexStats {
|
||||
std::string file;
|
||||
double compile_ms = 0;
|
||||
double build_index_ms = 0;
|
||||
double serialize_ms = 0;
|
||||
std::size_t symbols = 0;
|
||||
std::size_t occurrences = 0;
|
||||
std::size_t relations = 0;
|
||||
std::size_t serialized_bytes = 0;
|
||||
std::size_t file_count = 0;
|
||||
};
|
||||
|
||||
struct IndexResult {
|
||||
IndexStats stats;
|
||||
index::TUIndex tu_index;
|
||||
};
|
||||
|
||||
struct MergeStats {
|
||||
std::size_t total_files = 0;
|
||||
std::size_t total_occurrences = 0;
|
||||
std::size_t total_relations = 0;
|
||||
double merge_ms = 0;
|
||||
double serialize_ms = 0;
|
||||
std::size_t total_serialized_bytes = 0;
|
||||
std::size_t shard_count = 0;
|
||||
};
|
||||
|
||||
void print_size(std::size_t bytes) {
|
||||
if(bytes < 1024)
|
||||
std::print("{}B", bytes);
|
||||
else if(bytes < 1024 * 1024)
|
||||
std::print("{:.1f}KB", bytes / 1024.0);
|
||||
else
|
||||
std::print("{:.1f}MB", bytes / (1024.0 * 1024.0));
|
||||
}
|
||||
|
||||
/// Per-task: compile one file, build TUIndex, serialize, collect stats.
|
||||
/// Everything here is thread-local, no shared state.
|
||||
static std::optional<IndexResult> index_one_file(CompilationDatabase& cdb,
|
||||
const CompilationEntry& entry) {
|
||||
auto file = cdb.resolve_path(entry.file);
|
||||
auto cmds = cdb.lookup(file);
|
||||
if(cmds.empty())
|
||||
return std::nullopt;
|
||||
|
||||
auto& cmd = cmds[0];
|
||||
auto argv_vec = cmd.to_argv();
|
||||
|
||||
CompilationParams cp;
|
||||
cp.kind = CompilationKind::Indexing;
|
||||
cp.directory = cmd.resolved.directory;
|
||||
for(auto* arg: argv_vec)
|
||||
cp.arguments.push_back(arg);
|
||||
|
||||
IndexResult result;
|
||||
result.stats.file = std::string(file);
|
||||
|
||||
Timer t;
|
||||
auto unit = compile(cp);
|
||||
result.stats.compile_ms = t.ms();
|
||||
|
||||
if(!unit.completed())
|
||||
return std::nullopt;
|
||||
|
||||
t.reset();
|
||||
result.tu_index = index::TUIndex::build(unit);
|
||||
result.stats.build_index_ms = t.ms();
|
||||
|
||||
result.stats.symbols = result.tu_index.symbols.size();
|
||||
result.stats.file_count = result.tu_index.file_indices.size() + 1;
|
||||
for(auto& [fid, fi]: result.tu_index.file_indices) {
|
||||
result.stats.occurrences += fi.occurrences.size();
|
||||
for(auto& [_, rels]: fi.relations)
|
||||
result.stats.relations += rels.size();
|
||||
}
|
||||
result.stats.occurrences += result.tu_index.main_file_index.occurrences.size();
|
||||
for(auto& [_, rels]: result.tu_index.main_file_index.relations)
|
||||
result.stats.relations += rels.size();
|
||||
|
||||
t.reset();
|
||||
std::string serialized;
|
||||
llvm::raw_string_ostream os(serialized);
|
||||
result.tu_index.serialize(os);
|
||||
result.stats.serialize_ms = t.ms();
|
||||
result.stats.serialized_bytes = serialized.size();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
auto args = deco::util::argvify(argc, argv);
|
||||
auto parsed = deco::cli::parse<Options>(args);
|
||||
if(!parsed.has_value()) {
|
||||
std::println(stderr, "Error: {}", parsed.error().message);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto& opts = parsed->options;
|
||||
if(opts.help.value_or(false) || !opts.cdb_path.has_value()) {
|
||||
std::ostringstream oss;
|
||||
deco::cli::write_usage_for<Options>(oss, "index_benchmark [OPTIONS] <cdb>");
|
||||
std::print("{}", oss.str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto level = spdlog::level::from_str(*opts.log_level);
|
||||
clice::logging::options.level = level;
|
||||
clice::logging::stderr_logger("index_benchmark", clice::logging::options);
|
||||
|
||||
auto num_jobs = static_cast<unsigned>(*opts.jobs);
|
||||
if(num_jobs == 0)
|
||||
num_jobs = std::thread::hardware_concurrency();
|
||||
if(num_jobs == 0)
|
||||
num_jobs = 4;
|
||||
|
||||
// Load CDB.
|
||||
CompilationDatabase cdb;
|
||||
auto count = cdb.load(*opts.cdb_path);
|
||||
std::println("CDB: {} entries from {}", count, *opts.cdb_path);
|
||||
|
||||
auto entries = cdb.get_entries();
|
||||
auto limit = static_cast<std::size_t>(*opts.limit);
|
||||
if(limit > 0 && limit < entries.size())
|
||||
entries = entries.take_front(limit);
|
||||
std::println("Indexing {} files with {} threads...\n", entries.size(), num_jobs);
|
||||
|
||||
// Phase 1: Parallel compile & build TUIndex.
|
||||
// Each thread picks tasks from a shared atomic counter.
|
||||
// Results are written to a pre-sized vector (one slot per entry).
|
||||
std::vector<std::optional<IndexResult>> results(entries.size());
|
||||
std::atomic<std::size_t> next_task{0};
|
||||
std::atomic<std::size_t> completed{0};
|
||||
|
||||
Timer phase1_wall;
|
||||
|
||||
{
|
||||
std::vector<std::jthread> workers;
|
||||
workers.reserve(num_jobs);
|
||||
for(unsigned i = 0; i < num_jobs; i++) {
|
||||
workers.emplace_back([&] {
|
||||
while(true) {
|
||||
auto idx = next_task.fetch_add(1);
|
||||
if(idx >= entries.size())
|
||||
break;
|
||||
results[idx] = index_one_file(cdb, entries[idx]);
|
||||
auto done = completed.fetch_add(1) + 1;
|
||||
if(done % 100 == 0)
|
||||
std::println(" ... {}/{} done", done, entries.size());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
double phase1_wall_ms = phase1_wall.ms();
|
||||
|
||||
// Collect results on main thread.
|
||||
std::vector<IndexStats> all_stats;
|
||||
std::vector<std::pair<std::string, index::TUIndex>> tu_indices;
|
||||
all_stats.reserve(entries.size());
|
||||
tu_indices.reserve(entries.size());
|
||||
|
||||
double total_compile_ms = 0;
|
||||
double total_build_ms = 0;
|
||||
double total_serialize_ms = 0;
|
||||
std::size_t total_serialized = 0;
|
||||
|
||||
for(auto& r: results) {
|
||||
if(!r)
|
||||
continue;
|
||||
total_compile_ms += r->stats.compile_ms;
|
||||
total_build_ms += r->stats.build_index_ms;
|
||||
total_serialize_ms += r->stats.serialize_ms;
|
||||
total_serialized += r->stats.serialized_bytes;
|
||||
all_stats.push_back(r->stats);
|
||||
tu_indices.emplace_back(std::move(r->stats.file), std::move(r->tu_index));
|
||||
}
|
||||
|
||||
std::println("=== Phase 1: Compile & Build TUIndex ({} threads) ===", num_jobs);
|
||||
std::println(" Files indexed: {}", all_stats.size());
|
||||
std::println(" Wall time: {:.0f}ms", phase1_wall_ms);
|
||||
std::println(" Total compile: {:.0f}ms (sum of per-thread)", total_compile_ms);
|
||||
std::println(" Total build idx: {:.0f}ms (sum of per-thread)", total_build_ms);
|
||||
std::println(" Total serialize: {:.0f}ms (sum of per-thread)", total_serialize_ms);
|
||||
std::print(" Total TUIndex size: ");
|
||||
print_size(total_serialized);
|
||||
std::println("");
|
||||
|
||||
// Size distribution.
|
||||
if(!all_stats.empty()) {
|
||||
std::vector<std::size_t> sizes;
|
||||
std::vector<double> compile_times;
|
||||
std::vector<double> build_times;
|
||||
for(auto& s: all_stats) {
|
||||
sizes.push_back(s.serialized_bytes);
|
||||
compile_times.push_back(s.compile_ms);
|
||||
build_times.push_back(s.build_index_ms);
|
||||
}
|
||||
std::ranges::sort(sizes);
|
||||
std::ranges::sort(compile_times);
|
||||
std::ranges::sort(build_times);
|
||||
auto p = [](auto& v, double pct) {
|
||||
return v[static_cast<std::size_t>(v.size() * pct)];
|
||||
};
|
||||
|
||||
std::println("\n TUIndex size distribution:");
|
||||
std::print(" p50=");
|
||||
print_size(p(sizes, 0.5));
|
||||
std::print(" p90=");
|
||||
print_size(p(sizes, 0.9));
|
||||
std::print(" p99=");
|
||||
print_size(p(sizes, 0.99));
|
||||
std::print(" max=");
|
||||
print_size(sizes.back());
|
||||
std::println("");
|
||||
|
||||
std::println(" Compile time distribution:");
|
||||
std::println(" p50={:.0f}ms p90={:.0f}ms p99={:.0f}ms max={:.0f}ms",
|
||||
p(compile_times, 0.5),
|
||||
p(compile_times, 0.9),
|
||||
p(compile_times, 0.99),
|
||||
compile_times.back());
|
||||
|
||||
std::println(" Build index time distribution:");
|
||||
std::println(" p50={:.1f}ms p90={:.1f}ms p99={:.1f}ms max={:.1f}ms",
|
||||
p(build_times, 0.5),
|
||||
p(build_times, 0.9),
|
||||
p(build_times, 0.99),
|
||||
build_times.back());
|
||||
}
|
||||
|
||||
// Phase 2: Merge into MergedIndex shards (main thread).
|
||||
std::println("\n=== Phase 2: Merge into MergedIndex ===");
|
||||
|
||||
index::ProjectIndex project_index;
|
||||
llvm::DenseMap<std::uint32_t, index::MergedIndex> merged;
|
||||
MergeStats merge_stats;
|
||||
Timer merge_timer;
|
||||
|
||||
llvm::DenseMap<std::uint32_t, std::string> file_contents;
|
||||
|
||||
auto read_content = [&](std::uint32_t path_id, llvm::StringRef path) -> llvm::StringRef {
|
||||
auto it = file_contents.find(path_id);
|
||||
if(it != file_contents.end())
|
||||
return it->second;
|
||||
auto buf = llvm::MemoryBuffer::getFile(path);
|
||||
if(!buf)
|
||||
return {};
|
||||
auto [inserted, _] = file_contents.try_emplace(path_id, (*buf)->getBuffer().str());
|
||||
return inserted->second;
|
||||
};
|
||||
|
||||
// Pre-compute SHA256 hashes (simulates offloading to worker threads).
|
||||
// With cached hash, this warms the cache so MergedIndex::merge won't recompute.
|
||||
Timer hash_timer;
|
||||
std::size_t hash_count = 0;
|
||||
for(auto& [file, tu_index]: tu_indices) {
|
||||
for(auto& [fid, fi]: tu_index.file_indices) {
|
||||
fi.hash();
|
||||
hash_count++;
|
||||
}
|
||||
tu_index.main_file_index.hash();
|
||||
hash_count++;
|
||||
}
|
||||
double precompute_hash_ms = hash_timer.ms();
|
||||
|
||||
// Merge with both optimizations:
|
||||
// 1. SHA256 already cached from above (MergedIndex::merge uses cached values)
|
||||
// 2. Selective ProjectIndex update (skip symbols only in cache-hit files)
|
||||
double project_merge_full_ms = 0;
|
||||
double project_merge_selective_ms = 0;
|
||||
double merged_index_merge_ms = 0;
|
||||
std::size_t cache_hits = 0;
|
||||
std::size_t cache_misses = 0;
|
||||
|
||||
for(auto& [file, tu_index]: tu_indices) {
|
||||
auto& graph = tu_index.graph;
|
||||
auto main_tu_path_id = static_cast<std::uint32_t>(graph.paths.size() - 1);
|
||||
|
||||
// First: MergedIndex merges to find out which files are new.
|
||||
Bitmap new_file_ids;
|
||||
Timer mt;
|
||||
for(auto& [fid, fi]: tu_index.file_indices) {
|
||||
auto tu_path_id = graph.path_id(fid);
|
||||
auto global_path_id = project_index.path_pool.path_id(graph.paths[tu_path_id]);
|
||||
auto content = read_content(global_path_id, graph.paths[tu_path_id]);
|
||||
auto include_id = graph.include_location_id(fid);
|
||||
bool hit = merged[global_path_id].merge(global_path_id, include_id, fi, content);
|
||||
if(hit) {
|
||||
cache_hits++;
|
||||
} else {
|
||||
cache_misses++;
|
||||
new_file_ids.add(tu_path_id);
|
||||
}
|
||||
merge_stats.total_files++;
|
||||
}
|
||||
|
||||
// Main file is always new.
|
||||
new_file_ids.add(main_tu_path_id);
|
||||
auto global_main_id = project_index.path_pool.path_id(graph.paths[main_tu_path_id]);
|
||||
auto content = read_content(global_main_id, graph.paths[main_tu_path_id]);
|
||||
auto now = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch());
|
||||
|
||||
std::vector<index::IncludeLocation> remapped_locs;
|
||||
for(auto& loc: graph.locations) {
|
||||
auto remapped = loc;
|
||||
remapped.path_id = project_index.path_pool.path_id(graph.paths[loc.path_id]);
|
||||
remapped_locs.push_back(remapped);
|
||||
}
|
||||
merged[global_main_id].merge(global_main_id,
|
||||
now,
|
||||
std::move(remapped_locs),
|
||||
tu_index.main_file_index,
|
||||
content);
|
||||
cache_misses++;
|
||||
merged_index_merge_ms += mt.ms();
|
||||
|
||||
// Then: selective ProjectIndex merge (only symbols in new files).
|
||||
Timer pt;
|
||||
project_index.merge(tu_index, new_file_ids);
|
||||
project_merge_selective_ms += pt.ms();
|
||||
}
|
||||
|
||||
merge_stats.merge_ms = merge_timer.ms();
|
||||
merge_stats.shard_count = merged.size();
|
||||
|
||||
// Serialize all shards to measure size.
|
||||
merge_timer.reset();
|
||||
for(auto& [path, shard]: merged) {
|
||||
std::string buf;
|
||||
llvm::raw_string_ostream os(buf);
|
||||
shard.serialize(os);
|
||||
merge_stats.total_serialized_bytes += buf.size();
|
||||
}
|
||||
merge_stats.serialize_ms = merge_timer.ms();
|
||||
|
||||
std::println(" Shards: {}", merge_stats.shard_count);
|
||||
std::println(" SHA256 precompute (offloadable): {:.0f}ms ({} hashes)",
|
||||
precompute_hash_ms,
|
||||
hash_count);
|
||||
std::println(" MergedIndex merge (cached hash): {:.0f}ms (hit: {}, miss: {})",
|
||||
merged_index_merge_ms,
|
||||
cache_hits,
|
||||
cache_misses);
|
||||
std::println(" ProjectIndex merge (selective): {:.0f}ms", project_merge_selective_ms);
|
||||
if(!tu_indices.empty()) {
|
||||
std::println(" Main-thread total: {:.0f}ms ({:.1f}ms/TU)",
|
||||
merged_index_merge_ms + project_merge_selective_ms,
|
||||
(merged_index_merge_ms + project_merge_selective_ms) / tu_indices.size());
|
||||
}
|
||||
std::println(" Serialize time: {:.0f}ms", merge_stats.serialize_ms);
|
||||
std::print(" Total size: ");
|
||||
print_size(merge_stats.total_serialized_bytes);
|
||||
std::println("");
|
||||
|
||||
// Shard size distribution.
|
||||
{
|
||||
std::vector<std::size_t> shard_sizes;
|
||||
shard_sizes.reserve(merged.size());
|
||||
for(auto& [path, shard]: merged) {
|
||||
std::string buf;
|
||||
llvm::raw_string_ostream os(buf);
|
||||
shard.serialize(os);
|
||||
shard_sizes.push_back(buf.size());
|
||||
}
|
||||
std::ranges::sort(shard_sizes);
|
||||
if(!shard_sizes.empty()) {
|
||||
auto p = [&](double pct) {
|
||||
return shard_sizes[static_cast<std::size_t>(shard_sizes.size() * pct)];
|
||||
};
|
||||
std::println("\n Shard size distribution:");
|
||||
std::print(" p50=");
|
||||
print_size(p(0.5));
|
||||
std::print(" p90=");
|
||||
print_size(p(0.9));
|
||||
std::print(" p99=");
|
||||
print_size(p(0.99));
|
||||
std::print(" max=");
|
||||
print_size(shard_sizes.back());
|
||||
std::println("");
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: Query benchmark.
|
||||
std::println("\n=== Phase 3: Query Benchmark ===");
|
||||
|
||||
// Collect some symbol hashes to query.
|
||||
llvm::SmallVector<index::SymbolHash> sample_hashes;
|
||||
for(auto& [file, tu_index]: tu_indices) {
|
||||
for(auto& [hash, sym]: tu_index.symbols) {
|
||||
sample_hashes.push_back(hash);
|
||||
if(sample_hashes.size() >= 1000)
|
||||
break;
|
||||
}
|
||||
if(sample_hashes.size() >= 1000)
|
||||
break;
|
||||
}
|
||||
|
||||
// Occurrence lookup by offset (raw, no position conversion).
|
||||
{
|
||||
std::size_t hit_count = 0;
|
||||
Timer t;
|
||||
constexpr int rounds = 10;
|
||||
for(int r = 0; r < rounds; r++) {
|
||||
for(auto& [path, shard]: merged) {
|
||||
for(std::uint32_t offset = 0; offset < 1000; offset += 50) {
|
||||
shard.lookup(offset, [&](const index::Occurrence&) {
|
||||
hit_count++;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
auto elapsed = t.ms();
|
||||
auto queries = merged.size() * 20 * rounds;
|
||||
std::println(" Occurrence lookup (raw): {} queries in {:.1f}ms ({:.0f} q/ms, {} hits)",
|
||||
queries,
|
||||
elapsed,
|
||||
queries / elapsed,
|
||||
hit_count);
|
||||
}
|
||||
|
||||
// Relation lookup by symbol hash (raw, no position conversion).
|
||||
{
|
||||
std::size_t hit_count = 0;
|
||||
Timer t;
|
||||
constexpr int rounds = 10;
|
||||
for(int r = 0; r < rounds; r++) {
|
||||
for(auto& [path, shard]: merged) {
|
||||
for(auto hash: sample_hashes) {
|
||||
shard.lookup(hash, RelationKind::Definition, [&](const index::Relation&) {
|
||||
hit_count++;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
auto elapsed = t.ms();
|
||||
auto queries = merged.size() * sample_hashes.size() * rounds;
|
||||
std::println(" Relation lookup (raw): {} queries in {:.1f}ms ({:.0f} q/ms, {} hits)",
|
||||
queries,
|
||||
elapsed,
|
||||
queries / elapsed,
|
||||
hit_count);
|
||||
}
|
||||
|
||||
// PositionMapper construction benchmark.
|
||||
{
|
||||
using kota::ipc::lsp::PositionMapper;
|
||||
using kota::ipc::lsp::PositionEncoding;
|
||||
|
||||
std::size_t mapper_count = 0;
|
||||
std::vector<double> construct_times;
|
||||
|
||||
for(auto& [path, shard]: merged) {
|
||||
auto content = shard.content();
|
||||
if(content.empty())
|
||||
continue;
|
||||
Timer t;
|
||||
PositionMapper mapper(content, PositionEncoding::UTF16);
|
||||
construct_times.push_back(t.ms());
|
||||
mapper_count++;
|
||||
}
|
||||
|
||||
std::ranges::sort(construct_times);
|
||||
if(!construct_times.empty()) {
|
||||
auto sum = std::accumulate(construct_times.begin(), construct_times.end(), 0.0);
|
||||
auto p = [&](double pct) {
|
||||
return construct_times[static_cast<std::size_t>(construct_times.size() * pct)];
|
||||
};
|
||||
std::println("\n PositionMapper construction ({} shards):", mapper_count);
|
||||
std::println(" Total: {:.1f}ms", sum);
|
||||
std::println(" p50={:.3f}ms p90={:.3f}ms p99={:.3f}ms max={:.3f}ms",
|
||||
p(0.5),
|
||||
p(0.9),
|
||||
p(0.99),
|
||||
construct_times.back());
|
||||
}
|
||||
}
|
||||
|
||||
// Relation lookup with full position conversion (simulates real LSP query).
|
||||
{
|
||||
using kota::ipc::lsp::PositionMapper;
|
||||
using kota::ipc::lsp::PositionEncoding;
|
||||
|
||||
// Pre-build mappers for all shards.
|
||||
llvm::DenseMap<std::uint32_t, PositionMapper> mappers;
|
||||
for(auto& [path_id, shard]: merged) {
|
||||
auto content = shard.content();
|
||||
if(!content.empty())
|
||||
mappers.try_emplace(path_id, content, PositionEncoding::UTF16);
|
||||
}
|
||||
|
||||
std::size_t hit_count = 0;
|
||||
std::size_t convert_count = 0;
|
||||
Timer t;
|
||||
constexpr int rounds = 10;
|
||||
for(int r = 0; r < rounds; r++) {
|
||||
for(auto& [path_id, shard]: merged) {
|
||||
auto mapper_it = mappers.find(path_id);
|
||||
if(mapper_it == mappers.end())
|
||||
continue;
|
||||
auto& mapper = mapper_it->second;
|
||||
for(auto hash: sample_hashes) {
|
||||
shard.lookup(hash, RelationKind::Definition, [&](const index::Relation& rel) {
|
||||
auto start = mapper.to_position(rel.range.begin);
|
||||
auto end = mapper.to_position(rel.range.end);
|
||||
if(start && end)
|
||||
convert_count++;
|
||||
hit_count++;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
auto elapsed = t.ms();
|
||||
auto queries = merged.size() * sample_hashes.size() * rounds;
|
||||
std::println("\n Relation lookup + to_position:");
|
||||
std::println(" {} queries in {:.1f}ms ({:.0f} q/ms, {} hits, {} converted)",
|
||||
queries,
|
||||
elapsed,
|
||||
queries / elapsed,
|
||||
hit_count,
|
||||
convert_count);
|
||||
}
|
||||
|
||||
// Phase 4: Realistic "find references" for specific high-frequency symbols.
|
||||
std::println("=== Phase 4: Realistic Find-References ===");
|
||||
{
|
||||
using kota::ipc::lsp::PositionMapper;
|
||||
using kota::ipc::lsp::PositionEncoding;
|
||||
|
||||
// Use the properly merged ProjectIndex symbol table.
|
||||
auto& symbol_table = project_index.symbols;
|
||||
|
||||
// Find symbols by name to query. Collect ALL hashes whose name contains the target.
|
||||
llvm::StringRef targets[] = {"vector",
|
||||
"StringRef",
|
||||
"SourceLocation",
|
||||
"DenseMap",
|
||||
"SmallVector"};
|
||||
for(auto target_name: targets) {
|
||||
llvm::SmallVector<index::SymbolHash> matching_hashes;
|
||||
std::size_t ref_file_count = 0;
|
||||
for(auto& [hash, sym]: symbol_table) {
|
||||
if(sym.name == target_name) {
|
||||
matching_hashes.push_back(hash);
|
||||
ref_file_count += sym.reference_files.cardinality();
|
||||
}
|
||||
}
|
||||
|
||||
if(matching_hashes.empty()) {
|
||||
std::println(" {}: not found in symbol table", target_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query all shards for this symbol's references (simulating real find-references).
|
||||
std::size_t total_refs = 0;
|
||||
std::size_t shards_queried = 0;
|
||||
RelationKind kinds[] = {
|
||||
RelationKind::Reference,
|
||||
RelationKind::Definition,
|
||||
RelationKind::Declaration,
|
||||
};
|
||||
Timer t;
|
||||
for(auto& [path, shard]: merged) {
|
||||
bool found = false;
|
||||
for(auto target_hash: matching_hashes) {
|
||||
for(auto kind: kinds) {
|
||||
shard.lookup(target_hash, kind, [&](const index::Relation&) {
|
||||
total_refs++;
|
||||
found = true;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
if(found)
|
||||
shards_queried++;
|
||||
}
|
||||
auto elapsed = t.ms();
|
||||
|
||||
std::println(
|
||||
" {} ({} hashes, {} ref_files): {} refs across {} shards, " "scanned {} shards in {:.2f}ms",
|
||||
target_name,
|
||||
matching_hashes.size(),
|
||||
ref_file_count,
|
||||
total_refs,
|
||||
shards_queried,
|
||||
merged.size(),
|
||||
elapsed);
|
||||
|
||||
// Now with reference_files bitmap to skip irrelevant shards.
|
||||
// We don't have path_id mapping here, so just measure the full scan above
|
||||
// vs a targeted scan using reference_files count as an estimate.
|
||||
}
|
||||
}
|
||||
|
||||
std::println("");
|
||||
return 0;
|
||||
}
|
||||
@@ -129,18 +129,19 @@ struct MergedIndex::Impl {
|
||||
std::vector<std::uint32_t> canonical_ref_counts;
|
||||
|
||||
/// The canonical id set of removed index.
|
||||
roaring::Roaring removed;
|
||||
ContextBitmap removed;
|
||||
|
||||
/// All merged symbol occurrences.
|
||||
llvm::DenseMap<Occurrence, roaring::Roaring> occurrences;
|
||||
llvm::DenseMap<Occurrence, ContextBitmap> occurrences;
|
||||
|
||||
/// All merged symbol relations.
|
||||
llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, roaring::Roaring>> relations;
|
||||
llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, ContextBitmap>> relations;
|
||||
|
||||
/// Sorted occurrences cache for fast lookup.
|
||||
std::vector<Occurrence> occurrences_cache;
|
||||
|
||||
void merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
|
||||
/// Returns true if this was a cache hit (no new data inserted).
|
||||
bool merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
|
||||
auto hash = index.hash();
|
||||
auto hash_key = llvm::StringRef(reinterpret_cast<char*>(hash.data()), hash.size());
|
||||
auto [it, success] = self.canonical_cache.try_emplace(hash_key, self.max_canonical_id);
|
||||
@@ -151,7 +152,7 @@ struct MergedIndex::Impl {
|
||||
if(!success) {
|
||||
self.canonical_ref_counts[canonical_id] += 1;
|
||||
self.removed.remove(canonical_id);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
for(auto& occurrence: index.occurrences) {
|
||||
@@ -167,6 +168,7 @@ struct MergedIndex::Impl {
|
||||
|
||||
self.canonical_ref_counts.emplace_back(1);
|
||||
self.max_canonical_id += 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
friend bool operator==(const Impl&, const Impl&) = default;
|
||||
@@ -237,19 +239,20 @@ void MergedIndex::load_in_memory(this Self& self) {
|
||||
|
||||
// Deserialize removed bitmap.
|
||||
if(root->removed() && root->removed()->size() > 0) {
|
||||
index.removed = read_bitmap(root->removed());
|
||||
index.removed = ContextBitmap::from_roaring(read_bitmap(root->removed()));
|
||||
}
|
||||
|
||||
for(auto entry: *root->occurrences()) {
|
||||
index.occurrences.try_emplace(*safe_cast<Occurrence>(entry->occurrence()),
|
||||
read_bitmap(entry->context()));
|
||||
ContextBitmap::from_roaring(read_bitmap(entry->context())));
|
||||
}
|
||||
|
||||
for(auto entry: *root->relations()) {
|
||||
auto& relations = index.relations[entry->symbol()];
|
||||
for(auto relation_entry: *entry->relations()) {
|
||||
relations.try_emplace(*safe_cast<Relation>(relation_entry->relation()),
|
||||
read_bitmap(relation_entry->context()));
|
||||
relations.try_emplace(
|
||||
*safe_cast<Relation>(relation_entry->relation()),
|
||||
ContextBitmap::from_roaring(read_bitmap(relation_entry->context())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -310,17 +313,22 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
|
||||
CreateStructVector<binary::IncludeLocation>(builder, context.include_locations));
|
||||
});
|
||||
|
||||
auto serialize_ctx_bitmap = [&](const ContextBitmap& ctx) {
|
||||
auto r = ctx.to_roaring();
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(r.getSizeInBytes(false));
|
||||
r.write(buffer.data(), false);
|
||||
return CreateVector(builder, buffer);
|
||||
};
|
||||
|
||||
llvm::SmallVector<const Occurrence*> occurrence_keys;
|
||||
occurrence_keys.reserve(index->occurrences.size());
|
||||
auto occurrences = transform(index->occurrences, [&](auto&& value) {
|
||||
auto&& [occurrence, bitmap] = value;
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
|
||||
bitmap.write(buffer.data(), false);
|
||||
occurrence_keys.emplace_back(&occurrence);
|
||||
return binary::CreateOccurrenceEntry(builder,
|
||||
safe_cast<binary::Occurrence>(&occurrence),
|
||||
CreateVector(builder, buffer));
|
||||
serialize_ctx_bitmap(bitmap));
|
||||
});
|
||||
std::ranges::sort(std::views::zip(occurrence_keys, occurrences), [](auto lhs, auto rhs) {
|
||||
const auto& lo = *std::get<0>(lhs);
|
||||
@@ -335,12 +343,9 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
|
||||
auto&& [symbol_id, symbol_relations] = value;
|
||||
auto relations = transform(symbol_relations, [&](auto&& value) {
|
||||
auto&& [relation, bitmap] = value;
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
|
||||
bitmap.write(buffer.data(), false);
|
||||
return binary::CreateRelationEntry(builder,
|
||||
safe_cast<binary::Relation>(&relation),
|
||||
CreateVector(builder, buffer));
|
||||
serialize_ctx_bitmap(bitmap));
|
||||
});
|
||||
relation_keys.emplace_back(symbol_id);
|
||||
return binary::CreateSymbolRelationsEntry(builder,
|
||||
@@ -353,9 +358,10 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
|
||||
|
||||
// Serialize removed bitmap.
|
||||
buffer.clear();
|
||||
if(!index->removed.isEmpty()) {
|
||||
buffer.resize_for_overwrite(index->removed.getSizeInBytes(false));
|
||||
index->removed.write(buffer.data(), false);
|
||||
if(!index->removed.is_empty()) {
|
||||
auto r = index->removed.to_roaring();
|
||||
buffer.resize_for_overwrite(r.getSizeInBytes(false));
|
||||
r.write(buffer.data(), false);
|
||||
}
|
||||
auto removed = CreateVector(builder, buffer);
|
||||
|
||||
@@ -398,14 +404,12 @@ void MergedIndex::lookup(this const Self& self,
|
||||
while(it != occurrences.end()) {
|
||||
if(it->range.contains(offset)) {
|
||||
// Skip occurrences whose canonical_ids are all removed.
|
||||
if(!index.removed.isEmpty()) {
|
||||
if(!index.removed.is_empty()) {
|
||||
auto bitmap_it = index.occurrences.find(*it);
|
||||
if(bitmap_it != index.occurrences.end()) {
|
||||
auto remaining = bitmap_it->second - index.removed;
|
||||
if(remaining.isEmpty()) {
|
||||
it++;
|
||||
continue;
|
||||
}
|
||||
if(bitmap_it != index.occurrences.end() &&
|
||||
!bitmap_it->second.any_not_in(index.removed)) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,7 +417,7 @@ void MergedIndex::lookup(this const Self& self,
|
||||
break;
|
||||
}
|
||||
|
||||
it++;
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -434,7 +438,7 @@ void MergedIndex::lookup(this const Self& self,
|
||||
break;
|
||||
}
|
||||
|
||||
it++;
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -457,11 +461,8 @@ void MergedIndex::lookup(this const Self& self,
|
||||
for(auto& [relation, bitmap]: relations) {
|
||||
if(relation.kind & kind) {
|
||||
// Skip relations whose canonical_ids are all removed.
|
||||
if(!self.impl->removed.isEmpty()) {
|
||||
auto remaining = bitmap - self.impl->removed;
|
||||
if(remaining.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
if(!self.impl->removed.is_empty() && !bitmap.any_not_in(self.impl->removed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!callback(relation)) {
|
||||
@@ -579,7 +580,7 @@ void MergedIndex::remove(this Self& self, std::uint32_t path_id) {
|
||||
index.occurrences_cache.clear();
|
||||
}
|
||||
|
||||
void MergedIndex::merge(this Self& self,
|
||||
bool MergedIndex::merge(this Self& self,
|
||||
std::uint32_t path_id,
|
||||
std::chrono::milliseconds build_at,
|
||||
std::vector<IncludeLocation> include_locations,
|
||||
@@ -587,16 +588,17 @@ void MergedIndex::merge(this Self& self,
|
||||
llvm::StringRef content) {
|
||||
self.load_in_memory();
|
||||
self.impl->content = content.str();
|
||||
self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
|
||||
bool hit = self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
|
||||
auto& context = self.compilation_contexts[path_id];
|
||||
context.canonical_id = canonical_id;
|
||||
context.build_at = build_at.count();
|
||||
context.include_locations = std::move(include_locations);
|
||||
});
|
||||
self.impl->occurrences_cache.clear();
|
||||
return hit;
|
||||
}
|
||||
|
||||
void MergedIndex::merge(this Self& self,
|
||||
bool MergedIndex::merge(this Self& self,
|
||||
std::uint32_t path_id,
|
||||
std::uint32_t include_id,
|
||||
FileIndex& index,
|
||||
@@ -605,11 +607,12 @@ void MergedIndex::merge(this Self& self,
|
||||
if(self.impl->content.empty() && !content.empty()) {
|
||||
self.impl->content = content.str();
|
||||
}
|
||||
self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
|
||||
bool hit = self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
|
||||
auto& context = self.header_contexts[path_id];
|
||||
context.includes.emplace_back(include_id, canonical_id);
|
||||
});
|
||||
self.impl->occurrences_cache.clear();
|
||||
return hit;
|
||||
}
|
||||
|
||||
llvm::StringRef MergedIndex::content(this const Self& self) {
|
||||
|
||||
@@ -68,7 +68,8 @@ public:
|
||||
llvm::StringRef content(this const Self& self);
|
||||
|
||||
/// Merge the index with given compilation context.
|
||||
void merge(this Self& self,
|
||||
/// Returns true if this was a cache hit (no new data inserted).
|
||||
bool merge(this Self& self,
|
||||
std::uint32_t path_id,
|
||||
std::chrono::milliseconds build_at,
|
||||
std::vector<IncludeLocation> include_locations,
|
||||
@@ -76,7 +77,8 @@ public:
|
||||
llvm::StringRef content);
|
||||
|
||||
/// Merge the index with given header context.
|
||||
void merge(this Self& self,
|
||||
/// Returns true if this was a cache hit (no new data inserted).
|
||||
bool merge(this Self& self,
|
||||
std::uint32_t path_id,
|
||||
std::uint32_t include_id,
|
||||
FileIndex& index,
|
||||
|
||||
@@ -27,6 +27,42 @@ llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self, TU
|
||||
return file_ids_map;
|
||||
}
|
||||
|
||||
llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self,
|
||||
TUIndex& index,
|
||||
const Bitmap& new_file_ids) {
|
||||
auto& paths = index.graph.paths;
|
||||
llvm::SmallVector<std::uint32_t> file_ids_map;
|
||||
file_ids_map.resize_for_overwrite(paths.size());
|
||||
|
||||
for(std::uint32_t i = 0; i < paths.size(); i++) {
|
||||
file_ids_map[i] = self.path_pool.path_id(paths[i]);
|
||||
}
|
||||
|
||||
for(auto& [symbol_id, symbol]: index.symbols) {
|
||||
// Skip symbols that don't reference any new file.
|
||||
bool references_new = false;
|
||||
for(auto ref: symbol.reference_files) {
|
||||
if(new_file_ids.contains(ref)) {
|
||||
references_new = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!references_new)
|
||||
continue;
|
||||
|
||||
auto& target_symbol = self.symbols[symbol_id];
|
||||
if(target_symbol.name.empty()) {
|
||||
target_symbol.name = symbol.name;
|
||||
target_symbol.kind = symbol.kind;
|
||||
}
|
||||
for(auto ref: symbol.reference_files) {
|
||||
target_symbol.reference_files.add(file_ids_map[ref]);
|
||||
}
|
||||
}
|
||||
|
||||
return file_ids_map;
|
||||
}
|
||||
|
||||
void ProjectIndex::serialize(this ProjectIndex& self, llvm::raw_ostream& os) {
|
||||
fbs::FlatBufferBuilder builder(1024);
|
||||
|
||||
|
||||
@@ -82,6 +82,12 @@ struct ProjectIndex {
|
||||
|
||||
llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self, TUIndex& index);
|
||||
|
||||
/// Selective merge: only update symbols referenced by files in new_file_ids.
|
||||
/// new_file_ids contains TU-local path indices for cache-miss FileIndices.
|
||||
llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self,
|
||||
TUIndex& index,
|
||||
const Bitmap& new_file_ids);
|
||||
|
||||
void serialize(this ProjectIndex& self, llvm::raw_ostream& os);
|
||||
|
||||
static ProjectIndex from(const void* data);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "index/tu_index.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
|
||||
#include "index/serialization.h"
|
||||
@@ -160,6 +161,9 @@ private:
|
||||
} // namespace
|
||||
|
||||
std::array<std::uint8_t, 32> FileIndex::hash() {
|
||||
if(cached_hash)
|
||||
return *cached_hash;
|
||||
|
||||
llvm::SHA256 hasher;
|
||||
|
||||
using u8 = std::uint8_t;
|
||||
@@ -172,20 +176,36 @@ std::array<std::uint8_t, 32> FileIndex::hash() {
|
||||
hasher.update(llvm::ArrayRef(data, size));
|
||||
}
|
||||
|
||||
for(auto& [symbol_id, relations]: relations) {
|
||||
// Sort keys and relations to ensure deterministic hashing regardless of
|
||||
// DenseMap iteration order. This mutates the relations in-place intentionally.
|
||||
llvm::SmallVector<SymbolHash> sorted_keys;
|
||||
sorted_keys.reserve(relations.size());
|
||||
for(auto& [symbol_id, _]: relations) {
|
||||
sorted_keys.push_back(symbol_id);
|
||||
}
|
||||
std::ranges::sort(sorted_keys);
|
||||
|
||||
for(auto symbol_id: sorted_keys) {
|
||||
hasher.update(std::bit_cast<std::array<u8, sizeof(symbol_id)>>(symbol_id));
|
||||
static_assert(sizeof(Relation) ==
|
||||
sizeof(RelationKind) + 4 + sizeof(Range) + sizeof(SymbolHash));
|
||||
static_assert(sizeof(Relation) % 8 == 0);
|
||||
|
||||
if(!relations.empty()) {
|
||||
auto data = reinterpret_cast<u8*>(relations.data());
|
||||
auto size = relations.size() * sizeof(Relation);
|
||||
auto& rels = relations[symbol_id];
|
||||
std::ranges::sort(rels, [](const Relation& lhs, const Relation& rhs) {
|
||||
return std::tuple(lhs.kind.value(), lhs.range.begin, lhs.range.end, lhs.target_symbol) <
|
||||
std::tuple(rhs.kind.value(), rhs.range.begin, rhs.range.end, rhs.target_symbol);
|
||||
});
|
||||
|
||||
if(!rels.empty()) {
|
||||
auto data = reinterpret_cast<u8*>(rels.data());
|
||||
auto size = rels.size() * sizeof(Relation);
|
||||
hasher.update(llvm::ArrayRef(data, size));
|
||||
}
|
||||
}
|
||||
|
||||
return hasher.final();
|
||||
cached_hash = hasher.final();
|
||||
return *cached_hash;
|
||||
}
|
||||
|
||||
TUIndex TUIndex::build(CompilationUnitRef unit, bool interested_only) {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <bit>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -52,7 +53,12 @@ struct FileIndex {
|
||||
|
||||
std::vector<Occurrence> occurrences;
|
||||
|
||||
/// Compute (and cache) SHA256 hash of this index's content.
|
||||
/// Each FileIndex instance must only be hashed by one thread at a time.
|
||||
std::array<std::uint8_t, 32> hash();
|
||||
|
||||
private:
|
||||
std::optional<std::array<std::uint8_t, 32>> cached_hash;
|
||||
};
|
||||
|
||||
struct Symbol {
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <utility>
|
||||
|
||||
#define ROARING_EXCEPTIONS 0
|
||||
#define ROARING_TERMINATE(message) std::abort()
|
||||
@@ -10,4 +13,162 @@ namespace clice {
|
||||
|
||||
using Bitmap = roaring::Roaring;
|
||||
|
||||
/// Compact bitmap optimized for small canonical-id sets.
|
||||
///
|
||||
/// Uses a tagged pointer scheme in a single uint64_t:
|
||||
/// - Low bit 1: inline mode, bits 1-63 store the bitmap (ids 0-62)
|
||||
/// - Low bit 0: heap mode, the value is a roaring::Roaring* (aligned, so low bit is 0)
|
||||
///
|
||||
/// Default state is inline-empty (data == 1). Upgrades to heap when an id >= 63
|
||||
/// is added. The common case (< 63 canonical ids per MergedIndex) stays entirely
|
||||
/// inline with zero heap allocation and single-instruction bitwise operations.
|
||||
class ContextBitmap {
|
||||
constexpr static std::uint32_t inline_capacity = 63;
|
||||
constexpr static std::uint64_t inline_tag = 1;
|
||||
|
||||
std::uint64_t data = inline_tag;
|
||||
|
||||
bool is_inline() const {
|
||||
return data & inline_tag;
|
||||
}
|
||||
|
||||
roaring::Roaring* as_heap() const {
|
||||
return reinterpret_cast<roaring::Roaring*>(data);
|
||||
}
|
||||
|
||||
std::uint64_t bits() const {
|
||||
return data >> 1;
|
||||
}
|
||||
|
||||
void free_heap() {
|
||||
if(!is_inline())
|
||||
delete as_heap();
|
||||
}
|
||||
|
||||
void upgrade(std::uint32_t new_id) {
|
||||
auto* r = new roaring::Roaring();
|
||||
auto b = bits();
|
||||
while(b) {
|
||||
r->add(static_cast<std::uint32_t>(std::countr_zero(b)));
|
||||
b &= b - 1;
|
||||
}
|
||||
r->add(new_id);
|
||||
data = reinterpret_cast<std::uint64_t>(r);
|
||||
}
|
||||
|
||||
public:
|
||||
ContextBitmap() = default;
|
||||
|
||||
~ContextBitmap() {
|
||||
free_heap();
|
||||
}
|
||||
|
||||
ContextBitmap(ContextBitmap&& other) noexcept : data(std::exchange(other.data, inline_tag)) {}
|
||||
|
||||
ContextBitmap& operator=(ContextBitmap&& other) noexcept {
|
||||
if(this != &other) {
|
||||
free_heap();
|
||||
data = std::exchange(other.data, inline_tag);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
ContextBitmap(const ContextBitmap&) = delete;
|
||||
ContextBitmap& operator=(const ContextBitmap&) = delete;
|
||||
|
||||
void add(std::uint32_t id) {
|
||||
if(is_inline()) {
|
||||
if(id < inline_capacity) {
|
||||
data |= (1ULL << (id + 1));
|
||||
} else {
|
||||
upgrade(id);
|
||||
}
|
||||
} else {
|
||||
as_heap()->add(id);
|
||||
}
|
||||
}
|
||||
|
||||
void remove(std::uint32_t id) {
|
||||
if(is_inline()) {
|
||||
if(id < inline_capacity)
|
||||
data &= ~(1ULL << (id + 1));
|
||||
} else {
|
||||
as_heap()->remove(id);
|
||||
}
|
||||
}
|
||||
|
||||
bool is_empty() const {
|
||||
if(is_inline())
|
||||
return bits() == 0;
|
||||
return as_heap()->isEmpty();
|
||||
}
|
||||
|
||||
/// Check if (this - other) has any bits set, without allocating in the common inline case.
|
||||
bool any_not_in(const ContextBitmap& other) const {
|
||||
if(is_inline() && other.is_inline())
|
||||
return (bits() & ~other.bits()) != 0;
|
||||
|
||||
// Rare: at least one side is heap. Use roaring operations.
|
||||
if(is_inline()) {
|
||||
auto b = bits();
|
||||
while(b) {
|
||||
auto i = static_cast<std::uint32_t>(std::countr_zero(b));
|
||||
if(!other.as_heap()->contains(i))
|
||||
return true;
|
||||
b &= b - 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if(other.is_inline()) {
|
||||
for(auto v: *as_heap()) {
|
||||
if(v >= inline_capacity || !(other.bits() & (1ULL << v)))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return !(*as_heap() - *other.as_heap()).isEmpty();
|
||||
}
|
||||
|
||||
friend bool operator==(const ContextBitmap& lhs, const ContextBitmap& rhs) {
|
||||
if(lhs.is_inline() && rhs.is_inline())
|
||||
return lhs.data == rhs.data;
|
||||
// Different modes implies different sets: a heap bitmap always contains
|
||||
// at least one id >= inline_capacity that an inline bitmap cannot hold.
|
||||
if(lhs.is_inline() != rhs.is_inline())
|
||||
return false;
|
||||
return *lhs.as_heap() == *rhs.as_heap();
|
||||
}
|
||||
|
||||
/// Convert to roaring::Roaring (for serialization).
|
||||
roaring::Roaring to_roaring() const {
|
||||
roaring::Roaring r;
|
||||
if(is_inline()) {
|
||||
auto b = bits();
|
||||
while(b) {
|
||||
r.add(static_cast<std::uint32_t>(std::countr_zero(b)));
|
||||
b &= b - 1;
|
||||
}
|
||||
} else {
|
||||
r = *as_heap();
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/// Construct from roaring::Roaring (for deserialization).
|
||||
static ContextBitmap from_roaring(const roaring::Roaring& r) {
|
||||
ContextBitmap result;
|
||||
if(r.isEmpty())
|
||||
return result;
|
||||
if(r.maximum() < inline_capacity) {
|
||||
for(auto v: r)
|
||||
result.data |= (1ULL << (v + 1));
|
||||
} else {
|
||||
result.data = reinterpret_cast<std::uint64_t>(new roaring::Roaring(r));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace clice
|
||||
|
||||
@@ -357,6 +357,68 @@ TEST_CASE(CacheInvalidatedAfterMerge) {
|
||||
ASSERT_TRUE(found_second);
|
||||
}
|
||||
|
||||
TEST_CASE(MergeCacheHit) {
|
||||
add_file("header.h", R"(
|
||||
#pragma once
|
||||
inline int shared() { return 1; }
|
||||
)");
|
||||
add_main("a.cpp", R"(
|
||||
#include "header.h"
|
||||
int use_a() { return shared(); }
|
||||
)");
|
||||
ASSERT_TRUE(compile());
|
||||
auto tu_a = index::TUIndex::build(*unit);
|
||||
|
||||
add_file("header.h", R"(
|
||||
#pragma once
|
||||
inline int shared() { return 1; }
|
||||
)");
|
||||
add_main("b.cpp", R"(
|
||||
#include "header.h"
|
||||
int use_b() { return shared(); }
|
||||
)");
|
||||
ASSERT_TRUE(compile());
|
||||
auto tu_b = index::TUIndex::build(*unit);
|
||||
|
||||
// First merge of a FileIndex should return false (new data).
|
||||
index::MergedIndex merged;
|
||||
bool first_hit = false;
|
||||
for(auto& [fid, file_index]: tu_a.file_indices) {
|
||||
first_hit = merged.merge(0, tu_a.graph.include_location_id(fid), file_index, {});
|
||||
}
|
||||
ASSERT_FALSE(first_hit);
|
||||
|
||||
// Second merge with identical content should return true (cache hit).
|
||||
bool second_hit = false;
|
||||
for(auto& [fid, file_index]: tu_b.file_indices) {
|
||||
second_hit = merged.merge(1, tu_b.graph.include_location_id(fid), file_index, {});
|
||||
}
|
||||
ASSERT_TRUE(second_hit);
|
||||
}
|
||||
|
||||
TEST_CASE(MergeCacheHitCompilation) {
|
||||
build_index(R"(
|
||||
int foo() { return 42; }
|
||||
)");
|
||||
auto tu_a = tu_index;
|
||||
|
||||
build_index(R"(
|
||||
int foo() { return 42; }
|
||||
)");
|
||||
auto tu_b = tu_index;
|
||||
|
||||
index::MergedIndex merged;
|
||||
std::vector<index::IncludeLocation> locations_a;
|
||||
bool first_hit =
|
||||
merged.merge(0, tu_a.built_at, std::move(locations_a), tu_a.main_file_index, {});
|
||||
ASSERT_FALSE(first_hit);
|
||||
|
||||
std::vector<index::IncludeLocation> locations_b;
|
||||
bool second_hit =
|
||||
merged.merge(1, tu_b.built_at, std::move(locations_b), tu_b.main_file_index, {});
|
||||
ASSERT_TRUE(second_hit);
|
||||
}
|
||||
|
||||
}; // TEST_SUITE(MergedIndex)
|
||||
} // namespace
|
||||
} // namespace clice::testing
|
||||
|
||||
@@ -200,6 +200,58 @@ TEST_CASE(NameSurvivesRoundTrip) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(SelectiveMergeFilters) {
|
||||
add_file("header.h", R"(
|
||||
#pragma once
|
||||
inline int shared() { return 1; }
|
||||
)");
|
||||
add_main("main.cpp", R"(
|
||||
#include "header.h"
|
||||
int use() { return shared(); }
|
||||
)");
|
||||
ASSERT_TRUE(compile());
|
||||
auto tu = index::TUIndex::build(*unit);
|
||||
|
||||
// Build a bitmap containing only the main file's path index.
|
||||
auto main_path_id = static_cast<std::uint32_t>(tu.graph.paths.size() - 1);
|
||||
Bitmap new_file_ids;
|
||||
new_file_ids.add(main_path_id);
|
||||
|
||||
index::ProjectIndex project;
|
||||
project.merge(tu, new_file_ids);
|
||||
|
||||
// Symbols that reference the main file should be present.
|
||||
bool has_main_symbols = false;
|
||||
for(auto& [hash, symbol]: tu.symbols) {
|
||||
for(auto ref: symbol.reference_files) {
|
||||
if(ref == main_path_id && project.symbols.contains(hash)) {
|
||||
has_main_symbols = true;
|
||||
// ALL reference_files should be added, not just the new ones.
|
||||
auto& proj_sym = project.symbols[hash];
|
||||
ASSERT_TRUE(proj_sym.reference_files.cardinality() >=
|
||||
symbol.reference_files.cardinality());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(has_main_symbols);
|
||||
}
|
||||
|
||||
TEST_CASE(SelectiveMergeEmpty) {
|
||||
index::TUIndex tu;
|
||||
ASSERT_TRUE(build_and_index(R"(
|
||||
int foo() { return 42; }
|
||||
)",
|
||||
tu));
|
||||
|
||||
Bitmap empty_bitmap;
|
||||
index::ProjectIndex project;
|
||||
project.merge(tu, empty_bitmap);
|
||||
|
||||
// No symbols should be added when the bitmap is empty.
|
||||
ASSERT_TRUE(project.symbols.empty());
|
||||
}
|
||||
|
||||
}; // TEST_SUITE(ProjectIndex)
|
||||
} // namespace
|
||||
} // namespace clice::testing
|
||||
|
||||
@@ -500,6 +500,36 @@ TEST_CASE(SymbolKinds) {
|
||||
check_kind("ns", SymbolKind::Namespace);
|
||||
}
|
||||
|
||||
TEST_CASE(HashCaching) {
|
||||
build_index(R"(
|
||||
int foo() { return 42; }
|
||||
int bar() { return foo() + 1; }
|
||||
)");
|
||||
|
||||
auto& index = tu_index.main_file_index;
|
||||
auto hash1 = index.hash();
|
||||
auto hash2 = index.hash();
|
||||
ASSERT_EQ(hash1, hash2);
|
||||
}
|
||||
|
||||
TEST_CASE(HashDeterminism) {
|
||||
add_main("main.cpp", R"(
|
||||
int foo() { return 42; }
|
||||
int bar() { return foo() + 1; }
|
||||
)");
|
||||
ASSERT_TRUE(compile());
|
||||
auto index_a = index::TUIndex::build(*unit);
|
||||
|
||||
add_main("main.cpp", R"(
|
||||
int foo() { return 42; }
|
||||
int bar() { return foo() + 1; }
|
||||
)");
|
||||
ASSERT_TRUE(compile());
|
||||
auto index_b = index::TUIndex::build(*unit);
|
||||
|
||||
ASSERT_EQ(index_a.main_file_index.hash(), index_b.main_file_index.hash());
|
||||
}
|
||||
|
||||
}; // TEST_SUITE(TUIndex)
|
||||
} // namespace
|
||||
} // namespace clice::testing
|
||||
|
||||
297
tests/unit/support/context_bitmap_tests.cpp
Normal file
297
tests/unit/support/context_bitmap_tests.cpp
Normal file
@@ -0,0 +1,297 @@
|
||||
#include "test/test.h"
|
||||
#include "support/bitmap.h"
|
||||
|
||||
namespace clice::testing {
|
||||
namespace {
|
||||
|
||||
TEST_SUITE(ContextBitmap) {
|
||||
|
||||
TEST_CASE(EmptyByDefault) {
|
||||
ContextBitmap bm;
|
||||
EXPECT_TRUE(bm.is_empty());
|
||||
}
|
||||
|
||||
TEST_CASE(AddInline) {
|
||||
ContextBitmap bm;
|
||||
bm.add(0);
|
||||
EXPECT_FALSE(bm.is_empty());
|
||||
bm.add(62);
|
||||
EXPECT_FALSE(bm.is_empty());
|
||||
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_TRUE(r.contains(0));
|
||||
EXPECT_TRUE(r.contains(62));
|
||||
EXPECT_FALSE(r.contains(1));
|
||||
EXPECT_EQ(r.cardinality(), 2U);
|
||||
}
|
||||
|
||||
TEST_CASE(RemoveInline) {
|
||||
ContextBitmap bm;
|
||||
bm.add(5);
|
||||
bm.add(10);
|
||||
bm.remove(5);
|
||||
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_FALSE(r.contains(5));
|
||||
EXPECT_TRUE(r.contains(10));
|
||||
EXPECT_EQ(r.cardinality(), 1U);
|
||||
|
||||
bm.remove(10);
|
||||
EXPECT_TRUE(bm.is_empty());
|
||||
}
|
||||
|
||||
TEST_CASE(UpgradeToHeap) {
|
||||
ContextBitmap bm;
|
||||
bm.add(5);
|
||||
bm.add(63);
|
||||
|
||||
EXPECT_FALSE(bm.is_empty());
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_TRUE(r.contains(5));
|
||||
EXPECT_TRUE(r.contains(63));
|
||||
EXPECT_EQ(r.cardinality(), 2U);
|
||||
}
|
||||
|
||||
TEST_CASE(HeapAddRemove) {
|
||||
ContextBitmap bm;
|
||||
bm.add(100);
|
||||
bm.add(200);
|
||||
bm.remove(100);
|
||||
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_FALSE(r.contains(100));
|
||||
EXPECT_TRUE(r.contains(200));
|
||||
}
|
||||
|
||||
TEST_CASE(AnyNotInBothInline) {
|
||||
ContextBitmap a, b;
|
||||
a.add(1);
|
||||
a.add(2);
|
||||
b.add(1);
|
||||
b.add(2);
|
||||
EXPECT_FALSE(a.any_not_in(b));
|
||||
|
||||
a.add(3);
|
||||
EXPECT_TRUE(a.any_not_in(b));
|
||||
}
|
||||
|
||||
TEST_CASE(AnyNotInInlineVsHeap) {
|
||||
ContextBitmap a, b;
|
||||
a.add(5);
|
||||
b.add(5);
|
||||
b.add(100);
|
||||
EXPECT_FALSE(a.any_not_in(b));
|
||||
|
||||
a.add(10);
|
||||
EXPECT_TRUE(a.any_not_in(b));
|
||||
}
|
||||
|
||||
TEST_CASE(AnyNotInHeapVsInline) {
|
||||
ContextBitmap a, b;
|
||||
a.add(100);
|
||||
b.add(5);
|
||||
// 100 >= 63 not in inline b
|
||||
EXPECT_TRUE(a.any_not_in(b));
|
||||
|
||||
ContextBitmap c, d;
|
||||
c.add(5);
|
||||
c.add(100);
|
||||
d.add(5);
|
||||
// c has 100 which d doesn't have
|
||||
EXPECT_TRUE(c.any_not_in(d));
|
||||
}
|
||||
|
||||
TEST_CASE(AnyNotInBothHeap) {
|
||||
ContextBitmap a, b;
|
||||
a.add(100);
|
||||
a.add(200);
|
||||
b.add(100);
|
||||
b.add(200);
|
||||
EXPECT_FALSE(a.any_not_in(b));
|
||||
|
||||
a.add(300);
|
||||
EXPECT_TRUE(a.any_not_in(b));
|
||||
}
|
||||
|
||||
TEST_CASE(MoveConstruct) {
|
||||
ContextBitmap a;
|
||||
a.add(5);
|
||||
a.add(10);
|
||||
ContextBitmap b(std::move(a));
|
||||
|
||||
EXPECT_TRUE(a.is_empty());
|
||||
EXPECT_FALSE(b.is_empty());
|
||||
auto r = b.to_roaring();
|
||||
EXPECT_TRUE(r.contains(5));
|
||||
EXPECT_TRUE(r.contains(10));
|
||||
}
|
||||
|
||||
TEST_CASE(MoveConstructHeap) {
|
||||
ContextBitmap a;
|
||||
a.add(100);
|
||||
ContextBitmap b(std::move(a));
|
||||
|
||||
EXPECT_TRUE(a.is_empty());
|
||||
auto r = b.to_roaring();
|
||||
EXPECT_TRUE(r.contains(100));
|
||||
}
|
||||
|
||||
TEST_CASE(MoveAssign) {
|
||||
ContextBitmap a, b;
|
||||
a.add(3);
|
||||
b.add(7);
|
||||
b = std::move(a);
|
||||
|
||||
EXPECT_TRUE(a.is_empty());
|
||||
auto r = b.to_roaring();
|
||||
EXPECT_TRUE(r.contains(3));
|
||||
EXPECT_FALSE(r.contains(7));
|
||||
}
|
||||
|
||||
TEST_CASE(FromRoaringInline) {
|
||||
roaring::Roaring r;
|
||||
r.add(0);
|
||||
r.add(30);
|
||||
r.add(62);
|
||||
auto bm = ContextBitmap::from_roaring(r);
|
||||
|
||||
EXPECT_FALSE(bm.is_empty());
|
||||
auto out = bm.to_roaring();
|
||||
EXPECT_EQ(out.cardinality(), 3U);
|
||||
EXPECT_TRUE(out.contains(0));
|
||||
EXPECT_TRUE(out.contains(30));
|
||||
EXPECT_TRUE(out.contains(62));
|
||||
}
|
||||
|
||||
TEST_CASE(FromRoaringHeap) {
|
||||
roaring::Roaring r;
|
||||
r.add(5);
|
||||
r.add(100);
|
||||
auto bm = ContextBitmap::from_roaring(r);
|
||||
|
||||
auto out = bm.to_roaring();
|
||||
EXPECT_EQ(out.cardinality(), 2U);
|
||||
EXPECT_TRUE(out.contains(5));
|
||||
EXPECT_TRUE(out.contains(100));
|
||||
}
|
||||
|
||||
TEST_CASE(FromRoaringEmpty) {
|
||||
roaring::Roaring r;
|
||||
auto bm = ContextBitmap::from_roaring(r);
|
||||
EXPECT_TRUE(bm.is_empty());
|
||||
}
|
||||
|
||||
TEST_CASE(Equality) {
|
||||
ContextBitmap a, b;
|
||||
a.add(1);
|
||||
a.add(2);
|
||||
b.add(1);
|
||||
b.add(2);
|
||||
EXPECT_TRUE(a == b);
|
||||
|
||||
b.add(3);
|
||||
EXPECT_FALSE(a == b);
|
||||
}
|
||||
|
||||
TEST_CASE(BoundaryId62) {
|
||||
ContextBitmap bm;
|
||||
bm.add(62);
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_TRUE(r.contains(62));
|
||||
EXPECT_EQ(r.cardinality(), 1U);
|
||||
}
|
||||
|
||||
TEST_CASE(BoundaryId63) {
|
||||
ContextBitmap bm;
|
||||
bm.add(63);
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_TRUE(r.contains(63));
|
||||
EXPECT_EQ(r.cardinality(), 1U);
|
||||
}
|
||||
|
||||
TEST_CASE(MoveAssignHeap) {
|
||||
ContextBitmap a, b;
|
||||
a.add(100);
|
||||
a.add(200);
|
||||
b.add(300);
|
||||
b.add(400);
|
||||
b = std::move(a);
|
||||
|
||||
EXPECT_TRUE(a.is_empty());
|
||||
auto r = b.to_roaring();
|
||||
EXPECT_TRUE(r.contains(100));
|
||||
EXPECT_TRUE(r.contains(200));
|
||||
EXPECT_FALSE(r.contains(300));
|
||||
EXPECT_FALSE(r.contains(400));
|
||||
}
|
||||
|
||||
TEST_CASE(EqualityHeap) {
|
||||
ContextBitmap a, b;
|
||||
a.add(100);
|
||||
a.add(200);
|
||||
b.add(100);
|
||||
b.add(200);
|
||||
EXPECT_TRUE(a == b);
|
||||
|
||||
b.add(300);
|
||||
EXPECT_FALSE(a == b);
|
||||
}
|
||||
|
||||
TEST_CASE(AddSmallAfterUpgrade) {
|
||||
ContextBitmap bm;
|
||||
bm.add(100);
|
||||
bm.add(5);
|
||||
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_TRUE(r.contains(100));
|
||||
EXPECT_TRUE(r.contains(5));
|
||||
EXPECT_EQ(r.cardinality(), 2U);
|
||||
}
|
||||
|
||||
TEST_CASE(AnyNotInEmpty) {
|
||||
ContextBitmap empty_a, empty_b, non_empty;
|
||||
non_empty.add(5);
|
||||
|
||||
EXPECT_FALSE(empty_a.any_not_in(non_empty));
|
||||
EXPECT_TRUE(non_empty.any_not_in(empty_b));
|
||||
EXPECT_FALSE(empty_a.any_not_in(empty_b));
|
||||
}
|
||||
|
||||
TEST_CASE(RemoveNonexistent) {
|
||||
// Inline mode: remove id that was never added.
|
||||
ContextBitmap inline_bm;
|
||||
inline_bm.add(3);
|
||||
inline_bm.remove(10);
|
||||
EXPECT_FALSE(inline_bm.is_empty());
|
||||
auto r1 = inline_bm.to_roaring();
|
||||
EXPECT_TRUE(r1.contains(3));
|
||||
EXPECT_EQ(r1.cardinality(), 1U);
|
||||
|
||||
// Heap mode: remove id that was never added.
|
||||
ContextBitmap heap_bm;
|
||||
heap_bm.add(100);
|
||||
heap_bm.remove(200);
|
||||
EXPECT_FALSE(heap_bm.is_empty());
|
||||
auto r2 = heap_bm.to_roaring();
|
||||
EXPECT_TRUE(r2.contains(100));
|
||||
EXPECT_EQ(r2.cardinality(), 1U);
|
||||
}
|
||||
|
||||
TEST_CASE(RemoveLargeInInline) {
|
||||
ContextBitmap bm;
|
||||
bm.add(5);
|
||||
bm.add(10);
|
||||
// Remove id >= 63 while in inline mode; should be a no-op.
|
||||
bm.remove(63);
|
||||
bm.remove(100);
|
||||
EXPECT_FALSE(bm.is_empty());
|
||||
auto r = bm.to_roaring();
|
||||
EXPECT_TRUE(r.contains(5));
|
||||
EXPECT_TRUE(r.contains(10));
|
||||
EXPECT_EQ(r.cardinality(), 2U);
|
||||
}
|
||||
|
||||
}; // TEST_SUITE(ContextBitmap)
|
||||
|
||||
} // namespace
|
||||
} // namespace clice::testing
|
||||
Reference in New Issue
Block a user