2 Commits

Author SHA1 Message Date
ykiko
e1dde9ee92 fix(index): address pre-PR review findings
- Fix use-after-partial-move and division-by-zero in benchmark
- Use std::exchange in ContextBitmap move operations
- Add clarifying comments on operator== mixed mode and hash() sorting
- Add tests: compilation-context merge cache hit, hash determinism,
  any_not_in empty inputs, remove edge cases

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-18 14:49:39 +08:00
ykiko
005fc9ac21 feat(index): optimize merge with ContextBitmap, hash caching, and selective merge
Introduce ContextBitmap (tagged-pointer inline bitmap for <63 ids) to
replace roaring::Roaring in MergedIndex hot paths, eliminating heap
allocations in the common case. Cache FileIndex SHA256 hashes to avoid
recomputation. Add selective ProjectIndex::merge that skips symbols not
referencing newly-merged files. MergedIndex::merge now returns bool to
indicate cache hits.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-18 14:49:39 +08:00
13 changed files with 1392 additions and 45 deletions

View File

@@ -200,6 +200,14 @@ if(CLICE_ENABLE_BENCHMARK)
"${PROJECT_SOURCE_DIR}/src"
)
target_link_libraries(scan_benchmark PRIVATE clice::core kota::deco)
add_executable(index_benchmark
"${PROJECT_SOURCE_DIR}/benchmarks/index_benchmark.cpp"
)
target_include_directories(index_benchmark PRIVATE
"${PROJECT_SOURCE_DIR}/src"
)
target_link_libraries(index_benchmark PRIVATE clice::core kota::deco)
endif()
if(CLICE_RELEASE)

View File

@@ -0,0 +1,664 @@
/// Benchmark for TUIndex build, MergedIndex merge, and query performance.
///
/// Usage:
/// index_benchmark [OPTIONS] <compile_commands.json>
///
/// Example:
/// ./build/RelWithDebInfo/bin/index_benchmark \
/// /home/ykiko/C++/clice/.llvm/build-debug/compile_commands.json
#include <algorithm>
#include <atomic>
#include <chrono>
#include <cstdlib>
#include <mutex>
#include <numeric>
#include <print>
#include <sstream>
#include <thread>
#include <vector>
#include "command/command.h"
#include "compile/compilation.h"
#include "index/merged_index.h"
#include "index/project_index.h"
#include "index/tu_index.h"
#include "support/logging.h"
#include "kota/deco/deco.h"
#include "kota/ipc/lsp/position.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/raw_ostream.h"
using namespace clice;
struct Options {
DecoKV(names = {"--log-level"}; help = "Log level"; required = false;)
<std::string> log_level = "off";
DecoKV(names = {"--limit"}; help = "Max files to index (0 = all)"; required = false;)
<int> limit = 100;
DecoKV(names = {"--jobs", "-j"}; help = "Parallel compile jobs (0 = nproc)"; required = false;)
<int> jobs = 0;
DecoFlag(names = {"-h", "--help"}; help = "Show help"; required = false;)
help;
DecoInput(meta_var = "CDB"; help = "Path to compile_commands.json"; required = false;)
<std::string> cdb_path;
};
struct Timer {
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
double ms() const {
auto end = std::chrono::steady_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}
void reset() {
start = std::chrono::steady_clock::now();
}
};
struct IndexStats {
std::string file;
double compile_ms = 0;
double build_index_ms = 0;
double serialize_ms = 0;
std::size_t symbols = 0;
std::size_t occurrences = 0;
std::size_t relations = 0;
std::size_t serialized_bytes = 0;
std::size_t file_count = 0;
};
struct IndexResult {
IndexStats stats;
index::TUIndex tu_index;
};
struct MergeStats {
std::size_t total_files = 0;
std::size_t total_occurrences = 0;
std::size_t total_relations = 0;
double merge_ms = 0;
double serialize_ms = 0;
std::size_t total_serialized_bytes = 0;
std::size_t shard_count = 0;
};
void print_size(std::size_t bytes) {
if(bytes < 1024)
std::print("{}B", bytes);
else if(bytes < 1024 * 1024)
std::print("{:.1f}KB", bytes / 1024.0);
else
std::print("{:.1f}MB", bytes / (1024.0 * 1024.0));
}
/// Per-task: compile one file, build TUIndex, serialize, collect stats.
/// Everything here is thread-local, no shared state.
static std::optional<IndexResult> index_one_file(CompilationDatabase& cdb,
const CompilationEntry& entry) {
auto file = cdb.resolve_path(entry.file);
auto cmds = cdb.lookup(file);
if(cmds.empty())
return std::nullopt;
auto& cmd = cmds[0];
auto argv_vec = cmd.to_argv();
CompilationParams cp;
cp.kind = CompilationKind::Indexing;
cp.directory = cmd.resolved.directory;
for(auto* arg: argv_vec)
cp.arguments.push_back(arg);
IndexResult result;
result.stats.file = std::string(file);
Timer t;
auto unit = compile(cp);
result.stats.compile_ms = t.ms();
if(!unit.completed())
return std::nullopt;
t.reset();
result.tu_index = index::TUIndex::build(unit);
result.stats.build_index_ms = t.ms();
result.stats.symbols = result.tu_index.symbols.size();
result.stats.file_count = result.tu_index.file_indices.size() + 1;
for(auto& [fid, fi]: result.tu_index.file_indices) {
result.stats.occurrences += fi.occurrences.size();
for(auto& [_, rels]: fi.relations)
result.stats.relations += rels.size();
}
result.stats.occurrences += result.tu_index.main_file_index.occurrences.size();
for(auto& [_, rels]: result.tu_index.main_file_index.relations)
result.stats.relations += rels.size();
t.reset();
std::string serialized;
llvm::raw_string_ostream os(serialized);
result.tu_index.serialize(os);
result.stats.serialize_ms = t.ms();
result.stats.serialized_bytes = serialized.size();
return result;
}
int main(int argc, const char** argv) {
auto args = deco::util::argvify(argc, argv);
auto parsed = deco::cli::parse<Options>(args);
if(!parsed.has_value()) {
std::println(stderr, "Error: {}", parsed.error().message);
return 1;
}
auto& opts = parsed->options;
if(opts.help.value_or(false) || !opts.cdb_path.has_value()) {
std::ostringstream oss;
deco::cli::write_usage_for<Options>(oss, "index_benchmark [OPTIONS] <cdb>");
std::print("{}", oss.str());
return 0;
}
auto level = spdlog::level::from_str(*opts.log_level);
clice::logging::options.level = level;
clice::logging::stderr_logger("index_benchmark", clice::logging::options);
auto num_jobs = static_cast<unsigned>(*opts.jobs);
if(num_jobs == 0)
num_jobs = std::thread::hardware_concurrency();
if(num_jobs == 0)
num_jobs = 4;
// Load CDB.
CompilationDatabase cdb;
auto count = cdb.load(*opts.cdb_path);
std::println("CDB: {} entries from {}", count, *opts.cdb_path);
auto entries = cdb.get_entries();
auto limit = static_cast<std::size_t>(*opts.limit);
if(limit > 0 && limit < entries.size())
entries = entries.take_front(limit);
std::println("Indexing {} files with {} threads...\n", entries.size(), num_jobs);
// Phase 1: Parallel compile & build TUIndex.
// Each thread picks tasks from a shared atomic counter.
// Results are written to a pre-sized vector (one slot per entry).
std::vector<std::optional<IndexResult>> results(entries.size());
std::atomic<std::size_t> next_task{0};
std::atomic<std::size_t> completed{0};
Timer phase1_wall;
{
std::vector<std::jthread> workers;
workers.reserve(num_jobs);
for(unsigned i = 0; i < num_jobs; i++) {
workers.emplace_back([&] {
while(true) {
auto idx = next_task.fetch_add(1);
if(idx >= entries.size())
break;
results[idx] = index_one_file(cdb, entries[idx]);
auto done = completed.fetch_add(1) + 1;
if(done % 100 == 0)
std::println(" ... {}/{} done", done, entries.size());
}
});
}
}
double phase1_wall_ms = phase1_wall.ms();
// Collect results on main thread.
std::vector<IndexStats> all_stats;
std::vector<std::pair<std::string, index::TUIndex>> tu_indices;
all_stats.reserve(entries.size());
tu_indices.reserve(entries.size());
double total_compile_ms = 0;
double total_build_ms = 0;
double total_serialize_ms = 0;
std::size_t total_serialized = 0;
for(auto& r: results) {
if(!r)
continue;
total_compile_ms += r->stats.compile_ms;
total_build_ms += r->stats.build_index_ms;
total_serialize_ms += r->stats.serialize_ms;
total_serialized += r->stats.serialized_bytes;
all_stats.push_back(r->stats);
tu_indices.emplace_back(std::move(r->stats.file), std::move(r->tu_index));
}
std::println("=== Phase 1: Compile & Build TUIndex ({} threads) ===", num_jobs);
std::println(" Files indexed: {}", all_stats.size());
std::println(" Wall time: {:.0f}ms", phase1_wall_ms);
std::println(" Total compile: {:.0f}ms (sum of per-thread)", total_compile_ms);
std::println(" Total build idx: {:.0f}ms (sum of per-thread)", total_build_ms);
std::println(" Total serialize: {:.0f}ms (sum of per-thread)", total_serialize_ms);
std::print(" Total TUIndex size: ");
print_size(total_serialized);
std::println("");
// Size distribution.
if(!all_stats.empty()) {
std::vector<std::size_t> sizes;
std::vector<double> compile_times;
std::vector<double> build_times;
for(auto& s: all_stats) {
sizes.push_back(s.serialized_bytes);
compile_times.push_back(s.compile_ms);
build_times.push_back(s.build_index_ms);
}
std::ranges::sort(sizes);
std::ranges::sort(compile_times);
std::ranges::sort(build_times);
auto p = [](auto& v, double pct) {
return v[static_cast<std::size_t>(v.size() * pct)];
};
std::println("\n TUIndex size distribution:");
std::print(" p50=");
print_size(p(sizes, 0.5));
std::print(" p90=");
print_size(p(sizes, 0.9));
std::print(" p99=");
print_size(p(sizes, 0.99));
std::print(" max=");
print_size(sizes.back());
std::println("");
std::println(" Compile time distribution:");
std::println(" p50={:.0f}ms p90={:.0f}ms p99={:.0f}ms max={:.0f}ms",
p(compile_times, 0.5),
p(compile_times, 0.9),
p(compile_times, 0.99),
compile_times.back());
std::println(" Build index time distribution:");
std::println(" p50={:.1f}ms p90={:.1f}ms p99={:.1f}ms max={:.1f}ms",
p(build_times, 0.5),
p(build_times, 0.9),
p(build_times, 0.99),
build_times.back());
}
// Phase 2: Merge into MergedIndex shards (main thread).
std::println("\n=== Phase 2: Merge into MergedIndex ===");
index::ProjectIndex project_index;
llvm::DenseMap<std::uint32_t, index::MergedIndex> merged;
MergeStats merge_stats;
Timer merge_timer;
llvm::DenseMap<std::uint32_t, std::string> file_contents;
auto read_content = [&](std::uint32_t path_id, llvm::StringRef path) -> llvm::StringRef {
auto it = file_contents.find(path_id);
if(it != file_contents.end())
return it->second;
auto buf = llvm::MemoryBuffer::getFile(path);
if(!buf)
return {};
auto [inserted, _] = file_contents.try_emplace(path_id, (*buf)->getBuffer().str());
return inserted->second;
};
// Pre-compute SHA256 hashes (simulates offloading to worker threads).
// With cached hash, this warms the cache so MergedIndex::merge won't recompute.
Timer hash_timer;
std::size_t hash_count = 0;
for(auto& [file, tu_index]: tu_indices) {
for(auto& [fid, fi]: tu_index.file_indices) {
fi.hash();
hash_count++;
}
tu_index.main_file_index.hash();
hash_count++;
}
double precompute_hash_ms = hash_timer.ms();
// Merge with both optimizations:
// 1. SHA256 already cached from above (MergedIndex::merge uses cached values)
// 2. Selective ProjectIndex update (skip symbols only in cache-hit files)
double project_merge_full_ms = 0;
double project_merge_selective_ms = 0;
double merged_index_merge_ms = 0;
std::size_t cache_hits = 0;
std::size_t cache_misses = 0;
for(auto& [file, tu_index]: tu_indices) {
auto& graph = tu_index.graph;
auto main_tu_path_id = static_cast<std::uint32_t>(graph.paths.size() - 1);
// First: MergedIndex merges to find out which files are new.
Bitmap new_file_ids;
Timer mt;
for(auto& [fid, fi]: tu_index.file_indices) {
auto tu_path_id = graph.path_id(fid);
auto global_path_id = project_index.path_pool.path_id(graph.paths[tu_path_id]);
auto content = read_content(global_path_id, graph.paths[tu_path_id]);
auto include_id = graph.include_location_id(fid);
bool hit = merged[global_path_id].merge(global_path_id, include_id, fi, content);
if(hit) {
cache_hits++;
} else {
cache_misses++;
new_file_ids.add(tu_path_id);
}
merge_stats.total_files++;
}
// Main file is always new.
new_file_ids.add(main_tu_path_id);
auto global_main_id = project_index.path_pool.path_id(graph.paths[main_tu_path_id]);
auto content = read_content(global_main_id, graph.paths[main_tu_path_id]);
auto now = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch());
std::vector<index::IncludeLocation> remapped_locs;
for(auto& loc: graph.locations) {
auto remapped = loc;
remapped.path_id = project_index.path_pool.path_id(graph.paths[loc.path_id]);
remapped_locs.push_back(remapped);
}
merged[global_main_id].merge(global_main_id,
now,
std::move(remapped_locs),
tu_index.main_file_index,
content);
cache_misses++;
merged_index_merge_ms += mt.ms();
// Then: selective ProjectIndex merge (only symbols in new files).
Timer pt;
project_index.merge(tu_index, new_file_ids);
project_merge_selective_ms += pt.ms();
}
merge_stats.merge_ms = merge_timer.ms();
merge_stats.shard_count = merged.size();
// Serialize all shards to measure size.
merge_timer.reset();
for(auto& [path, shard]: merged) {
std::string buf;
llvm::raw_string_ostream os(buf);
shard.serialize(os);
merge_stats.total_serialized_bytes += buf.size();
}
merge_stats.serialize_ms = merge_timer.ms();
std::println(" Shards: {}", merge_stats.shard_count);
std::println(" SHA256 precompute (offloadable): {:.0f}ms ({} hashes)",
precompute_hash_ms,
hash_count);
std::println(" MergedIndex merge (cached hash): {:.0f}ms (hit: {}, miss: {})",
merged_index_merge_ms,
cache_hits,
cache_misses);
std::println(" ProjectIndex merge (selective): {:.0f}ms", project_merge_selective_ms);
if(!tu_indices.empty()) {
std::println(" Main-thread total: {:.0f}ms ({:.1f}ms/TU)",
merged_index_merge_ms + project_merge_selective_ms,
(merged_index_merge_ms + project_merge_selective_ms) / tu_indices.size());
}
std::println(" Serialize time: {:.0f}ms", merge_stats.serialize_ms);
std::print(" Total size: ");
print_size(merge_stats.total_serialized_bytes);
std::println("");
// Shard size distribution.
{
std::vector<std::size_t> shard_sizes;
shard_sizes.reserve(merged.size());
for(auto& [path, shard]: merged) {
std::string buf;
llvm::raw_string_ostream os(buf);
shard.serialize(os);
shard_sizes.push_back(buf.size());
}
std::ranges::sort(shard_sizes);
if(!shard_sizes.empty()) {
auto p = [&](double pct) {
return shard_sizes[static_cast<std::size_t>(shard_sizes.size() * pct)];
};
std::println("\n Shard size distribution:");
std::print(" p50=");
print_size(p(0.5));
std::print(" p90=");
print_size(p(0.9));
std::print(" p99=");
print_size(p(0.99));
std::print(" max=");
print_size(shard_sizes.back());
std::println("");
}
}
// Phase 3: Query benchmark.
std::println("\n=== Phase 3: Query Benchmark ===");
// Collect some symbol hashes to query.
llvm::SmallVector<index::SymbolHash> sample_hashes;
for(auto& [file, tu_index]: tu_indices) {
for(auto& [hash, sym]: tu_index.symbols) {
sample_hashes.push_back(hash);
if(sample_hashes.size() >= 1000)
break;
}
if(sample_hashes.size() >= 1000)
break;
}
// Occurrence lookup by offset (raw, no position conversion).
{
std::size_t hit_count = 0;
Timer t;
constexpr int rounds = 10;
for(int r = 0; r < rounds; r++) {
for(auto& [path, shard]: merged) {
for(std::uint32_t offset = 0; offset < 1000; offset += 50) {
shard.lookup(offset, [&](const index::Occurrence&) {
hit_count++;
return true;
});
}
}
}
auto elapsed = t.ms();
auto queries = merged.size() * 20 * rounds;
std::println(" Occurrence lookup (raw): {} queries in {:.1f}ms ({:.0f} q/ms, {} hits)",
queries,
elapsed,
queries / elapsed,
hit_count);
}
// Relation lookup by symbol hash (raw, no position conversion).
{
std::size_t hit_count = 0;
Timer t;
constexpr int rounds = 10;
for(int r = 0; r < rounds; r++) {
for(auto& [path, shard]: merged) {
for(auto hash: sample_hashes) {
shard.lookup(hash, RelationKind::Definition, [&](const index::Relation&) {
hit_count++;
return true;
});
}
}
}
auto elapsed = t.ms();
auto queries = merged.size() * sample_hashes.size() * rounds;
std::println(" Relation lookup (raw): {} queries in {:.1f}ms ({:.0f} q/ms, {} hits)",
queries,
elapsed,
queries / elapsed,
hit_count);
}
// PositionMapper construction benchmark.
{
using kota::ipc::lsp::PositionMapper;
using kota::ipc::lsp::PositionEncoding;
std::size_t mapper_count = 0;
std::vector<double> construct_times;
for(auto& [path, shard]: merged) {
auto content = shard.content();
if(content.empty())
continue;
Timer t;
PositionMapper mapper(content, PositionEncoding::UTF16);
construct_times.push_back(t.ms());
mapper_count++;
}
std::ranges::sort(construct_times);
if(!construct_times.empty()) {
auto sum = std::accumulate(construct_times.begin(), construct_times.end(), 0.0);
auto p = [&](double pct) {
return construct_times[static_cast<std::size_t>(construct_times.size() * pct)];
};
std::println("\n PositionMapper construction ({} shards):", mapper_count);
std::println(" Total: {:.1f}ms", sum);
std::println(" p50={:.3f}ms p90={:.3f}ms p99={:.3f}ms max={:.3f}ms",
p(0.5),
p(0.9),
p(0.99),
construct_times.back());
}
}
// Relation lookup with full position conversion (simulates real LSP query).
{
using kota::ipc::lsp::PositionMapper;
using kota::ipc::lsp::PositionEncoding;
// Pre-build mappers for all shards.
llvm::DenseMap<std::uint32_t, PositionMapper> mappers;
for(auto& [path_id, shard]: merged) {
auto content = shard.content();
if(!content.empty())
mappers.try_emplace(path_id, content, PositionEncoding::UTF16);
}
std::size_t hit_count = 0;
std::size_t convert_count = 0;
Timer t;
constexpr int rounds = 10;
for(int r = 0; r < rounds; r++) {
for(auto& [path_id, shard]: merged) {
auto mapper_it = mappers.find(path_id);
if(mapper_it == mappers.end())
continue;
auto& mapper = mapper_it->second;
for(auto hash: sample_hashes) {
shard.lookup(hash, RelationKind::Definition, [&](const index::Relation& rel) {
auto start = mapper.to_position(rel.range.begin);
auto end = mapper.to_position(rel.range.end);
if(start && end)
convert_count++;
hit_count++;
return true;
});
}
}
}
auto elapsed = t.ms();
auto queries = merged.size() * sample_hashes.size() * rounds;
std::println("\n Relation lookup + to_position:");
std::println(" {} queries in {:.1f}ms ({:.0f} q/ms, {} hits, {} converted)",
queries,
elapsed,
queries / elapsed,
hit_count,
convert_count);
}
// Phase 4: Realistic "find references" for specific high-frequency symbols.
std::println("=== Phase 4: Realistic Find-References ===");
{
using kota::ipc::lsp::PositionMapper;
using kota::ipc::lsp::PositionEncoding;
// Use the properly merged ProjectIndex symbol table.
auto& symbol_table = project_index.symbols;
// Find symbols by name to query. Collect ALL hashes whose name contains the target.
llvm::StringRef targets[] = {"vector",
"StringRef",
"SourceLocation",
"DenseMap",
"SmallVector"};
for(auto target_name: targets) {
llvm::SmallVector<index::SymbolHash> matching_hashes;
std::size_t ref_file_count = 0;
for(auto& [hash, sym]: symbol_table) {
if(sym.name == target_name) {
matching_hashes.push_back(hash);
ref_file_count += sym.reference_files.cardinality();
}
}
if(matching_hashes.empty()) {
std::println(" {}: not found in symbol table", target_name);
continue;
}
// Query all shards for this symbol's references (simulating real find-references).
std::size_t total_refs = 0;
std::size_t shards_queried = 0;
RelationKind kinds[] = {
RelationKind::Reference,
RelationKind::Definition,
RelationKind::Declaration,
};
Timer t;
for(auto& [path, shard]: merged) {
bool found = false;
for(auto target_hash: matching_hashes) {
for(auto kind: kinds) {
shard.lookup(target_hash, kind, [&](const index::Relation&) {
total_refs++;
found = true;
return true;
});
}
}
if(found)
shards_queried++;
}
auto elapsed = t.ms();
std::println(
" {} ({} hashes, {} ref_files): {} refs across {} shards, " "scanned {} shards in {:.2f}ms",
target_name,
matching_hashes.size(),
ref_file_count,
total_refs,
shards_queried,
merged.size(),
elapsed);
// Now with reference_files bitmap to skip irrelevant shards.
// We don't have path_id mapping here, so just measure the full scan above
// vs a targeted scan using reference_files count as an estimate.
}
}
std::println("");
return 0;
}

View File

@@ -129,18 +129,19 @@ struct MergedIndex::Impl {
std::vector<std::uint32_t> canonical_ref_counts;
/// The canonical id set of removed index.
roaring::Roaring removed;
ContextBitmap removed;
/// All merged symbol occurrences.
llvm::DenseMap<Occurrence, roaring::Roaring> occurrences;
llvm::DenseMap<Occurrence, ContextBitmap> occurrences;
/// All merged symbol relations.
llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, roaring::Roaring>> relations;
llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, ContextBitmap>> relations;
/// Sorted occurrences cache for fast lookup.
std::vector<Occurrence> occurrences_cache;
void merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
/// Returns true if this was a cache hit (no new data inserted).
bool merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
auto hash = index.hash();
auto hash_key = llvm::StringRef(reinterpret_cast<char*>(hash.data()), hash.size());
auto [it, success] = self.canonical_cache.try_emplace(hash_key, self.max_canonical_id);
@@ -151,7 +152,7 @@ struct MergedIndex::Impl {
if(!success) {
self.canonical_ref_counts[canonical_id] += 1;
self.removed.remove(canonical_id);
return;
return true;
}
for(auto& occurrence: index.occurrences) {
@@ -167,6 +168,7 @@ struct MergedIndex::Impl {
self.canonical_ref_counts.emplace_back(1);
self.max_canonical_id += 1;
return false;
}
friend bool operator==(const Impl&, const Impl&) = default;
@@ -237,19 +239,20 @@ void MergedIndex::load_in_memory(this Self& self) {
// Deserialize removed bitmap.
if(root->removed() && root->removed()->size() > 0) {
index.removed = read_bitmap(root->removed());
index.removed = ContextBitmap::from_roaring(read_bitmap(root->removed()));
}
for(auto entry: *root->occurrences()) {
index.occurrences.try_emplace(*safe_cast<Occurrence>(entry->occurrence()),
read_bitmap(entry->context()));
ContextBitmap::from_roaring(read_bitmap(entry->context())));
}
for(auto entry: *root->relations()) {
auto& relations = index.relations[entry->symbol()];
for(auto relation_entry: *entry->relations()) {
relations.try_emplace(*safe_cast<Relation>(relation_entry->relation()),
read_bitmap(relation_entry->context()));
relations.try_emplace(
*safe_cast<Relation>(relation_entry->relation()),
ContextBitmap::from_roaring(read_bitmap(relation_entry->context())));
}
}
@@ -310,17 +313,22 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
CreateStructVector<binary::IncludeLocation>(builder, context.include_locations));
});
auto serialize_ctx_bitmap = [&](const ContextBitmap& ctx) {
auto r = ctx.to_roaring();
buffer.clear();
buffer.resize_for_overwrite(r.getSizeInBytes(false));
r.write(buffer.data(), false);
return CreateVector(builder, buffer);
};
llvm::SmallVector<const Occurrence*> occurrence_keys;
occurrence_keys.reserve(index->occurrences.size());
auto occurrences = transform(index->occurrences, [&](auto&& value) {
auto&& [occurrence, bitmap] = value;
buffer.clear();
buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
bitmap.write(buffer.data(), false);
occurrence_keys.emplace_back(&occurrence);
return binary::CreateOccurrenceEntry(builder,
safe_cast<binary::Occurrence>(&occurrence),
CreateVector(builder, buffer));
serialize_ctx_bitmap(bitmap));
});
std::ranges::sort(std::views::zip(occurrence_keys, occurrences), [](auto lhs, auto rhs) {
const auto& lo = *std::get<0>(lhs);
@@ -335,12 +343,9 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
auto&& [symbol_id, symbol_relations] = value;
auto relations = transform(symbol_relations, [&](auto&& value) {
auto&& [relation, bitmap] = value;
buffer.clear();
buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
bitmap.write(buffer.data(), false);
return binary::CreateRelationEntry(builder,
safe_cast<binary::Relation>(&relation),
CreateVector(builder, buffer));
serialize_ctx_bitmap(bitmap));
});
relation_keys.emplace_back(symbol_id);
return binary::CreateSymbolRelationsEntry(builder,
@@ -353,9 +358,10 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
// Serialize removed bitmap.
buffer.clear();
if(!index->removed.isEmpty()) {
buffer.resize_for_overwrite(index->removed.getSizeInBytes(false));
index->removed.write(buffer.data(), false);
if(!index->removed.is_empty()) {
auto r = index->removed.to_roaring();
buffer.resize_for_overwrite(r.getSizeInBytes(false));
r.write(buffer.data(), false);
}
auto removed = CreateVector(builder, buffer);
@@ -398,14 +404,12 @@ void MergedIndex::lookup(this const Self& self,
while(it != occurrences.end()) {
if(it->range.contains(offset)) {
// Skip occurrences whose canonical_ids are all removed.
if(!index.removed.isEmpty()) {
if(!index.removed.is_empty()) {
auto bitmap_it = index.occurrences.find(*it);
if(bitmap_it != index.occurrences.end()) {
auto remaining = bitmap_it->second - index.removed;
if(remaining.isEmpty()) {
it++;
continue;
}
if(bitmap_it != index.occurrences.end() &&
!bitmap_it->second.any_not_in(index.removed)) {
++it;
continue;
}
}
@@ -413,7 +417,7 @@ void MergedIndex::lookup(this const Self& self,
break;
}
it++;
++it;
continue;
}
@@ -434,7 +438,7 @@ void MergedIndex::lookup(this const Self& self,
break;
}
it++;
++it;
continue;
}
@@ -457,11 +461,8 @@ void MergedIndex::lookup(this const Self& self,
for(auto& [relation, bitmap]: relations) {
if(relation.kind & kind) {
// Skip relations whose canonical_ids are all removed.
if(!self.impl->removed.isEmpty()) {
auto remaining = bitmap - self.impl->removed;
if(remaining.isEmpty()) {
continue;
}
if(!self.impl->removed.is_empty() && !bitmap.any_not_in(self.impl->removed)) {
continue;
}
if(!callback(relation)) {
@@ -579,7 +580,7 @@ void MergedIndex::remove(this Self& self, std::uint32_t path_id) {
index.occurrences_cache.clear();
}
void MergedIndex::merge(this Self& self,
bool MergedIndex::merge(this Self& self,
std::uint32_t path_id,
std::chrono::milliseconds build_at,
std::vector<IncludeLocation> include_locations,
@@ -587,16 +588,17 @@ void MergedIndex::merge(this Self& self,
llvm::StringRef content) {
self.load_in_memory();
self.impl->content = content.str();
self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
bool hit = self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
auto& context = self.compilation_contexts[path_id];
context.canonical_id = canonical_id;
context.build_at = build_at.count();
context.include_locations = std::move(include_locations);
});
self.impl->occurrences_cache.clear();
return hit;
}
void MergedIndex::merge(this Self& self,
bool MergedIndex::merge(this Self& self,
std::uint32_t path_id,
std::uint32_t include_id,
FileIndex& index,
@@ -605,11 +607,12 @@ void MergedIndex::merge(this Self& self,
if(self.impl->content.empty() && !content.empty()) {
self.impl->content = content.str();
}
self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
bool hit = self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
auto& context = self.header_contexts[path_id];
context.includes.emplace_back(include_id, canonical_id);
});
self.impl->occurrences_cache.clear();
return hit;
}
llvm::StringRef MergedIndex::content(this const Self& self) {

View File

@@ -68,7 +68,8 @@ public:
llvm::StringRef content(this const Self& self);
/// Merge the index with given compilation context.
void merge(this Self& self,
/// Returns true if this was a cache hit (no new data inserted).
bool merge(this Self& self,
std::uint32_t path_id,
std::chrono::milliseconds build_at,
std::vector<IncludeLocation> include_locations,
@@ -76,7 +77,8 @@ public:
llvm::StringRef content);
/// Merge the index with given header context.
void merge(this Self& self,
/// Returns true if this was a cache hit (no new data inserted).
bool merge(this Self& self,
std::uint32_t path_id,
std::uint32_t include_id,
FileIndex& index,

View File

@@ -27,6 +27,42 @@ llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self, TU
return file_ids_map;
}
llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self,
TUIndex& index,
const Bitmap& new_file_ids) {
auto& paths = index.graph.paths;
llvm::SmallVector<std::uint32_t> file_ids_map;
file_ids_map.resize_for_overwrite(paths.size());
for(std::uint32_t i = 0; i < paths.size(); i++) {
file_ids_map[i] = self.path_pool.path_id(paths[i]);
}
for(auto& [symbol_id, symbol]: index.symbols) {
// Skip symbols that don't reference any new file.
bool references_new = false;
for(auto ref: symbol.reference_files) {
if(new_file_ids.contains(ref)) {
references_new = true;
break;
}
}
if(!references_new)
continue;
auto& target_symbol = self.symbols[symbol_id];
if(target_symbol.name.empty()) {
target_symbol.name = symbol.name;
target_symbol.kind = symbol.kind;
}
for(auto ref: symbol.reference_files) {
target_symbol.reference_files.add(file_ids_map[ref]);
}
}
return file_ids_map;
}
void ProjectIndex::serialize(this ProjectIndex& self, llvm::raw_ostream& os) {
fbs::FlatBufferBuilder builder(1024);

View File

@@ -82,6 +82,12 @@ struct ProjectIndex {
llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self, TUIndex& index);
/// Selective merge: only update symbols referenced by files in new_file_ids.
/// new_file_ids contains TU-local path indices for cache-miss FileIndices.
llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self,
TUIndex& index,
const Bitmap& new_file_ids);
void serialize(this ProjectIndex& self, llvm::raw_ostream& os);
static ProjectIndex from(const void* data);

View File

@@ -1,5 +1,6 @@
#include "index/tu_index.h"
#include <algorithm>
#include <tuple>
#include "index/serialization.h"
@@ -160,6 +161,9 @@ private:
} // namespace
std::array<std::uint8_t, 32> FileIndex::hash() {
if(cached_hash)
return *cached_hash;
llvm::SHA256 hasher;
using u8 = std::uint8_t;
@@ -172,20 +176,36 @@ std::array<std::uint8_t, 32> FileIndex::hash() {
hasher.update(llvm::ArrayRef(data, size));
}
for(auto& [symbol_id, relations]: relations) {
// Sort keys and relations to ensure deterministic hashing regardless of
// DenseMap iteration order. This mutates the relations in-place intentionally.
llvm::SmallVector<SymbolHash> sorted_keys;
sorted_keys.reserve(relations.size());
for(auto& [symbol_id, _]: relations) {
sorted_keys.push_back(symbol_id);
}
std::ranges::sort(sorted_keys);
for(auto symbol_id: sorted_keys) {
hasher.update(std::bit_cast<std::array<u8, sizeof(symbol_id)>>(symbol_id));
static_assert(sizeof(Relation) ==
sizeof(RelationKind) + 4 + sizeof(Range) + sizeof(SymbolHash));
static_assert(sizeof(Relation) % 8 == 0);
if(!relations.empty()) {
auto data = reinterpret_cast<u8*>(relations.data());
auto size = relations.size() * sizeof(Relation);
auto& rels = relations[symbol_id];
std::ranges::sort(rels, [](const Relation& lhs, const Relation& rhs) {
return std::tuple(lhs.kind.value(), lhs.range.begin, lhs.range.end, lhs.target_symbol) <
std::tuple(rhs.kind.value(), rhs.range.begin, rhs.range.end, rhs.target_symbol);
});
if(!rels.empty()) {
auto data = reinterpret_cast<u8*>(rels.data());
auto size = rels.size() * sizeof(Relation);
hasher.update(llvm::ArrayRef(data, size));
}
}
return hasher.final();
cached_hash = hasher.final();
return *cached_hash;
}
TUIndex TUIndex::build(CompilationUnitRef unit, bool interested_only) {

View File

@@ -4,6 +4,7 @@
#include <bit>
#include <chrono>
#include <cstdint>
#include <optional>
#include <string>
#include <vector>
@@ -52,7 +53,12 @@ struct FileIndex {
std::vector<Occurrence> occurrences;
/// Compute (and cache) SHA256 hash of this index's content.
/// Each FileIndex instance must only be hashed by one thread at a time.
std::array<std::uint8_t, 32> hash();
private:
std::optional<std::array<std::uint8_t, 32>> cached_hash;
};
struct Symbol {

View File

@@ -1,6 +1,9 @@
#pragma once
#include <bit>
#include <cstdint>
#include <cstdlib>
#include <utility>
#define ROARING_EXCEPTIONS 0
#define ROARING_TERMINATE(message) std::abort()
@@ -10,4 +13,162 @@ namespace clice {
using Bitmap = roaring::Roaring;
/// Compact bitmap optimized for small canonical-id sets.
///
/// Uses a tagged pointer scheme in a single uint64_t:
/// - Low bit 1: inline mode, bits 1-63 store the bitmap (ids 0-62)
/// - Low bit 0: heap mode, the value is a roaring::Roaring* (aligned, so low bit is 0)
///
/// Default state is inline-empty (data == 1). Upgrades to heap when an id >= 63
/// is added. The common case (< 63 canonical ids per MergedIndex) stays entirely
/// inline with zero heap allocation and single-instruction bitwise operations.
class ContextBitmap {
constexpr static std::uint32_t inline_capacity = 63;
constexpr static std::uint64_t inline_tag = 1;
std::uint64_t data = inline_tag;
bool is_inline() const {
return data & inline_tag;
}
roaring::Roaring* as_heap() const {
return reinterpret_cast<roaring::Roaring*>(data);
}
std::uint64_t bits() const {
return data >> 1;
}
void free_heap() {
if(!is_inline())
delete as_heap();
}
void upgrade(std::uint32_t new_id) {
auto* r = new roaring::Roaring();
auto b = bits();
while(b) {
r->add(static_cast<std::uint32_t>(std::countr_zero(b)));
b &= b - 1;
}
r->add(new_id);
data = reinterpret_cast<std::uint64_t>(r);
}
public:
ContextBitmap() = default;
~ContextBitmap() {
free_heap();
}
ContextBitmap(ContextBitmap&& other) noexcept : data(std::exchange(other.data, inline_tag)) {}
ContextBitmap& operator=(ContextBitmap&& other) noexcept {
if(this != &other) {
free_heap();
data = std::exchange(other.data, inline_tag);
}
return *this;
}
ContextBitmap(const ContextBitmap&) = delete;
ContextBitmap& operator=(const ContextBitmap&) = delete;
void add(std::uint32_t id) {
if(is_inline()) {
if(id < inline_capacity) {
data |= (1ULL << (id + 1));
} else {
upgrade(id);
}
} else {
as_heap()->add(id);
}
}
void remove(std::uint32_t id) {
if(is_inline()) {
if(id < inline_capacity)
data &= ~(1ULL << (id + 1));
} else {
as_heap()->remove(id);
}
}
bool is_empty() const {
if(is_inline())
return bits() == 0;
return as_heap()->isEmpty();
}
/// Check if (this - other) has any bits set, without allocating in the common inline case.
bool any_not_in(const ContextBitmap& other) const {
if(is_inline() && other.is_inline())
return (bits() & ~other.bits()) != 0;
// Rare: at least one side is heap. Use roaring operations.
if(is_inline()) {
auto b = bits();
while(b) {
auto i = static_cast<std::uint32_t>(std::countr_zero(b));
if(!other.as_heap()->contains(i))
return true;
b &= b - 1;
}
return false;
}
if(other.is_inline()) {
for(auto v: *as_heap()) {
if(v >= inline_capacity || !(other.bits() & (1ULL << v)))
return true;
}
return false;
}
return !(*as_heap() - *other.as_heap()).isEmpty();
}
friend bool operator==(const ContextBitmap& lhs, const ContextBitmap& rhs) {
if(lhs.is_inline() && rhs.is_inline())
return lhs.data == rhs.data;
// Different modes implies different sets: a heap bitmap always contains
// at least one id >= inline_capacity that an inline bitmap cannot hold.
if(lhs.is_inline() != rhs.is_inline())
return false;
return *lhs.as_heap() == *rhs.as_heap();
}
/// Convert to roaring::Roaring (for serialization).
roaring::Roaring to_roaring() const {
roaring::Roaring r;
if(is_inline()) {
auto b = bits();
while(b) {
r.add(static_cast<std::uint32_t>(std::countr_zero(b)));
b &= b - 1;
}
} else {
r = *as_heap();
}
return r;
}
/// Construct from roaring::Roaring (for deserialization).
static ContextBitmap from_roaring(const roaring::Roaring& r) {
ContextBitmap result;
if(r.isEmpty())
return result;
if(r.maximum() < inline_capacity) {
for(auto v: r)
result.data |= (1ULL << (v + 1));
} else {
result.data = reinterpret_cast<std::uint64_t>(new roaring::Roaring(r));
}
return result;
}
};
} // namespace clice

View File

@@ -357,6 +357,68 @@ TEST_CASE(CacheInvalidatedAfterMerge) {
ASSERT_TRUE(found_second);
}
TEST_CASE(MergeCacheHit) {
add_file("header.h", R"(
#pragma once
inline int shared() { return 1; }
)");
add_main("a.cpp", R"(
#include "header.h"
int use_a() { return shared(); }
)");
ASSERT_TRUE(compile());
auto tu_a = index::TUIndex::build(*unit);
add_file("header.h", R"(
#pragma once
inline int shared() { return 1; }
)");
add_main("b.cpp", R"(
#include "header.h"
int use_b() { return shared(); }
)");
ASSERT_TRUE(compile());
auto tu_b = index::TUIndex::build(*unit);
// First merge of a FileIndex should return false (new data).
index::MergedIndex merged;
bool first_hit = false;
for(auto& [fid, file_index]: tu_a.file_indices) {
first_hit = merged.merge(0, tu_a.graph.include_location_id(fid), file_index, {});
}
ASSERT_FALSE(first_hit);
// Second merge with identical content should return true (cache hit).
bool second_hit = false;
for(auto& [fid, file_index]: tu_b.file_indices) {
second_hit = merged.merge(1, tu_b.graph.include_location_id(fid), file_index, {});
}
ASSERT_TRUE(second_hit);
}
TEST_CASE(MergeCacheHitCompilation) {
build_index(R"(
int foo() { return 42; }
)");
auto tu_a = tu_index;
build_index(R"(
int foo() { return 42; }
)");
auto tu_b = tu_index;
index::MergedIndex merged;
std::vector<index::IncludeLocation> locations_a;
bool first_hit =
merged.merge(0, tu_a.built_at, std::move(locations_a), tu_a.main_file_index, {});
ASSERT_FALSE(first_hit);
std::vector<index::IncludeLocation> locations_b;
bool second_hit =
merged.merge(1, tu_b.built_at, std::move(locations_b), tu_b.main_file_index, {});
ASSERT_TRUE(second_hit);
}
}; // TEST_SUITE(MergedIndex)
} // namespace
} // namespace clice::testing

View File

@@ -200,6 +200,58 @@ TEST_CASE(NameSurvivesRoundTrip) {
}
}
TEST_CASE(SelectiveMergeFilters) {
add_file("header.h", R"(
#pragma once
inline int shared() { return 1; }
)");
add_main("main.cpp", R"(
#include "header.h"
int use() { return shared(); }
)");
ASSERT_TRUE(compile());
auto tu = index::TUIndex::build(*unit);
// Build a bitmap containing only the main file's path index.
auto main_path_id = static_cast<std::uint32_t>(tu.graph.paths.size() - 1);
Bitmap new_file_ids;
new_file_ids.add(main_path_id);
index::ProjectIndex project;
project.merge(tu, new_file_ids);
// Symbols that reference the main file should be present.
bool has_main_symbols = false;
for(auto& [hash, symbol]: tu.symbols) {
for(auto ref: symbol.reference_files) {
if(ref == main_path_id && project.symbols.contains(hash)) {
has_main_symbols = true;
// ALL reference_files should be added, not just the new ones.
auto& proj_sym = project.symbols[hash];
ASSERT_TRUE(proj_sym.reference_files.cardinality() >=
symbol.reference_files.cardinality());
break;
}
}
}
ASSERT_TRUE(has_main_symbols);
}
TEST_CASE(SelectiveMergeEmpty) {
index::TUIndex tu;
ASSERT_TRUE(build_and_index(R"(
int foo() { return 42; }
)",
tu));
Bitmap empty_bitmap;
index::ProjectIndex project;
project.merge(tu, empty_bitmap);
// No symbols should be added when the bitmap is empty.
ASSERT_TRUE(project.symbols.empty());
}
}; // TEST_SUITE(ProjectIndex)
} // namespace
} // namespace clice::testing

View File

@@ -500,6 +500,36 @@ TEST_CASE(SymbolKinds) {
check_kind("ns", SymbolKind::Namespace);
}
TEST_CASE(HashCaching) {
build_index(R"(
int foo() { return 42; }
int bar() { return foo() + 1; }
)");
auto& index = tu_index.main_file_index;
auto hash1 = index.hash();
auto hash2 = index.hash();
ASSERT_EQ(hash1, hash2);
}
TEST_CASE(HashDeterminism) {
add_main("main.cpp", R"(
int foo() { return 42; }
int bar() { return foo() + 1; }
)");
ASSERT_TRUE(compile());
auto index_a = index::TUIndex::build(*unit);
add_main("main.cpp", R"(
int foo() { return 42; }
int bar() { return foo() + 1; }
)");
ASSERT_TRUE(compile());
auto index_b = index::TUIndex::build(*unit);
ASSERT_EQ(index_a.main_file_index.hash(), index_b.main_file_index.hash());
}
}; // TEST_SUITE(TUIndex)
} // namespace
} // namespace clice::testing

View File

@@ -0,0 +1,297 @@
#include "test/test.h"
#include "support/bitmap.h"
namespace clice::testing {
namespace {
TEST_SUITE(ContextBitmap) {
TEST_CASE(EmptyByDefault) {
ContextBitmap bm;
EXPECT_TRUE(bm.is_empty());
}
TEST_CASE(AddInline) {
ContextBitmap bm;
bm.add(0);
EXPECT_FALSE(bm.is_empty());
bm.add(62);
EXPECT_FALSE(bm.is_empty());
auto r = bm.to_roaring();
EXPECT_TRUE(r.contains(0));
EXPECT_TRUE(r.contains(62));
EXPECT_FALSE(r.contains(1));
EXPECT_EQ(r.cardinality(), 2U);
}
TEST_CASE(RemoveInline) {
ContextBitmap bm;
bm.add(5);
bm.add(10);
bm.remove(5);
auto r = bm.to_roaring();
EXPECT_FALSE(r.contains(5));
EXPECT_TRUE(r.contains(10));
EXPECT_EQ(r.cardinality(), 1U);
bm.remove(10);
EXPECT_TRUE(bm.is_empty());
}
TEST_CASE(UpgradeToHeap) {
ContextBitmap bm;
bm.add(5);
bm.add(63);
EXPECT_FALSE(bm.is_empty());
auto r = bm.to_roaring();
EXPECT_TRUE(r.contains(5));
EXPECT_TRUE(r.contains(63));
EXPECT_EQ(r.cardinality(), 2U);
}
TEST_CASE(HeapAddRemove) {
ContextBitmap bm;
bm.add(100);
bm.add(200);
bm.remove(100);
auto r = bm.to_roaring();
EXPECT_FALSE(r.contains(100));
EXPECT_TRUE(r.contains(200));
}
TEST_CASE(AnyNotInBothInline) {
ContextBitmap a, b;
a.add(1);
a.add(2);
b.add(1);
b.add(2);
EXPECT_FALSE(a.any_not_in(b));
a.add(3);
EXPECT_TRUE(a.any_not_in(b));
}
TEST_CASE(AnyNotInInlineVsHeap) {
ContextBitmap a, b;
a.add(5);
b.add(5);
b.add(100);
EXPECT_FALSE(a.any_not_in(b));
a.add(10);
EXPECT_TRUE(a.any_not_in(b));
}
TEST_CASE(AnyNotInHeapVsInline) {
ContextBitmap a, b;
a.add(100);
b.add(5);
// 100 >= 63 not in inline b
EXPECT_TRUE(a.any_not_in(b));
ContextBitmap c, d;
c.add(5);
c.add(100);
d.add(5);
// c has 100 which d doesn't have
EXPECT_TRUE(c.any_not_in(d));
}
TEST_CASE(AnyNotInBothHeap) {
ContextBitmap a, b;
a.add(100);
a.add(200);
b.add(100);
b.add(200);
EXPECT_FALSE(a.any_not_in(b));
a.add(300);
EXPECT_TRUE(a.any_not_in(b));
}
TEST_CASE(MoveConstruct) {
ContextBitmap a;
a.add(5);
a.add(10);
ContextBitmap b(std::move(a));
EXPECT_TRUE(a.is_empty());
EXPECT_FALSE(b.is_empty());
auto r = b.to_roaring();
EXPECT_TRUE(r.contains(5));
EXPECT_TRUE(r.contains(10));
}
TEST_CASE(MoveConstructHeap) {
ContextBitmap a;
a.add(100);
ContextBitmap b(std::move(a));
EXPECT_TRUE(a.is_empty());
auto r = b.to_roaring();
EXPECT_TRUE(r.contains(100));
}
TEST_CASE(MoveAssign) {
ContextBitmap a, b;
a.add(3);
b.add(7);
b = std::move(a);
EXPECT_TRUE(a.is_empty());
auto r = b.to_roaring();
EXPECT_TRUE(r.contains(3));
EXPECT_FALSE(r.contains(7));
}
TEST_CASE(FromRoaringInline) {
roaring::Roaring r;
r.add(0);
r.add(30);
r.add(62);
auto bm = ContextBitmap::from_roaring(r);
EXPECT_FALSE(bm.is_empty());
auto out = bm.to_roaring();
EXPECT_EQ(out.cardinality(), 3U);
EXPECT_TRUE(out.contains(0));
EXPECT_TRUE(out.contains(30));
EXPECT_TRUE(out.contains(62));
}
TEST_CASE(FromRoaringHeap) {
roaring::Roaring r;
r.add(5);
r.add(100);
auto bm = ContextBitmap::from_roaring(r);
auto out = bm.to_roaring();
EXPECT_EQ(out.cardinality(), 2U);
EXPECT_TRUE(out.contains(5));
EXPECT_TRUE(out.contains(100));
}
TEST_CASE(FromRoaringEmpty) {
roaring::Roaring r;
auto bm = ContextBitmap::from_roaring(r);
EXPECT_TRUE(bm.is_empty());
}
TEST_CASE(Equality) {
ContextBitmap a, b;
a.add(1);
a.add(2);
b.add(1);
b.add(2);
EXPECT_TRUE(a == b);
b.add(3);
EXPECT_FALSE(a == b);
}
TEST_CASE(BoundaryId62) {
ContextBitmap bm;
bm.add(62);
auto r = bm.to_roaring();
EXPECT_TRUE(r.contains(62));
EXPECT_EQ(r.cardinality(), 1U);
}
TEST_CASE(BoundaryId63) {
ContextBitmap bm;
bm.add(63);
auto r = bm.to_roaring();
EXPECT_TRUE(r.contains(63));
EXPECT_EQ(r.cardinality(), 1U);
}
TEST_CASE(MoveAssignHeap) {
ContextBitmap a, b;
a.add(100);
a.add(200);
b.add(300);
b.add(400);
b = std::move(a);
EXPECT_TRUE(a.is_empty());
auto r = b.to_roaring();
EXPECT_TRUE(r.contains(100));
EXPECT_TRUE(r.contains(200));
EXPECT_FALSE(r.contains(300));
EXPECT_FALSE(r.contains(400));
}
TEST_CASE(EqualityHeap) {
ContextBitmap a, b;
a.add(100);
a.add(200);
b.add(100);
b.add(200);
EXPECT_TRUE(a == b);
b.add(300);
EXPECT_FALSE(a == b);
}
TEST_CASE(AddSmallAfterUpgrade) {
ContextBitmap bm;
bm.add(100);
bm.add(5);
auto r = bm.to_roaring();
EXPECT_TRUE(r.contains(100));
EXPECT_TRUE(r.contains(5));
EXPECT_EQ(r.cardinality(), 2U);
}
TEST_CASE(AnyNotInEmpty) {
ContextBitmap empty_a, empty_b, non_empty;
non_empty.add(5);
EXPECT_FALSE(empty_a.any_not_in(non_empty));
EXPECT_TRUE(non_empty.any_not_in(empty_b));
EXPECT_FALSE(empty_a.any_not_in(empty_b));
}
TEST_CASE(RemoveNonexistent) {
// Inline mode: remove id that was never added.
ContextBitmap inline_bm;
inline_bm.add(3);
inline_bm.remove(10);
EXPECT_FALSE(inline_bm.is_empty());
auto r1 = inline_bm.to_roaring();
EXPECT_TRUE(r1.contains(3));
EXPECT_EQ(r1.cardinality(), 1U);
// Heap mode: remove id that was never added.
ContextBitmap heap_bm;
heap_bm.add(100);
heap_bm.remove(200);
EXPECT_FALSE(heap_bm.is_empty());
auto r2 = heap_bm.to_roaring();
EXPECT_TRUE(r2.contains(100));
EXPECT_EQ(r2.cardinality(), 1U);
}
TEST_CASE(RemoveLargeInInline) {
ContextBitmap bm;
bm.add(5);
bm.add(10);
// Remove id >= 63 while in inline mode; should be a no-op.
bm.remove(63);
bm.remove(100);
EXPECT_FALSE(bm.is_empty());
auto r = bm.to_roaring();
EXPECT_TRUE(r.contains(5));
EXPECT_TRUE(r.contains(10));
EXPECT_EQ(r.cardinality(), 2U);
}
}; // TEST_SUITE(ContextBitmap)
} // namespace
} // namespace clice::testing