Compare commits
1 Commits
folding-ra
...
migrate/fl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d241ea8492 |
@@ -124,42 +124,21 @@ if(CLICE_CI_ENVIRONMENT)
|
||||
target_compile_definitions(clice_options INTERFACE CLICE_CI_ENVIRONMENT=1)
|
||||
endif()
|
||||
|
||||
set(FBS_SCHEMA_FILE "${PROJECT_SOURCE_DIR}/src/index/schema.fbs")
|
||||
set(GENERATED_HEADER "${PROJECT_BINARY_DIR}/generated/schema_generated.h")
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(FLATC_EXECUTABLE flatc REQUIRED)
|
||||
set(FLATC_CMD "${FLATC_EXECUTABLE}")
|
||||
else()
|
||||
set(FLATC_CMD "$<TARGET_FILE:flatc>")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${GENERATED_HEADER}"
|
||||
COMMAND ${FLATC_CMD} --cpp -o "${PROJECT_BINARY_DIR}/generated" "${FBS_SCHEMA_FILE}"
|
||||
DEPENDS "${FBS_SCHEMA_FILE}"
|
||||
COMMENT "Generating C++ header from ${FBS_SCHEMA_FILE}"
|
||||
)
|
||||
|
||||
add_custom_target(generate_flatbuffers_schema DEPENDS "${GENERATED_HEADER}")
|
||||
|
||||
file(GLOB_RECURSE CLICE_CORE_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/src/*.cpp")
|
||||
add_library(clice-core STATIC ${CLICE_CORE_SOURCES})
|
||||
add_library(clice::core ALIAS clice-core)
|
||||
add_dependencies(clice-core generate_flatbuffers_schema)
|
||||
|
||||
target_include_directories(clice-core PUBLIC
|
||||
"${PROJECT_SOURCE_DIR}/src"
|
||||
"${PROJECT_BINARY_DIR}/generated"
|
||||
)
|
||||
target_link_libraries(clice-core PUBLIC
|
||||
clice_options
|
||||
llvm-libs
|
||||
spdlog::spdlog
|
||||
roaring::roaring
|
||||
flatbuffers
|
||||
kota::ipc::lsp
|
||||
kota::codec::toml
|
||||
kota::codec::flatbuffers
|
||||
simdjson::simdjson
|
||||
)
|
||||
|
||||
|
||||
@@ -27,21 +27,10 @@ FetchContent_Declare(
|
||||
set(ENABLE_ROARING_TESTS OFF CACHE INTERNAL "" FORCE)
|
||||
set(ENABLE_ROARING_MICROBENCHMARKS OFF CACHE INTERNAL "" FORCE)
|
||||
|
||||
# flatbuffers
|
||||
FetchContent_Declare(
|
||||
flatbuffers
|
||||
GIT_REPOSITORY https://github.com/google/flatbuffers.git
|
||||
GIT_TAG v25.9.23
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
set(FLATBUFFERS_BUILD_GRPC OFF CACHE BOOL "" FORCE)
|
||||
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||
set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_Declare(
|
||||
kotatsu
|
||||
GIT_REPOSITORY https://github.com/clice-io/kotatsu
|
||||
GIT_TAG main
|
||||
GIT_TAG refactor/flatbuffers-schema-driven
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
@@ -50,7 +39,8 @@ set(KOTA_ENABLE_TEST OFF)
|
||||
set(KOTA_CODEC_ENABLE_SIMDJSON ON)
|
||||
set(KOTA_CODEC_ENABLE_YYJSON ON)
|
||||
set(KOTA_CODEC_ENABLE_TOML ON)
|
||||
set(KOTA_CODEC_ENABLE_FLATBUFFERS ON)
|
||||
set(KOTA_ENABLE_EXCEPTIONS OFF)
|
||||
set(KOTA_ENABLE_RTTI OFF)
|
||||
|
||||
FetchContent_MakeAvailable(kotatsu spdlog croaring flatbuffers)
|
||||
FetchContent_MakeAvailable(kotatsu spdlog croaring)
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "syntax/token.h"
|
||||
|
||||
#include "kota/meta/annotation.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
|
||||
namespace clice {
|
||||
@@ -42,7 +43,10 @@ struct IncludeGraph {
|
||||
/// Each `FileID` represents a new header context and is introduced
|
||||
/// by a new include directive. So a include directive is a new header
|
||||
/// context. A map between FileID and its include location.
|
||||
llvm::DenseMap<clang::FileID, std::uint32_t> file_table;
|
||||
///
|
||||
/// Runtime-only: `clang::FileID` is an AST-scoped handle; on-disk the
|
||||
/// include graph is fully described by `paths` + `locations`.
|
||||
kota::meta::skip<llvm::DenseMap<clang::FileID, std::uint32_t>> file_table;
|
||||
|
||||
static IncludeGraph from(CompilationUnitRef unit);
|
||||
|
||||
|
||||
121
src/index/kotatsu_adapters.h
Normal file
121
src/index/kotatsu_adapters.h
Normal file
@@ -0,0 +1,121 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "semantic/relation_kind.h"
|
||||
#include "semantic/symbol_kind.h"
|
||||
#include "support/bitmap.h"
|
||||
|
||||
#include "kota/codec/arena/traits.h"
|
||||
#include "kota/codec/detail/fwd.h"
|
||||
|
||||
/// Type-level wire traits for clice index types.
|
||||
///
|
||||
/// These partially specialize the primary
|
||||
/// `kota::codec::serialize_traits<S, T>` / `deserialize_traits<D, T>`
|
||||
/// templates, constrained so only arena backends pick them up. They
|
||||
/// declare the wire representation for `T` and propagate through map
|
||||
/// values, sequence elements, and nested containers — no per-field
|
||||
/// `annotation<T, with<...>>` required.
|
||||
|
||||
namespace kota::codec {
|
||||
|
||||
/// `std::chrono::milliseconds` ⇄ `int64` tick count.
|
||||
template <typename S>
|
||||
requires arena::arena_serializer_like<S>
|
||||
struct serialize_traits<S, std::chrono::milliseconds> {
|
||||
using wire_type = std::int64_t;
|
||||
|
||||
static std::int64_t serialize(S&, std::chrono::milliseconds value) noexcept {
|
||||
return value.count();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
requires arena::arena_deserializer_like<D>
|
||||
struct deserialize_traits<D, std::chrono::milliseconds> {
|
||||
using wire_type = std::int64_t;
|
||||
|
||||
static std::chrono::milliseconds deserialize(const D&, std::int64_t value) noexcept {
|
||||
return std::chrono::milliseconds(value);
|
||||
}
|
||||
};
|
||||
|
||||
/// `RelationKind` ⇄ underlying `uint32` bitflags.
|
||||
template <typename S>
|
||||
requires arena::arena_serializer_like<S>
|
||||
struct serialize_traits<S, clice::RelationKind> {
|
||||
using wire_type = std::uint32_t;
|
||||
|
||||
static std::uint32_t serialize(S&, const clice::RelationKind& k) noexcept {
|
||||
return k.value();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
requires arena::arena_deserializer_like<D>
|
||||
struct deserialize_traits<D, clice::RelationKind> {
|
||||
using wire_type = std::uint32_t;
|
||||
|
||||
static clice::RelationKind deserialize(const D&, std::uint32_t v) noexcept {
|
||||
return clice::RelationKind(static_cast<clice::RelationKind::Kind>(v));
|
||||
}
|
||||
};
|
||||
|
||||
/// `SymbolKind` ⇄ underlying `uint8`.
|
||||
template <typename S>
|
||||
requires arena::arena_serializer_like<S>
|
||||
struct serialize_traits<S, clice::SymbolKind> {
|
||||
using wire_type = std::uint8_t;
|
||||
|
||||
static std::uint8_t serialize(S&, const clice::SymbolKind& k) noexcept {
|
||||
return k.value();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
requires arena::arena_deserializer_like<D>
|
||||
struct deserialize_traits<D, clice::SymbolKind> {
|
||||
using wire_type = std::uint8_t;
|
||||
|
||||
static clice::SymbolKind deserialize(const D&, std::uint8_t v) noexcept {
|
||||
return clice::SymbolKind(v);
|
||||
}
|
||||
};
|
||||
|
||||
/// `clice::Bitmap` (= `roaring::Roaring`) ⇄ opaque byte blob produced by
|
||||
/// Roaring's non-portable serialization (matches the legacy wire format).
|
||||
template <typename S>
|
||||
requires arena::arena_serializer_like<S>
|
||||
struct serialize_traits<S, clice::Bitmap> {
|
||||
using wire_type = std::vector<std::byte>;
|
||||
|
||||
static std::vector<std::byte> serialize(S&, const clice::Bitmap& bitmap) {
|
||||
std::vector<std::byte> buffer;
|
||||
if(bitmap.isEmpty()) {
|
||||
return buffer;
|
||||
}
|
||||
buffer.resize(bitmap.getSizeInBytes(false));
|
||||
bitmap.write(reinterpret_cast<char*>(buffer.data()), false);
|
||||
return buffer;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
requires arena::arena_deserializer_like<D>
|
||||
struct deserialize_traits<D, clice::Bitmap> {
|
||||
using wire_type = std::vector<std::byte>;
|
||||
|
||||
static clice::Bitmap deserialize(const D&, std::vector<std::byte> bytes) {
|
||||
if(bytes.empty()) {
|
||||
return clice::Bitmap();
|
||||
}
|
||||
return clice::Bitmap::read(reinterpret_cast<const char*>(bytes.data()), false);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace kota::codec
|
||||
@@ -1,11 +1,18 @@
|
||||
#include "index/merged_index.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <ranges>
|
||||
#include <span>
|
||||
#include <tuple>
|
||||
|
||||
#include "index/serialization.h"
|
||||
#include "index/kotatsu_adapters.h" // type_adapter specializations
|
||||
#include "support/filesystem.h"
|
||||
|
||||
#include "kota/codec/flatbuffers/deserializer.h"
|
||||
#include "kota/codec/flatbuffers/proxy.h"
|
||||
#include "kota/codec/flatbuffers/serializer.h"
|
||||
#include "kota/meta/annotation.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/Support/raw_os_ostream.h"
|
||||
|
||||
@@ -97,7 +104,7 @@ struct CompilationContext {
|
||||
|
||||
std::uint32_t canonical_id = 0;
|
||||
|
||||
std::uint64_t build_at;
|
||||
std::uint64_t build_at = 0;
|
||||
|
||||
std::vector<IncludeLocation> include_locations;
|
||||
|
||||
@@ -125,8 +132,9 @@ struct MergedIndex::Impl {
|
||||
/// The max canonical id we have allocated.
|
||||
std::uint32_t max_canonical_id = 0;
|
||||
|
||||
/// The reference count of each canonical id.
|
||||
std::vector<std::uint32_t> canonical_ref_counts;
|
||||
/// Reference counts per canonical id — derivable from header/compilation
|
||||
/// contexts at load time, so it doesn't need to live on the wire.
|
||||
kota::meta::skip<std::vector<std::uint32_t>> canonical_ref_counts;
|
||||
|
||||
/// The canonical id set of removed index.
|
||||
roaring::Roaring removed;
|
||||
@@ -137,8 +145,8 @@ struct MergedIndex::Impl {
|
||||
/// All merged symbol relations.
|
||||
llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, roaring::Roaring>> relations;
|
||||
|
||||
/// Sorted occurrences cache for fast lookup.
|
||||
std::vector<Occurrence> occurrences_cache;
|
||||
/// Sorted occurrences cache for fast lookup — rebuilt on demand.
|
||||
kota::meta::skip<std::vector<Occurrence>> occurrences_cache;
|
||||
|
||||
void merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
|
||||
auto hash = index.hash();
|
||||
@@ -172,6 +180,18 @@ struct MergedIndex::Impl {
|
||||
friend bool operator==(const Impl&, const Impl&) = default;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
namespace kfb = kota::codec::flatbuffers;
|
||||
|
||||
std::span<const std::uint8_t> buffer_bytes(const llvm::MemoryBuffer& buffer) {
|
||||
return std::span<const std::uint8_t>(
|
||||
reinterpret_cast<const std::uint8_t*>(buffer.getBufferStart()),
|
||||
buffer.getBufferSize());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
MergedIndex::MergedIndex(std::unique_ptr<llvm::MemoryBuffer> buffer, std::unique_ptr<Impl> impl) :
|
||||
buffer(std::move(buffer)), impl(std::move(impl)) {}
|
||||
|
||||
@@ -196,65 +216,24 @@ void MergedIndex::load_in_memory(this Self& self) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto bytes = buffer_bytes(*self.buffer);
|
||||
auto result = kfb::from_flatbuffer(bytes, *self.impl);
|
||||
if(!result) {
|
||||
self.buffer.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
// Rebuild the ref count table from the already-loaded contexts.
|
||||
auto& index = *self.impl;
|
||||
auto root = fbs::GetRoot<binary::MergedIndex>(self.buffer->getBufferStart());
|
||||
|
||||
index.max_canonical_id = root->max_canonical_id();
|
||||
|
||||
for(auto entry: *root->canonical_cache()) {
|
||||
index.canonical_cache.try_emplace(entry->sha256()->string_view(), entry->canonical_id());
|
||||
}
|
||||
|
||||
index.canonical_ref_counts.clear();
|
||||
index.canonical_ref_counts.resize(index.max_canonical_id, 0);
|
||||
|
||||
for(auto entry: *root->header_contexts()) {
|
||||
HeaderContext context;
|
||||
auto path = entry->path_id();
|
||||
context.version = entry->version();
|
||||
for(auto include: *entry->includes()) {
|
||||
index.canonical_ref_counts[include->canonical_id()] += 1;
|
||||
context.includes.emplace_back(*safe_cast<IncludeContext>(include));
|
||||
}
|
||||
index.header_contexts.try_emplace(path, std::move(context));
|
||||
}
|
||||
|
||||
for(auto entry: *root->compilation_contexts()) {
|
||||
CompilationContext context;
|
||||
auto path = entry->path_id();
|
||||
context.version = entry->version();
|
||||
context.canonical_id = entry->canonical_id();
|
||||
context.build_at = entry->build_at();
|
||||
for(auto include: *entry->include_locations()) {
|
||||
context.include_locations.emplace_back(*safe_cast<IncludeLocation>(include));
|
||||
}
|
||||
index.compilation_contexts.try_emplace(path, std::move(context));
|
||||
}
|
||||
|
||||
// Count ref counts from compilation contexts.
|
||||
for(auto entry: *root->compilation_contexts()) {
|
||||
index.canonical_ref_counts[entry->canonical_id()] += 1;
|
||||
}
|
||||
|
||||
// Deserialize removed bitmap.
|
||||
if(root->removed() && root->removed()->size() > 0) {
|
||||
index.removed = read_bitmap(root->removed());
|
||||
}
|
||||
|
||||
for(auto entry: *root->occurrences()) {
|
||||
index.occurrences.try_emplace(*safe_cast<Occurrence>(entry->occurrence()),
|
||||
read_bitmap(entry->context()));
|
||||
}
|
||||
|
||||
for(auto entry: *root->relations()) {
|
||||
auto& relations = index.relations[entry->symbol()];
|
||||
for(auto relation_entry: *entry->relations()) {
|
||||
relations.try_emplace(*safe_cast<Relation>(relation_entry->relation()),
|
||||
read_bitmap(relation_entry->context()));
|
||||
for(auto& [_, ctx]: index.header_contexts) {
|
||||
for(auto& inc: ctx.includes) {
|
||||
index.canonical_ref_counts[inc.canonical_id] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if(root->content()) {
|
||||
index.content = root->content()->str();
|
||||
for(auto& [_, ctx]: index.compilation_contexts) {
|
||||
index.canonical_ref_counts[ctx.canonical_id] += 1;
|
||||
}
|
||||
|
||||
self.buffer.reset();
|
||||
@@ -279,100 +258,9 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto& index = self.impl;
|
||||
|
||||
fbs::FlatBufferBuilder builder(1024);
|
||||
|
||||
llvm::SmallVector<char, 1024> buffer;
|
||||
|
||||
auto canonical_cache = transform(index->canonical_cache, [&](auto&& value) {
|
||||
auto&& [hash, canonical_id] = value;
|
||||
return binary::CreateCacheEntry(builder, CreateString(builder, hash), canonical_id);
|
||||
});
|
||||
|
||||
auto header_contexts = transform(index->header_contexts, [&](auto&& value) {
|
||||
auto& [path_id, context] = value;
|
||||
return binary::CreateHeaderContextEntry(
|
||||
builder,
|
||||
path_id,
|
||||
context.version,
|
||||
CreateStructVector<binary::IncludeContext>(builder, context.includes));
|
||||
});
|
||||
|
||||
auto compilation_contexts = transform(index->compilation_contexts, [&](auto&& value) {
|
||||
auto& [path_id, context] = value;
|
||||
return binary::CreateCompilationContextEntry(
|
||||
builder,
|
||||
path_id,
|
||||
context.version,
|
||||
context.canonical_id,
|
||||
context.build_at,
|
||||
CreateStructVector<binary::IncludeLocation>(builder, context.include_locations));
|
||||
});
|
||||
|
||||
llvm::SmallVector<const Occurrence*> occurrence_keys;
|
||||
occurrence_keys.reserve(index->occurrences.size());
|
||||
auto occurrences = transform(index->occurrences, [&](auto&& value) {
|
||||
auto&& [occurrence, bitmap] = value;
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
|
||||
bitmap.write(buffer.data(), false);
|
||||
occurrence_keys.emplace_back(&occurrence);
|
||||
return binary::CreateOccurrenceEntry(builder,
|
||||
safe_cast<binary::Occurrence>(&occurrence),
|
||||
CreateVector(builder, buffer));
|
||||
});
|
||||
std::ranges::sort(std::views::zip(occurrence_keys, occurrences), [](auto lhs, auto rhs) {
|
||||
const auto& lo = *std::get<0>(lhs);
|
||||
const auto& ro = *std::get<0>(rhs);
|
||||
return std::tuple(lo.range.begin, lo.range.end, lo.target) <
|
||||
std::tuple(ro.range.begin, ro.range.end, ro.target);
|
||||
});
|
||||
|
||||
llvm::SmallVector<std::uint64_t> relation_keys;
|
||||
relation_keys.reserve(index->relations.size());
|
||||
auto relations = transform(index->relations, [&](auto&& value) {
|
||||
auto&& [symbol_id, symbol_relations] = value;
|
||||
auto relations = transform(symbol_relations, [&](auto&& value) {
|
||||
auto&& [relation, bitmap] = value;
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
|
||||
bitmap.write(buffer.data(), false);
|
||||
return binary::CreateRelationEntry(builder,
|
||||
safe_cast<binary::Relation>(&relation),
|
||||
CreateVector(builder, buffer));
|
||||
});
|
||||
relation_keys.emplace_back(symbol_id);
|
||||
return binary::CreateSymbolRelationsEntry(builder,
|
||||
symbol_id,
|
||||
CreateVector(builder, relations));
|
||||
});
|
||||
std::ranges::sort(std::views::zip(relation_keys, relations), {}, [](auto e) {
|
||||
return std::get<0>(e);
|
||||
});
|
||||
|
||||
// Serialize removed bitmap.
|
||||
buffer.clear();
|
||||
if(!index->removed.isEmpty()) {
|
||||
buffer.resize_for_overwrite(index->removed.getSizeInBytes(false));
|
||||
index->removed.write(buffer.data(), false);
|
||||
}
|
||||
auto removed = CreateVector(builder, buffer);
|
||||
|
||||
auto content_offset = CreateString(builder, index->content);
|
||||
|
||||
auto merged_index = binary::CreateMergedIndex(builder,
|
||||
index->max_canonical_id,
|
||||
CreateVector(builder, canonical_cache),
|
||||
CreateVector(builder, header_contexts),
|
||||
CreateVector(builder, compilation_contexts),
|
||||
CreateVector(builder, occurrences),
|
||||
CreateVector(builder, relations),
|
||||
removed,
|
||||
content_offset);
|
||||
builder.Finish(merged_index);
|
||||
|
||||
out.write(safe_cast<char>(builder.GetBufferPointer()), builder.GetSize());
|
||||
auto bytes = kfb::to_flatbuffer(*self.impl);
|
||||
assert(bytes && "MergedIndex flatbuffer serialization failed");
|
||||
out.write(reinterpret_cast<const char*>(bytes->data()), bytes->size());
|
||||
}
|
||||
|
||||
void MergedIndex::lookup(this const Self& self,
|
||||
@@ -420,25 +308,43 @@ void MergedIndex::lookup(this const Self& self,
|
||||
break;
|
||||
}
|
||||
} else if(self.buffer) {
|
||||
auto index = fbs::GetRoot<binary::MergedIndex>(self.buffer->getBufferStart());
|
||||
auto& occurrences = *index->occurrences();
|
||||
// Lazy path: binary-search the sorted occurrences array directly in
|
||||
// the flatbuffer without materializing the in-memory Impl.
|
||||
auto root = kfb::table_view<Impl>::from_bytes(buffer_bytes(*self.buffer));
|
||||
auto entries = root[&Impl::occurrences];
|
||||
|
||||
auto it = std::ranges::lower_bound(occurrences, offset, {}, [](auto o) {
|
||||
return o->occurrence()->range().end();
|
||||
});
|
||||
auto read_occurrence = [](auto occ_view) -> Occurrence {
|
||||
auto range_view = occ_view[&Occurrence::range];
|
||||
return Occurrence{
|
||||
LocalSourceRange{range_view[&LocalSourceRange::begin],
|
||||
range_view[&LocalSourceRange::end]},
|
||||
occ_view[&Occurrence::target],
|
||||
};
|
||||
};
|
||||
|
||||
while(it != occurrences.end()) {
|
||||
auto o = safe_cast<Occurrence>(it->occurrence());
|
||||
if(o->range.contains(offset)) {
|
||||
if(!callback(*o)) {
|
||||
break;
|
||||
}
|
||||
|
||||
it++;
|
||||
continue;
|
||||
const std::size_t count = entries.size();
|
||||
std::size_t lo = 0;
|
||||
std::size_t hi = count;
|
||||
while(lo < hi) {
|
||||
auto mid = lo + (hi - lo) / 2;
|
||||
auto entry = entries.at(mid);
|
||||
auto range_view = entry.template get<0>()[&Occurrence::range];
|
||||
if(range_view[&LocalSourceRange::end] < offset) {
|
||||
lo = mid + 1;
|
||||
} else {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
for(; lo < count; ++lo) {
|
||||
auto entry = entries.at(lo);
|
||||
auto occurrence = read_occurrence(entry.template get<0>());
|
||||
if(!occurrence.range.contains(offset)) {
|
||||
break;
|
||||
}
|
||||
if(!callback(occurrence)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -470,18 +376,31 @@ void MergedIndex::lookup(this const Self& self,
|
||||
}
|
||||
}
|
||||
} else if(self.buffer) {
|
||||
auto index = fbs::GetRoot<binary::MergedIndex>(self.buffer->getBufferStart());
|
||||
auto& entries = *index->relations();
|
||||
|
||||
auto it = std::ranges::lower_bound(entries, symbol, {}, [](auto e) { return e->symbol(); });
|
||||
if(it == entries.end() || it->symbol() != symbol) [[unlikely]] {
|
||||
// Lazy path: binary-search the outer relations map and iterate the
|
||||
// inner map without materializing Impl.
|
||||
auto root = kfb::table_view<Impl>::from_bytes(buffer_bytes(*self.buffer));
|
||||
auto outer = root[&Impl::relations];
|
||||
auto entry = outer.find(symbol);
|
||||
if(!entry) {
|
||||
return;
|
||||
}
|
||||
|
||||
for(auto entry: *it->relations()) {
|
||||
auto r = safe_cast<Relation>(entry->relation());
|
||||
if(r->kind & kind) {
|
||||
if(!callback(*r)) {
|
||||
auto inner = entry->template get<1>();
|
||||
const std::size_t count = inner.size();
|
||||
for(std::size_t i = 0; i < count; ++i) {
|
||||
auto rel_view = inner.at(i).template get<0>();
|
||||
// Kind comes back as the wire uint32 via the type_adapter; rewrap it.
|
||||
auto relation_kind =
|
||||
RelationKind(static_cast<RelationKind::Kind>(rel_view[&Relation::kind]));
|
||||
if(relation_kind & kind) {
|
||||
auto range_view = rel_view[&Relation::range];
|
||||
Relation relation{
|
||||
.kind = relation_kind,
|
||||
.padding = rel_view[&Relation::padding],
|
||||
.range = LocalSourceRange{range_view[&LocalSourceRange::begin],
|
||||
range_view[&LocalSourceRange::end]},
|
||||
.target_symbol = rel_view[&Relation::target_symbol],
|
||||
};
|
||||
if(!callback(relation)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -516,25 +435,31 @@ bool MergedIndex::need_update(this const Self& self, llvm::ArrayRef<llvm::String
|
||||
|
||||
return false;
|
||||
} else if(self.buffer) {
|
||||
auto index = fbs::GetRoot<binary::MergedIndex>(self.buffer->getBufferStart());
|
||||
if(index->compilation_contexts()->empty()) {
|
||||
auto root = kfb::table_view<Impl>::from_bytes(buffer_bytes(*self.buffer));
|
||||
auto contexts = root[&Impl::compilation_contexts];
|
||||
if(contexts.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto context = *index->compilation_contexts()->begin();
|
||||
auto context = contexts.at(0).template get<1>();
|
||||
auto build_at = context[&CompilationContext::build_at];
|
||||
auto include_locations = context[&CompilationContext::include_locations];
|
||||
|
||||
llvm::DenseSet<std::uint32_t> deps;
|
||||
for(auto location: *context->include_locations()) {
|
||||
auto [_, success] = deps.insert(location->path_id());
|
||||
const std::size_t count = include_locations.size();
|
||||
for(std::size_t i = 0; i < count; ++i) {
|
||||
auto location = include_locations.at(i);
|
||||
auto path_id = location[&IncludeLocation::path_id];
|
||||
auto [_, success] = deps.insert(path_id);
|
||||
if(success) {
|
||||
fs::file_status status;
|
||||
if(auto err = fs::status(path_mapping[location->path_id()], status)) {
|
||||
if(auto err = fs::status(path_mapping[path_id], status)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto time = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
status.getLastModificationTime().time_since_epoch());
|
||||
if(time.count() > context->build_at()) {
|
||||
if(time.count() > build_at) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -616,10 +541,9 @@ llvm::StringRef MergedIndex::content(this const Self& self) {
|
||||
if(self.impl) {
|
||||
return self.impl->content;
|
||||
} else if(self.buffer) {
|
||||
auto root = fbs::GetRoot<binary::MergedIndex>(self.buffer->getBufferStart());
|
||||
if(root->content()) {
|
||||
return root->content()->string_view();
|
||||
}
|
||||
auto root = kfb::table_view<Impl>::from_bytes(buffer_bytes(*self.buffer));
|
||||
auto view = root[&Impl::content];
|
||||
return llvm::StringRef(view.data(), view.size());
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
#include "index/project_index.h"
|
||||
|
||||
#include "index/serialization.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
|
||||
#include "index/kotatsu_adapters.h" // type_adapter specializations
|
||||
|
||||
#include "kota/codec/flatbuffers/deserializer.h"
|
||||
#include "kota/codec/flatbuffers/serializer.h"
|
||||
|
||||
namespace clice::index {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace kfb = kota::codec::flatbuffers;
|
||||
|
||||
} // namespace
|
||||
|
||||
llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self, TUIndex& index) {
|
||||
auto& paths = index.graph.paths;
|
||||
llvm::SmallVector<std::uint32_t> file_ids_map;
|
||||
@@ -28,79 +41,22 @@ llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self, TU
|
||||
}
|
||||
|
||||
void ProjectIndex::serialize(this ProjectIndex& self, llvm::raw_ostream& os) {
|
||||
fbs::FlatBufferBuilder builder(1024);
|
||||
|
||||
llvm::SmallVector<char, 1024> buffer;
|
||||
|
||||
auto i = 0;
|
||||
auto paths = transform(self.path_pool.paths, [&](llvm::StringRef path) {
|
||||
auto entry =
|
||||
binary::CreatePathEntry(builder, CreateString(builder, self.path_pool.paths[i]), i);
|
||||
i += 1;
|
||||
return entry;
|
||||
});
|
||||
|
||||
auto indices = transform(self.indices, [&](auto&& value) {
|
||||
auto&& [source, index] = value;
|
||||
return binary::PathMapEntry(source, index);
|
||||
});
|
||||
|
||||
auto symbols = transform(self.symbols, [&](auto&& value) {
|
||||
auto& [symbol_id, symbol] = value;
|
||||
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(symbol.reference_files.getSizeInBytes(false));
|
||||
symbol.reference_files.write(buffer.data(), false);
|
||||
|
||||
return binary::CreateSymbolEntry(builder,
|
||||
symbol_id,
|
||||
binary::CreateSymbol(builder,
|
||||
CreateString(builder, symbol.name),
|
||||
symbol.kind.value(),
|
||||
CreateVector(builder, buffer)));
|
||||
});
|
||||
|
||||
auto project_index =
|
||||
binary::CreateProjectIndex(builder,
|
||||
CreateVector(builder, paths),
|
||||
CreateStructVector<binary::PathMapEntry>(builder, indices),
|
||||
CreateVector(builder, symbols));
|
||||
|
||||
builder.Finish(project_index);
|
||||
os.write(safe_cast<const char>(builder.GetBufferPointer()), builder.GetSize());
|
||||
auto bytes = kfb::to_flatbuffer(self);
|
||||
assert(bytes && "ProjectIndex flatbuffer serialization failed");
|
||||
os.write(reinterpret_cast<const char*>(bytes->data()), bytes->size());
|
||||
}
|
||||
|
||||
ProjectIndex ProjectIndex::from(const void* data) {
|
||||
auto root = fbs::GetRoot<binary::ProjectIndex>(data);
|
||||
|
||||
ProjectIndex ProjectIndex::from(const void* data, std::size_t size) {
|
||||
ProjectIndex index;
|
||||
|
||||
auto& pool = index.path_pool;
|
||||
pool.paths.resize(root->paths()->size());
|
||||
for(auto entry: *root->paths()) {
|
||||
// Normalize backslashes to forward slashes for cross-platform consistency
|
||||
// (persisted index may contain native-separator paths from Windows).
|
||||
llvm::SmallString<256> normalized(entry->path()->string_view());
|
||||
std::replace(normalized.begin(), normalized.end(), '\\', '/');
|
||||
auto k = pool.save(normalized.str());
|
||||
pool.paths[entry->id()] = k;
|
||||
pool.cache.try_emplace(k, entry->id());
|
||||
if(data == nullptr || size == 0) {
|
||||
return index;
|
||||
}
|
||||
|
||||
for(auto entry: *root->indices()) {
|
||||
index.indices.try_emplace(entry->source(), entry->index());
|
||||
std::span<const std::uint8_t> bytes(static_cast<const std::uint8_t*>(data), size);
|
||||
auto result = kfb::from_flatbuffer(bytes, index);
|
||||
if(!result) {
|
||||
return ProjectIndex();
|
||||
}
|
||||
|
||||
for(auto entry: *root->symbols()) {
|
||||
auto& symbol = index.symbols[entry->symbol_id()];
|
||||
auto* fb_symbol = entry->symbol();
|
||||
if(auto* name = fb_symbol->name()) {
|
||||
symbol.name = name->str();
|
||||
}
|
||||
symbol.kind = SymbolKind(static_cast<std::uint8_t>(fb_symbol->kind()));
|
||||
symbol.reference_files = read_bitmap(fb_symbol->refs());
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,10 +2,14 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "index/tu_index.h"
|
||||
|
||||
#include "kota/codec/arena/traits.h"
|
||||
#include "kota/codec/detail/fwd.h"
|
||||
#include "kota/support/expected_try.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
@@ -84,7 +88,71 @@ struct ProjectIndex {
|
||||
|
||||
void serialize(this ProjectIndex& self, llvm::raw_ostream& os);
|
||||
|
||||
static ProjectIndex from(const void* data);
|
||||
static ProjectIndex from(const void* data, std::size_t size);
|
||||
};
|
||||
|
||||
} // namespace clice::index
|
||||
|
||||
namespace kota::codec {
|
||||
|
||||
/// `PathPool` on the wire is a flat list of absolute paths; `id` is the
|
||||
/// position in the vector. The allocator and reverse cache are runtime-only.
|
||||
///
|
||||
/// Streaming serialize: iterate `pool.paths` and allocate strings directly
|
||||
/// into the builder, avoiding the double-copy that a value-mode
|
||||
/// `wire_type = std::vector<std::string>` conversion would introduce.
|
||||
template <typename S>
|
||||
requires arena::arena_serializer_like<S>
|
||||
struct serialize_traits<S, clice::index::PathPool> {
|
||||
// Structural wire shape — declared so the flatbuffers proxy views
|
||||
// a `PathPool` field as an `array_view<std::string>`.
|
||||
using wire_type = std::vector<std::string>;
|
||||
|
||||
static auto serialize(S& s, const clice::index::PathPool& pool)
|
||||
-> std::expected<typename S::vector_ref, typename S::error_type> {
|
||||
std::vector<typename S::string_ref> offsets;
|
||||
offsets.reserve(pool.paths.size());
|
||||
for(const auto& path: pool.paths) {
|
||||
auto r = s.alloc_string(std::string_view(path.data(), path.size()));
|
||||
if(!r) {
|
||||
return std::unexpected(r.error());
|
||||
}
|
||||
offsets.push_back(*r);
|
||||
}
|
||||
return s.alloc_string_vector(
|
||||
std::span<const typename S::string_ref>(offsets.data(), offsets.size()));
|
||||
}
|
||||
};
|
||||
|
||||
/// Streaming deserialize: read each path out of the flatbuffer's
|
||||
/// string-vector view directly, interning it into the pool's allocator
|
||||
/// in-place. Avoids the transient `std::vector<std::string>` the
|
||||
/// value-mode form would materialize.
|
||||
template <typename D>
|
||||
requires arena::arena_deserializer_like<D>
|
||||
struct deserialize_traits<D, clice::index::PathPool> {
|
||||
using wire_type = std::vector<std::string>;
|
||||
|
||||
static auto deserialize(const D& d,
|
||||
typename D::TableView view,
|
||||
typename D::slot_id sid,
|
||||
clice::index::PathPool& out)
|
||||
-> std::expected<void, typename D::error_type> {
|
||||
if(!view.has(sid)) {
|
||||
return {};
|
||||
}
|
||||
KOTA_EXPECTED_TRY_V(auto vec, d.get_string_vector(view, sid));
|
||||
out.paths.resize(vec.size());
|
||||
for(std::size_t i = 0; i < vec.size(); ++i) {
|
||||
auto sv = vec[i];
|
||||
llvm::SmallString<256> normalized(llvm::StringRef(sv.data(), sv.size()));
|
||||
std::replace(normalized.begin(), normalized.end(), '\\', '/');
|
||||
auto interned = out.save(normalized.str());
|
||||
out.paths[i] = interned;
|
||||
out.cache.try_emplace(interned, static_cast<std::uint32_t>(i));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace kota::codec
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
namespace clice.index.binary;
|
||||
|
||||
struct Range {
|
||||
begin : uint;
|
||||
end : uint;
|
||||
}
|
||||
|
||||
struct Occurrence {
|
||||
range : Range;
|
||||
target : ulong;
|
||||
}
|
||||
|
||||
struct Relation {
|
||||
kind : uint;
|
||||
padding : uint;
|
||||
range : Range;
|
||||
target_symbol : ulong;
|
||||
}
|
||||
|
||||
table CacheEntry {
|
||||
sha256:
|
||||
string;
|
||||
canonical_id:
|
||||
uint;
|
||||
}
|
||||
|
||||
struct IncludeContext {
|
||||
include_id : uint;
|
||||
canonical_id : uint;
|
||||
}
|
||||
|
||||
table HeaderContextEntry {
|
||||
path_id:
|
||||
uint;
|
||||
version:
|
||||
uint;
|
||||
includes:
|
||||
[IncludeContext];
|
||||
}
|
||||
|
||||
struct IncludeLocation {
|
||||
path_id : uint;
|
||||
line : uint;
|
||||
include_id : uint;
|
||||
}
|
||||
|
||||
table CompilationContextEntry {
|
||||
path_id:
|
||||
uint;
|
||||
version:
|
||||
uint;
|
||||
canonical_id:
|
||||
uint;
|
||||
build_at:
|
||||
ulong;
|
||||
include_locations:
|
||||
[IncludeLocation];
|
||||
}
|
||||
|
||||
table OccurrenceEntry {
|
||||
occurrence:
|
||||
Occurrence;
|
||||
context:
|
||||
[ubyte];
|
||||
}
|
||||
|
||||
table RelationEntry {
|
||||
relation:
|
||||
Relation;
|
||||
context:
|
||||
[ubyte];
|
||||
}
|
||||
|
||||
table SymbolRelationsEntry {
|
||||
symbol:
|
||||
ulong;
|
||||
relations:
|
||||
[RelationEntry];
|
||||
}
|
||||
|
||||
table Symbol {
|
||||
name:
|
||||
string;
|
||||
kind:
|
||||
ubyte;
|
||||
refs:
|
||||
[ubyte];
|
||||
}
|
||||
|
||||
table SymbolEntry {
|
||||
symbol_id:
|
||||
ulong;
|
||||
symbol:
|
||||
Symbol;
|
||||
}
|
||||
|
||||
table MergedIndex {
|
||||
max_canonical_id:
|
||||
uint;
|
||||
|
||||
canonical_cache:
|
||||
[CacheEntry];
|
||||
|
||||
header_contexts:
|
||||
[HeaderContextEntry];
|
||||
|
||||
compilation_contexts:
|
||||
[CompilationContextEntry];
|
||||
|
||||
occurrences:
|
||||
[OccurrenceEntry];
|
||||
|
||||
relations:
|
||||
[SymbolRelationsEntry];
|
||||
|
||||
removed:
|
||||
[ubyte];
|
||||
|
||||
content:
|
||||
string;
|
||||
}
|
||||
|
||||
table TUFileRelationsEntry {
|
||||
symbol:
|
||||
ulong;
|
||||
relations:
|
||||
[Relation];
|
||||
}
|
||||
|
||||
table TUFileIndexEntry {
|
||||
file_id:
|
||||
uint;
|
||||
occurrences:
|
||||
[Occurrence];
|
||||
relations:
|
||||
[TUFileRelationsEntry];
|
||||
}
|
||||
|
||||
table TUIndex {
|
||||
built_at:
|
||||
ulong;
|
||||
paths:
|
||||
[string];
|
||||
locations:
|
||||
[IncludeLocation];
|
||||
symbols:
|
||||
[SymbolEntry];
|
||||
file_indices:
|
||||
[TUFileIndexEntry];
|
||||
main_file_index:
|
||||
TUFileIndexEntry;
|
||||
}
|
||||
|
||||
table PathEntry {
|
||||
path:
|
||||
string;
|
||||
id:
|
||||
uint;
|
||||
}
|
||||
|
||||
struct PathMapEntry {
|
||||
source : uint;
|
||||
index : uint;
|
||||
}
|
||||
|
||||
table ProjectIndex {
|
||||
paths:
|
||||
[PathEntry];
|
||||
indices:
|
||||
[PathMapEntry];
|
||||
symbols:
|
||||
[SymbolEntry];
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
#include <cstdint>
|
||||
#include <ranges>
|
||||
#include <type_traits>
|
||||
|
||||
#include "schema_generated.h"
|
||||
#include "support/bitmap.h"
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
namespace clice::index {
|
||||
|
||||
namespace fbs = flatbuffers;
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename Range>
|
||||
concept sequence_range = std::ranges::input_range<Range> &&
|
||||
!requires { typename Range::key_type; } && requires(const Range& r) {
|
||||
r.data();
|
||||
r.size();
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using Offsets = llvm::SmallVector<fbs::Offset<T>, 0>;
|
||||
|
||||
template <typename U, typename V>
|
||||
const U* safe_cast(const V* v) {
|
||||
static_assert(sizeof(U) == sizeof(V), "size mismatch");
|
||||
static_assert(alignof(U) == alignof(V), "alignment mismatch");
|
||||
static_assert(std::is_trivially_copyable_v<U> && std::is_trivially_copyable_v<V>,
|
||||
"requires trivially copyable");
|
||||
/// If aliasing issues arise, prefer copying into a temporary SmallVector<U>.
|
||||
return reinterpret_cast<const U*>(v);
|
||||
}
|
||||
|
||||
auto CreateString(fbs::FlatBufferBuilder& builder, llvm::StringRef string) {
|
||||
return builder.CreateString(string.data(), string.size());
|
||||
}
|
||||
|
||||
template <sequence_range Range>
|
||||
auto CreateVector(fbs::FlatBufferBuilder& builder, const Range& range) {
|
||||
return builder.CreateVector(range.data(), range.size());
|
||||
}
|
||||
|
||||
auto CreateVector(fbs::FlatBufferBuilder& builder, const llvm::SmallVector<char, 1024>& range) {
|
||||
return builder.CreateVector(reinterpret_cast<const std::uint8_t*>(range.data()), range.size());
|
||||
}
|
||||
|
||||
template <typename U, sequence_range Range>
|
||||
auto CreateStructVector(fbs::FlatBufferBuilder& builder, const Range& range) {
|
||||
using V = std::ranges::range_value_t<Range>;
|
||||
(void)sizeof(V);
|
||||
return builder.CreateVectorOfStructs(safe_cast<U>(range.data()), range.size());
|
||||
}
|
||||
|
||||
template <typename Range, typename Functor>
|
||||
auto transform(const Range& range, const Functor& functor) {
|
||||
using V = std::ranges::range_value_t<Range>;
|
||||
using R = std::invoke_result_t<Functor, V>;
|
||||
|
||||
llvm::SmallVector<R, 0> result;
|
||||
result.resize_for_overwrite(std::ranges::size(range));
|
||||
|
||||
auto i = 0;
|
||||
for(auto&& v: range) {
|
||||
result[i] = functor(v);
|
||||
i += 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Bitmap read_bitmap(const fbs::Vector<uint8_t>* buffer) {
|
||||
return Bitmap::read(reinterpret_cast<const char*>(buffer->data()), false);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace clice::index
|
||||
@@ -1,17 +1,24 @@
|
||||
#include "index/tu_index.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <tuple>
|
||||
|
||||
#include "index/serialization.h"
|
||||
#include "index/kotatsu_adapters.h" // type_adapter specializations
|
||||
#include "semantic/ast_utility.h"
|
||||
#include "semantic/semantic_visitor.h"
|
||||
|
||||
#include "kota/codec/flatbuffers/deserializer.h"
|
||||
#include "kota/codec/flatbuffers/serializer.h"
|
||||
#include "llvm/Support/SHA256.h"
|
||||
|
||||
namespace clice::index {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace kfb = kota::codec::flatbuffers;
|
||||
|
||||
class Builder : public SemanticVisitor<Builder> {
|
||||
public:
|
||||
Builder(TUIndex& result, CompilationUnitRef unit, bool interested_only) :
|
||||
@@ -114,6 +121,8 @@ public:
|
||||
void build() {
|
||||
run();
|
||||
|
||||
auto interested = unit.interested_file();
|
||||
|
||||
for(auto& [fid, index]: result.file_indices) {
|
||||
for(auto& [symbol_id, relations]: index.relations) {
|
||||
std::ranges::sort(relations, [](const Relation& lhs, const Relation& rhs) {
|
||||
@@ -144,13 +153,19 @@ public:
|
||||
return lhs.range == rhs.range && lhs.target == rhs.target;
|
||||
});
|
||||
index.occurrences.erase(range.begin(), range.end());
|
||||
|
||||
if(fid == unit.interested_file()) {
|
||||
result.main_file_index = std::move(index);
|
||||
}
|
||||
}
|
||||
|
||||
result.file_indices.erase(unit.interested_file());
|
||||
// Populate main_file_index (interested file) and path_file_indices
|
||||
// (keyed by path_id) for serialization. `file_indices` itself is
|
||||
// `skip`-marked (runtime-only, keyed by clang::FileID) and retained
|
||||
// for in-memory consumers/tests that need FileID access.
|
||||
for(auto& [fid, index]: result.file_indices) {
|
||||
if(fid == interested) {
|
||||
result.main_file_index = index;
|
||||
} else {
|
||||
result.path_file_indices[result.graph.path_id(fid)] = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -198,119 +213,23 @@ TUIndex TUIndex::build(CompilationUnitRef unit, bool interested_only) {
|
||||
return index;
|
||||
}
|
||||
|
||||
void TUIndex::serialize(llvm::raw_ostream& os) const {
|
||||
fbs::FlatBufferBuilder builder(4096);
|
||||
|
||||
llvm::SmallVector<char, 1024> buffer;
|
||||
|
||||
auto paths =
|
||||
transform(graph.paths, [&](const std::string& p) { return builder.CreateString(p); });
|
||||
|
||||
auto syms = transform(symbols, [&](auto&& value) {
|
||||
auto& [symbol_id, symbol] = value;
|
||||
buffer.clear();
|
||||
buffer.resize_for_overwrite(symbol.reference_files.getSizeInBytes(false));
|
||||
symbol.reference_files.write(buffer.data(), false);
|
||||
return binary::CreateSymbolEntry(builder,
|
||||
symbol_id,
|
||||
binary::CreateSymbol(builder,
|
||||
CreateString(builder, symbol.name),
|
||||
symbol.kind.value(),
|
||||
CreateVector(builder, buffer)));
|
||||
});
|
||||
|
||||
/// Serialize a single FileIndex into a TUFileIndexEntry.
|
||||
auto serialize_file_index = [&](std::uint32_t fid, const FileIndex& index) {
|
||||
auto occs = CreateStructVector<binary::Occurrence>(builder, index.occurrences);
|
||||
auto rels = transform(index.relations, [&](auto&& value) {
|
||||
auto& [symbol_id, relations] = value;
|
||||
return binary::CreateTUFileRelationsEntry(
|
||||
builder,
|
||||
symbol_id,
|
||||
CreateStructVector<binary::Relation>(builder, relations));
|
||||
});
|
||||
return binary::CreateTUFileIndexEntry(builder, fid, occs, CreateVector(builder, rels));
|
||||
};
|
||||
|
||||
/// Convert FileID-keyed file_indices to path_id-keyed entries.
|
||||
llvm::SmallVector<fbs::Offset<binary::TUFileIndexEntry>> file_idx_vec;
|
||||
for(auto& [fid, index]: file_indices) {
|
||||
auto pid = graph.path_id(fid);
|
||||
file_idx_vec.push_back(serialize_file_index(pid, index));
|
||||
}
|
||||
|
||||
/// Main file is the last path in graph.paths (convention from IncludeGraph).
|
||||
auto main_idx =
|
||||
serialize_file_index(static_cast<std::uint32_t>(graph.paths.size() - 1), main_file_index);
|
||||
|
||||
auto tu_index =
|
||||
binary::CreateTUIndex(builder,
|
||||
static_cast<std::uint64_t>(built_at.count()),
|
||||
CreateVector(builder, paths),
|
||||
CreateStructVector<binary::IncludeLocation>(builder, graph.locations),
|
||||
CreateVector(builder, syms),
|
||||
builder.CreateVector(file_idx_vec.data(), file_idx_vec.size()),
|
||||
main_idx);
|
||||
|
||||
builder.Finish(tu_index);
|
||||
os.write(safe_cast<const char>(builder.GetBufferPointer()), builder.GetSize());
|
||||
void TUIndex::serialize(llvm::raw_ostream& os) {
|
||||
auto bytes = kfb::to_flatbuffer(*this);
|
||||
assert(bytes && "TUIndex flatbuffer serialization failed");
|
||||
os.write(reinterpret_cast<const char*>(bytes->data()), bytes->size());
|
||||
}
|
||||
|
||||
TUIndex TUIndex::from(const void* data) {
|
||||
auto root = fbs::GetRoot<binary::TUIndex>(data);
|
||||
|
||||
TUIndex TUIndex::from(const void* data, std::size_t size) {
|
||||
TUIndex index;
|
||||
index.built_at = std::chrono::milliseconds(root->built_at());
|
||||
|
||||
for(auto p: *root->paths()) {
|
||||
index.graph.paths.emplace_back(p->str());
|
||||
if(data == nullptr || size == 0) {
|
||||
return index;
|
||||
}
|
||||
|
||||
for(auto loc: *root->locations()) {
|
||||
index.graph.locations.emplace_back(*safe_cast<IncludeLocation>(loc));
|
||||
std::span<const std::uint8_t> bytes(static_cast<const std::uint8_t*>(data), size);
|
||||
auto result = kfb::from_flatbuffer(bytes, index);
|
||||
if(!result) {
|
||||
return TUIndex();
|
||||
}
|
||||
|
||||
for(auto entry: *root->symbols()) {
|
||||
auto& symbol = index.symbols[entry->symbol_id()];
|
||||
symbol.name = entry->symbol()->name()->str();
|
||||
symbol.kind = SymbolKind(static_cast<std::uint8_t>(entry->symbol()->kind()));
|
||||
symbol.reference_files = read_bitmap(entry->symbol()->refs());
|
||||
}
|
||||
|
||||
/// Helper to deserialize a TUFileIndexEntry into a FileIndex.
|
||||
auto deserialize_file_index = [](const binary::TUFileIndexEntry* entry) -> FileIndex {
|
||||
FileIndex fi;
|
||||
if(entry->occurrences()) {
|
||||
fi.occurrences.reserve(entry->occurrences()->size());
|
||||
for(auto o: *entry->occurrences()) {
|
||||
fi.occurrences.emplace_back(*safe_cast<Occurrence>(o));
|
||||
}
|
||||
}
|
||||
if(entry->relations()) {
|
||||
for(auto rel_entry: *entry->relations()) {
|
||||
auto& rels = fi.relations[rel_entry->symbol()];
|
||||
if(rel_entry->relations()) {
|
||||
rels.reserve(rel_entry->relations()->size());
|
||||
for(auto r: *rel_entry->relations()) {
|
||||
rels.emplace_back(*safe_cast<Relation>(r));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return fi;
|
||||
};
|
||||
|
||||
/// Populate path_file_indices keyed by path_id (no clang::FileID needed).
|
||||
if(root->file_indices()) {
|
||||
for(auto entry: *root->file_indices()) {
|
||||
index.path_file_indices[entry->file_id()] = deserialize_file_index(entry);
|
||||
}
|
||||
}
|
||||
|
||||
if(root->main_file_index()) {
|
||||
index.main_file_index = deserialize_file_index(root->main_file_index());
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "semantic/symbol_kind.h"
|
||||
#include "support/bitmap.h"
|
||||
|
||||
#include "kota/meta/annotation.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace clice::index {
|
||||
@@ -35,6 +36,10 @@ struct Relation {
|
||||
constexpr auto definition_range() {
|
||||
return std::bit_cast<LocalSourceRange>(target_symbol);
|
||||
}
|
||||
|
||||
friend bool operator==(const Relation&, const Relation&) = default;
|
||||
|
||||
friend auto operator<=>(const Relation&, const Relation&) = default;
|
||||
};
|
||||
|
||||
struct Occurrence {
|
||||
@@ -45,6 +50,8 @@ struct Occurrence {
|
||||
SymbolHash target;
|
||||
|
||||
friend bool operator==(const Occurrence&, const Occurrence&) = default;
|
||||
|
||||
friend auto operator<=>(const Occurrence&, const Occurrence&) = default;
|
||||
};
|
||||
|
||||
struct FileIndex {
|
||||
@@ -77,19 +84,21 @@ struct TUIndex {
|
||||
|
||||
SymbolTable symbols;
|
||||
|
||||
llvm::DenseMap<clang::FileID, FileIndex> file_indices;
|
||||
/// Runtime-only: keyed by AST-scoped `clang::FileID` during build; flushed
|
||||
/// into `path_file_indices` (keyed by path id) before serialization.
|
||||
kota::meta::skip<llvm::DenseMap<clang::FileID, FileIndex>> file_indices;
|
||||
|
||||
/// File indices keyed by path_id, populated by from() for deserialized data.
|
||||
/// When built from AST, this is empty and file_indices (keyed by FileID) is used.
|
||||
/// File indices keyed by path_id. Populated from `file_indices` at
|
||||
/// serialize time, and directly from the wire on deserialize.
|
||||
llvm::DenseMap<std::uint32_t, FileIndex> path_file_indices;
|
||||
|
||||
FileIndex main_file_index;
|
||||
|
||||
static TUIndex build(CompilationUnitRef unit, bool interested_only = false);
|
||||
|
||||
void serialize(llvm::raw_ostream& os) const;
|
||||
void serialize(llvm::raw_ostream& os);
|
||||
|
||||
static TUIndex from(const void* data);
|
||||
static TUIndex from(const void* data, std::size_t size);
|
||||
};
|
||||
|
||||
} // namespace clice::index
|
||||
|
||||
@@ -71,6 +71,10 @@ constexpr bool operator==(RelationKind lhs, RelationKind rhs) {
|
||||
return lhs.value() == rhs.value();
|
||||
}
|
||||
|
||||
constexpr auto operator<=>(RelationKind lhs, RelationKind rhs) {
|
||||
return lhs.value() <=> rhs.value();
|
||||
}
|
||||
|
||||
constexpr bool operator&(RelationKind lhs, RelationKind rhs) {
|
||||
return lhs.value() == rhs.value();
|
||||
}
|
||||
|
||||
@@ -763,7 +763,8 @@ kota::task<bool> Compiler::ensure_compiled(Session& session) {
|
||||
|
||||
// Store open file index from the stateful worker's TUIndex.
|
||||
if(!result.value().tu_index_data.empty()) {
|
||||
auto tu_index = index::TUIndex::from(result.value().tu_index_data.data());
|
||||
auto tu_index = index::TUIndex::from(result.value().tu_index_data.data(),
|
||||
result.value().tu_index_data.size());
|
||||
OpenFileIndex ofi;
|
||||
ofi.file_index = std::move(tu_index.main_file_index);
|
||||
ofi.symbols = std::move(tu_index.symbols);
|
||||
|
||||
@@ -25,7 +25,7 @@ namespace clice {
|
||||
namespace lsp = kota::ipc::lsp;
|
||||
|
||||
void Indexer::merge(const void* tu_index_data, std::size_t size) {
|
||||
auto tu_index = index::TUIndex::from(tu_index_data);
|
||||
auto tu_index = index::TUIndex::from(tu_index_data, size);
|
||||
if(tu_index.graph.paths.empty()) {
|
||||
LOG_WARN("Ignoring TUIndex with empty path graph");
|
||||
return;
|
||||
@@ -144,7 +144,8 @@ void Indexer::load(llvm::StringRef index_dir) {
|
||||
auto project_path = path::join(index_dir, "project.idx");
|
||||
auto buf = llvm::MemoryBuffer::getFile(project_path);
|
||||
if(buf) {
|
||||
workspace.project_index = index::ProjectIndex::from((*buf)->getBufferStart());
|
||||
workspace.project_index =
|
||||
index::ProjectIndex::from((*buf)->getBufferStart(), (*buf)->getBufferSize());
|
||||
LOG_INFO("Loaded ProjectIndex: {} symbols", workspace.project_index.symbols.size());
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,8 @@ struct LocalSourceRange {
|
||||
|
||||
constexpr bool operator==(const LocalSourceRange& other) const = default;
|
||||
|
||||
constexpr auto operator<=>(const LocalSourceRange& other) const = default;
|
||||
|
||||
constexpr std::uint32_t length() const {
|
||||
return end - begin;
|
||||
}
|
||||
|
||||
@@ -128,7 +128,7 @@ TEST_CASE(SerializationRoundTrip) {
|
||||
project.serialize(os);
|
||||
|
||||
// Deserialize.
|
||||
auto restored = index::ProjectIndex::from(buf.data());
|
||||
auto restored = index::ProjectIndex::from(buf.data(), buf.size());
|
||||
|
||||
// Path pools should match.
|
||||
ASSERT_EQ(project.path_pool.paths.size(), restored.path_pool.paths.size());
|
||||
@@ -190,7 +190,7 @@ TEST_CASE(NameSurvivesRoundTrip) {
|
||||
llvm::SmallString<4096> buf;
|
||||
llvm::raw_svector_ostream os(buf);
|
||||
project.serialize(os);
|
||||
auto restored = index::ProjectIndex::from(buf.data());
|
||||
auto restored = index::ProjectIndex::from(buf.data(), buf.size());
|
||||
|
||||
// Verify names survive round-trip.
|
||||
for(auto& [hash, symbol]: project.symbols) {
|
||||
|
||||
Reference in New Issue
Block a user