Files
clice/src/server/worker/stateful_worker.cpp
ykiko cc5b25d5c3 refactor: public feature types and snapshot testing infrastructure (#442)
## Summary

- **Public feature types**: Move `SemanticToken`, `FoldingRange`,
`DocumentSymbol`, `InlayHint`, and `HintCategory` from internal `.cpp`
files to `feature.h` as public API types. Each feature now exposes two
overloads: a raw overload returning offset-based types and a protocol
overload that converts to LSP wire-format with explicit
`PositionEncoding`.
- **Snapshot testing**: Add corpus-driven snapshot tests using
`ASSERT_SNAPSHOT_GLOB` for semantic tokens, folding ranges, inlay hints,
document symbols, and TU index. Tests compile real C++ corpus files,
format output as YAML flow mappings, and diff against `.snap.yml`
baselines.
- **Test infrastructure**: Add `compile_file()` to `Tester`,
`yaml_str()` utility, `--corpus-dir` / `--snapshot-dir` CLI options, and
`--verbose` flag for unit tests. Migrate to kotatsu's unified
`kota::zest::Options` API.
- **Toolchain robustness**: Filter unknown cc1 args via
`clang::driver::getDriverOptTable()` to handle system compilers newer
than embedded LLVM.
- **Dependency bump**: Update kotatsu to 7381404 (unified zest Options,
out-param `from_json` API).

## Details

### Feature type changes
All five feature modules (`semantic_tokens`, `folding_ranges`,
`document_symbols`, `inlay_hints`, `document_links`) now follow the same
two-overload pattern. The raw overload returns offset-based structs
suitable for indexing and testing; the protocol overload adds
`PositionEncoding` conversion for LSP responses. `stateful_worker.cpp`
explicitly passes `PositionEncoding::UTF16` at every call site.

### Snapshot tests
Corpus files live in `tests/corpus/` (organized by language construct).
Snapshot baselines live in `tests/snapshots/<feature>/`. Format lambdas
are inlined directly in test bodies — no separate format functions for
single-use formatters. YAML output uses flow mappings (`- { key: value
}`) for compact, diffable baselines.

### cc1 arg filtering
`src/command/toolchain.cpp` now parses the cc1 argument list through
LLVM's driver option table and drops any args classified as
`UnknownClass`. This prevents compilation failures when the system
compiler emits flags that the embedded LLVM version doesn't recognize.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-24 19:36:27 +08:00

315 lines
12 KiB
C++

#include "server/worker/stateful_worker.h"
#include <atomic>
#include <cstdint>
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "compile/compilation.h"
#include "feature/feature.h"
#include "index/tu_index.h"
#include "server/protocol/worker.h"
#include "server/worker/worker_common.h"
#include "support/logging.h"
#include "kota/async/async.h"
#include "kota/ipc/codec/bincode.h"
#include "kota/ipc/peer.h"
#include "kota/ipc/transport.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/raw_ostream.h"
namespace clice {
using kota::ipc::RequestResult;
using RequestContext = kota::ipc::BincodePeer::RequestContext;
struct DocumentEntry {
int version = 0;
std::string text;
bool has_ast = false;
CompilationUnit unit{nullptr};
std::atomic<bool> dirty{false};
// Signaled when the first compilation completes (has_ast becomes true).
// Feature handlers co_await this before accessing the AST.
kota::event ast_ready{false};
// Compilation context (from CompileParams)
std::string directory;
std::vector<std::string> arguments;
std::pair<std::string, uint32_t> pch;
llvm::StringMap<std::string> pcms;
// Per-document serialization mutex
kota::mutex strand;
};
class StatefulWorker {
kota::ipc::BincodePeer& peer;
std::uint64_t memory_limit;
llvm::StringMap<std::shared_ptr<DocumentEntry>> documents;
// LRU tracking — owns keys so they don't dangle after request handler returns
std::list<std::string> lru;
llvm::StringMap<std::list<std::string>::iterator> lru_index;
void touch_lru(llvm::StringRef path) {
auto it = lru_index.find(path);
if(it != lru_index.end()) {
lru.erase(it->second);
}
lru.emplace_front(path.str());
lru_index[path] = lru.begin();
}
void shrink_if_over_limit() {
// TODO: Implement memory-based eviction using memory_limit.
// For now, cap at a fixed number of documents.
while(documents.size() > 16 && !lru.empty()) {
auto path = lru.back();
lru.pop_back();
lru_index.erase(path);
LOG_DEBUG("Evicting document: {}", path);
peer.send_notification(worker::EvictedParams{std::string(path)});
documents.erase(path);
}
}
std::shared_ptr<DocumentEntry> get_or_create(llvm::StringRef path) {
auto [it, inserted] = documents.try_emplace(path, nullptr);
if(inserted) {
it->second = std::make_shared<DocumentEntry>();
LOG_DEBUG("Created new document entry: {}", path.str());
}
return it->second;
}
/// Look up document, wait for AST, lock strand, run fn(doc) on thread pool, unlock.
/// Returns "null" if document not found or AST not usable.
template <typename F>
kota::task<kota::codec::RawValue> with_ast(llvm::StringRef path, F&& fn) {
auto it = documents.find(path);
if(it == documents.end()) {
co_return kota::codec::RawValue{"null"};
}
// Hold shared_ptr so Evict can't destroy the entry mid-request.
auto doc = it->second;
touch_lru(path);
co_await doc->ast_ready.wait();
co_await doc->strand.lock();
auto result = co_await kota::queue([&]() -> kota::codec::RawValue {
if(!doc->has_ast || (!doc->unit.completed() && !doc->unit.fatal_error()))
return kota::codec::RawValue{"null"};
return fn(*doc);
});
doc->strand.unlock();
co_return result.value();
}
public:
StatefulWorker(kota::ipc::BincodePeer& peer, std::uint64_t memory_limit) :
peer(peer), memory_limit(memory_limit) {}
void register_handlers();
};
void StatefulWorker::register_handlers() {
// === Compile ===
peer.on_request(
[this](RequestContext& ctx,
const worker::CompileParams& params) -> RequestResult<worker::CompileParams> {
LOG_INFO("Compile request: path={}, version={}", params.path, params.version);
// Hold shared_ptr so Evict can't destroy the entry mid-compile.
auto doc = get_or_create(params.path);
touch_lru(params.path);
co_await doc->strand.lock();
// Copy params to doc AFTER acquiring the strand lock, so that
// concurrent Compile requests waiting on the strand don't
// overwrite our fields before we use them.
doc->version = params.version;
doc->text = params.text;
doc->directory = params.directory;
doc->arguments = params.arguments;
doc->pch = params.pch;
doc->pcms.clear();
for(auto& [name, pcm_path]: params.pcms) {
doc->pcms.try_emplace(name, pcm_path);
}
auto compile_result = co_await kota::queue([&]() -> worker::CompileResult {
ScopedTimer timer;
CompilationParams cp;
cp.kind = CompilationKind::Content;
fill_args(cp, doc->directory, doc->arguments);
if(!doc->pch.first.empty()) {
cp.pch = doc->pch;
}
cp.add_remapped_file(params.path, doc->text);
for(auto& entry: doc->pcms) {
cp.pcms.try_emplace(entry.getKey(), entry.getValue());
}
doc->unit = compile(cp);
doc->has_ast = true;
doc->dirty.store(false, std::memory_order_release);
worker::CompileResult result;
result.version = doc->version;
if(doc->unit.completed() || doc->unit.fatal_error()) {
auto diags = feature::diagnostics(doc->unit);
auto json = kota::codec::json::to_json<kota::ipc::lsp_config>(diags);
result.diagnostics = kota::codec::RawValue{json ? std::move(*json) : "[]"};
LOG_INFO("Compile done: path={}, {}ms, {} diags, fatal={}",
params.path,
timer.ms(),
diags.size(),
doc->unit.fatal_error());
} else {
result.diagnostics = kota::codec::RawValue{"[]"};
LOG_WARN("Compile incomplete: path={}, {}ms", params.path, timer.ms());
}
result.memory_usage = 0; // TODO: query actual memory
if(doc->unit.completed()) {
result.deps = doc->unit.deps();
// Build index for main file only (interested_only=true).
auto tu_index = index::TUIndex::build(doc->unit, true);
llvm::raw_string_ostream os(result.tu_index_data);
tu_index.serialize(os);
}
return result;
});
doc->strand.unlock();
doc->ast_ready.set();
shrink_if_over_limit();
co_return compile_result.value();
});
// === DocumentUpdate ===
// Only mark the document dirty — do NOT update doc.text or doc.version
// here. The kota::queue compilation work may be reading doc.text on the
// thread pool concurrently, so writing it from the event loop would be
// a data race. The next Compile request will bring the correct text
// and update it inside the strand lock.
peer.on_notification([this](const worker::DocumentUpdateParams& params) {
LOG_TRACE("DocumentUpdate: path={}, version={}", params.path, params.version);
auto it = documents.find(params.path);
if(it == documents.end()) {
LOG_TRACE("DocumentUpdate ignored (not tracked): path={}", params.path);
return;
}
it->second->dirty.store(true, std::memory_order_release);
});
// === Evict ===
peer.on_notification([this](const worker::EvictParams& params) {
LOG_DEBUG("Evict notification: path={}", params.path);
auto it = lru_index.find(params.path);
if(it != lru_index.end()) {
lru.erase(it->second);
lru_index.erase(it);
}
documents.erase(params.path);
});
// === Query (hover, definition, semantic tokens, etc.) ===
peer.on_request(
[this](RequestContext& ctx,
const worker::QueryParams& params) -> RequestResult<worker::QueryParams> {
using K = worker::QueryKind;
switch(params.kind) {
case K::Hover:
co_return co_await with_ast(params.path, [&](DocumentEntry& doc) {
auto result = feature::hover(doc.unit, params.offset);
return result ? to_raw(*result) : kota::codec::RawValue{"null"};
});
case K::GoToDefinition:
// TODO: Implement go-to-definition
co_return kota::codec::RawValue{"[]"};
case K::SemanticTokens:
co_return co_await with_ast(params.path, [&](DocumentEntry& doc) {
return to_raw(
feature::semantic_tokens(doc.unit, feature::PositionEncoding::UTF16));
});
case K::InlayHints:
co_return co_await with_ast(params.path, [&](DocumentEntry& doc) {
auto range = params.range;
if(range.begin == static_cast<uint32_t>(-1))
range = LocalSourceRange{0, static_cast<uint32_t>(doc.text.size())};
return to_raw(feature::inlay_hints(doc.unit,
range,
{},
feature::PositionEncoding::UTF16));
});
case K::FoldingRange:
co_return co_await with_ast(params.path, [&](DocumentEntry& doc) {
return to_raw(
feature::folding_ranges(doc.unit, feature::PositionEncoding::UTF16));
});
case K::DocumentSymbol:
co_return co_await with_ast(params.path, [&](DocumentEntry& doc) {
return to_raw(
feature::document_symbols(doc.unit, feature::PositionEncoding::UTF16));
});
case K::DocumentLink:
co_return co_await with_ast(params.path, [&](DocumentEntry& doc) {
return to_raw(
feature::document_links(doc.unit, feature::PositionEncoding::UTF16));
});
case K::CodeAction:
// TODO: Implement code actions
co_return kota::codec::RawValue{"[]"};
}
co_return kota::codec::RawValue{"null"};
});
}
int run_stateful_worker_mode(std::uint64_t memory_limit,
const std::string& worker_name,
const std::string& log_dir) {
logging::stderr_logger(worker_name, logging::options);
if(!log_dir.empty()) {
logging::file_logger(worker_name, log_dir, logging::options);
}
LOG_INFO("Starting stateful worker, memory_limit={}MB", memory_limit / (1024 * 1024));
kota::event_loop loop;
auto transport_result = kota::ipc::StreamTransport::open_stdio(loop);
if(!transport_result) {
LOG_ERROR("Failed to open stdio transport");
return 1;
}
kota::ipc::BincodePeer peer(loop, std::move(*transport_result));
StatefulWorker worker(peer, memory_limit);
worker.register_handlers();
LOG_INFO("Stateful worker ready, waiting for requests");
loop.schedule(peer.run());
auto ret = loop.run();
LOG_INFO("Stateful worker exiting with code {}", ret);
return ret;
}
} // namespace clice