9 Commits

Author SHA1 Message Date
ykiko
8f714c3b4a refactor(document links): use Lexer for unified directive argument scanning
Replace hand-written character scanning with the project's Lexer class
to find filename arguments in preprocessor directives. Extend the Lexer
to activate header_name mode for #embed and expose set_header_name_mode()
for __has_include/__has_embed contexts. Remove unused Include::filename_range
field which had a latent assert crash on macro-expanded includes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 21:02:22 +08:00
ykiko
ccc805d0c3 Merge branch 'main' of https://github.com/clice-io/clice into feat/document-links-pch-embed 2026-04-09 19:44:34 +08:00
ykiko
d48236de9c refactor: unify include handling with add_link_by_location
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 17:12:14 +08:00
ykiko
b691ed1d06 refactor: extract add_link_by_location to deduplicate has_include/embed/has_embed
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 17:08:27 +08:00
ykiko
02e4f74347 style: remove verbose comments from integration tests
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 17:06:13 +08:00
ykiko
8af2704723 refactor: reuse find_filename_range for has_include scanning
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 17:01:00 +08:00
ykiko
4d8c335c0d fix: re-lookup session after co_await to avoid invalidated iterator
The sessions DenseMap iterator may be invalidated during co_await
(other coroutines can modify the map). Re-lookup by path_id after
the await completes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 16:56:08 +08:00
ykiko
4926b4ac32 test(document links): add __has_embed integration tests
Cover both existing-file (produces link) and missing-file (no link)
cases for __has_embed directives.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 16:05:54 +08:00
ykiko
13527b7084 feat(feature): preserve PCH document links and add #embed/#has_embed support
PCH compilation now serializes document links and stores them in PCHState.
The master server merges PCH links with main-file links on DocumentLink
requests, fixing missing links for includes inside the preamble.

Also adds document link support for #embed and __has_embed directives.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 15:56:19 +08:00
18 changed files with 329 additions and 53 deletions

View File

@@ -94,7 +94,7 @@ public:
const clang::Token& include_tok,
llvm::StringRef,
bool,
clang::CharSourceRange filename_range,
clang::CharSourceRange,
clang::OptionalFileEntryRef,
llvm::StringRef,
llvm::StringRef,
@@ -108,7 +108,6 @@ public:
unit->directives[prev_fid].includes.emplace_back(Include{
.fid = {},
.location = include_tok.getLocation(),
.filename_range = filename_range.getAsRange(),
});
}

View File

@@ -20,11 +20,8 @@ struct Include {
/// The file id of included file.
clang::FileID fid;
/// Location of the `include`.
/// Location of the `include` keyword.
clang::SourceLocation location;
/// The range of filename(includes `""` or `<>`).
clang::SourceRange filename_range;
};
/// Information about `__has_include` directive.

View File

@@ -1,13 +1,20 @@
#include <algorithm>
#include <cstdint>
#include <string>
#include <vector>
#include "feature/feature.h"
#include "syntax/lexer.h"
namespace clice::feature {
namespace {} // namespace
namespace {
bool is_directive_keyword(llvm::StringRef word) {
return word == "include" || word == "include_next" || word == "import" || word == "embed" ||
word == "__has_include" || word == "__has_include_next" || word == "__has_embed";
}
} // namespace
auto document_links(CompilationUnitRef unit, PositionEncoding encoding)
-> std::vector<protocol::DocumentLink> {
@@ -23,49 +30,92 @@ auto document_links(CompilationUnitRef unit, PositionEncoding encoding)
PositionMapper converter(content, encoding);
auto& directives = directives_it->second;
links.reserve(directives.includes.size() + directives.has_includes.size());
for(const auto& include: directives.includes) {
auto [fid, range] = unit.decompose_range(include.filename_range);
if(fid != interested || !range.valid()) {
continue;
// Find the filename argument of a preprocessor directive starting from `offset`.
// Creates a Lexer from the line start so that # at start-of-line is detected,
// which enables header_name mode for #include and #embed automatically.
// For __has_include/__has_embed, manually enables header_name mode after (.
auto find_argument_range = [&](std::uint32_t offset) -> std::optional<LocalSourceRange> {
std::uint32_t line_start = 0;
if(offset > 0) {
if(auto nl = content.rfind('\n', offset - 1); nl != llvm::StringRef::npos)
line_start = static_cast<std::uint32_t>(nl + 1);
}
protocol::DocumentLink link{
.range = to_range(converter, range),
};
link.target = std::string(unit.file_path(include.fid));
auto line = content.substr(line_start);
Lexer lexer(line);
bool after_has_keyword = false;
while(true) {
auto tok = lexer.advance();
if(tok.is_eof() || tok.is_eod())
break;
auto abs_begin = line_start + tok.range.begin;
auto abs_end = line_start + tok.range.end;
// Detect __has_include/__has_embed to enable header_name mode after (.
if(tok.is_identifier()) {
auto text = tok.text(line);
if(text == "__has_include" || text == "__has_include_next" ||
text == "__has_embed") {
after_has_keyword = true;
continue;
}
}
if(tok.kind == clang::tok::l_paren && after_has_keyword) {
after_has_keyword = false;
lexer.set_header_name_mode();
continue;
}
// Only return tokens at or after the directive's starting offset.
if(abs_begin < offset)
continue;
if(tok.is_header_name() || tok.kind == clang::tok::string_literal)
return LocalSourceRange(abs_begin, abs_end);
if(tok.is_identifier() && !is_directive_keyword(tok.text(line)))
return LocalSourceRange(abs_begin, abs_end);
}
return std::nullopt;
};
auto add_link = [&](clang::SourceLocation loc, llvm::StringRef target) {
auto [fid, offset] = unit.decompose_location(loc);
if(fid != interested || offset >= content.size())
return;
auto range = find_argument_range(offset);
if(!range)
return;
protocol::DocumentLink link{.range = to_range(converter, *range)};
link.target = target.str();
links.push_back(std::move(link));
};
for(const auto& include: directives.includes) {
if(include.fid.isValid()) {
add_link(include.location, unit.file_path(include.fid));
}
}
for(const auto& has_include: directives.has_includes) {
if(has_include.fid.isInvalid()) {
continue;
if(has_include.fid.isValid()) {
add_link(has_include.location, unit.file_path(has_include.fid));
}
}
auto [fid, offset] = unit.decompose_location(has_include.location);
if(fid != interested || offset >= content.size()) {
continue;
for(const auto& embed: directives.embeds) {
if(embed.file) {
add_link(embed.loc, embed.file->getName());
}
}
auto tail = content.substr(offset);
char open = tail.front();
if(open != '<' && open != '"') {
continue;
for(const auto& has_embed: directives.has_embeds) {
if(has_embed.file) {
add_link(has_embed.loc, has_embed.file->getName());
}
char close = open == '<' ? '>' : '"';
auto close_index = tail.find(close, 1);
if(close_index == llvm::StringRef::npos) {
continue;
}
LocalSourceRange range(offset, offset + static_cast<std::uint32_t>(close_index + 1));
protocol::DocumentLink link{
.range = to_range(converter, range),
};
link.target = std::string(unit.file_path(has_include.fid));
links.push_back(std::move(link));
}
return links;

View File

@@ -502,6 +502,7 @@ et::task<bool> Compiler::ensure_pch(Session& session,
st.bound = bound;
st.hash = preamble_hash;
st.deps = capture_deps_snapshot(workspace.path_pool, result.value().deps);
st.document_links_json = std::move(result.value().pch_links_json);
st.building.reset();
session.pch_ref = Session::PCHRef{path_id, preamble_hash, bound};

View File

@@ -478,15 +478,38 @@ void MasterServer::register_handlers() {
co_return co_await compiler.forward_query(worker::QueryKind::DocumentSymbol, sit->second);
});
peer.on_request(
[this](RequestContext& ctx, const protocol::DocumentLinkParams& params) -> RawResult {
auto path = uri_to_path(params.text_document.uri);
auto path_id = workspace.path_pool.intern(path);
auto sit = sessions.find(path_id);
if(sit == sessions.end())
co_return serde_raw{"null"};
co_return co_await compiler.forward_query(worker::QueryKind::DocumentLink, sit->second);
});
peer.on_request([this](RequestContext& ctx,
const protocol::DocumentLinkParams& params) -> RawResult {
auto path = uri_to_path(params.text_document.uri);
auto path_id = workspace.path_pool.intern(path);
auto sit = sessions.find(path_id);
if(sit == sessions.end())
co_return serde_raw{"null"};
auto& session = sit->second;
auto result = co_await compiler.forward_query(worker::QueryKind::DocumentLink, session);
if(!result.has_value())
co_return serde_raw{"null"};
// Merge document links from PCH if available.
auto& links = result.value();
// Re-lookup session after co_await since iterators may be invalidated.
auto sit2 = sessions.find(path_id);
if(sit2 != sessions.end() && sit2->second.pch_ref) {
auto pch_it = workspace.pch_cache.find(sit2->second.pch_ref->path_id);
if(pch_it != workspace.pch_cache.end() && !pch_it->second.document_links_json.empty()) {
auto& pch_json = pch_it->second.document_links_json;
// Merge two JSON arrays.
if(!links.data.empty() && links.data != "null" && links.data.size() > 2) {
// "[a,b]" + "[c,d]" -> "[a,b,c,d]"
links.data.pop_back(); // remove trailing ']'
links.data += ',';
links.data.append(pch_json.begin() + 1, pch_json.end()); // skip '['
} else {
links.data = pch_json;
}
}
}
co_return std::move(links);
});
peer.on_request(
[this](RequestContext& ctx, const protocol::CodeActionParams& params) -> RawResult {

View File

@@ -102,6 +102,7 @@ struct BuildResult {
std::string output_path; ///< PCH or PCM path
std::vector<std::string> deps;
std::string tu_index_data;
std::string pch_links_json; ///< Pre-serialized DocumentLink[] from PCH
eventide::serde::RawValue result_json; ///< Completion/SignatureHelp result
};

View File

@@ -96,8 +96,13 @@ static worker::BuildResult handle_build_pch(const worker::BuildParams& params) {
errors = collect_errors(unit);
std::string tu_index_data;
if(success)
std::string pch_links_json;
if(success) {
tu_index_data = serialize_tu_index(unit);
auto links = feature::document_links(unit);
auto raw = to_raw(links);
pch_links_json = std::move(raw.data);
}
// Destroy CompilationUnit to flush PCH to disk.
unit = CompilationUnit(nullptr);
@@ -110,6 +115,7 @@ static worker::BuildResult handle_build_pch(const worker::BuildParams& params) {
result.output_path = std::move(final_path);
result.deps = pch_info.deps;
result.tu_index_data = std::move(tu_index_data);
result.pch_links_json = std::move(pch_links_json);
return result;
} else {
LOG_WARN("BuildPCH failed: file={}, {}ms, errors=[{}]", params.file, timer.ms(), errors);

View File

@@ -140,6 +140,7 @@ struct PCHState {
std::uint32_t bound = 0;
std::uint64_t hash = 0;
DepsSnapshot deps;
std::string document_links_json; ///< Pre-serialized DocumentLink[] from PCH build
std::shared_ptr<eventide::event> building;
};

View File

@@ -53,7 +53,8 @@ void Lexer::lex(Token& token) {
}
} else if(parse_pp_keyword) {
parse_pp_keyword = false;
parse_header_name = token.text(content) == "include";
auto kw = token.text(content);
parse_header_name = kw == "include" || kw == "include_next" || kw == "embed";
}
}

View File

@@ -51,6 +51,15 @@ public:
Token advance_until(TokenKind kind);
/// Force the lexer into header-name mode so the next token is lexed
/// via LexIncludeFilename (correctly handling both "..." and <...>).
/// Use this before lexing filename arguments in contexts like
/// __has_include() or __has_embed() where the lexer cannot detect
/// the mode automatically.
void set_header_name_mode() {
parse_header_name = true;
}
private:
bool ignore_end_of_directive = true;
bool parse_pp_keyword = false;

View File

@@ -231,6 +231,14 @@ def _generate_test_data_cdbs(data_dir: Path) -> None:
if ic_main.exists():
_write(ic_dir, [_entry(ic_dir, ic_main, ["-I."])])
# document_links
dl_dir = data_dir / "document_links"
dl_main = dl_dir / "main.cpp"
if dl_main.exists():
_write(
dl_dir, [_entry(dl_dir, dl_main, [f"-I{dl_dir.as_posix()}", "-std=c++23"])]
)
# pch_test
pt_dir = data_dir / "pch_test"
if pt_dir.exists():

View File

@@ -0,0 +1 @@
0123456789

View File

@@ -0,0 +1,3 @@
#pragma once
int a = 1;

View File

@@ -0,0 +1,3 @@
#pragma once
int b = 2;

View File

@@ -0,0 +1,3 @@
#pragma once
int c = 3;

View File

@@ -0,0 +1,20 @@
#include "header_a.h"
#include "header_b.h"
int x = 1;
#include "header_c.h"
const char data[] = {
#embed "data.bin"
};
#if __has_embed("data.bin")
int has_embed_found = 1;
#endif
#if __has_embed("no_such_file.bin")
int has_embed_not_found = 1;
#endif
int main() {
return a + b + c;
}

View File

@@ -0,0 +1,103 @@
from pathlib import Path
import pytest
@pytest.mark.workspace("document_links")
async def test_document_links_with_pch(client, workspace):
uri, content = await client.open_and_wait(workspace / "main.cpp")
links = await client.document_links(uri)
assert links is not None, "document_links returned None"
targets = sorted(Path(link.target).name for link in links)
assert targets == [
"data.bin",
"data.bin",
"header_a.h",
"header_b.h",
"header_c.h",
], f"Unexpected targets: {targets}"
client.close(uri)
@pytest.mark.workspace("document_links")
async def test_document_links_pch_portion(client, workspace):
uri, _ = await client.open_and_wait(workspace / "main.cpp")
links = await client.document_links(uri)
pch_links = [link for link in links if link.range.start.line < 2]
assert len(pch_links) == 2, (
f"Expected 2 PCH links (lines 0-1), got {len(pch_links)}"
)
pch_targets = sorted(Path(link.target).name for link in pch_links)
assert pch_targets == ["header_a.h", "header_b.h"]
client.close(uri)
@pytest.mark.workspace("document_links")
async def test_document_links_main_portion(client, workspace):
uri, _ = await client.open_and_wait(workspace / "main.cpp")
links = await client.document_links(uri)
main_links = [link for link in links if link.range.start.line >= 2]
assert len(main_links) == 3, (
f"Expected 3 main-file links (lines 3, 6, 9), got {len(main_links)}"
)
main_targets = sorted(Path(link.target).name for link in main_links)
assert main_targets == ["data.bin", "data.bin", "header_c.h"]
client.close(uri)
@pytest.mark.workspace("document_links")
async def test_document_links_embed(client, workspace):
uri, _ = await client.open_and_wait(workspace / "main.cpp")
links = await client.document_links(uri)
embed_links = [
link
for link in links
if Path(link.target).name == "data.bin" and link.range.start.line == 6
]
assert len(embed_links) == 1, (
f"Expected 1 embed link at line 6, got {len(embed_links)}"
)
client.close(uri)
@pytest.mark.workspace("document_links")
async def test_document_links_has_embed_exists(client, workspace):
uri, _ = await client.open_and_wait(workspace / "main.cpp")
links = await client.document_links(uri)
has_embed_links = [
link
for link in links
if Path(link.target).name == "data.bin" and link.range.start.line == 9
]
assert len(has_embed_links) == 1, (
f"Expected 1 has_embed link at line 9, got {len(has_embed_links)}"
)
client.close(uri)
@pytest.mark.workspace("document_links")
async def test_document_links_has_embed_missing(client, workspace):
uri, _ = await client.open_and_wait(workspace / "main.cpp")
links = await client.document_links(uri)
missing_links = [
link for link in links if Path(link.target).name == "no_such_file.bin"
]
assert len(missing_links) == 0, (
f"Expected 0 links for non-existent file, got {len(missing_links)}"
)
client.close(uri)

View File

@@ -15,9 +15,9 @@ TEST_SUITE(DocumentLink, Tester) {
std::vector<protocol::DocumentLink> links;
void run(llvm::StringRef source) {
void run(llvm::StringRef source, llvm::StringRef standard = "-std=c++17") {
add_files("main.cpp", source);
ASSERT_TRUE(compile());
ASSERT_TRUE(compile(standard));
links = feature::document_links(*unit, feature::PositionEncoding::UTF8);
}
@@ -89,6 +89,53 @@ TEST_CASE(HasInclude) {
EXPECT_LINK(1, "1", TestVFS::path("test.h"));
}
TEST_CASE(MacroInclude) {
run(R"cpp(
#[test.h]
#[main.cpp]
#define HEADER "test.h"
#include @0[HEADER$]
)cpp");
ASSERT_EQ(links.size(), 1U);
EXPECT_LINK(0, "0", TestVFS::path("test.h"));
}
TEST_CASE(Embed) {
run(R"cpp(
#[bytes.bin]
0123456789
#[main.cpp]
const char e[] = {
#embed @0["bytes.bin"$]
};
)cpp",
"-std=c++23");
ASSERT_EQ(links.size(), 1U);
EXPECT_LINK(0, "0", TestVFS::path("bytes.bin"));
}
TEST_CASE(HasEmbed) {
run(R"cpp(
#[data.bin]
ABCDE
#[main.cpp]
#if __has_embed(@0["data.bin"$])
#endif
#if __has_embed("non_existent.bin")
#endif
)cpp",
"-std=c++23");
ASSERT_EQ(links.size(), 1U);
EXPECT_LINK(0, "0", TestVFS::path("data.bin"));
}
}; // TEST_SUITE(DocumentLink)
} // namespace