510 lines
16 KiB
C++
510 lines
16 KiB
C++
#include "Server/LSPConverter.h"
|
|
#include "Support/FileSystem.h"
|
|
|
|
namespace clice {
|
|
|
|
namespace {
|
|
|
|
/// @brief Iterates over Unicode codepoints in a UTF-8 encoded string and invokes a callback for
|
|
/// each codepoint.
|
|
///
|
|
/// Processes the input UTF-8 string, calculating the length of each Unicode codepoint in both
|
|
/// UTF-8 (bytes) and UTF-16 (code units), and passes these lengths to the callback.
|
|
/// Iteration stops early if the callback returns `false`.
|
|
///
|
|
/// ASCII characters are treated as 1-byte UTF-8 codepoints with a UTF-16 length of 1.
|
|
/// Non-ASCII characters are processed based on their leading byte to determine UTF-8 length:
|
|
/// - Valid lengths are 2 to 4 bytes.
|
|
/// - Astral codepoints (UTF-8 length of 4) have a UTF-16 length of 2 code units.
|
|
/// Invalid UTF-8 sequences are treated as single-byte ASCII characters.
|
|
///
|
|
/// Returns `false` if the callback stops the iteration.
|
|
template <typename Callback>
|
|
bool iterateCodepoints(llvm::StringRef content, const Callback& callback) {
|
|
// Iterate over the input string, processing each codepoint.
|
|
for(size_t index = 0; index < content.size();) {
|
|
unsigned char c = static_cast<unsigned char>(content[index]);
|
|
|
|
// Handle ASCII characters (1-byte UTF-8, 1-code-unit UTF-16).
|
|
if(!(c & 0x80)) [[likely]] {
|
|
if(!callback(1, 1)) {
|
|
return true;
|
|
}
|
|
|
|
++index;
|
|
continue;
|
|
}
|
|
|
|
// Determine the length of the codepoint in UTF-8 by counting the leading 1s.
|
|
size_t length = llvm::countl_one(c);
|
|
|
|
// Validate UTF-8 encoding: length must be between 2 and 4.
|
|
if(length < 2 || length > 4) [[unlikely]] {
|
|
assert(false && "Invalid UTF-8 sequence");
|
|
|
|
// Treat the byte as an ASCII character.
|
|
if(!callback(1, 1)) {
|
|
return true;
|
|
}
|
|
|
|
++index;
|
|
continue;
|
|
}
|
|
|
|
// Advance the index by the length of the current UTF-8 codepoint.
|
|
index += length;
|
|
|
|
// Calculate the UTF-16 length: astral codepoints (4-byte UTF-8) take 2 code units.
|
|
if(!callback(length, length == 4 ? 2 : 1)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Convert a proto::Position to a file offset in the content with the specified encoding kind.
|
|
std::uint32_t toOffset(llvm::StringRef content,
|
|
PositionEncodingKind kind,
|
|
proto::Position position) {
|
|
std::uint32_t offset = 0;
|
|
for(auto i = 0; i < position.line; i++) {
|
|
auto pos = content.find('\n');
|
|
assert(pos != llvm::StringRef::npos && "Line value is out of range");
|
|
|
|
offset += pos + 1;
|
|
content = content.substr(pos + 1);
|
|
}
|
|
|
|
/// Drop the content after the line.
|
|
content = content.take_until([](char c) { return c == '\n'; });
|
|
assert(position.character <= content.size() && "Character value is out of range");
|
|
|
|
if(kind == PositionEncodingKind::UTF8) {
|
|
offset += position.character;
|
|
return offset;
|
|
}
|
|
|
|
if(kind == PositionEncodingKind::UTF16) {
|
|
iterateCodepoints(content, [&](std::uint32_t utf8Length, std::uint32_t utf16Length) {
|
|
assert(position.character >= utf16Length && "Character value is out of range");
|
|
position.character -= utf16Length;
|
|
offset += utf8Length;
|
|
return position.character != 0;
|
|
});
|
|
return offset;
|
|
}
|
|
|
|
if(kind == PositionEncodingKind::UTF32) {
|
|
iterateCodepoints(content, [&](std::uint32_t utf8Length, std::uint32_t) {
|
|
assert(position.character >= 1 && "Character value is out of range");
|
|
position.character -= 1;
|
|
offset += utf8Length;
|
|
return position.character != 0;
|
|
});
|
|
return offset;
|
|
}
|
|
|
|
std::unreachable();
|
|
}
|
|
|
|
/// Remeasure the length (character count) of the content with the specified encoding kind.
|
|
std::uint32_t remeasure(llvm::StringRef content, PositionEncodingKind kind) {
|
|
if(kind == PositionEncodingKind::UTF8) {
|
|
return content.size();
|
|
}
|
|
|
|
if(kind == PositionEncodingKind::UTF16) {
|
|
std::uint32_t length = 0;
|
|
iterateCodepoints(content, [&](std::uint32_t, std::uint32_t utf16Length) {
|
|
length += utf16Length;
|
|
return true;
|
|
});
|
|
return length;
|
|
}
|
|
|
|
if(kind == PositionEncodingKind::UTF32) {
|
|
std::uint32_t length = 0;
|
|
iterateCodepoints(content, [&](std::uint32_t, std::uint32_t) {
|
|
length += 1;
|
|
return true;
|
|
});
|
|
return length;
|
|
}
|
|
|
|
std::unreachable();
|
|
}
|
|
|
|
class PositionConverter {
|
|
public:
|
|
PositionConverter(llvm::StringRef content, PositionEncodingKind encoding) :
|
|
content(content), encoding(encoding) {}
|
|
|
|
/// Convert a offset to a proto::Position with given encoding.
|
|
/// The input offset must be UTF-8 encoded and in order.
|
|
proto::Position toPosition(uint32_t offset) {
|
|
assert(offset <= content.size() && "Offset is out of range");
|
|
assert(offset >= lastInput && "Offset must be in order");
|
|
|
|
/// Fast path: return the last output.
|
|
if(offset == lastInput) [[unlikely]] {
|
|
return lastOutput;
|
|
}
|
|
|
|
/// The length of the current line.
|
|
std::uint32_t lineLength = 0;
|
|
|
|
/// Move the line offset to the current line.
|
|
for(std::uint32_t i = lastLineOffset; i < offset; i++) {
|
|
lineLength += 1;
|
|
if(content[i] == '\n') {
|
|
line += 1;
|
|
lastLineOffset += lineLength;
|
|
lineLength = 0;
|
|
}
|
|
}
|
|
|
|
/// Get the content of the current line.
|
|
auto lineContent = content.substr(lastLineOffset, lineLength);
|
|
auto position = proto::Position{
|
|
.line = line,
|
|
.character = remeasure(lineContent, encoding),
|
|
};
|
|
|
|
/// Cache the result.
|
|
lastInput = offset;
|
|
lastOutput = position;
|
|
|
|
return position;
|
|
}
|
|
|
|
template <typename Range, typename Proj>
|
|
void toPositions(Range&& range, Proj&& proj) {
|
|
std::vector<uint32_t> offsets;
|
|
for(auto&& item: range) {
|
|
auto [begin, end] = proj(item);
|
|
offsets.emplace_back(begin);
|
|
offsets.emplace_back(end);
|
|
}
|
|
|
|
ranges::sort(offsets);
|
|
|
|
for(auto&& offset: offsets) {
|
|
if(auto it = cache.find(offset); it == cache.end()) {
|
|
cache.try_emplace(offset, toPosition(offset));
|
|
}
|
|
}
|
|
}
|
|
|
|
proto::Position lookup(uint32_t offset) {
|
|
auto it = cache.find(offset);
|
|
assert(it != cache.end() && "Offset is not cached");
|
|
return it->second;
|
|
}
|
|
|
|
proto::Range lookup(LocalSourceRange range) {
|
|
auto it = cache.find(range.begin);
|
|
assert(it != cache.end() && "Offset is not cached");
|
|
auto begin = it->second;
|
|
it = cache.find(range.end);
|
|
assert(it != cache.end() && "Offset is not cached");
|
|
auto end = it->second;
|
|
return proto::Range{begin, end};
|
|
}
|
|
|
|
private:
|
|
std::uint32_t line = 0;
|
|
/// The offset of the last line end.
|
|
std::uint32_t lastLineOffset = 0;
|
|
|
|
/// The input offset of last call.
|
|
std::uint32_t lastInput = 0;
|
|
proto::Position lastOutput = {0, 0};
|
|
|
|
llvm::DenseMap<std::uint32_t, proto::Position> cache;
|
|
|
|
llvm::StringRef content;
|
|
PositionEncodingKind encoding;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
std::uint32_t LSPConverter::convert(llvm::StringRef content, proto::Position position) {
|
|
return toOffset(content, encoding(), position);
|
|
}
|
|
|
|
proto::Position LSPConverter::convert(llvm::StringRef content, std::uint32_t offset) {
|
|
PositionConverter converter(content, encoding());
|
|
return converter.toPosition(offset);
|
|
}
|
|
|
|
std::string LSPConverter::convert(llvm::StringRef URI) {
|
|
return fs::toPath(URI);
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content, const feature::Hover& hover) {
|
|
return json::Value(nullptr);
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content, const feature::InlayHints& hints) {
|
|
return json::Value(nullptr);
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content, const feature::FoldingRanges& foldings) {
|
|
PositionConverter converter(content, encoding());
|
|
converter.toPositions(foldings, [](auto&& folding) { return folding.range; });
|
|
|
|
json::Array result;
|
|
for(auto&& folding: foldings) {
|
|
auto [beginOffset, endOffset] = folding.range;
|
|
auto [beginLine, beginChar] = converter.lookup(beginOffset);
|
|
auto [endLine, endChar] = converter.lookup(endOffset);
|
|
|
|
auto object = json::Object{
|
|
{"startLine", beginLine},
|
|
{"startCharacter", beginChar},
|
|
{"endLine", endLine },
|
|
{"kind", "region" },
|
|
};
|
|
|
|
result.push_back(std::move(object));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content, const feature::DocumentLinks& links) {
|
|
PositionConverter converter(content, encoding());
|
|
|
|
json::Array result;
|
|
for(auto& link: links) {
|
|
proto::Range range{
|
|
converter.toPosition(link.range.begin),
|
|
converter.toPosition(link.range.end),
|
|
};
|
|
|
|
auto object = json::Object{
|
|
/// The range of document link.
|
|
{"range", json::serialize(range)},
|
|
/// Target file URI.
|
|
{"target", fs::toURI(link.file) },
|
|
};
|
|
|
|
result.emplace_back(std::move(object));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content,
|
|
const feature::DocumentSymbols& symbols) {
|
|
PositionConverter converter(content, encoding());
|
|
|
|
struct DocumentSymbol {
|
|
std::string name;
|
|
std::string detail;
|
|
SymbolKind kind;
|
|
proto::Range range;
|
|
proto::Range selectionRange;
|
|
std::vector<DocumentSymbol> children;
|
|
};
|
|
|
|
json::Array result;
|
|
|
|
/// TODO: Implementation.
|
|
|
|
return result;
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content, const feature::SemanticTokens& tokens) {
|
|
std::vector<std::uint32_t> groups;
|
|
|
|
auto addGroup = [&](uint32_t line,
|
|
uint32_t character,
|
|
uint32_t length,
|
|
SymbolKind kind,
|
|
SymbolModifiers modifiers) {
|
|
groups.emplace_back(line);
|
|
groups.emplace_back(character);
|
|
groups.emplace_back(length);
|
|
groups.emplace_back(kind.value());
|
|
groups.emplace_back(0);
|
|
};
|
|
|
|
PositionConverter converter(content, encoding());
|
|
std::uint32_t lastLine = 0;
|
|
std::uint32_t lastChar = 0;
|
|
|
|
for(auto& token: tokens) {
|
|
auto [beginOffset, endOffset] = token.range;
|
|
auto [beginLine, beginChar] = converter.toPosition(beginOffset);
|
|
auto [endLine, endChar] = converter.toPosition(endOffset);
|
|
|
|
if(beginLine == endLine) [[likely]] {
|
|
std::uint32_t line = beginLine - lastLine;
|
|
std::uint32_t character = (line == 0 ? beginChar - lastChar : beginChar);
|
|
std::uint32_t length = endChar - beginChar;
|
|
addGroup(line, character, length, token.kind, token.modifiers);
|
|
} else {
|
|
/// If the token spans multiple lines, split it into multiple tokens.
|
|
auto subContent = content.substr(beginOffset, endOffset - beginOffset);
|
|
|
|
/// The first line is special.
|
|
bool isFirst = true;
|
|
/// The offset of the last line end.
|
|
std::uint32_t lastLineOffset = 0;
|
|
/// The length of the current line.
|
|
std::uint32_t lineLength = 0;
|
|
|
|
for(auto c: subContent) {
|
|
lineLength += 1;
|
|
if(c == '\n') {
|
|
std::uint32_t line;
|
|
std::uint32_t character;
|
|
|
|
if(isFirst) [[unlikely]] {
|
|
line = beginLine - lastLine;
|
|
character = (line == 0 ? beginChar - lastChar : beginChar);
|
|
isFirst = false;
|
|
} else {
|
|
line = 1;
|
|
character = 0;
|
|
}
|
|
|
|
std::uint32_t length =
|
|
remeasure(subContent.substr(lastLineOffset, lineLength), encoding());
|
|
addGroup(line, character, length, token.kind, token.modifiers);
|
|
|
|
lastLineOffset += lineLength;
|
|
lineLength = 0;
|
|
}
|
|
}
|
|
|
|
/// Process the last line if it's not empty.
|
|
if(lineLength > 0) {
|
|
std::uint32_t length = remeasure(subContent.substr(lastLineOffset), encoding());
|
|
addGroup(1, 0, length, token.kind, token.modifiers);
|
|
}
|
|
}
|
|
|
|
lastLine = endLine;
|
|
lastChar = beginChar;
|
|
}
|
|
|
|
return json::Object{
|
|
/// The actual tokens.
|
|
{"data", json::serialize(groups)},
|
|
};
|
|
}
|
|
|
|
json::Value LSPConverter::convert(llvm::StringRef content,
|
|
const std::vector<feature::CompletionItem>& items) {
|
|
PositionConverter converter(content, encoding());
|
|
converter.toPositions(items, [](auto& item) { return item.edit.range; });
|
|
|
|
json::Array result;
|
|
for(auto& item: items) {
|
|
json::Object object{
|
|
{"label", item.label },
|
|
{"kind", static_cast<int>(item.kind)},
|
|
{"textEdit",
|
|
json::Object{
|
|
{"newText", item.edit.text},
|
|
{"range", json::serialize(converter.lookup(item.edit.range))},
|
|
} },
|
|
};
|
|
result.emplace_back(std::move(object));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
namespace proto {
|
|
|
|
struct InitializeParams {
|
|
struct ClientInfo {
|
|
std::string name;
|
|
std::string version;
|
|
} clientInfo;
|
|
|
|
struct ClientCapabilities {
|
|
struct General {
|
|
std::vector<std::string> positionEncodings;
|
|
} general;
|
|
} capabilities;
|
|
|
|
std::vector<WorkspaceFolder> workspaceFolders;
|
|
};
|
|
|
|
struct InitializeResult {
|
|
struct ServerInfo {
|
|
std::string name;
|
|
std::string version;
|
|
} serverInfo;
|
|
|
|
struct ServerCapabilities {
|
|
std::string positionEncoding;
|
|
TextDocumentSyncKind textDocumentSync = TextDocumentSyncKind::Full;
|
|
|
|
bool declarationProvider = true;
|
|
bool definitionProvider = true;
|
|
bool typeDefinitionProvider = true;
|
|
bool implementationProvider = true;
|
|
bool callHierarchyProvider = true;
|
|
bool typeHierarchyProvider = true;
|
|
|
|
bool hoverProvider = true;
|
|
ResolveProvider inlayHintProvider = {true};
|
|
bool foldingRangeProvider = true;
|
|
ResolveProvider documentLinkProvider = {false};
|
|
bool documentSymbolProvider = true;
|
|
SemanticTokenOptions semanticTokensProvider;
|
|
|
|
/// TODO:
|
|
CompletionOptions completionProvider;
|
|
/// signatureHelpProvider
|
|
/// codeLensProvider
|
|
/// codeActionProvider
|
|
/// documentFormattingProvider
|
|
/// documentRangeFormattingProvider
|
|
/// renameProvider
|
|
/// diagnosticProvider
|
|
} capabilities;
|
|
};
|
|
|
|
} // namespace proto
|
|
|
|
json::Value LSPConverter::initialize(json::Value value) {
|
|
auto params = json::deserialize<proto::InitializeParams>(value);
|
|
|
|
auto& encodings = params.capabilities.general.positionEncodings;
|
|
/// Select the first one encoding if any.
|
|
if(encodings.empty()) {
|
|
kind = PositionEncodingKind::UTF16;
|
|
} else if(encodings[0] == "utf-8") {
|
|
kind = PositionEncodingKind::UTF8;
|
|
} else if(encodings[0] == "utf-16") {
|
|
kind = PositionEncodingKind::UTF16;
|
|
} else if(encodings[0] == "utf-32") {
|
|
kind = PositionEncodingKind::UTF32;
|
|
}
|
|
|
|
workspacePath = fs::toPath(params.workspaceFolders[0].uri);
|
|
|
|
proto::InitializeResult result{
|
|
.serverInfo = {"clice", "0.0.1"},
|
|
.capabilities = {
|
|
.positionEncoding = encodings.empty() ? "utf-16" : encodings[0],
|
|
}
|
|
};
|
|
|
|
auto& semanticTokensProvider = result.capabilities.semanticTokensProvider;
|
|
for(auto name: SymbolKind::all()) {
|
|
std::string type{name};
|
|
type[0] = std::tolower(type[0]);
|
|
semanticTokensProvider.legend.tokenTypes.emplace_back(std::move(type));
|
|
}
|
|
|
|
return json::serialize(result);
|
|
}
|
|
|
|
} // namespace clice
|