From 97d29bb96ba15b01be0c0320f683614524acee9e Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 27 Dec 2024 11:20:11 +0800 Subject: [PATCH] Some small changes. (#20) --- docs/clice.toml | 112 ++++++++++++------- docs/configuration.md | 179 +++++++++++++++++++++++++++++++ include/Basic/Basic.h | 22 +--- include/Basic/Location.h | 16 +-- include/Basic/SourceCode.h | 28 ++--- include/Feature/SemanticTokens.h | 8 +- include/Server/Config.h | 42 +++++--- include/Support/Enum.h | 26 +++++ src/Basic/SourceCode.cpp | 162 ++++++++++++++++++++++++++++ src/Server/Config.cpp | 129 +++++++++++++--------- src/Server/Lifestyle.cpp | 4 +- src/Server/Scheduler.cpp | 8 +- src/Server/Server.cpp | 2 +- unittests/Basic/SourceCode.cpp | 22 ++++ 14 files changed, 610 insertions(+), 150 deletions(-) create mode 100644 docs/configuration.md create mode 100644 src/Basic/SourceCode.cpp create mode 100644 unittests/Basic/SourceCode.cpp diff --git a/docs/clice.toml b/docs/clice.toml index 360b9cfc..a7328a92 100644 --- a/docs/clice.toml +++ b/docs/clice.toml @@ -1,48 +1,88 @@ +### clice configuration + +# This section outlines the supported built-in variables for clice. +# These variables can be referenced in strings using the syntax `${var}`. + +# Supported variables: +# - `${version}`: The version of clice. +# - `${binary}`: The path of the clice binary. +# - `${llvm_version}`: The LLVM version used by clice. +# - `${workspace}`: The workspace directory provided by the client. + [server] -# "pipe" or "socket" -mode = "socket" -port = 50051 -address = "127.0.0.1" + # Compile commands directories to search for compile_commands.json files. + compile_commands_dirs = ["${workspace}/build"] -[frontend] -# the commands that will be appended to the command line -append = [] -# the commands that will be removed from the command line -remove = [] +# Cache configuration for storing precompiled headers and modules. +[cache] + # Directory for storing PCH and PCM files. + dir = "${workspace}/.clice/cache" -# the -compile-commands-directory = "" + # Maximum number of cache files to keep. If the total exceeds this limit, clice + # deletes the oldest files automatically. Set to 0 to disable the limit. + limit = 0 -# the path of builtin headers that clang uses -resource-directory = "${executable}/../lib/clang/${version}" +# Index configuration for symbol and feature indexing. +[index] + # Directory for storing index files. + dir = "${workspace}/.clice/index" -[diagnostic] -# the maximum number of diagnostics that will be displayed -max-diagnostic = 100 + # Whether to index entities in implicit template instantiations. + implicitInstantiation = true -[completion.keyword] -# whether to insert snippets when completing keywords -# if enabled, keywords will be expanded with placeholders for typical usage -# e.g., "if" will expand to "if (condition) { }" -snippet = false +# Control the behavior for specific files. Note that Clice matches rules +# in order. If you want to add your own rules, either delete this rule +# or insert your rule before it. +[[rules]] + # Files matching the specified pattern will have this rule applied. + # + # Patterns can use the following syntax: + # - `*`: Matches one or more characters in a path segment. + # - `?`: Matches a single character in a path segment. + # - `**`: Matches any number of path segments, including none. + # - `{}`: Groups conditions (e.g., `**/*.{ts,js}` matches all TypeScript + # and JavaScript files). + # - `[]`: Declares a range of characters to match in a path segment + # (e.g., `example.[0-9]` matches `example.0`, `example.1`, etc.). + # - `[!...]`: Negates a range of characters to match in a path segment + # (e.g., `example.[!0-9]` matches `example.a`, `example.b`, but not `example.0`). + pattern = "**/*" -[completion.function] -# whether to insert function arguments as placeholders when completing a function -# if enabled, arguments will be inserted as placeholders, e.g., fo^ -> foo(int a, int b) -arguments = false + # Commands to append to the original command list (e.g., ["-std=c++17"]). + append = [] -# whether to insert parentheses when completing a function -# only applicable when `arguments` is set to false -# if disabled, only the function name will be inserted, e.g., fo^ -> foo -parens = false + # Commands to remove from the original command list. + remove = [] -[inlay-hint] -# the maximum length of the inlay hint text. -max-length = 20 + # Controls whether the file is treated as readonly. + # Possible values: ["auto", "always", "never"] + # + # - "auto": Treats the file as readonly until you edit it. + # - "always": Always treats the file as readonly. + # - "never": Always treats the file as non-readonly. + # + # Readonly means the file is not editable, and LSP requests such as + # code actions or completions will not be sent to the server. This avoids + # dynamic computation and allows pre-indexed results to be loaded directly, + # improving performance. + readonly = "auto" -# If array initializer elements are more than this, no inlay hints will be shown. -max-array-elements = 3 + # Controls how header files are treated. + # Possible values: ["auto", "always", "never"] + # + # - "auto": Attempts to infer the header context first. If no header context + # is found, the file will be treated as a normal source file. + # - "always": Always treats the file as a header file. If no header context + # is found, errors will be reported. + # - "never": Always treats the file as a source file. + # + # Header context refers to the related source files or additional metadata + # linked to the header file. + header = "auto" -# Show implicit cast as hint like `1 as int` -implicit-cast = true \ No newline at end of file + # Specifies extra header contexts (file paths) for the file. + # Normally, header contexts are inferred automatically once the file is indexed. + # However, if you need immediate context before indexing completes, you can + # provide it manually using this field. + contexts = [] diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 00000000..eb49e9bc --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,179 @@ +# Configuration + +This is the document of + +## Server + +| Name | Type | Default | +| ---------------------- | --------- | ------- | +| `server.moduleSupport` | `boolean` | `false` | + +Whether to enable module support. +
+ +| Name | Type | Default | +| ------------------- | --------- | ------- | +| `server.overSearch` | `boolean` | `true` | + +- `false`: Limits the symbol search scope to files connected through the **include graph**, which is efficient but does not handle symbols defined independently in other files. + +For example: + +```cpp +/// a.h +struct Foo {}; + +/// b.cpp +#include "a.h" +Foo foo1; + +/// c.cpp +#include "a.h" +Foo foo2; +``` + +If you look up the symbol `Foo` in `b.cpp`, the include graph guides the search path as follows: `b.cpp` -> `a.h` -> `c.cpp`. All other files are ignored. When you have a really large project, this can save a lot of time. + +- `true`: Expands the search to all index files, ignoring the include graph. This is less efficient but ensures all references to a symbol can be found, even if they are not linked through `#include`. + +For example, consider the following files: + +```cpp +/// a.cpp +struct Foo {}; +Foo foo1; + +/// b.cpp +void foo(struct Foo foo2); +``` + +In such case, because the symbol `Foo` is independently declared in multiple files. To find all references to `Foo`, it becomes necessary to search all index files. +
+ +## Rule + +`[rules]` represents that it is an array of objects. Each object has the following properties. Note that the order of rules matters. clice applies the first matching rule to the file. +
+ +| Name | Type | +| ----------------- | -------- | +| `[rules].pattern` | `string` | + +Glob pattern for matching files. If the pattern matches the file path, clice will apply the rule to the file. + +Normally, the pattern is a file path. However, you can also use the following syntax to match multiple files: + +- `*`: Matches one or more characters in a path segment. +- `?`: Matches a single character in a path segment. +- `**`: Matches any number of path segments, including none. +- `{}`: Groups conditions (e.g., `**/*.{ts,js}` matches all TypeScript and JavaScript files). +- `[]`: Declares a range of characters to match in a path segment(e.g., `example.[0-9]` matches `example.0`, `example.1`, etc.). +- `[!...]`: Negates a range of characters to match in a path segment(e.g., `example.[!0-9]` matches `example.a`, `example.b`, but not `example.0`). +
+ +| Name | Type | Default | +| ---------------- | ------------------- | ------- | +| `[rules].append` | `array` of `string` | `[]` | + +Commands to append to the original command list. For example, `append = ["-std=c++17"]`. +
+ +| Name | Type | Default | +| ---------------- | ------------------- | ------- | +| `[rules].remove` | `array` of `string` | `[]` | + +Commands to remove from the original command list. For example, `remove = ["-std=c++11"]`. +
+ +| Name | Type | Default | +| ------------------ | -------- | -------- | +| `[rules].readonly` | `string` | `"auto"` | + +Controls whether the file is treated as readonly. Value could be one of `"auto"`, `"always"` and `"never"`. + +- `"auto"`: Treats the file as readonly until you edit it. +- `"always"`: Always treats the file as readonly. +- `"never"`: Always treats the file as non-readonly. + +Readonly means the file is not editable, and LSP requests such as code actions or completions will not be triggered on it. This avoids dynamic computation and allows pre-indexed results to be loaded directly, improving performance. +
+ +| Name | Type | Default | +| ---------------- | -------- | -------- | +| `[rules].header` | `string` | `"auto"` | + +Controls how header files are treated. Value could be one of `"auto"`, `"always"` and `"never"`. + +- `"auto"`: Attempts to infer the header context first. If no header context is found, the file will be treated as a normal source file. +- `"always"`: Always treats the file as a header file. If no header context is found, errors will be reported. +- `"never"`: Always treats the file as a source file. + +Header context refers to the related source files or additional metadata linked to the header file. +
+ +| Name | Type | Default | +| ------------------ | ------------------- | ------- | +| `[rules].contexts` | `array` of `string` | `[]` | + +Specifies extra header contexts (file paths) for the file. + +Normally, header contexts are inferred automatically once the file is indexed. However, if you need immediate context before indexing completes, you can provide it manually using this field. + +## Cache + +| Name | Type | Default | +| ----------- | -------- | ----------------------------- | +| `cache.dir` | `string` | `"${workspace}/.clice/cache"` | + +Directory for storing PCH and PCM +
+ +| Name | Type | Default | +| ------------- | -------- | ------- | +| `cache.limit` | `number` | `0` | + +Maximum number of cache files to keep. If the total exceeds this limit, clice deletes the oldest files automatically. Set to `0` to disable the limit. +
+ +## Index + +| Name | Type | Default | +| ----------- | -------- | ----------------------------- | +| `index.dir` | `string` | `"${workspace}/.clice/index"` | + +Directory for storing index files. +
+ +| Name | Type | Default | +| ------------------ | --------- | ------- | +| `index.background` | `boolean` | `true` | + +Whether index files in the background. If `true`, clice will index files in the background when the server is idle. If `false`, you need to send an index request to index files. +
+ +| Name | Type | Default | +| --------------------- | --------- | ------- | +| `index.instantiation` | `boolean` | `true` | + +Whether index entities inside template instantiation. For example + +```cpp +struct X { static void foo(); }; +struct Y { static void foo(); }; + +template +void foo() { + T::foo(); +} + +template void foo(); + +int main() { + foo(); +} +``` + +If `index.instantiation` is `true`, clice will traverse declarations in template instantiation, such as `foo` and `foo`, and index them. As a result, if you trigger `go-to-definition` on `foo` in `T::foo()`, clice will return the locations of `X::foo` and `Y::foo`. + +If `index.instantiation` is `false`, clice will not index entities inside template instantiations, and `go-to-definition` will return no results. +
diff --git a/include/Basic/Basic.h b/include/Basic/Basic.h index 4a849ed4..8476104d 100644 --- a/include/Basic/Basic.h +++ b/include/Basic/Basic.h @@ -1,13 +1,8 @@ #pragma once -#include -#include +#include -namespace clice { - -class ASTInfo; - -} +#include "llvm/ADT/StringRef.h" namespace clice::proto { @@ -29,17 +24,4 @@ using DocumentUri = std::string; // TODO: figure out URI. using URI = std::string; -/// Beacuse C++ does support string enum, so define `enum_type` for -/// tag when serialize/deserialize. -template -struct enum_type { - T value; - - using underlying_type = T; - - constexpr enum_type(T value) : value(value) {} - - friend bool operator== (const enum_type& lhs, const enum_type& rhs) = default; -}; - } // namespace clice::proto diff --git a/include/Basic/Location.h b/include/Basic/Location.h index d28f6cb9..45bd4f65 100644 --- a/include/Basic/Location.h +++ b/include/Basic/Location.h @@ -1,15 +1,19 @@ #pragma once -#include +#include "Basic.h" +#include "Support/Enum.h" namespace clice::proto { /// A set of predefined position encoding kinds. -struct PositionEncodingKind : enum_type { - using enum_type::enum_type; - constexpr inline static string_literal UTF8 = "utf-8"; - constexpr inline static string_literal UTF16 = "utf-16"; - constexpr inline static string_literal UTF32 = "utf-32"; +struct PositionEncodingKind : refl::Enum { + using Enum::Enum; + + constexpr inline static std::string_view UTF8 = "utf-8"; + constexpr inline static std::string_view UTF16 = "utf-16"; + constexpr inline static std::string_view UTF32 = "utf-32"; + + constexpr inline static std::array All = {UTF8, UTF16, UTF32}; }; struct Position { diff --git a/include/Basic/SourceCode.h b/include/Basic/SourceCode.h index debf8865..e19c0ded 100644 --- a/include/Basic/SourceCode.h +++ b/include/Basic/SourceCode.h @@ -5,26 +5,26 @@ namespace clice { +/// Measure the length of the content with the specified encoding kind. +std::size_t remeasure(llvm::StringRef content, proto::PositionEncodingKind kind); + /// Convert a clang::SourceLocation to a proto::Position according to the -/// specified encoding kind. Note that `SourceLocation` in clang is one-based and +/// specified encoding kind. Note that `SourceLocation` in clang is 1-based and /// is always encoded in UTF-8. proto::Position toPosition(llvm::StringRef content, clang::SourceLocation location, proto::PositionEncodingKind kind, - const clang::SourceManager& srcMgr); + const clang::SourceManager& SM); -/// Same as above, but for a group of locations. It is more efficient than calling -/// `toLocation` multiple times. Note that the locations must be sorted. -std::vector toPosition(llvm::StringRef content, - llvm::ArrayRef locations, - proto::PositionEncodingKind kind, - const clang::SourceManager& srcMgr); +/// Same as above, but content is retrieved from the `SourceManager`. +proto::Position toPosition(clang::SourceLocation location, + proto::PositionEncodingKind kind, + const clang::SourceManager& SM); -/// Convert a proto::Position to a clang::SourceLocation according to the -/// specified encoding kind. If any error occurs, return an invalid location. -clang::SourceLocation toSourceLocation(llvm::StringRef content, - proto::Position position, - proto::PositionEncodingKind kind, - const clang::SourceManager& srcMgr); +/// Convert a proto::Position to a file offset in the content with the specified +/// encoding kind. +std::size_t toOffset(llvm::StringRef content, + proto::Position position, + proto::PositionEncodingKind kind); } // namespace clice diff --git a/include/Feature/SemanticTokens.h b/include/Feature/SemanticTokens.h index 1de97eb5..a82a92da 100644 --- a/include/Feature/SemanticTokens.h +++ b/include/Feature/SemanticTokens.h @@ -56,10 +56,16 @@ struct SemanticTokens { } // namespace clice::proto +namespace clice { + +class ASTInfo; + +} + namespace clice::feature { /// FIXME: -proto::SemanticTokens semanticTokens(ASTInfo& compiler, llvm::StringRef filename); +proto::SemanticTokens semanticTokens(class ASTInfo& info, llvm::StringRef filename); } // namespace clice::feature diff --git a/include/Server/Config.h b/include/Server/Config.h index d996ffed..1cff2a1f 100644 --- a/include/Server/Config.h +++ b/include/Server/Config.h @@ -5,32 +5,44 @@ namespace clice::config { /// Read the config file, call when the program starts. -void parse(llvm::StringRef execute, llvm::StringRef filepath); +void load(llvm::StringRef execute, llvm::StringRef filename); -/// initialize the config, replace all predefined variables in the config file. +/// Initialize the config, replace all predefined variables in the config file. /// called in `Server::initialize`. void init(std::string_view workplace); -struct ServerOption { - std::string mode = "socket"; - unsigned int port; - std::string address; +struct ServerOptions { + std::vector compile_commands_dirs; }; -struct FrontendOption { +struct CacheOptions { + std::string dir; + uint32_t limit = 0; +}; + +struct IndexOptions { + std::string dir; + bool implicitInstantiation = true; +}; + +struct Rule { + std::string pattern; std::vector append; std::vector remove; - std::string index_directory = "${workplace}/.clice/index"; - std::string cache_directory = "${workplace}/.clice/cache"; - std::string resource_dictionary = "${binary}/../../lib/clang/${llvm_version}"; - std::vector compile_commands_directorys = {"${workplace}/build"}; + std::string readonly; + std::string header; + std::vector context; }; -llvm::StringRef workplace(); +extern llvm::StringRef version; +extern llvm::StringRef binary; +extern llvm::StringRef llvm_version; +extern llvm::StringRef workspace; -const ServerOption& server(); - -const FrontendOption& frontend(); +extern const ServerOptions& server; +extern const CacheOptions& cache; +extern const IndexOptions& index; +extern llvm::ArrayRef rules; }; // namespace clice::config diff --git a/include/Support/Enum.h b/include/Support/Enum.h index d1387019..3b24bd07 100644 --- a/include/Support/Enum.h +++ b/include/Support/Enum.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -240,6 +241,31 @@ private: underlying m_Value; }; +template + requires (!integral) +class Enum { +public: + constexpr Enum(underlying value) : m_Value(value) { + static_assert( + requires { Derived::All; }, + "Derived enum must define all possible enum values."); + + assert(std::ranges::any_of(Derived::All, [&](auto v) { return v == value; }) && + "Invalid enum value."); + } + + constexpr Enum(const Enum&) = default; + + constexpr friend bool operator== (Enum lhs, Enum rhs) = default; + + constexpr underlying value() const { + return m_Value; + } + +private: + underlying m_Value; +}; + template concept special_enum = requires { T::is_special_enum_v; diff --git a/src/Basic/SourceCode.cpp b/src/Basic/SourceCode.cpp new file mode 100644 index 00000000..22ef4bf3 --- /dev/null +++ b/src/Basic/SourceCode.cpp @@ -0,0 +1,162 @@ +#include "Basic/SourceCode.h" + +namespace clice { + +/// @brief Iterates over Unicode codepoints in a UTF-8 encoded string and invokes a callback for +/// each codepoint. +/// +/// Processes the input UTF-8 string, calculating the length of each Unicode codepoint in both +/// UTF-8 (bytes) and UTF-16 (code units), and passes these lengths to the callback. +/// Iteration stops early if the callback returns `false`. +/// +/// ASCII characters are treated as 1-byte UTF-8 codepoints with a UTF-16 length of 1. +/// Non-ASCII characters are processed based on their leading byte to determine UTF-8 length: +/// - Valid lengths are 2 to 4 bytes. +/// - Astral codepoints (UTF-8 length of 4) have a UTF-16 length of 2 code units. +/// Invalid UTF-8 sequences are treated as single-byte ASCII characters. +/// +/// Returns `false` if the callback stops the iteration. +template +static bool iterateCodepoints(llvm::StringRef content, const Callback& callback) { + // Iterate over the input string, processing each codepoint. + for(size_t index = 0; index < content.size();) { + unsigned char c = static_cast(content[index]); + + // Handle ASCII characters (1-byte UTF-8, 1-code-unit UTF-16). + if(!(c & 0x80)) [[likely]] { + if(!callback(1, 1)) { + return true; + } + + ++index; + continue; + } + + // Determine the length of the codepoint in UTF-8 by counting the leading 1s. + size_t length = llvm::countl_one(c); + + // Validate UTF-8 encoding: length must be between 2 and 4. + if(length < 2 || length > 4) [[unlikely]] { + assert(false && "Invalid UTF-8 sequence"); + + // Treat the byte as an ASCII character. + if(!callback(1, 1)) { + return true; + } + + ++index; + continue; + } + + // Advance the index by the length of the current UTF-8 codepoint. + index += length; + + // Calculate the UTF-16 length: astral codepoints (4-byte UTF-8) take 2 code units. + if(!callback(length, length == 4 ? 2 : 1)) { + return true; + } + } + + return false; +} + +std::size_t remeasure(llvm::StringRef content, proto::PositionEncodingKind kind) { + if(kind == proto::PositionEncodingKind::UTF8) { + return content.size(); + } + + if(kind == proto::PositionEncodingKind::UTF16) { + std::size_t length = 0; + iterateCodepoints(content, [&](size_t, size_t utf16Length) { + length += utf16Length; + return true; + }); + return length; + } + + if(kind == proto::PositionEncodingKind::UTF32) { + std::size_t length = 0; + iterateCodepoints(content, [&](size_t, size_t) { + length += 1; + return true; + }); + return length; + } + + std::unreachable(); +} + +proto::Position toPosition(llvm::StringRef content, + clang::SourceLocation location, + proto::PositionEncodingKind kind, + const clang::SourceManager& SM) { + assert(location.isValid() && location.isFileID() && + "SourceLocation must be valid and not a macro location"); + auto [fileID, offset] = SM.getDecomposedSpellingLoc(location); + + /// Line and column in LSP are 0-based but clang's SourceLocation is 1-based. + auto line = SM.getLineNumber(fileID, offset) - 1; + auto column = SM.getColumnNumber(fileID, offset) - 1; + + proto::Position position; + /// Line doesn't need to be adjusted. It is encoding-dependent. + position.line = line; + /// Column needs to be adjusted based on the encoding. + position.character = remeasure(content.substr(offset - column, column), kind); + return position; +} + +proto::Position toPosition(clang::SourceLocation location, + proto::PositionEncodingKind kind, + const clang::SourceManager& SM) { + bool isInvalid = false; + llvm::StringRef content = SM.getCharacterData(location, &isInvalid); + assert(!isInvalid && "Invalid SourceLocation"); + return toPosition(content, location, kind, SM); +} + +std::size_t toOffset(llvm::StringRef content, + proto::Position position, + proto::PositionEncodingKind kind) { + std::size_t offset = 0; + for(auto i = 0; i < position.line; i++) { + auto pos = content.find('\n'); + assert(pos != llvm::StringRef::npos && "Line value is out of range"); + + offset += pos + 1; + content = content.substr(pos + 1); + } + + /// Drop the content after the line. + content = content.take_until([](char c) { return c == '\n'; }); + assert(position.character <= content.size() && "Character value is out of range"); + + if(kind == proto::PositionEncodingKind::UTF8) { + offset += position.character; + return offset; + } + + if(kind == proto::PositionEncodingKind::UTF16) { + iterateCodepoints(content, [&](size_t utf8Length, size_t utf16Length) { + assert(position.character >= utf16Length && "Character value is out of range"); + position.character -= utf16Length; + offset += utf8Length; + return position.character != 0; + }); + return offset; + } + + if(kind == proto::PositionEncodingKind::UTF32) { + iterateCodepoints(content, [&](size_t utf8Length, size_t) { + assert(position.character >= 1 && "Character value is out of range"); + position.character -= 1; + offset += utf8Length; + return position.character != 0; + }); + return offset; + } + + std::unreachable(); +} + +} // namespace clice diff --git a/src/Server/Config.cpp b/src/Server/Config.cpp index 6e013348..a7008574 100644 --- a/src/Server/Config.cpp +++ b/src/Server/Config.cpp @@ -7,10 +7,7 @@ namespace clice::config { -namespace { - -/// predefined variables. -llvm::StringMap predefined = { +static llvm::StringMap predefined = { /// the directory of the workplace. {"workplace", "" }, /// the directory of the executable. @@ -21,16 +18,78 @@ llvm::StringMap predefined = { {"llvm_version", "20" }, }; +/// predefined variables. +llvm::StringRef version = predefined["version"]; +llvm::StringRef binary = predefined["binary"]; +llvm::StringRef llvm_version = predefined["llvm_version"]; +llvm::StringRef workspace = predefined["workplace"]; + struct Config { - ServerOption server; - FrontendOption frontend; + ServerOptions server; + CacheOptions cache; + IndexOptions index; + std::vector rules; }; /// global config instance. -Config config = {}; +static Config config = {}; + +const ServerOptions& server = config.server; +const CacheOptions& cache = config.cache; +const IndexOptions& index = config.index; +llvm::ArrayRef rules = config.rules; + +template +static void parse(Object& object, auto&& value) { + if constexpr(std::is_same_v) { + if(auto v = value.as_boolean()) { + object = v->get(); + } + } else if constexpr(clice::integral) { + if(auto v = value.as_integer()) { + object = v->get(); + } + } else if constexpr(std::is_same_v) { + if(auto v = value.as_string()) { + object = v->get(); + } + } else if constexpr(clice::is_specialization_of) { + if(auto v = value.as_array()) { + for(auto& item: *v) { + object.emplace_back(); + parse(object.back(), item); + } + } + } else if constexpr(refl::reflectable) { + if(auto table = value.as_table()) { + refl::foreach(object, [&](std::string_view key, auto& member) { + if(auto v = (*table)[key]) { + parse(member, v); + } + }); + } + } else { + static_assert(dependent_false, "Unsupported type"); + } +} + +void load(llvm::StringRef execute, llvm::StringRef filename) { + predefined["version"] = "0.0.1"; + predefined["binary"] = execute; + predefined["llvm_version"] = "20"; + + auto toml = toml::parse_file(filename); + if(toml.failed()) { + log::fatal("Failed to parse config file: {0}. Because: {1}", + filename, + toml.error().description()); + } + + parse(config, toml.table()); +} /// replace all predefined variables in the text. -void resolve(std::string& input) { +static void resolve(std::string& input) { std::string_view text = input; llvm::SmallString<128> path; std::size_t pos = 0; @@ -60,58 +119,26 @@ void resolve(std::string& input) { input = path.str(); } -} // namespace - -void parse(llvm::StringRef execute, llvm::StringRef filepath) { - predefined["binary"] = execute; - - auto toml = toml::parse_file(filepath); - if(toml.failed()) { - log::fatal("Failed to parse config file: {0}. Because: {1}", - filepath, - toml.error().description()); - } - - auto table = toml["server"]; - if(table) { - if(auto mode = table["mode"]) { - config.server.mode = mode.as_string()->get(); - } - - if(auto port = table["port"]) { - config.server.port = port.as_integer()->get(); - } - - if(auto address = table["address"]) { - config.server.address = address.as_string()->get(); +template +static void replace(Object& object) { + if constexpr(std::is_same_v) { + resolve(object); + } else if constexpr(clice::is_specialization_of) { + for(auto& item: object) { + replace(item); } + } else if constexpr(refl::reflectable) { + refl::foreach(object, [&](auto, auto& member) { replace(member); }); } } void init(std::string_view workplace) { - predefined["workplace"] = workplace; + predefined["workspace"] = workplace; - resolve(config.frontend.index_directory); - resolve(config.frontend.cache_directory); - resolve(config.frontend.resource_dictionary); - for(auto& directory: config.frontend.compile_commands_directorys) { - resolve(directory); - } + replace(config); log::info("Config initialized successfully: {0}", json::serialize(config)); return; } -llvm::StringRef workplace() { - return predefined["workplace"]; -} - -const ServerOption& server() { - return config.server; -} - -const FrontendOption& frontend() { - return config.frontend; -} - } // namespace clice::config diff --git a/src/Server/Lifestyle.cpp b/src/Server/Lifestyle.cpp index 8eb7f8aa..0264cbb5 100644 --- a/src/Server/Lifestyle.cpp +++ b/src/Server/Lifestyle.cpp @@ -15,7 +15,7 @@ async::promise Server::onInitialize(json::Value id, const proto::Initializ async::response(std::move(id), json::serialize(result)); /// Load the compile commands from the workspace. - for(auto dir: config::frontend().compile_commands_directorys) { + for(auto dir: config::server.compile_commands_dirs) { synchronizer.sync(dir + "/compile_commands.json"); } @@ -24,7 +24,7 @@ async::promise Server::onInitialize(json::Value id, const proto::Initializ async::promise Server::onInitialized(const proto::InitializedParams& params) { proto::DidChangeWatchedFilesRegistrationOptions options; - for(auto& dir: config::frontend().compile_commands_directorys) { + for(auto& dir: config::server.compile_commands_dirs) { options.watchers.emplace_back(proto::FileSystemWatcher{ dir + "/compile_commands.json", }); diff --git a/src/Server/Scheduler.cpp b/src/Server/Scheduler.cpp index d4b1c7e9..d1b8be14 100644 --- a/src/Server/Scheduler.cpp +++ b/src/Server/Scheduler.cpp @@ -6,7 +6,7 @@ namespace clice { static std::string getPCHOutPath(llvm::StringRef srcPath) { llvm::SmallString<128> outPath = srcPath; - path::replace_path_prefix(outPath, config::workplace(), config::frontend().cache_directory); + path::replace_path_prefix(outPath, config::workspace, config::cache.dir); path::replace_extension(outPath, ".pch"); if(auto dir = path::parent_path(outPath); !fs::exists(dir)) { @@ -20,7 +20,7 @@ static std::string getPCHOutPath(llvm::StringRef srcPath) { static std::string getPCMOutPath(llvm::StringRef srcPath) { llvm::SmallString<128> outPath = srcPath; - path::replace_path_prefix(outPath, config::workplace(), config::frontend().cache_directory); + path::replace_path_prefix(outPath, config::workspace, config::cache.dir); path::replace_extension(outPath, ".pcm"); if(auto dir = path::parent_path(outPath); !fs::exists(dir)) { @@ -295,7 +295,7 @@ async::promise<> Scheduler::updateAST(llvm::StringRef filename, void Scheduler::loadCache() { llvm::SmallString<128> fileName; - path::append(fileName, config::frontend().cache_directory, "cache.json"); + path::append(fileName, config::cache.dir, "cache.json"); auto buffer = llvm::MemoryBuffer::getFile(fileName); if(!buffer) { @@ -348,7 +348,7 @@ void Scheduler::saveCache() const { result.try_emplace("PCM", std::move(pcmArray)); llvm::SmallString<128> fileName; - path::append(fileName, config::frontend().cache_directory, "cache.json"); + path::append(fileName, config::cache.dir, "cache.json"); std::error_code EC; llvm::raw_fd_ostream stream(fileName, EC, llvm::sys::fs::OF_Text); diff --git a/src/Server/Server.cpp b/src/Server/Server.cpp index a08a16be..222059fe 100644 --- a/src/Server/Server.cpp +++ b/src/Server/Server.cpp @@ -18,7 +18,7 @@ int Server::run(int argc, const char** argv) { if(cl::config.empty()) { log::warn("No config file specified; using default configuration."); } else { - config::parse(argv[0], cl::config.getValue()); + config::load(argv[0], cl::config.getValue()); log::info("Successfully loaded configuration file from {0}.", cl::config.getValue()); } diff --git a/unittests/Basic/SourceCode.cpp b/unittests/Basic/SourceCode.cpp new file mode 100644 index 00000000..ee2151f3 --- /dev/null +++ b/unittests/Basic/SourceCode.cpp @@ -0,0 +1,22 @@ +#include "../Test.h" +#include "Basic/SourceCode.h" + +namespace clice { + +namespace { + +TEST(SourceCode, Remeasure) { + EXPECT_EQ(remeasure("", proto::PositionEncodingKind::UTF8), 0); + EXPECT_EQ(remeasure("ascii", proto::PositionEncodingKind::UTF8), 5); + + EXPECT_EQ(remeasure("↓", proto::PositionEncodingKind::UTF16), 1); + EXPECT_EQ(remeasure("¥", proto::PositionEncodingKind::UTF16), 1); + + EXPECT_EQ(remeasure("😂", proto::PositionEncodingKind::UTF16), 2); + EXPECT_EQ(remeasure("😂", proto::PositionEncodingKind::UTF32), 1); +} + +} // namespace + +} // namespace clice +