From 46ba1e4db67bd163608dd48eb74479a00a5770f8 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 27 Mar 2026 13:15:49 +0800 Subject: [PATCH] refactor: simplify CompilationDatabase, extract ArgumentParser, remove pimpl (#371) Co-authored-by: Claude Opus 4.6 --- CMakeLists.txt | 3 +- src/command/argument_parser.cpp | 224 +++ src/command/argument_parser.h | 114 ++ src/command/command.cpp | 1565 ++++++----------- src/command/command.h | 296 +++- src/command/driver.h | 146 -- src/command/search_config.cpp | 3 +- src/command/toolchain.cpp | 14 +- src/command/toolchain_provider.cpp | 209 --- src/command/toolchain_provider.h | 75 - src/server/master_server.cpp | 7 +- src/support/object_pool.h | 20 +- tests/unit/command/argument_parser_tests.cpp | 91 + tests/unit/command/command_tests.cpp | 655 ++++--- .../unit/command/toolchain_provider_tests.cpp | 91 +- tests/unit/command/toolchain_tests.cpp | 10 +- tests/unit/server/stateless_worker_tests.cpp | 10 +- tests/unit/server/worker_test_helpers.h | 11 +- tests/unit/test/tester.cpp | 4 +- 19 files changed, 1724 insertions(+), 1824 deletions(-) create mode 100644 src/command/argument_parser.cpp create mode 100644 src/command/argument_parser.h delete mode 100644 src/command/driver.h delete mode 100644 src/command/toolchain_provider.cpp delete mode 100644 src/command/toolchain_provider.h create mode 100644 tests/unit/command/argument_parser_tests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e711b18..0905ecbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,10 +131,10 @@ add_custom_target(generate_flatbuffers_schema DEPENDS "${GENERATED_HEADER}") # Temporary migration-only build graph. add_library(clice-core STATIC + "${PROJECT_SOURCE_DIR}/src/command/argument_parser.cpp" "${PROJECT_SOURCE_DIR}/src/command/command.cpp" "${PROJECT_SOURCE_DIR}/src/command/search_config.cpp" "${PROJECT_SOURCE_DIR}/src/command/toolchain.cpp" - "${PROJECT_SOURCE_DIR}/src/command/toolchain_provider.cpp" "${PROJECT_SOURCE_DIR}/src/compile/compilation.cpp" "${PROJECT_SOURCE_DIR}/src/compile/compilation_unit.cpp" "${PROJECT_SOURCE_DIR}/src/compile/diagnostic.cpp" @@ -187,6 +187,7 @@ target_link_libraries(clice-core PUBLIC flatbuffers eventide::ipc::lsp eventide::serde::toml + simdjson::simdjson ) add_executable(clice "${PROJECT_SOURCE_DIR}/src/clice.cc") diff --git a/src/command/argument_parser.cpp b/src/command/argument_parser.cpp new file mode 100644 index 00000000..3fa3fc3f --- /dev/null +++ b/src/command/argument_parser.cpp @@ -0,0 +1,224 @@ +#include "command/argument_parser.h" + +#include + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/Options.h" + +namespace clice { + +namespace { + +namespace opt = llvm::opt; +namespace driver = clang::driver; + +/// Access private members of OptTable via the Thief pattern. +bool enable_dash_dash_parsing(const opt::OptTable& table); +bool enable_grouped_short_options(const opt::OptTable& table); + +template +struct Thief { + friend bool enable_dash_dash_parsing(const opt::OptTable& table) { + return table.*MP1; + } + + friend bool enable_grouped_short_options(const opt::OptTable& table) { + return table.*MP2; + } +}; + +template struct Thief<&opt::OptTable::DashDashParsing, &opt::OptTable::GroupedShortOptions>; + +auto& option_table = driver::getDriverOptTable(); + +} // namespace + +std::unique_ptr ArgumentParser::parse_one(unsigned& index) { + assert(!enable_dash_dash_parsing(option_table)); + assert(!enable_grouped_short_options(option_table)); + return option_table.ParseOneArg(*this, index); +} + +using ID = clang::driver::options::ID; + +bool is_discarded_option(unsigned id) { + switch(id) { + /// Input file and output — we manage these ourselves. + case ID::OPT_INPUT: + case ID::OPT_c: + case ID::OPT_o: + case ID::OPT_dxc_Fc: + case ID::OPT_dxc_Fo: + + /// PCH building. + case ID::OPT_emit_pch: + case ID::OPT_include_pch: + case ID::OPT__SLASH_Yu: + case ID::OPT__SLASH_Fp: + + /// Dependency scan. + case ID::OPT_E: + case ID::OPT_M: + case ID::OPT_MM: + case ID::OPT_MD: + case ID::OPT_MMD: + case ID::OPT_MF: + case ID::OPT_MT: + case ID::OPT_MQ: + case ID::OPT_MG: + case ID::OPT_MP: + case ID::OPT_show_inst: + case ID::OPT_show_encoding: + case ID::OPT_show_includes: + case ID::OPT__SLASH_showFilenames: + case ID::OPT__SLASH_showFilenames_: + case ID::OPT__SLASH_showIncludes: + case ID::OPT__SLASH_showIncludes_user: + + /// C++ modules — we handle these ourselves. + case ID::OPT_fmodule_file: + case ID::OPT_fmodule_output: + case ID::OPT_fprebuilt_module_path: return true; + + default: return false; + } +} + +bool is_user_content_option(unsigned id) { + switch(id) { + case ID::OPT_I: + case ID::OPT_isystem: + case ID::OPT_iquote: + case ID::OPT_idirafter: + case ID::OPT_D: + case ID::OPT_U: + case ID::OPT_include: return true; + default: return false; + } +} + +bool is_include_path_option(unsigned id) { + switch(id) { + case ID::OPT_I: + case ID::OPT_isystem: + case ID::OPT_iquote: + case ID::OPT_idirafter: return true; + default: return false; + } +} + +bool is_xclang_option(unsigned id) { + return id == ID::OPT_Xclang; +} + +std::optional get_option_id(llvm::StringRef argument) { + llvm::SmallString<64> buffer = argument; + + if(argument.ends_with("=")) { + buffer += "placeholder"; + } + + unsigned index = 1; + std::array arguments = {"clang++", buffer.c_str(), "placeholder"}; + llvm::opt::InputArgList arg_list(arguments.data(), arguments.data() + arguments.size()); + + if(auto arg = option_table.ParseOneArg(arg_list, index)) { + return arg->getOption().getID(); + } else { + return {}; + } +} + +llvm::StringRef resource_dir() { + static std::string dir = [] { + // Use address of this lambda to locate our binary via dladdr/proc. + static int anchor; + auto exe = llvm::sys::fs::getMainExecutable("", &anchor); + if(exe.empty()) { + return std::string{}; + } + return clang::driver::Driver::GetResourcesPath(exe); + }(); + return dir; +} + +bool is_codegen_option(unsigned id, const llvm::opt::Option& opt) { + /// Debug info options form a group (-g, -gdwarf-*, -gsplit-dwarf, etc.). + if(opt.matches(ID::OPT_DebugInfo_Group)) { + return true; + } + + switch(id) { + /// Position-independent code — pure codegen, no macro or semantic effect. + case ID::OPT_fPIC: + case ID::OPT_fno_PIC: + case ID::OPT_fpic: + case ID::OPT_fno_pic: + case ID::OPT_fPIE: + case ID::OPT_fno_PIE: + case ID::OPT_fpie: + case ID::OPT_fno_pie: + + /// Frame pointer and unwind tables — pure codegen. + case ID::OPT_fomit_frame_pointer: + case ID::OPT_fno_omit_frame_pointer: + case ID::OPT_funwind_tables: + case ID::OPT_fno_unwind_tables: + case ID::OPT_fasynchronous_unwind_tables: + case ID::OPT_fno_asynchronous_unwind_tables: + + /// Stack protection — pure codegen. + case ID::OPT_fstack_protector: + case ID::OPT_fstack_protector_strong: + case ID::OPT_fstack_protector_all: + case ID::OPT_fno_stack_protector: + + /// Section splitting, LTO, semantic interposition — pure codegen/linker. + case ID::OPT_fdata_sections: + case ID::OPT_fno_data_sections: + case ID::OPT_ffunction_sections: + case ID::OPT_fno_function_sections: + case ID::OPT_flto: + case ID::OPT_flto_EQ: + case ID::OPT_fno_lto: + case ID::OPT_fsemantic_interposition: + case ID::OPT_fno_semantic_interposition: + case ID::OPT_fvisibility_inlines_hidden: + + /// Diagnostics output formatting — doesn't affect analysis. + case ID::OPT_fcolor_diagnostics: + case ID::OPT_fno_color_diagnostics: + + /// Floating-point codegen — doesn't define macros (unlike -ffast-math). + case ID::OPT_ftrapping_math: + case ID::OPT_fno_trapping_math: return true; + + default: return false; + } +} + +std::string print_argv(llvm::ArrayRef args) { + std::string buf; + llvm::raw_string_ostream os(buf); + bool sep = false; + for(llvm::StringRef arg: args) { + if(sep) + os << ' '; + sep = true; + if(llvm::all_of(arg, llvm::isPrint) && + arg.find_first_of(" \t\n\"\\") == llvm::StringRef::npos) { + os << arg; + continue; + } + os << '"'; + os.write_escaped(arg, /*UseHexEscapes=*/true); + os << '"'; + } + return std::move(os.str()); +} + +} // namespace clice diff --git a/src/command/argument_parser.h b/src/command/argument_parser.h new file mode 100644 index 00000000..0d1f5174 --- /dev/null +++ b/src/command/argument_parser.h @@ -0,0 +1,114 @@ +#pragma once + +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/Allocator.h" + +namespace clice { + +class ArgumentParser final : public llvm::opt::ArgList { +public: + ArgumentParser(llvm::BumpPtrAllocator* allocator) : allocator(allocator) {} + + ~ArgumentParser() { + /// We never use the private `Args` field, so make sure it's empty. + if(getArgs().size() != 0) { + std::abort(); + } + } + + const char* getArgString(unsigned index) const override { + return arguments[index]; + } + + unsigned getNumInputArgStrings() const override { + return arguments.size(); + } + + const char* MakeArgStringRef(llvm::StringRef s) const override { + auto p = allocator->Allocate(s.size() + 1); + std::ranges::copy(s, p); + p[s.size()] = '\0'; + return p; + } + + /// Parse a single argument at the given index. Defined out-of-line in + /// argument_parser.cpp to isolate the heavy clang driver option table include. + std::unique_ptr parse_one(unsigned& index); + + void parse(llvm::ArrayRef arguments, const auto& on_parse, const auto& on_error) { + this->arguments = arguments; + + unsigned it = 0; + while(it != arguments.size()) { + llvm::StringRef s = arguments[it]; + + if(s.empty()) [[unlikely]] { + it += 1; + continue; + } + + auto prev = it; + auto arg = parse_one(it); + assert(it > prev && "parser failed to consume argument"); + + if(!arg) [[unlikely]] { + assert(it >= arguments.size() && "unexpected parser error!"); + assert(it - prev - 1 && "no missing arguments!"); + + on_error(prev, it - prev - 1); + break; + } + + on_parse(std::move(arg)); + } + } + +private: + llvm::BumpPtrAllocator* allocator; + + llvm::ArrayRef arguments; +}; + +/// Check if an option is a codegen-only flag that doesn't affect frontend +/// semantics (parsing, diagnostics, code completion). These are pure +/// backend/linker concerns irrelevant to an LSP server. +/// +/// Note: options that DO affect semantics are intentionally kept: +/// -fno-exceptions, -fno-rtti, -std=*, -march=*, -fsanitize=*, -O*, -W* +/// +/// Defined out-of-line in argument_parser.cpp (needs clang driver option IDs). +bool is_codegen_option(unsigned id, const llvm::opt::Option& opt); + +/// Options that are completely irrelevant to an LSP and should be discarded +/// (input/output, PCH building, dependency scan, C++ modules). +bool is_discarded_option(unsigned id); + +/// User-content options that go into the per-file patch rather than the +/// shared canonical command: -I, -D, -U, -include, -isystem, -iquote, -idirafter. +bool is_user_content_option(unsigned id); + +/// Subset of user-content options that are include-path flags +/// (-I, -isystem, -iquote, -idirafter) — used for path absolutization. +bool is_include_path_option(unsigned id); + +/// Check if this is the -Xclang pass-through option. +bool is_xclang_option(unsigned id); + +/// Get the option ID for a specific argument string. +std::optional get_option_id(llvm::StringRef argument); + +/// Get the resource directory for clang builtin headers. Computed once +/// from the current executable path using Driver::GetResourcesPath. +llvm::StringRef resource_dir(); + +/// Format an argument list as a human-readable string: "[arg1 arg2 ...]". +std::string print_argv(llvm::ArrayRef args); + +} // namespace clice diff --git a/src/command/command.cpp b/src/command/command.cpp index ca158dc2..3e91a8a6 100644 --- a/src/command/command.cpp +++ b/src/command/command.cpp @@ -1,23 +1,18 @@ #include "command/command.h" +#include #include +#include #include #include #include -#include -#include "command/driver.h" +#include "simdjson.h" #include "command/toolchain.h" #include "support/filesystem.h" #include "support/logging.h" -#include "support/object_pool.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/JSON.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/StringSaver.h" namespace clice { @@ -25,1081 +20,641 @@ namespace clice { namespace { namespace ranges = std::ranges; -namespace json = llvm::json; - -using StringID = StringSet::ID; - -struct CompilationInfo { - /// The working directory of the compilation. - StringID directory = 0; - - /// The canonical compilation arguments(input file and output file are removed). - llvm::ArrayRef arguments; - - friend bool operator==(const CompilationInfo&, const CompilationInfo&) = default; -}; - -/// An item in the compilation database. -struct JSONItem { - /// The path of the source json file, so that we can know where this - /// json item from. - StringID json_src_path = 0; - - /// The file path of this json item. - StringID file_path = 0; - - /// The canonical compilation info of this item. - object_ptr info = {nullptr}; - - /// A file may have multiple compilation commands, we use - /// a chain to connect them. Note that this field does't - /// get involved in equality judgement or hash computing. - object_ptr next = {nullptr}; - - friend bool operator==(const JSONItem& lhs, const JSONItem& rhs) { - return lhs.json_src_path == rhs.json_src_path && lhs.file_path == rhs.file_path && - lhs.info == rhs.info; - } - - friend bool operator<(const JSONItem& lhs, const JSONItem& rhs) { - return std::tie(lhs.file_path, lhs.info) < std::tie(rhs.file_path, rhs.info); - } -}; - -struct JSONSource { - /// The path of the source json file. - StringID src_path; - - /// All json items in the json file, used for increment update. - std::vector> items; -}; - -using ID = clang::driver::options::ID; } // namespace -} // namespace clice - -namespace llvm { - -template <> -struct DenseMapInfo { - using T = clice::CompilationInfo; - - inline static T getEmptyKey() { - return T(llvm::DenseMapInfo::getEmptyKey()); - } - - inline static T getTombstoneKey() { - return T(llvm::DenseMapInfo::getTombstoneKey()); - } - - static unsigned getHashValue(const T& info) { - return llvm::hash_combine(info.directory, llvm::hash_combine_range(info.arguments)); - } - - static bool isEqual(const T& lhs, const T& rhs) { - return lhs == rhs; - } -}; - -template <> -struct DenseMapInfo { - using T = clice::JSONItem; - - inline static T getEmptyKey() { - return T(0, llvm::DenseMapInfo::getEmptyKey()); - } - - inline static T getTombstoneKey() { - return T(0, llvm::DenseMapInfo::getTombstoneKey()); - } - - static unsigned getHashValue(const T& value) { - return llvm::hash_combine(value.json_src_path, value.file_path, value.info.ptr); - } - - static bool isEqual(const T& lhs, const T& rhs) { - return lhs == rhs; - } -}; - -} // namespace llvm - -namespace clice { - -struct CompilationDatabase::Impl { - /// The memory pool which holds all elements of compilation database. - /// We never try to release the memory until it destructs. So don't - /// worry about the lifetime of allocated elements. - llvm::BumpPtrAllocator allocator; - - /// Keep all strings. - StringSet strings{allocator}; - - /// Keep all items in the `compile_commands.json`. - ObjectSet items{allocator}; - - /// Keep all canonical command infos, most of file actually - /// have the same canonical command. - ObjectSet infos{allocator}; - - /// All json source file. - llvm::SmallVector sources; - - /// All source files in the compilation database. - llvm::DenseMap> files; - - /// Pluggable toolchain provider: manages toolchain queries and caching. - ToolchainProvider toolchain; - - /// Cache of SearchConfig keyed by (CompilationInfo*, options_bits). - /// options_bits encodes the CommandOptions fields that affect the result, - /// so different option combinations don't pollute each other's cache entries. - using ConfigCacheKey = std::pair; - llvm::DenseMap search_config_cache; - - static std::uint8_t options_bits(const CommandOptions& options) { - return options.query_toolchain ? 1u : 0u; - } - - /// The clang options we want to filter in all cases, like -c and -o. - llvm::DenseSet filtered_options; - - ArgumentParser parser{&allocator}; - - /// Check if an argument matches the source file path, handling - /// Windows path separator differences (backslash vs forward slash). - static bool is_same_file(llvm::StringRef argument, llvm::StringRef file) { - if(argument == file) { - return true; - } - -#ifdef _WIN32 - // On Windows, cmake may use backslashes in `arguments` but forward - // slashes in `file`. Normalize and compare. - if(argument.size() == file.size()) { - for(std::size_t i = 0; i < argument.size(); i++) { - char a = argument[i] == '\\' ? '/' : argument[i]; - char b = file[i] == '\\' ? '/' : file[i]; - if(std::tolower(static_cast(a)) != - std::tolower(static_cast(b))) { - return false; - } - } - return true; - } -#endif - - return false; - } - - object_ptr save_compilation_info(this Impl& self, - llvm::StringRef file, - llvm::StringRef directory, - llvm::ArrayRef arguments) { - llvm::SmallVector stored_arguments; - - self.parser.set_arguments(arguments); - /// We don't want to parse all arguments here, it is time-consuming. But we - /// want to remove output and input file from arguments. They are main reasons - /// causing different file have different commands. - for(unsigned it = 0; it != arguments.size(); it++) { - llvm::StringRef argument = arguments[it]; - - if(is_same_file(argument, file)) { - continue; - } - - /// All possible output options prefix. - constexpr static std::string_view output_options[] = { - "-o", - "--output", - "/o", - "/Fo", - "/Fe", - "/Fd", - }; - - /// FIXME: This is a heuristic approach that covers the vast majority of cases, but - /// theoretical corner cases exist. For example, `-oxx` might be an argument for another - /// command, and processing it this way would lead to its incorrect removal. To fix - /// these corner cases, it's necessary to parse the command line fully. Additionally, - /// detailed benchmarks should be conducted to determine the time required for parsing - /// command-line arguments in order to decide if it's worth doing so. - if(ranges::any_of(output_options, [&](llvm::StringRef option) { - return argument.starts_with(option); - })) { - auto prev = it; - auto arg = self.parser.parse_one(it); - - /// FIXME: How to handle parse error here? - if(!arg) { - it = prev; - continue; - } - - auto id = arg->getOption().getID(); - if(id == ID::OPT_o || id == ID::OPT_dxc_Fo || id == ID::OPT__SLASH_o || - id == ID::OPT__SLASH_Fo || id == ID::OPT__SLASH_Fe) { - /// It will point to the next argument start but it also increases - /// in the next loop. So decrease it for not skipping next argument. - it -= 1; - continue; - } - - /// This argument doesn't represent output file, just recovery it. - it = prev; - } - - /// FIXME: Handle response file. - if(argument.starts_with("@")) { - LOG_WARN( - "clice currently supports only one response file in the command, when loads {}", - file); - continue; - } - - stored_arguments.emplace_back(self.strings.get(argument)); - } - - auto info_id = self.infos.get({ - self.strings.get(directory), - stored_arguments, - }); - - /// Note: check whether the arguments data are same as stored arguments, - /// if so, we need allocate buffer for it to avoid dangling reference. - auto info = self.infos.get(info_id); - if(info->arguments.data() == stored_arguments.data()) { - auto result = self.allocator.Allocate(info->arguments.size()); - std::ranges::copy(info->arguments, result); - info->arguments = {result, info->arguments.size()}; - } - - return info; - } - - object_ptr save_compilation_info(this Impl& self, - llvm::StringRef file, - llvm::StringRef directory, - llvm::StringRef command) { - llvm::BumpPtrAllocator local; - llvm::StringSaver saver(local); - - llvm::SmallVector arguments; - - /// On Windows, always use the Windows tokenizer regardless of the compiler - /// (MSVC, clang-cl, MinGW, etc.), because all programs are invoked through - /// the Windows API and paths use backslashes. The GNU tokenizer treats '\' - /// as an escape character, which corrupts Windows paths like C:\Users into - /// C:Users. - /// - /// Note: this does NOT affect toolchain.cpp's query_clang_toolchain(), which - /// parses clang's -### output. That output uses shell-style escaping (\\), - /// so the GNU tokenizer is correct there. -#ifdef _WIN32 - llvm::cl::TokenizeWindowsCommandLineFull(command, saver, arguments); -#else - llvm::cl::TokenizeGNUCommandLine(command, saver, arguments); -#endif - - return self.save_compilation_info(file, directory, arguments); - } - - void insert_item(this Impl& self, object_ptr item) { - auto [it, success] = self.files.try_emplace(item->file_path, item); - if(success) { - return; - } - - if(!it->second) { - it->second = item; - return; - } - - auto cur = it->second; - while(cur->next) { - cur = cur->next; - } - cur->next = item; - } - - void delete_item(this Impl& self, object_ptr item) { - auto it = self.files.find(item->file_path); - if(it == self.files.end()) { - return; - } - - if(it->second == item) { - it->second = item->next; - return; - } - - auto cur = it->second; - while(cur->next) { - if(cur->next == item) { - cur->next = item->next; - break; - } - cur = cur->next; - } - } - - auto update_source(this Impl& self, JSONSource& source) { - std::vector updates; - - /// We only need to sort the input source items, so that sources in self - /// are already sorted. - ranges::sort(source.items, [](object_ptr lhs, object_ptr rhs) { - return *lhs < *rhs; - }); - - auto it = ranges::find(self.sources, source.src_path, &JSONSource::src_path); - if(it == self.sources.end()) { - for(auto& item: source.items) { - self.insert_item(item); - updates.emplace_back(UpdateKind::Inserted, item->file_path, item->info.ptr); - } - - self.sources.emplace_back(std::move(source)); - } else { - auto& new_items = source.items; - auto& old_items = it->items; - - auto it_new = new_items.begin(); - auto it_old = old_items.begin(); - - while(it_new != new_items.end() && it_old != old_items.end()) { - const auto& new_item = **it_new; - const auto& old_item = **it_old; - - if(new_item == old_item) { - updates.emplace_back(UpdateKind::Unchanged, - new_item.file_path, - new_item.info.ptr); - ++it_new; - ++it_old; - } else if(new_item < old_item) { - self.insert_item(*it_new); - updates.emplace_back(UpdateKind::Inserted, - new_item.file_path, - new_item.info.ptr); - ++it_new; - } else { - self.delete_item(*it_old); - updates.emplace_back(UpdateKind::Deleted, - old_item.file_path, - old_item.info.ptr); - ++it_old; - } - } - - while(it_new != new_items.end()) { - self.insert_item(*it_new); - updates.emplace_back(UpdateKind::Inserted, - (*it_new)->file_path, - (*it_new)->info.ptr); - ++it_new; - } - - while(it_old != old_items.end()) { - self.delete_item(*it_old); - updates.emplace_back(UpdateKind::Deleted, - (*it_old)->file_path, - (*it_old)->info.ptr); - ++it_old; - } - - it->items = std::move(source.items); - } - - return updates; - } - - auto mangle_command(this Impl& self, - llvm::StringRef file, - const CompilationInfo& info, - const CommandOptions& options) { - llvm::StringRef directory = self.strings.get(info.directory); - llvm::SmallVector arguments; - for(auto arg: info.arguments) { - arguments.emplace_back(self.strings.get(arg).data()); - } - - /// Store the final result arguments. - llvm::SmallVector final_arguments; - - auto add_string = [&](llvm::StringRef argument) { - auto saved = self.strings.save(argument); - final_arguments.emplace_back(saved.data()); - }; - - /// Rewrite the argument to filter arguments, we basically reimplement - /// the logic of `Arg::render` to use our allocator to allocate memory. - auto add_argument = [&](llvm::opt::Arg& arg) { - switch(arg.getOption().getRenderStyle()) { - case llvm::opt::Option::RenderValuesStyle: { - for(auto value: arg.getValues()) { - add_string(value); - } - break; - } - - case llvm::opt::Option::RenderSeparateStyle: { - add_string(arg.getSpelling()); - for(auto value: arg.getValues()) { - add_string(value); - } - break; - } - - case llvm::opt::Option::RenderJoinedStyle: { - llvm::SmallString<256> first = {arg.getSpelling(), arg.getValue(0)}; - add_string(first); - for(auto value: llvm::ArrayRef(arg.getValues()).drop_front()) { - add_string(value); - } - break; - } - - case llvm::opt::Option::RenderCommaJoinedStyle: { - llvm::SmallString<256> buffer = arg.getSpelling(); - for(auto i = 0; i < arg.getNumValues(); i++) { - if(i) { - buffer += ','; - } - buffer += arg.getValue(i); - } - add_string(buffer); - break; - } - } - }; - - /// Append driver sperately - add_string(arguments.front()); - - using Arg = std::unique_ptr; - auto on_error = [&](int index, int count) { - LOG_WARN("missing argument index: {}, count: {} when parse: {}", index, count, file); - }; - - /// Prepare for removing arguments. - llvm::SmallVector remove; - for(auto& arg: options.remove) { - remove.push_back(self.strings.save(arg).data()); - } - - /// FIXME: Handle unknow remove arguments. - llvm::SmallVector known_remove_args; - self.parser.parse( - remove, - [&known_remove_args](Arg arg) { known_remove_args.emplace_back(std::move(arg)); }, - on_error); - auto get_id = [](const Arg& arg) { - return arg->getOption().getID(); - }; - ranges::sort(known_remove_args, {}, get_id); - - bool remove_pch = false; - - /// FIXME: Append the commands from response file. - self.parser.parse( - llvm::ArrayRef(arguments).drop_front(), - [&](Arg arg) { - auto& opt = arg->getOption(); - auto id = opt.getID(); - - /// Filter options we don't need. - if(self.filtered_options.contains(id)) { - return; - } - - /// Filter debug info options by group (-g, -gdwarf-*, -gsplit-dwarf, etc.). - /// These only affect debug info generation, not frontend semantics. - if(opt.matches(ID::OPT_DebugInfo_Group)) { - return; - } - - /// Remove arguments in the remove list. - auto range = ranges::equal_range(known_remove_args, id, {}, get_id); - for(auto& remove: range) { - /// Match the -I*. - if(remove->getNumValues() == 1 && remove->getValue(0) == llvm::StringRef("*")) { - return; - } - - /// Compare each value, convert `const char*` to `llvm::StringRef` for - /// comparing. - if(ranges::equal( - arg->getValues(), - remove->getValues(), - [](llvm::StringRef lhs, llvm::StringRef rhs) { return lhs == rhs; })) { - return; - } - } - - /// For arguments -I, convert directory to absolute path. - /// i.e xmake will generate commands in this style. - if(id == ID::OPT_I && arg->getNumValues() == 1) { - add_string("-I"); - llvm::StringRef value = arg->getValue(0); - if(!value.empty() && !path::is_absolute(value)) { - add_string(path::join(directory, value)); - } else { - add_string(value); - } - return; - } - - /// A workaround to remove extra PCH when cmake generate PCH flags for clang. - if(id == ID::OPT_Xclang && arg->getNumValues() == 1) { - if(remove_pch) { - remove_pch = false; - return; - } - - llvm::StringRef value = arg->getValue(0); - if(value == "-include-pch") { - remove_pch = true; - return; - } - } - - add_argument(*arg); - }, - on_error); - - /// FIXME: Do we want to parse append arguments also? - for(auto& arg: options.append) { - add_string(arg); - } - - return llvm::ArrayRef(final_arguments).vec(); - } -}; - -CompilationDatabase::CompilationDatabase() : self(std::make_unique()) { - constexpr static std::array filtered_options = { - /// Remove the input file, we will add input file ourselves. - ID::OPT_INPUT, - - /// -c and -o are meaningless for frontend. - ID::OPT_c, - ID::OPT_o, - ID::OPT_dxc_Fc, - ID::OPT_dxc_Fo, - - /// Remove all ID related to PCH building. - ID::OPT_emit_pch, - ID::OPT_include_pch, - ID::OPT__SLASH_Yu, - ID::OPT__SLASH_Fp, - - /// Remove all ID related to dependency scan. - ID::OPT_E, - ID::OPT_M, - ID::OPT_MM, - ID::OPT_MD, - ID::OPT_MMD, - ID::OPT_MF, - ID::OPT_MT, - ID::OPT_MQ, - ID::OPT_MG, - ID::OPT_MP, - ID::OPT_show_inst, - ID::OPT_show_encoding, - ID::OPT_show_includes, - ID::OPT__SLASH_showFilenames, - ID::OPT__SLASH_showFilenames_, - ID::OPT__SLASH_showIncludes, - ID::OPT__SLASH_showIncludes_user, - - /// Remove all ID related to C++ module, we will - /// build module and set deps ourselves. - ID::OPT_fmodule_file, - ID::OPT_fmodule_output, - ID::OPT_fprebuilt_module_path, - - /// Remove codegen-only options that don't affect frontend semantics - /// (parsing, diagnostics, code completion). These are pure backend/linker - /// concerns irrelevant to an LSP server. - /// - /// Note: -fno-exceptions, -fno-rtti, -std=*, -march=*, -fsanitize=*, -O* - /// are NOT filtered here — they affect predefined macros or language semantics. - - /// Position-independent code — pure codegen, no macro or semantic effect. - ID::OPT_fPIC, - ID::OPT_fno_PIC, - ID::OPT_fpic, - ID::OPT_fno_pic, - ID::OPT_fPIE, - ID::OPT_fno_PIE, - ID::OPT_fpie, - ID::OPT_fno_pie, - - /// Frame pointer and unwind tables — pure codegen. - ID::OPT_fomit_frame_pointer, - ID::OPT_fno_omit_frame_pointer, - ID::OPT_funwind_tables, - ID::OPT_fno_unwind_tables, - ID::OPT_fasynchronous_unwind_tables, - ID::OPT_fno_asynchronous_unwind_tables, - - /// Stack protection — pure codegen. - ID::OPT_fstack_protector, - ID::OPT_fstack_protector_strong, - ID::OPT_fstack_protector_all, - ID::OPT_fno_stack_protector, - - /// Section splitting, LTO, semantic interposition — pure codegen/linker. - ID::OPT_fdata_sections, - ID::OPT_fno_data_sections, - ID::OPT_ffunction_sections, - ID::OPT_fno_function_sections, - ID::OPT_flto, - ID::OPT_flto_EQ, - ID::OPT_fno_lto, - ID::OPT_fsemantic_interposition, - ID::OPT_fno_semantic_interposition, - ID::OPT_fvisibility_inlines_hidden, - - /// Diagnostics output formatting — doesn't affect analysis. - ID::OPT_fcolor_diagnostics, - ID::OPT_fno_color_diagnostics, - - /// Floating-point codegen — doesn't define macros (unlike -ffast-math). - ID::OPT_ftrapping_math, - ID::OPT_fno_trapping_math, - }; - - for(auto opt: filtered_options) { - self->filtered_options.insert(opt); - } -} - -CompilationDatabase::CompilationDatabase(CompilationDatabase&& other) = default; - -CompilationDatabase& CompilationDatabase::operator=(CompilationDatabase&& other) = default; +CompilationDatabase::CompilationDatabase() = default; CompilationDatabase::~CompilationDatabase() = default; -std::vector CompilationDatabase::load_compile_database(llvm::StringRef path) { - auto content = llvm::MemoryBuffer::getFile(path); - if(!content) { - LOG_ERROR("Failed to read compilation database from {}. Reason: {}", - path, - content.getError()); +llvm::ArrayRef CompilationDatabase::find_entries(std::uint32_t path_id) const { + auto [first, last] = ranges::equal_range(entries, path_id, {}, &CompilationEntry::file); + if(first == last) return {}; - } - - auto json = json::parse(content.get()->getBuffer()); - if(!json) { - LOG_ERROR("Failed to parse compilation database from {}. Reason: {}", - path, - json.takeError()); - return {}; - } - - if(json->kind() != json::Value::Array) { - LOG_ERROR( - "Invalid compilation database format in {}. Reason: Root element must be an array.", - path); - return {}; - } - - JSONSource source; - source.src_path = self->strings.get(path); - - for(size_t i = 0; i < json->getAsArray()->size(); ++i) { - const auto& value = (*json->getAsArray())[i]; - if(value.kind() != json::Value::Object) { - LOG_ERROR( - "Invalid compilation database in {}. Skipping item at index {}. Reason: item is not an object.", - path, - i); - continue; - } - - auto& object = *value.getAsObject(); - auto directory = object.getString("directory"); - if(!directory) { - LOG_ERROR( - "Invalid compilation database in {}. Skipping item at index {}. Reason: 'directory' key is missing.", - path, - i); - continue; - } - - auto file = object.getString("file"); - if(!file) { - LOG_ERROR( - "Invalid compilation database in {}. Skipping item at index {}. Reason: 'file' key is missing.", - path, - i); - continue; - } - - auto arguments = object.getArray("arguments"); - auto command = object.getString("command"); - if(!arguments && !command) { - LOG_ERROR( - "Invalid compilation database in {}. Skipping item at index {}. Reason: neither 'arguments' nor 'command' key is present.", - path, - i); - continue; - } - - JSONItem item; - item.json_src_path = source.src_path; - item.file_path = self->strings.get(*file); - if(arguments) { - llvm::BumpPtrAllocator local; - llvm::StringSaver saver(local); - llvm::SmallVector agrs; - for(auto& argument: *arguments) { - if(argument.kind() == json::Value::String) { - agrs.emplace_back(saver.save(*argument.getAsString()).data()); - } - } - item.info = self->save_compilation_info(*file, *directory, agrs); - } else if(command) { - item.info = self->save_compilation_info(*file, *directory, *command); - } - source.items.emplace_back(self->items.save(item)); - } - - return self->update_source(source); + return {&*first, static_cast(last - first)}; } -CompilationContext CompilationDatabase::lookup(llvm::StringRef file, - const CommandOptions& options, - const void* context) { - object_ptr info = nullptr; +namespace { - auto path_id = self->strings.get(file); - file = self->strings.get(path_id); - - auto it = self->files.find(path_id); - if(it != self->files.end()) [[unlikely]] { - if(!context) { - /// If context is not provided, we just use the first. - info = it->second->info; - } else { - /// Otherwise find the corresponding one. - auto cur = it->second; - while(cur) { - if(cur->info.ptr == context) { - info = cur->info; - break; - } - cur = cur->next; +/// Shared render logic for a parsed argument. Calls `emit(StringRef)` for each +/// output token, handling all four render styles. +template +void render_arg_to(Emit&& emit, llvm::opt::Arg& arg) { + switch(arg.getOption().getRenderStyle()) { + case llvm::opt::Option::RenderValuesStyle: + for(auto value: arg.getValues()) { + emit(llvm::StringRef(value)); } + break; + + case llvm::opt::Option::RenderSeparateStyle: + emit(arg.getSpelling()); + for(auto value: arg.getValues()) { + emit(llvm::StringRef(value)); + } + break; + + case llvm::opt::Option::RenderJoinedStyle: { + llvm::SmallString<256> first = {arg.getSpelling(), arg.getValue(0)}; + emit(llvm::StringRef(first)); + for(auto value: llvm::ArrayRef(arg.getValues()).drop_front()) { + emit(llvm::StringRef(value)); + } + break; + } + + case llvm::opt::Option::RenderCommaJoinedStyle: { + llvm::SmallString<256> buffer = arg.getSpelling(); + for(unsigned i = 0; i < arg.getNumValues(); i++) { + if(i) + buffer += ','; + buffer += arg.getValue(i); + } + emit(llvm::StringRef(buffer)); + break; } } +} - llvm::StringRef directory; - std::vector arguments; +} // namespace - if(info) { - directory = self->strings.get(info->directory); - arguments = self->mangle_command(file, *info, options); - // TODO: other c++ suffixes - } else if(file.ends_with(".cpp") || file.ends_with(".hpp") || file.ends_with(".cc")) { - arguments = {"clang++", "-std=c++20"}; - } else { - arguments = {"clang"}; - } +llvm::ArrayRef CompilationDatabase::persist_args(llvm::ArrayRef args) { + if(args.empty()) + return {}; + auto* buf = allocator->Allocate(args.size()); + std::ranges::copy(args, buf); + return {buf, args.size()}; +} - auto append_arg = [&](llvm::StringRef s) { - arguments.emplace_back(self->strings.save(s).data()); +object_ptr + CompilationDatabase::save_compilation_info(llvm::StringRef file, + llvm::StringRef directory, + llvm::ArrayRef arguments) { + assert(!arguments.empty() && "arguments must contain at least the driver"); + + auto render_arg = [&](auto& out, llvm::opt::Arg& arg) { + render_arg_to([&](llvm::StringRef s) { out.push_back(strings.save(s).data()); }, arg); }; - if(info && options.query_toolchain) { - // Save user args before replacing with cc1 result. The toolchain - // query includes all flags except user-content options (-I/-D/-U/etc.), - // so the cc1 result has correct semantics. Only user-content options - // need to be replayed afterward. - auto user_args = std::move(arguments); + llvm::SmallVector canonical_args; + llvm::SmallVector patch_args; - auto cached = self->toolchain.query_cached(file, directory, user_args); + /// Driver goes into canonical. + canonical_args.push_back(strings.save(arguments[0]).data()); - if(cached.empty()) { - LOG_WARN("failed to query toolchain: {}", file); - arguments = std::move(user_args); - } else { - // Start with cc1 result (has system paths, driver flags, etc.). - arguments.assign(cached.begin(), cached.end()); + bool remove_pch = false; - // Remove the temp source file that was appended during query. - arguments.pop_back(); + auto on_error = [&](int index, int count) { + LOG_WARN("missing argument index: {}, count: {} when parse: {}", index, count, file); + }; - // The toolchain query derives the resource dir from the system - // compiler's executable path. If that compiler is a different clang - // version, its builtin headers may not match ours. Replace the - // queried resource dir with ours so the headers are consistent. - // (See clangd's CommandMangler for precedent.) - if(!resource_dir().empty()) { - llvm::StringRef old_resource_dir; - for(std::size_t i = 0; i + 1 < arguments.size(); ++i) { - if(arguments[i] == llvm::StringRef("-resource-dir")) { - old_resource_dir = arguments[i + 1]; - break; - } + parser->parse( + llvm::ArrayRef(arguments).drop_front(), + [&](std::unique_ptr arg) { + auto& opt = arg->getOption(); + auto id = opt.getID(); + + /// Discard options irrelevant to frontend. + if(is_discarded_option(id)) { + return; + } + + /// Discard codegen-only options. + if(is_codegen_option(id, opt)) { + return; + } + + /// Handle CMake's Xclang PCH workaround: + /// -Xclang -include-pch -Xclang → discard both pairs. + if(is_xclang_option(id) && arg->getNumValues() == 1) { + if(remove_pch) { + remove_pch = false; + return; } - if(!old_resource_dir.empty() && old_resource_dir != resource_dir()) { - for(auto& arg: arguments) { - llvm::StringRef s(arg); - if(s.starts_with(old_resource_dir)) { - auto replaced = - resource_dir().str() + s.substr(old_resource_dir.size()).str(); - arg = self->strings.save(replaced).data(); + llvm::StringRef value = arg->getValue(0); + if(value == "-include-pch") { + remove_pch = true; + return; + } + } + + /// User-content options go into per-file patch. + if(is_user_content_option(id)) { + /// Absolutize relative paths for include-path options. + if(is_include_path_option(id) && arg->getNumValues() == 1) { + patch_args.push_back(strings.save(arg->getSpelling()).data()); + llvm::StringRef value = arg->getValue(0); + if(!value.empty() && !path::is_absolute(value)) { + patch_args.push_back(strings.save(path::join(directory, value)).data()); + } else { + patch_args.push_back(strings.save(value).data()); + } + return; + } + render_arg(patch_args, *arg); + return; + } + + /// Everything else goes into canonical. + render_arg(canonical_args, *arg); + }, + on_error); + + /// Dedup canonical command. + auto canonical_id = canonicals.get(CanonicalCommand{canonical_args}); + auto canonical = canonicals.get(canonical_id); + if(canonical->arguments.data() == canonical_args.data()) { + canonical->arguments = persist_args(canonical_args); + } + + /// Build and dedup CompilationInfo. + auto dir = strings.save(directory).data(); + auto info_id = infos.get(CompilationInfo{dir, canonical, patch_args}); + auto info = infos.get(info_id); + if(info->patch.data() == patch_args.data()) { + info->patch = persist_args(patch_args); + } + + return info; +} + +object_ptr CompilationDatabase::save_compilation_info(llvm::StringRef file, + llvm::StringRef directory, + llvm::StringRef command) { + llvm::BumpPtrAllocator local; + llvm::StringSaver saver(local); + + llvm::SmallVector arguments; + +#ifdef _WIN32 + llvm::cl::TokenizeWindowsCommandLineFull(command, saver, arguments); +#else + llvm::cl::TokenizeGNUCommandLine(command, saver, arguments); +#endif + + if(arguments.empty()) { + return {nullptr}; + } + + return save_compilation_info(file, directory, arguments); +} + +std::size_t CompilationDatabase::load(llvm::StringRef path) { + // Clear old entries and caches (but keep allocator/strings/canonicals/infos/toolchain). + entries.clear(); + search_config_cache.clear(); + + simdjson::padded_string json_buf; + if(auto error = simdjson::padded_string::load(std::string(path)).get(json_buf)) { + LOG_ERROR("Failed to read compilation database from {}: {}", + path, + simdjson::error_message(error)); + return 0; + } + + simdjson::ondemand::parser json_parser; + simdjson::ondemand::document doc; + if(auto error = json_parser.iterate(json_buf).get(doc)) { + LOG_ERROR("Failed to parse compilation database from {}: {}", + path, + simdjson::error_message(error)); + return 0; + } + + simdjson::ondemand::array arr; + if(auto error = doc.get_array().get(arr)) { + LOG_ERROR("Invalid compilation database format in {}: root element must be an array.", + path); + return 0; + } + + std::size_t index = 0; + for(auto element: arr) { + simdjson::ondemand::object obj; + if(element.get_object().get(obj)) { + LOG_ERROR( + "Invalid compilation database in {}. Skipping item at index {}: " "item is not an object.", + path, + index); + ++index; + continue; + } + + std::string_view dir_sv, file_sv; + if(obj["directory"].get_string().get(dir_sv)) { + LOG_ERROR( + "Invalid compilation database in {}. Skipping item at index {}: " "'directory' key is missing.", + path, + index); + ++index; + continue; + } + + if(obj["file"].get_string().get(file_sv)) { + LOG_ERROR( + "Invalid compilation database in {}. Skipping item at index {}: " "'file' key is missing.", + path, + index); + ++index; + continue; + } + + llvm::StringRef dir_ref(dir_sv.data(), dir_sv.size()); + llvm::StringRef file_ref(file_sv.data(), file_sv.size()); + + // Resolve relative file paths against the directory so that entries + // from different directories don't collide in the PathPool. + std::string file_abs; + if(!path::is_absolute(file_ref)) { + file_abs = path::join(dir_ref, file_ref); + file_ref = file_abs; + } + + simdjson::ondemand::array args_arr; + if(!obj["arguments"].get_array().get(args_arr)) { + llvm::BumpPtrAllocator local; + llvm::StringSaver saver(local); + llvm::SmallVector args; + bool malformed = false; + for(auto arg_val: args_arr) { + std::string_view sv; + if(arg_val.get_string().get(sv)) { + malformed = true; + break; + } + args.push_back(saver.save(llvm::StringRef(sv.data(), sv.size())).data()); + } + if(!malformed && !args.empty()) { + auto info = save_compilation_info(file_ref, dir_ref, args); + assert(info && "save_compilation_info must succeed with non-empty args"); + auto path_id = paths.intern(file_ref); + entries.push_back({path_id, info}); + } + } else { + std::string_view cmd_sv; + if(obj["command"].get_string().get(cmd_sv)) { + LOG_ERROR( + "Invalid compilation database in {}. Skipping item at index {}: " "neither 'arguments' nor 'command' key is present.", + path, + index); + ++index; + continue; + } + auto info = save_compilation_info(file_ref, + dir_ref, + llvm::StringRef(cmd_sv.data(), cmd_sv.size())); + if(!info) { + ++index; + continue; + } + auto path_id = paths.intern(file_ref); + entries.push_back({path_id, info}); + } + + ++index; + } + + // Sort by file path_id for binary search. + ranges::sort(entries, {}, &CompilationEntry::file); + + return entries.size(); +} + +llvm::SmallVector CompilationDatabase::lookup(llvm::StringRef file, + const CommandOptions& options) { + auto path_id = paths.intern(file); + auto matched = find_entries(path_id); + + auto render_arg = [&](auto& out, llvm::opt::Arg& arg) { + render_arg_to([&](llvm::StringRef s) { out.push_back(strings.save(s).data()); }, arg); + }; + + /// Build one CompilationContext from a single CompilationInfo. + auto build_context = [&](object_ptr info) -> CompilationContext { + llvm::StringRef directory = info->directory; + std::vector arguments; + + auto append_arg = [&](llvm::StringRef s) { + arguments.emplace_back(strings.save(s).data()); + }; + + auto append_args = [&](llvm::ArrayRef args) { + arguments.insert(arguments.end(), args.begin(), args.end()); + }; + + if(options.query_toolchain) { + auto cached = query_toolchain_cached(file, directory, info->canonical->arguments); + + if(cached.empty()) { + if(!options.suppress_logging) { + LOG_WARN("failed to query toolchain: {}", file); + } + append_args(info->canonical->arguments); + append_args(info->patch); + } else { + arguments.assign(cached.begin(), cached.end()); + arguments.pop_back(); // remove temp source file + + // Replace resource dir if needed. + if(!resource_dir().empty()) { + llvm::StringRef old_resource_dir; + for(std::size_t i = 0; i + 1 < arguments.size(); ++i) { + if(arguments[i] == llvm::StringRef("-resource-dir")) { + old_resource_dir = arguments[i + 1]; + break; + } + } + if(!old_resource_dir.empty() && old_resource_dir != resource_dir()) { + for(auto& arg: arguments) { + llvm::StringRef s(arg); + if(s.starts_with(old_resource_dir)) { + auto replaced = + resource_dir().str() + s.substr(old_resource_dir.size()).str(); + arg = strings.save(replaced).data(); + } } } } + + append_args(info->patch); + + // Fix -main-file-name to match the actual file. + bool next_main_file = false; + for(auto& arg: arguments) { + if(arg == llvm::StringRef("-main-file-name")) { + next_main_file = true; + continue; + } + if(next_main_file) { + arg = strings.save(path::filename(file)).data(); + next_main_file = false; + } + } } - // Replay user-content options (-I/-D/-U/-include/-idirafter) from - // the original mangled args. These were excluded from the toolchain - // query since they don't affect compiler semantics or system paths. - self->parser.parse( - llvm::ArrayRef(user_args).drop_front(), - [&](std::unique_ptr arg) { - auto id = arg->getOption().getID(); - switch(id) { - case ID::OPT_I: - case ID::OPT_isystem: - case ID::OPT_iquote: - case ID::OPT_idirafter: - case ID::OPT_D: - case ID::OPT_U: - case ID::OPT_include: - append_arg(arg->getSpelling()); - for(auto value: arg->getValues()) { - append_arg(value); - } - break; - default: break; + // Inject our resource dir if not already present. + if(!resource_dir().empty()) { + bool has_resource_dir = false; + for(auto& arg: arguments) { + if(arg == llvm::StringRef("-resource-dir")) { + has_resource_dir = true; + break; } + } + if(!has_resource_dir) { + append_arg("-resource-dir"); + append_arg(resource_dir()); + } + } + } else { + append_args(info->canonical->arguments); + append_args(info->patch); + } + + // Apply remove filter. + if(!options.remove.empty()) { + using Arg = std::unique_ptr; + llvm::SmallVector remove_strs; + for(auto& s: options.remove) { + remove_strs.push_back(strings.save(s).data()); + } + llvm::SmallVector remove_args; + parser->parse( + remove_strs, + [&remove_args](Arg arg) { remove_args.emplace_back(std::move(arg)); }, + [](int, int) {}); + auto get_id = [](const Arg& arg) { + return arg->getOption().getID(); + }; + std::ranges::sort(remove_args, {}, get_id); + + auto saved_args = std::move(arguments); + arguments.clear(); + arguments.push_back(saved_args.front()); + + parser->parse( + llvm::ArrayRef(saved_args).drop_front(), + [&](Arg arg) { + auto id = arg->getOption().getID(); + auto range = std::ranges::equal_range(remove_args, id, {}, get_id); + for(auto& remove: range) { + if(remove->getNumValues() == 1 && + remove->getValue(0) == llvm::StringRef("*")) { + return; + } + if(std::ranges::equal( + arg->getValues(), + remove->getValues(), + [](llvm::StringRef l, llvm::StringRef r) { return l == r; })) { + return; + } + } + render_arg(arguments, *arg); }, [](int, int) {}); - - // Fix -main-file-name to match the actual file. - bool next_main_file = false; - for(auto& arg: arguments) { - if(arg == llvm::StringRef("-main-file-name")) { - next_main_file = true; - continue; - } - - if(next_main_file) { - arg = self->strings.save(path::filename(file)).data(); - next_main_file = false; - } - } } - // Inject our resource dir if not already present in the arguments. - // On success, the cc1 output already has -resource-dir (possibly - // replaced above). On failure, the original user_args won't have it. - if(!resource_dir().empty()) { - bool has_resource_dir = false; - for(auto& arg: arguments) { - if(arg == llvm::StringRef("-resource-dir")) { - has_resource_dir = true; - break; - } - } - if(!has_resource_dir) { - append_arg("-resource-dir"); - append_arg(resource_dir()); - } + for(auto& arg: options.append) { + append_arg(arg); } + + arguments.emplace_back(paths.resolve(path_id).data()); + return CompilationContext(directory, std::move(arguments)); + }; + + llvm::SmallVector results; + + if(!matched.empty()) { + for(auto& entry: matched) { + results.push_back(build_context(entry.info)); + } + } else { + // No matching entry — synthesize a default command. + std::vector arguments; + if(file.ends_with(".cpp") || file.ends_with(".hpp") || file.ends_with(".cc")) { + arguments = {"clang++", "-std=c++20"}; + } else { + arguments = {"clang"}; + } + arguments.emplace_back(paths.resolve(path_id).data()); + results.push_back(CompilationContext({}, std::move(arguments))); } - arguments.emplace_back(file.data()); - - return CompilationContext(directory, std::move(arguments)); + return results; } SearchConfig CompilationDatabase::lookup_search_config(llvm::StringRef file, - const CommandOptions& options, - const void* context) { - // Resolve to the internal CompilationInfo pointer for cache lookup. - auto path_id = self->strings.get(file); - auto it = self->files.find(path_id); - const CompilationInfo* info_ptr = nullptr; - if(it != self->files.end()) { - if(!context) { - info_ptr = it->second->info.ptr; - } else { - auto cur = it->second; - while(cur) { - if(cur->info.ptr == context) { - info_ptr = cur->info.ptr; - break; - } - cur = cur->next; - } - } - } + const CommandOptions& options) { + auto path_id = paths.intern(file); + auto matched = find_entries(path_id); - if(info_ptr) { - auto key = Impl::ConfigCacheKey{info_ptr, Impl::options_bits(options)}; - auto cache_it = self->search_config_cache.find(key); - if(cache_it != self->search_config_cache.end()) { + // Only cache when remove/append are empty — custom options produce + // per-call results that shouldn't pollute the shared cache. + bool cacheable = !matched.empty() && options.remove.empty() && options.append.empty(); + + if(cacheable) { + auto key = ConfigCacheKey{matched.front().info.ptr, options_bits(options)}; + auto cache_it = search_config_cache.find(key); + if(cache_it != search_config_cache.end()) { return cache_it->second; } } - auto ctx = lookup(file, options, context); + auto results = lookup(file, options); + auto& ctx = results.front(); auto config = extract_search_config(ctx.arguments, ctx.directory); - if(info_ptr) { - auto key = Impl::ConfigCacheKey{info_ptr, Impl::options_bits(options)}; - self->search_config_cache.try_emplace(key, config); + if(cacheable) { + auto key = ConfigCacheKey{matched.front().info.ptr, options_bits(options)}; + search_config_cache.try_emplace(key, config); } return config; } bool CompilationDatabase::has_cached_configs() const { - return !self->search_config_cache.empty(); + return !search_config_cache.empty(); } -std::optional CompilationDatabase::get_option_id(llvm::StringRef argument) { - auto& table = clang::driver::getDriverOptTable(); +CompilationDatabase::ToolchainExtract + CompilationDatabase::extract_toolchain_flags(llvm::StringRef file, + llvm::ArrayRef arguments) { + ToolchainExtract result; - llvm::SmallString<64> buffer = argument; + // Driver binary (first arg) — e.g. "clang++" vs "clang" affects language mode. + result.key += arguments[0]; + result.key += '\0'; - if(argument.ends_with("=")) { - buffer += "placeholder"; - } + // File extension affects language mode (C vs C++). + result.key += path::extension(file); + result.key += '\0'; - unsigned index = 1; - std::array arguments = {"clang++", buffer.c_str(), "placeholder"}; - llvm::opt::InputArgList arg_list(arguments.data(), arguments.data() + arguments.size()); + result.query_args.push_back(arguments[0]); - if(auto arg = table.ParseOneArg(arg_list, index)) { - return arg->getOption().getID(); - } else { - return {}; - } -} - -llvm::StringRef CompilationDatabase::resource_dir() { - static std::string dir = [] { - // Use address of this lambda to locate our binary via dladdr/proc. - static int anchor; - auto exe = llvm::sys::fs::getMainExecutable("", &anchor); - if(exe.empty()) { - return std::string{}; - } - return clang::driver::Driver::GetResourcesPath(exe); - }(); - return dir; -} - -ToolchainProvider& CompilationDatabase::toolchain() { - return self->toolchain; -} - -std::vector CompilationDatabase::resolve_toolchain_entries( - llvm::ArrayRef> files) { - std::vector entries; - entries.reserve(files.size()); - - for(auto& [file, context]: files) { - auto path_id = self->strings.get(file); - auto stored_file = self->strings.get(path_id); - - object_ptr info = nullptr; - auto it = self->files.find(path_id); - if(it != self->files.end()) { - if(!context) { - info = it->second->info; - } else { - auto cur = it->second; - while(cur) { - if(cur->info.ptr == context) { - info = cur->info; - break; - } - cur = cur->next; - } + parser->parse( + llvm::ArrayRef(arguments).drop_front(), + [&](std::unique_ptr arg) { + auto& opt = arg->getOption(); + auto id = opt.getID(); + if(is_discarded_option(id) || is_user_content_option(id) || + is_codegen_option(id, opt)) { + return; } - } - if(!info || info->arguments.empty()) { - continue; - } + // Add option ID and all its values to the cache key. + result.key += std::to_string(id); + result.key += '\0'; + for(auto value: arg->getValues()) { + result.key += value; + result.key += '\0'; + } - ToolchainProvider::PendingEntry entry; - entry.file = stored_file; - entry.directory = self->strings.get(info->directory); - entry.arguments.reserve(info->arguments.size()); - for(auto arg_id: info->arguments) { - entry.arguments.push_back(self->strings.get(arg_id).data()); - } + // Render the argument back to query args. + render_arg_to( + [&](llvm::StringRef s) { result.query_args.push_back(strings.save(s).data()); }, + *arg); + }, + [](int, int) {}); - entries.push_back(std::move(entry)); - } - - return entries; -} - -llvm::StringRef CompilationDatabase::resolve_path(std::uint32_t path_id) { - return self->strings.get(path_id); -} - -std::vector CompilationDatabase::files() { - std::vector result; - for(auto& [file, _]: self->files) { - result.emplace_back(self->strings.get(file).data()); - } return result; } -llvm::StringRef CompilationDatabase::save_string(llvm::StringRef string) { - return self->strings.save(string); +llvm::ArrayRef + CompilationDatabase::query_toolchain_cached(llvm::StringRef file, + llvm::StringRef directory, + llvm::ArrayRef arguments) { + auto [key, query_args] = extract_toolchain_flags(file, arguments); + auto it = toolchain_cache.find(key); + if(it != toolchain_cache.end()) { + return it->second; + } + + LOG_WARN("Toolchain cache miss (spawning process): file={}, cache_size={}, key_len={}", + file, + toolchain_cache.size(), + key.size()); + + auto callback = [&](const char* s) -> const char* { + return strings.save(s).data(); + }; + toolchain::QueryParams params = {file, directory, query_args, callback}; + auto result = toolchain::query_toolchain(params); + + auto [entry, _] = toolchain_cache.try_emplace(std::move(key), std::move(result)); + return entry->second; +} + +std::vector + CompilationDatabase::get_pending_queries(llvm::ArrayRef entries) { + llvm::StringMap seen_keys; + std::vector queries; + + for(auto& entry: entries) { + if(entry.arguments.empty()) { + continue; + } + + auto [key, query_args] = extract_toolchain_flags(entry.file, entry.arguments); + + // Skip if already cached or already queued. + if(toolchain_cache.count(key) || !seen_keys.try_emplace(key, true).second) { + continue; + } + + LOG_DEBUG("Pre-warm: new toolchain key (len={}) for file={}", key.size(), entry.file); + queries.push_back( + {std::move(key), std::move(query_args), entry.file.str(), entry.directory.str()}); + } + + LOG_INFO("Pre-warm: {} unique keys from {} entries, {} queries needed", + seen_keys.size(), + entries.size(), + queries.size()); + return queries; +} + +void CompilationDatabase::inject_results(llvm::ArrayRef results) { + for(auto& result: results) { + if(toolchain_cache.count(result.key)) { + continue; + } + std::vector saved; + saved.reserve(result.cc1_args.size()); + for(auto& arg: result.cc1_args) { + saved.push_back(strings.save(arg).data()); + } + toolchain_cache.try_emplace(result.key, std::move(saved)); + } +} + +bool CompilationDatabase::has_cached_toolchain() const { + return !toolchain_cache.empty(); +} + +llvm::StringRef CompilationDatabase::resolve_path(std::uint32_t path_id) { + return paths.resolve(path_id); } #ifdef CLICE_ENABLE_TEST void CompilationDatabase::add_command(llvm::StringRef directory, llvm::StringRef file, - llvm::ArrayRef arguments) { - JSONItem item; - item.json_src_path = self->strings.get("fake"); - item.file_path = self->strings.get(file); - item.info = self->save_compilation_info(file, directory, arguments); - self->insert_item(self->items.save(item)); + auto path_id = paths.intern(file); + auto info = save_compilation_info(file, directory, arguments); + // Insert in sorted position to maintain sort invariant. + auto it = ranges::lower_bound(entries, path_id, {}, &CompilationEntry::file); + entries.insert(it, {path_id, info}); } void CompilationDatabase::add_command(llvm::StringRef directory, llvm::StringRef file, llvm::StringRef command) { - JSONItem item; - item.json_src_path = self->strings.get("fake"); - item.file_path = self->strings.get(file); - item.info = self->save_compilation_info(file, directory, command); - self->insert_item(self->items.save(item)); + auto path_id = paths.intern(file); + auto info = save_compilation_info(file, directory, command); + auto it = ranges::lower_bound(entries, path_id, {}, &CompilationEntry::file); + entries.insert(it, {path_id, info}); } #endif -std::string print_argv(llvm::ArrayRef args) { - std::string s = "["; - if(!args.empty()) { - s += args.consume_front(); - for(auto arg: args) { - s += " "; - s += arg; - } - } - s += "]"; - return s; -} - } // namespace clice diff --git a/src/command/command.h b/src/command/command.h index d6f78031..75f26571 100644 --- a/src/command/command.h +++ b/src/command/command.h @@ -1,25 +1,25 @@ #pragma once #include -#include #include -#include #include #include +#include "command/argument_parser.h" #include "command/search_config.h" -#include "command/toolchain_provider.h" -#include "support/format.h" +#include "support/object_pool.h" +#include "support/path_pool.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" namespace clice { struct CommandOptions { - /// Ignore unknown commands arguments. - bool ignore_unknown = true; - /// Query the compiler driver for additional information, such as system includes and target. /// When enabled, also replaces the queried resource dir with our own (clang tools must use /// builtin headers matching their parser version — see clangd's CommandMangler for precedent). @@ -29,32 +29,13 @@ struct CommandOptions { /// Set true in unittests to avoid cluttering test output. bool suppress_logging = false; - /// The commands that you want to remove from original commands list. + /// Extra arguments to remove from the original command line. llvm::ArrayRef remove; - /// The commands that you want to add to original commands list. + /// Extra arguments to append to the original command line. llvm::ArrayRef append; }; -enum class UpdateKind : std::uint8_t { - Unchanged, - Inserted, - Deleted, -}; - -struct UpdateInfo { - /// The kind of update. - UpdateKind kind; - - /// The updated file. - std::uint32_t path_id; - - /// The compilation context of this file command, which could - /// be used to identity the same file with different compilation - /// contexts. - const void* context; -}; - struct CompilationContext { /// The working directory of compilation. llvm::StringRef directory; @@ -63,73 +44,167 @@ struct CompilationContext { std::vector arguments; }; -std::string print_argv(llvm::ArrayRef args); +/// Shared compiler identity — driver + all semantics-affecting flags. +/// Deduped via ObjectSet so most files share one instance. This directly +/// serves as the toolchain cache key (no re-parsing needed at query time). +struct CanonicalCommand { + /// Driver path followed by semantics-affecting flags (e.g. -std=, -target, -W*). + /// All pointers are interned in StringSet and pointer-stable. + llvm::ArrayRef arguments; + + friend bool operator==(const CanonicalCommand&, const CanonicalCommand&) = default; +}; + +/// Per-file compilation entry = shared canonical + per-file user-content patch. +/// Parsed and classified once at CDB load time; no further parsing needed. +struct CompilationInfo { + /// Working directory (interned in StringSet, pointer-stable). + const char* directory = nullptr; + + /// Shared canonical command (driver + semantic flags). + object_ptr canonical = {nullptr}; + + /// Per-file user-content options: -I, -D, -U, -include, -isystem, -iquote, + /// -idirafter. Pre-rendered as flat arg list with -I paths already absolutized. + llvm::ArrayRef patch; + + friend bool operator==(const CompilationInfo&, const CompilationInfo&) = default; +}; + +/// A single entry in the compilation database, stored in a flat sorted vector. +struct CompilationEntry { + /// Interned path ID for the source file (from PathPool). + std::uint32_t file; + + /// Parsed compilation info (directory + canonical + patch). + object_ptr info; +}; + +/// A pending toolchain query, ready to be executed (possibly in parallel). +struct ToolchainQuery { + std::string key; + std::vector query_args; + std::string file; + std::string directory; +}; + +/// Result of a toolchain query, to be injected back into the cache. +struct ToolchainResult { + std::string key; + std::vector cc1_args; +}; + +} // namespace clice + +namespace llvm { + +template <> +struct DenseMapInfo { + using T = clice::CanonicalCommand; + + inline static T getEmptyKey() { + return T{ + llvm::ArrayRef(reinterpret_cast(~uintptr_t(0)), size_t(0))}; + } + + inline static T getTombstoneKey() { + return T{llvm::ArrayRef(reinterpret_cast(~uintptr_t(0) - 1), + size_t(0))}; + } + + static unsigned getHashValue(const T& cmd) { + return llvm::hash_combine_range(cmd.arguments); + } + + static bool isEqual(const T& lhs, const T& rhs) { + // Sentinels have distinct data pointers but both have size 0, + // and ArrayRef equality is content-based — so we must compare + // data pointers first to keep sentinels distinguishable. + if(lhs.arguments.data() == rhs.arguments.data()) + return lhs.arguments.size() == rhs.arguments.size(); + if(lhs.arguments.data() == getEmptyKey().arguments.data() || + lhs.arguments.data() == getTombstoneKey().arguments.data() || + rhs.arguments.data() == getEmptyKey().arguments.data() || + rhs.arguments.data() == getTombstoneKey().arguments.data()) + return false; + return lhs == rhs; + } +}; + +template <> +struct DenseMapInfo { + using T = clice::CompilationInfo; + + inline static T getEmptyKey() { + return T{llvm::DenseMapInfo::getEmptyKey()}; + } + + inline static T getTombstoneKey() { + return T{llvm::DenseMapInfo::getTombstoneKey()}; + } + + static unsigned getHashValue(const T& info) { + return llvm::hash_combine(info.directory, + info.canonical.ptr, + llvm::hash_combine_range(info.patch)); + } + + static bool isEqual(const T& lhs, const T& rhs) { + return lhs == rhs; + } +}; + +} // namespace llvm + +namespace clice { class CompilationDatabase { public: CompilationDatabase(); - - CompilationDatabase(const CompilationDatabase&) = delete; - - CompilationDatabase(CompilationDatabase&& other); - - CompilationDatabase& operator=(const CompilationDatabase&) = delete; - - CompilationDatabase& operator=(CompilationDatabase&& other); - ~CompilationDatabase(); + CompilationDatabase(const CompilationDatabase&) = delete; + CompilationDatabase& operator=(const CompilationDatabase&) = delete; + CompilationDatabase(CompilationDatabase&&) = default; + CompilationDatabase& operator=(CompilationDatabase&&) = default; + public: - /// Read the compilation database on the give file and return the - /// incremental update infos. - std::vector load_compile_database(llvm::StringRef file); + /// Load (or reload) the compilation database from the given file. + /// Full reload: old entries are replaced, SearchConfig cache is cleared, + /// but toolchain cache survives. Returns the number of entries loaded. + std::size_t load(llvm::StringRef path); - /// Lookup the compilation context of specific file. If the context - /// param is provided, we will return the compilation context corresponding - /// to the handle. Otherwise we just return the first one(if the file have) - /// multiple compilation contexts. - CompilationContext lookup(llvm::StringRef file, - const CommandOptions& options = {}, - const void* context = nullptr); - - /// TODO: list all compilation context of the file, this is useful to show - /// all contexts and let user choose one. - /// std::vector fetch_all(llvm::StringRef file); + /// Lookup the compilation contexts for a file. A file may have multiple + /// compilation commands (e.g. different build configurations); all are returned. + llvm::SmallVector lookup(llvm::StringRef file, + const CommandOptions& options = {}); /// Combined lookup + extract_search_config with internal caching. - /// Results are cached by CompilationInfo pointer, avoiding repeated - /// argument parsing across multiple calls with the same context. - SearchConfig lookup_search_config(llvm::StringRef file, - const CommandOptions& options = {}, - const void* context = nullptr); + SearchConfig lookup_search_config(llvm::StringRef file, const CommandOptions& options = {}); /// Check if SearchConfig cache is populated (non-empty). bool has_cached_configs() const; - /// Get an the option for specific argument. - static std::optional get_option_id(llvm::StringRef argument); - - /// Get the resource directory for clang builtin headers. Computed once - /// from the current executable path using Driver::GetResourcesPath. - static llvm::StringRef resource_dir(); - - /// Resolve a path_id (from UpdateInfo) back to the file path string. + /// Resolve a path_id back to the file path string. llvm::StringRef resolve_path(std::uint32_t path_id); - /// Access the toolchain provider for batch pre-warming and direct queries. - ToolchainProvider& toolchain(); + /// Entry for batch pre-warming: file + directory + raw compilation arguments. + struct PendingEntry { + llvm::StringRef file; + llvm::StringRef directory; + llvm::SmallVector arguments; + }; - /// Resolve (file, context) pairs to PendingEntry tuples for toolchain queries. - /// Converts CDB-internal context pointers to raw (file, directory, arguments) - /// that the ToolchainProvider can consume. - std::vector - resolve_toolchain_entries(llvm::ArrayRef> files); + /// Get pending toolchain queries for a batch of compilation entries. + /// Returns queries only for cache-miss keys (deduplicated). + std::vector get_pending_queries(llvm::ArrayRef entries); - /// FIXME: bad interface design ... - std::vector files(); + /// Inject pre-computed toolchain results into the cache. Strings are copied + /// into the internal string pool. + void inject_results(llvm::ArrayRef results); - /// FIXME: remove this api? - auto save_string(llvm::StringRef string) -> llvm::StringRef; + /// Check if toolchain cache has any entries. + bool has_cached_toolchain() const; #ifdef CLICE_ENABLE_TEST @@ -139,15 +214,72 @@ public: void add_command(llvm::StringRef directory, llvm::StringRef file, llvm::StringRef command); - /// FIXME: remove this - /// Update commands from json file and return all updated file. - std::expected, std::string> load_commands(llvm::StringRef json_content, - llvm::StringRef workspace); #endif private: - struct Impl; - std::unique_ptr self; + /// Find all CompilationEntry items for a file by path_id (binary search). + /// Returns a sub-range of `entries`; may be empty. + llvm::ArrayRef find_entries(std::uint32_t path_id) const; + + /// Allocate a persistent copy of a const char* array on the bump allocator. + llvm::ArrayRef persist_args(llvm::ArrayRef args); + + /// Parse and classify a compilation command into canonical + patch. + object_ptr save_compilation_info(llvm::StringRef file, + llvm::StringRef directory, + llvm::ArrayRef arguments); + + object_ptr save_compilation_info(llvm::StringRef file, + llvm::StringRef directory, + llvm::StringRef command); + + static std::uint8_t options_bits(const CommandOptions& options) { + return options.query_toolchain ? 1u : 0u; + } + + struct ToolchainExtract { + std::string key; + std::vector query_args; + }; + + /// Extract toolchain-relevant flags and build a cache key. + ToolchainExtract extract_toolchain_flags(llvm::StringRef file, + llvm::ArrayRef arguments); + + /// Query toolchain with caching. Returns cached cc1 args, running the + /// expensive compiler query only on cache miss. + llvm::ArrayRef query_toolchain_cached(llvm::StringRef file, + llvm::StringRef directory, + llvm::ArrayRef arguments); + + /// The memory pool which holds all elements of compilation database. + /// Heap-allocated so its address is stable across moves. + std::unique_ptr allocator = std::make_unique(); + + /// Keep all strings (arguments, directories, etc.). + StringSet strings{allocator.get()}; + + /// Shared canonical commands — most files share one instance. + ObjectSet canonicals{allocator.get()}; + + /// Per-file compilation infos (canonical + patch + directory). + ObjectSet infos{allocator.get()}; + + /// Intern pool for file paths → compact uint32_t IDs. + PathPool paths; + + /// All compilation entries, sorted by file path_id. + /// Multiple entries for the same file are adjacent. + std::vector entries; + + /// Cache of SearchConfig keyed by (CompilationInfo*, options_bits). + using ConfigCacheKey = std::pair; + llvm::DenseMap search_config_cache; + + /// Cache of toolchain query results, keyed by canonical toolchain key. + llvm::StringMap> toolchain_cache; + + std::unique_ptr parser = std::make_unique(allocator.get()); }; } // namespace clice diff --git a/src/command/driver.h b/src/command/driver.h deleted file mode 100644 index faee234d..00000000 --- a/src/command/driver.h +++ /dev/null @@ -1,146 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "command/command.h" - -#include "llvm/Support/Allocator.h" -#include "clang/Driver/Driver.h" - -namespace clice { - -namespace { - -namespace opt = llvm::opt; -namespace driver = clang::driver; - -/// Checks if dash-dash (`--`) parsing is enabled. If enabled, all arguments -/// after a standalone `--` are treated as positional arguments (e.g., input files). -bool enable_dash_dash_parsing(const opt::OptTable& table); - -/// Checks if grouped short options are enabled. If enabled, a short option group -/// like `-ab` is parsed as separate options `-a` and `-b`. -bool enable_grouped_short_options(const opt::OptTable& table); - -/// Get the specific toolchain of given target, we mainly use it to get msvc toolchain. -const driver::ToolChain& get_toolchain(driver::Driver& driver, - const opt::ArgList& Args, - const llvm::Triple& Target); - -template -struct Thief { - friend bool enable_dash_dash_parsing(const opt::OptTable& table) { - return table.*MP1; - } - - friend bool enable_grouped_short_options(const opt::OptTable& table) { - return table.*MP2; - } - - friend const driver::ToolChain& get_toolchain(driver::Driver& driver, - const opt::ArgList& args, - const llvm::Triple& target) { - return (driver.*MP3)(args, target); - } -}; - -template struct Thief<&opt::OptTable::DashDashParsing, - &opt::OptTable::GroupedShortOptions, - &driver::Driver::getToolChain>; - -class ArgumentParser final : public llvm::opt::ArgList { -public: - ArgumentParser(llvm::BumpPtrAllocator* allocator) : allocator(allocator) {} - - ~ArgumentParser() { - /// We never use the private `Args` field, so make sure it's empty. - if(getArgs().size() != 0) { - std::abort(); - } - } - - const char* getArgString(unsigned index) const override { - return arguments[index]; - } - - unsigned getNumInputArgStrings() const override { - return arguments.size(); - } - - const char* MakeArgStringRef(llvm::StringRef s) const override { - auto p = allocator->Allocate(s.size() + 1); - std::ranges::copy(s, p); - p[s.size()] = '\0'; - return p; - } - - inline static auto& option_table = clang::driver::getDriverOptTable(); - - void set_arguments(llvm::ArrayRef arguments) { - if(getArgs().size() != 0) { - std::abort(); - } - - this->arguments = arguments; - } - - std::unique_ptr parse_one(unsigned& index) { - /// Make sure we are not using - assert(!enable_dash_dash_parsing(option_table)); - assert(!enable_grouped_short_options(option_table)); - return option_table.ParseOneArg(*this, index); - } - - void parse(llvm::ArrayRef arguments, const auto& on_parse, const auto& on_error) { - this->arguments = arguments; - - unsigned it = 0; - while(it != arguments.size()) { - llvm::StringRef s = arguments[it]; - - if(s.empty()) [[unlikely]] { - it += 1; - continue; - } - - auto prev = it; - auto arg = parse_one(it); - assert(it > prev && "parser failed to consume argument"); - - if(!arg) [[unlikely]] { - assert(it >= arguments.size() && "unexpected parser error!"); - assert(it - prev - 1 && "no missing arguments!"); - - /// FIXME: When parsing fails, the parser may have encountered unknown - /// arguments (e.g., options for a different compiler like nvcc). - /// We should allow the user to provide a custom option registry - /// (mainly for these pass-through arguments). - /// - /// This would let us ignore them correctly. For example, when - /// parsing `nvcc --option-dir x.txt main.cpp`, our parser fails - /// because it discards `--option-dir` but doesn't know it also - /// consumes the next argument (`x.txt`). - /// - /// With a custom registry, we could register that `--option-dir` - /// takes one argument, allowing us to skip both and continue - /// parsing from `main.cpp`. - on_error(prev, it - prev - 1); - break; - } - - on_parse(std::move(arg)); - } - } - -private: - llvm::BumpPtrAllocator* allocator; - - llvm::ArrayRef arguments; -}; - -} // namespace - -} // namespace clice diff --git a/src/command/search_config.cpp b/src/command/search_config.cpp index 63f0221b..497d2bf7 100644 --- a/src/command/search_config.cpp +++ b/src/command/search_config.cpp @@ -1,11 +1,12 @@ #include "command/search_config.h" -#include "command/driver.h" +#include "command/argument_parser.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "clang/Driver/Options.h" namespace clice { diff --git a/src/command/toolchain.cpp b/src/command/toolchain.cpp index 3f6a1ba3..75dfb890 100644 --- a/src/command/toolchain.cpp +++ b/src/command/toolchain.cpp @@ -4,6 +4,7 @@ #include #include +#include "command/argument_parser.h" #include "eventide/reflection/enum.h" #include "support/filesystem.h" #include "support/logging.h" @@ -62,19 +63,6 @@ namespace clice::toolchain { namespace { -std::string print_argv(llvm::ArrayRef args) { - std::string s = "["; - if(!args.empty()) { - s += args.consume_front(); - for(auto arg: args) { - s += " "; - s += arg; - } - } - s += "]"; - return s; -} - std::optional execute_command(llvm::ArrayRef arguments, bool capture_stdout = false) { LOG_INFO("Execute command: {}", print_argv(arguments)); diff --git a/src/command/toolchain_provider.cpp b/src/command/toolchain_provider.cpp deleted file mode 100644 index 63d51481..00000000 --- a/src/command/toolchain_provider.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include "command/toolchain_provider.h" - -#include "command/driver.h" -#include "command/toolchain.h" -#include "support/filesystem.h" -#include "support/logging.h" -#include "support/object_pool.h" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" - -namespace clice { - -using ID = clang::driver::options::ID; - -struct ToolchainProvider::Impl { - llvm::BumpPtrAllocator allocator; - StringSet strings{allocator}; - ArgumentParser parser{&allocator}; - - /// Cache of toolchain query results, keyed by canonical toolchain key. - /// The key includes all flags except user-content options (-I/-D/-U/etc.), - /// so the cc1 result reflects the correct compiler semantics (-f/-W/-O/etc.) - /// and only user-content options need to be replayed after cache lookup. - llvm::StringMap> toolchain_cache; - - /// Options excluded from the cache key and toolchain query. These are - /// per-file user content (include paths, defines, forced includes) or - /// input files. They don't affect compiler semantics or system path - /// discovery, and are replayed into the cc1 result afterward. - static bool is_excluded_option(unsigned id) { - switch(id) { - case ID::OPT_I: - case ID::OPT_isystem: - case ID::OPT_iquote: - case ID::OPT_idirafter: - case ID::OPT_D: - case ID::OPT_U: - case ID::OPT_include: - case ID::OPT_INPUT: return true; - default: return false; - } - } - - /// Extract flags for the toolchain query. All options except user-content - /// options (-I/-D/-U/etc.) are included in both the cache key and query args, - /// so the cc1 result correctly reflects compiler semantics (-f/-W/-O/etc.). - struct ToolchainExtract { - std::string key; - std::vector query_args; - }; - - ToolchainExtract extract_toolchain_flags(this Impl& self, - llvm::StringRef file, - llvm::ArrayRef arguments) { - ToolchainExtract result; - - // Driver binary (first arg) — e.g. "clang++" vs "clang" affects language mode. - result.key += arguments[0]; - result.key += '\0'; - - // File extension affects language mode (C vs C++). - result.key += path::extension(file); - result.key += '\0'; - - result.query_args.push_back(arguments[0]); - - self.parser.parse( - llvm::ArrayRef(arguments).drop_front(), - [&](std::unique_ptr arg) { - auto id = arg->getOption().getID(); - if(is_excluded_option(id)) { - return; - } - - // Add option ID and all its values to the cache key. - result.key += std::to_string(id); - result.key += '\0'; - for(auto value: arg->getValues()) { - result.key += value; - result.key += '\0'; - } - - // Render the argument back to query args, respecting the option's - // render style (joined vs separate). - switch(arg->getOption().getRenderStyle()) { - case llvm::opt::Option::RenderJoinedStyle: { - // e.g. -std=c++17, --target=x86_64-linux-gnu - llvm::SmallString<64> joined(arg->getSpelling()); - if(arg->getNumValues() > 0) { - joined += arg->getValue(0); - } - result.query_args.push_back(self.strings.save(joined).data()); - break; - } - case llvm::opt::Option::RenderSeparateStyle: { - // e.g. -target x86_64-linux-gnu, -isysroot /path - result.query_args.push_back(self.strings.save(arg->getSpelling()).data()); - for(auto value: arg->getValues()) { - result.query_args.push_back(self.strings.save(value).data()); - } - break; - } - default: { - // Flags (no value): -nostdinc, -nostdinc++ - result.query_args.push_back(self.strings.save(arg->getSpelling()).data()); - break; - } - } - }, - [](int, int) { - // Unknown arguments are silently dropped — they can't be - // reliably parsed, so we skip them rather than corrupting - // the cache key. - }); - - return result; - } - - /// Query toolchain with caching. Returns the cached cc1 args for the given - /// toolchain key, running the expensive query only on cache miss. - llvm::ArrayRef query_toolchain_cached(this Impl& self, - llvm::StringRef file, - llvm::StringRef directory, - llvm::ArrayRef arguments) { - auto [key, query_args] = self.extract_toolchain_flags(file, arguments); - auto it = self.toolchain_cache.find(key); - if(it != self.toolchain_cache.end()) { - return it->second; - } - - LOG_WARN("Toolchain cache miss (spawning process): file={}, cache_size={}, key_len={}", - file, - self.toolchain_cache.size(), - key.size()); - - auto callback = [&](const char* s) -> const char* { - return self.strings.save(s).data(); - }; - toolchain::QueryParams params = {file, directory, query_args, callback}; - auto result = toolchain::query_toolchain(params); - - auto [entry, _] = self.toolchain_cache.try_emplace(std::move(key), std::move(result)); - return entry->second; - } -}; - -ToolchainProvider::ToolchainProvider() : self(std::make_unique()) {} - -ToolchainProvider::~ToolchainProvider() = default; - -ToolchainProvider::ToolchainProvider(ToolchainProvider&&) noexcept = default; - -ToolchainProvider& ToolchainProvider::operator=(ToolchainProvider&&) noexcept = default; - -llvm::ArrayRef ToolchainProvider::query_cached(llvm::StringRef file, - llvm::StringRef directory, - llvm::ArrayRef arguments) { - return self->query_toolchain_cached(file, directory, arguments); -} - -std::vector - ToolchainProvider::get_pending_queries(llvm::ArrayRef entries) { - llvm::StringMap seen_keys; - std::vector queries; - - for(auto& entry: entries) { - if(entry.arguments.empty()) { - continue; - } - - auto [key, query_args] = self->extract_toolchain_flags(entry.file, entry.arguments); - - // Skip if already cached or already queued. - if(self->toolchain_cache.count(key) || !seen_keys.try_emplace(key, true).second) { - continue; - } - - LOG_DEBUG("Pre-warm: new toolchain key (len={}) for file={}", key.size(), entry.file); - queries.push_back( - {std::move(key), std::move(query_args), entry.file.str(), entry.directory.str()}); - } - - LOG_INFO("Pre-warm: {} unique keys from {} entries, {} queries needed", - seen_keys.size(), - entries.size(), - queries.size()); - return queries; -} - -void ToolchainProvider::inject_results(llvm::ArrayRef results) { - for(auto& result: results) { - if(self->toolchain_cache.count(result.key)) { - continue; - } - std::vector saved; - saved.reserve(result.cc1_args.size()); - for(auto& arg: result.cc1_args) { - saved.push_back(self->strings.save(arg).data()); - } - self->toolchain_cache.try_emplace(result.key, std::move(saved)); - } -} - -bool ToolchainProvider::has_cached_entries() const { - return !self->toolchain_cache.empty(); -} - -} // namespace clice diff --git a/src/command/toolchain_provider.h b/src/command/toolchain_provider.h deleted file mode 100644 index 19de2920..00000000 --- a/src/command/toolchain_provider.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" - -namespace clice { - -/// A pending toolchain query, ready to be executed (possibly in parallel). -struct ToolchainQuery { - std::string key; - std::vector query_args; - std::string file; - std::string directory; -}; - -/// Result of a toolchain query, to be injected back into the cache. -struct ToolchainResult { - std::string key; - std::vector cc1_args; -}; - -/// Manages toolchain queries and caching, separated from CompilationDatabase. -/// -/// Given compilation arguments, this component: -/// 1. Extracts toolchain-relevant flags (driver, target, sysroot, stdlib, etc.) -/// 2. Builds a canonical cache key from those flags -/// 3. Queries the compiler driver for system include paths (expensive: spawns a process) -/// 4. Caches results so identical toolchain configurations share one query -/// -/// Designed to be pluggable: CompilationDatabase holds a ToolchainProvider by -/// composition and delegates all toolchain operations to it. -class ToolchainProvider { -public: - ToolchainProvider(); - ~ToolchainProvider(); - ToolchainProvider(ToolchainProvider&&) noexcept; - ToolchainProvider& operator=(ToolchainProvider&&) noexcept; - - /// Query toolchain with caching. Returns cached cc1 args for the given - /// compilation arguments, running the expensive compiler query only on - /// cache miss. The returned ArrayRef is valid for the provider's lifetime. - llvm::ArrayRef query_cached(llvm::StringRef file, - llvm::StringRef directory, - llvm::ArrayRef arguments); - - /// Entry for batch pre-warming: file + directory + raw compilation arguments. - struct PendingEntry { - llvm::StringRef file; - llvm::StringRef directory; - llvm::SmallVector arguments; - }; - - /// Get pending queries for a batch of compilation entries. - /// Returns queries only for cache-miss keys (deduplicated). - std::vector get_pending_queries(llvm::ArrayRef entries); - - /// Inject pre-computed results into the cache. Strings are copied into - /// the provider's internal string pool. - void inject_results(llvm::ArrayRef results); - - /// Check if the cache has any entries. - bool has_cached_entries() const; - -private: - struct Impl; - std::unique_ptr self; -}; - -} // namespace clice diff --git a/src/server/master_server.cpp b/src/server/master_server.cpp index aabc8fa1..63fec8d9 100644 --- a/src/server/master_server.cpp +++ b/src/server/master_server.cpp @@ -192,14 +192,15 @@ et::task<> MasterServer::load_workspace() { co_return; } - auto updates = cdb.load_compile_database(cdb_path); - LOG_INFO("Loaded CDB from {} with {} entries", cdb_path, updates.size()); + auto count = cdb.load(cdb_path); + LOG_INFO("Loaded CDB from {} with {} entries", cdb_path, count); } void MasterServer::fill_compile_args(llvm::StringRef path, std::string& directory, std::vector& arguments) { - auto ctx = cdb.lookup(path, {.query_toolchain = true}); + auto results = cdb.lookup(path, {.query_toolchain = true}); + auto& ctx = results.front(); directory = ctx.directory.str(); arguments.clear(); for(auto* arg: ctx.arguments) { diff --git a/src/support/object_pool.h b/src/support/object_pool.h index 9afe67d7..d4e478a1 100644 --- a/src/support/object_pool.h +++ b/src/support/object_pool.h @@ -19,14 +19,14 @@ class StringSet { public: using ID = std::uint32_t; - explicit StringSet(llvm::BumpPtrAllocator& allocator) : allocator(allocator) { + explicit StringSet(llvm::BumpPtrAllocator* allocator) : allocator(allocator) { strings.emplace_back(); } StringSet(const StringSet&) = delete; - StringSet(StringSet&&) = delete; StringSet& operator=(const StringSet&) = delete; - StringSet& operator=(StringSet&&) = delete; + StringSet(StringSet&&) = default; + StringSet& operator=(StringSet&&) = default; ~StringSet() = default; ID get(llvm::StringRef s) { @@ -40,7 +40,7 @@ public: } const auto size = s.size(); - auto* p = allocator.Allocate(size + 1); + auto* p = allocator->Allocate(size + 1); std::memcpy(p, s.data(), size); p[size] = '\0'; @@ -60,7 +60,7 @@ public: } private: - llvm::BumpPtrAllocator& allocator; + llvm::BumpPtrAllocator* allocator; std::vector strings; llvm::DenseMap cache; }; @@ -98,12 +98,12 @@ class ObjectSet { public: using ID = std::uint32_t; - explicit ObjectSet(llvm::BumpPtrAllocator& allocator) : allocator(allocator) {} + explicit ObjectSet(llvm::BumpPtrAllocator* allocator) : allocator(allocator) {} ObjectSet(const ObjectSet&) = delete; - ObjectSet(ObjectSet&&) = delete; ObjectSet& operator=(const ObjectSet&) = delete; - ObjectSet& operator=(ObjectSet&&) = delete; + ObjectSet(ObjectSet&&) = default; + ObjectSet& operator=(ObjectSet&&) = default; ~ObjectSet() { if constexpr(!std::is_trivially_destructible_v) { @@ -137,7 +137,7 @@ public: it->second = id; objects[id] = o; } else { - auto* p = allocator.Allocate(1); + auto* p = allocator->Allocate(1); p = new (p) T(object); it->first = object_ptr{p}; @@ -170,7 +170,7 @@ public: } private: - llvm::BumpPtrAllocator& allocator; + llvm::BumpPtrAllocator* allocator; std::vector> objects; llvm::SmallVector, ID>> removed; llvm::DenseMap, ID> cache; diff --git a/tests/unit/command/argument_parser_tests.cpp b/tests/unit/command/argument_parser_tests.cpp new file mode 100644 index 00000000..32d2c5dd --- /dev/null +++ b/tests/unit/command/argument_parser_tests.cpp @@ -0,0 +1,91 @@ +#include "test/test.h" +#include "command/argument_parser.h" + +#include "clang/Driver/Options.h" + +namespace clice::testing { + +namespace { + +TEST_SUITE(ArgumentParser) { + +using option = clang::driver::options::ID; + +void expect_id(llvm::StringRef command, option opt) { + auto id = get_option_id(command); + ASSERT_TRUE(id.has_value()); + ASSERT_EQ(*id, int(opt)); +} + +TEST_CASE(GetOptionID) { + /// GroupClass + expect_id("-g", option::OPT_g_Flag); + + /// InputClass + expect_id("main.cpp", option::OPT_INPUT); + + /// UnknownClass + expect_id("--clice", option::OPT_UNKNOWN); + + /// FlagClass + expect_id("-v", option::OPT_v); + expect_id("-c", option::OPT_c); + expect_id("-pedantic", option::OPT_pedantic); + expect_id("--pedantic", option::OPT_pedantic); + + /// JoinedClass + expect_id("-Wno-unused-variable", option::OPT_W_Joined); + expect_id("-W*", option::OPT_W_Joined); + expect_id("-W", option::OPT_W_Joined); + + /// ValuesClass + + /// SeparateClass + expect_id("-Xclang", option::OPT_Xclang); + /// expect_id(GET_ID("-Xclang -ast-dump") , option::OPT_Xclang); + + /// RemainingArgsClass + + /// RemainingArgsJoinedClass + + /// CommaJoinedClass + expect_id("-Wl,", option::OPT_Wl_COMMA); + + /// MultiArgClass + + /// JoinedOrSeparateClass + expect_id("-o", option::OPT_o); + expect_id("-omain.o", option::OPT_o); + expect_id("-I", option::OPT_I); + expect_id("--include-directory=", option::OPT_I); + expect_id("-x", option::OPT_x); + expect_id("--language=", option::OPT_x); + + /// JoinedAndSeparateClass +}; + +TEST_CASE(PrintArgv) { + /// Normal args. + std::vector args = {"clang++", "-std=c++20", "main.cpp"}; + ASSERT_EQ(print_argv(args), "clang++ -std=c++20 main.cpp"); + + /// Empty args. + std::vector empty = {}; + ASSERT_EQ(print_argv(empty), ""); + + /// Args with spaces get quoted. + std::vector spaced = {"clang++", "-DFOO=hello world"}; + auto result = print_argv(spaced); + EXPECT_TRUE(llvm::StringRef(result).contains("\"")); + + /// Args with backslash get quoted/escaped. + std::vector escaped = {"clang++", "-DPATH=C:\\foo"}; + auto result2 = print_argv(escaped); + EXPECT_TRUE(llvm::StringRef(result2).contains("\"")); +}; + +}; // TEST_SUITE(ArgumentParser) + +} // namespace + +} // namespace clice::testing diff --git a/tests/unit/command/command_tests.cpp b/tests/unit/command/command_tests.cpp index 66be728d..aebf5181 100644 --- a/tests/unit/command/command_tests.cpp +++ b/tests/unit/command/command_tests.cpp @@ -1,102 +1,33 @@ #include "test/test.h" +#include "command/argument_parser.h" #include "command/command.h" -#include "compile/compilation.h" +#include "support/filesystem.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Program.h" -#include "clang/Driver/Driver.h" +#include "llvm/Support/raw_ostream.h" namespace clice::testing { namespace { -std::string print_argv(llvm::ArrayRef args) { - std::string buf; - llvm::raw_string_ostream os(buf); - bool Sep = false; - for(llvm::StringRef arg: args) { - if(Sep) - os << ' '; - Sep = true; - if(llvm::all_of(arg, llvm::isPrint) && - arg.find_first_of(" \t\n\"\\") == llvm::StringRef::npos) { - os << arg; - continue; - } - os << '"'; - os.write_escaped(arg, /*UseHexEscapes=*/true); - os << '"'; - } - return std::move(os.str()); +using namespace std::literals; + +CommandOptions quiet_options() { + CommandOptions options; + options.suppress_logging = true; + return options; } +#define EXPECT_CONTAINS(haystack, needle) EXPECT_TRUE(llvm::StringRef(haystack).contains(needle)) +#define EXPECT_NOT_CONTAINS(haystack, needle) \ + EXPECT_FALSE(llvm::StringRef(haystack).contains(needle)) + TEST_SUITE(Command) { -using option = clang::driver::options::ID; - -void expect_id(llvm::StringRef command, option opt) { - auto id = CompilationDatabase::get_option_id(command); - ASSERT_TRUE(id.has_value()); - ASSERT_EQ(*id, int(opt)); -} - -TEST_CASE(GetOptionID) { - /// GroupClass - expect_id("-g", option::OPT_g_Flag); - - /// InputClass - expect_id("main.cpp", option::OPT_INPUT); - - /// UnknownClass - expect_id("--clice", option::OPT_UNKNOWN); - - /// FlagClass - expect_id("-v", option::OPT_v); - expect_id("-c", option::OPT_c); - expect_id("-pedantic", option::OPT_pedantic); - expect_id("--pedantic", option::OPT_pedantic); - - /// JoinedClass - expect_id("-Wno-unused-variable", option::OPT_W_Joined); - expect_id("-W*", option::OPT_W_Joined); - expect_id("-W", option::OPT_W_Joined); - - /// ValuesClass - - /// SeparateClass - expect_id("-Xclang", option::OPT_Xclang); - /// expect_id(GET_ID("-Xclang -ast-dump") , option::OPT_Xclang); - - /// RemainingArgsClass - - /// RemainingArgsJoinedClass - - /// CommaJoinedClass - expect_id("-Wl,", option::OPT_Wl_COMMA); - - /// MultiArgClass - - /// JoinedOrSeparateClass - expect_id("-o", option::OPT_o); - expect_id("-omain.o", option::OPT_o); - expect_id("-I", option::OPT_I); - expect_id("--include-directory=", option::OPT_I); - expect_id("-x", option::OPT_x); - expect_id("--language=", option::OPT_x); - - /// JoinedAndSeparateClass -}; - void expect_strip(llvm::StringRef argv, llvm::StringRef result) { CompilationDatabase database; llvm::StringRef file = "main.cpp"; database.add_command("fake/", file, argv); - - CommandOptions options; - options.suppress_logging = true; - ASSERT_EQ(result, print_argv(database.lookup(file, options).arguments)); + ASSERT_EQ(result, print_argv(database.lookup(file, quiet_options()).front().arguments)); }; TEST_CASE(DefaultFilters) { @@ -122,16 +53,13 @@ TEST_CASE(DefaultFilters) { }; TEST_CASE(Reuse) { - using namespace std::literals; - CompilationDatabase database; database.add_command("fake", "test.cpp", "clang++ -std=c++23 test.cpp"sv); database.add_command("fake", "test2.cpp", "clang++ -std=c++23 test2.cpp"sv); - CommandOptions options; - options.suppress_logging = true; - auto command1 = database.lookup("test.cpp", options).arguments; - auto command2 = database.lookup("test2.cpp", options).arguments; + auto options = quiet_options(); + auto command1 = database.lookup("test.cpp", options).front().arguments; + auto command2 = database.lookup("test2.cpp", options).front().arguments; ASSERT_EQ(command1.size(), 3U); ASSERT_EQ(command2.size(), 3U); @@ -158,42 +86,459 @@ TEST_CASE(RemoveAppend) { CompilationDatabase database; database.add_command("/fake", "main.cpp", args); - CommandOptions options; + auto options = quiet_options(); llvm::SmallVector remove; llvm::SmallVector append; remove = {"-DA"}; options.remove = remove; - auto result = database.lookup("main.cpp", options).arguments; + auto result = database.lookup("main.cpp", options).front().arguments; ASSERT_EQ(print_argv(result), "clang++ -D B=0 main.cpp"); remove = {"-D", "A"}; options.remove = remove; - result = database.lookup("main.cpp", options).arguments; + result = database.lookup("main.cpp", options).front().arguments; ASSERT_EQ(print_argv(result), "clang++ -D B=0 main.cpp"); remove = {"-DA", "-D", "B=0"}; options.remove = remove; - result = database.lookup("main.cpp", options).arguments; + result = database.lookup("main.cpp", options).front().arguments; ASSERT_EQ(print_argv(result), "clang++ main.cpp"); remove = {"-D*"}; options.remove = remove; - result = database.lookup("main.cpp", options).arguments; + result = database.lookup("main.cpp", options).front().arguments; ASSERT_EQ(print_argv(result), "clang++ main.cpp"); remove = {"-D", "*"}; options.remove = remove; - result = database.lookup("main.cpp", options).arguments; + result = database.lookup("main.cpp", options).front().arguments; ASSERT_EQ(print_argv(result), "clang++ main.cpp"); append = {"-D", "C"}; options.append = append; - result = database.lookup("main.cpp", options).arguments; + result = database.lookup("main.cpp", options).front().arguments; ASSERT_EQ(print_argv(result), "clang++ -D C main.cpp"); }; +TEST_CASE(DefaultFallback) { + /// Lookup for a file not in the CDB should synthesize a default command. + CompilationDatabase database; + + /// C++ files get "clang++ -std=c++20 ". + auto cpp_results = database.lookup("unknown.cpp"); + ASSERT_EQ(cpp_results.size(), 1U); + auto& cpp_ctx = cpp_results.front(); + ASSERT_EQ(cpp_ctx.arguments.size(), 3U); + ASSERT_EQ(cpp_ctx.arguments[0], "clang++"sv); + ASSERT_EQ(cpp_ctx.arguments[1], "-std=c++20"sv); + ASSERT_EQ(cpp_ctx.arguments[2], "unknown.cpp"sv); + + /// .hpp files also get C++ default. + auto hpp_results = database.lookup("header.hpp"); + ASSERT_EQ(hpp_results.front().arguments.size(), 3U); + ASSERT_EQ(hpp_results.front().arguments[0], "clang++"sv); + + /// .cc files also get C++ default. + auto cc_results = database.lookup("file.cc"); + ASSERT_EQ(cc_results.front().arguments.size(), 3U); + ASSERT_EQ(cc_results.front().arguments[0], "clang++"sv); + + /// C files get "clang ". + auto c_results = database.lookup("unknown.c"); + ASSERT_EQ(c_results.size(), 1U); + auto& c_ctx = c_results.front(); + ASSERT_EQ(c_ctx.arguments.size(), 2U); + ASSERT_EQ(c_ctx.arguments[0], "clang"sv); + ASSERT_EQ(c_ctx.arguments[1], "unknown.c"sv); + + /// Other extensions also get plain clang. + auto h_results = database.lookup("foo.h"); + ASSERT_EQ(h_results.front().arguments.size(), 2U); + ASSERT_EQ(h_results.front().arguments[0], "clang"sv); +}; + +TEST_CASE(MultiCommand) { + /// A file can have multiple compilation commands (e.g. different configs). + CompilationDatabase database; + database.add_command("fake", "main.cpp", "clang++ -std=c++17 main.cpp"sv); + database.add_command("fake", "main.cpp", "clang++ -std=c++20 main.cpp"sv); + database.add_command("fake", "other.cpp", "clang++ -std=c++23 other.cpp"sv); + + auto options = quiet_options(); + + auto results = database.lookup("main.cpp", options); + ASSERT_EQ(results.size(), 2U); + + /// Both commands are present (order depends on insert position). + bool has_17 = false, has_20 = false; + for(auto& ctx: results) { + auto argv = print_argv(ctx.arguments); + if(llvm::StringRef(argv).contains("-std=c++17")) + has_17 = true; + if(llvm::StringRef(argv).contains("-std=c++20")) + has_20 = true; + } + EXPECT_TRUE(has_17); + EXPECT_TRUE(has_20); + + /// other.cpp has only one. + auto other = database.lookup("other.cpp", options); + ASSERT_EQ(other.size(), 1U); +}; + +TEST_CASE(CodegenFilter) { + /// Codegen-only options should be stripped from the canonical command. + CompilationDatabase database; + database.add_command( + "fake", + "main.cpp", + "clang++ -std=c++20 -fPIC -fno-omit-frame-pointer -fstack-protector-strong " "-fdata-sections -ffunction-sections -flto -fcolor-diagnostics -g main.cpp"sv); + + auto result = database.lookup("main.cpp", quiet_options()).front().arguments; + auto argv = print_argv(result); + + /// -std=c++20 must survive (semantic). + EXPECT_CONTAINS(argv, "-std=c++20"); + + /// All codegen flags must be stripped. + EXPECT_NOT_CONTAINS(argv, "-fPIC"); + EXPECT_NOT_CONTAINS(argv, "-fno-omit-frame-pointer"); + EXPECT_NOT_CONTAINS(argv, "-fstack-protector"); + EXPECT_NOT_CONTAINS(argv, "-fdata-sections"); + EXPECT_NOT_CONTAINS(argv, "-ffunction-sections"); + EXPECT_NOT_CONTAINS(argv, "-flto"); + EXPECT_NOT_CONTAINS(argv, "-fcolor-diagnostics"); + EXPECT_NOT_CONTAINS(argv, "-g"); +}; + +TEST_CASE(DependencyScanFilter) { + /// Dependency scan options should be stripped. + CompilationDatabase database; + database.add_command("fake", + "main.cpp", + "clang++ -std=c++20 -MD -MF main.d -MT main.o main.cpp"sv); + + auto result = database.lookup("main.cpp", quiet_options()).front().arguments; + auto argv = print_argv(result); + + EXPECT_CONTAINS(argv, "-std=c++20"); + EXPECT_NOT_CONTAINS(argv, "-MD"); + EXPECT_NOT_CONTAINS(argv, "-MF"); + EXPECT_NOT_CONTAINS(argv, "-MT"); + EXPECT_NOT_CONTAINS(argv, "main.d"); +}; + +TEST_CASE(ModuleFilter) { + /// Module-related options should be stripped. + expect_strip("clang++ -std=c++20 -fmodule-file=mod.pcm main.cpp", + "clang++ -std=c++20 main.cpp"); + expect_strip("clang++ -std=c++20 -fprebuilt-module-path=/tmp main.cpp", + "clang++ -std=c++20 main.cpp"); +}; + +TEST_CASE(UserContentClassification) { + /// -D, -U, -include go to per-file patch; -std=, -W go to canonical. + /// Files with different -D but same -std/-W share canonical. + CompilationDatabase database; + database.add_command("fake", "a.cpp", "clang++ -std=c++20 -Wall -DA=1 -DFOO a.cpp"sv); + database.add_command("fake", "b.cpp", "clang++ -std=c++20 -Wall -DB=2 b.cpp"sv); + + auto options = quiet_options(); + + auto a_argv = print_argv(database.lookup("a.cpp", options).front().arguments); + auto b_argv = print_argv(database.lookup("b.cpp", options).front().arguments); + + /// Both must contain canonical flags. + EXPECT_CONTAINS(a_argv, "-std=c++20"); + EXPECT_CONTAINS(a_argv, "-Wall"); + EXPECT_CONTAINS(b_argv, "-std=c++20"); + EXPECT_CONTAINS(b_argv, "-Wall"); + + /// a.cpp has its own defines. + EXPECT_CONTAINS(a_argv, "-D"); + EXPECT_CONTAINS(a_argv, "A=1"); + EXPECT_CONTAINS(a_argv, "FOO"); + + /// b.cpp has its own defines. + EXPECT_CONTAINS(b_argv, "-D"); + EXPECT_CONTAINS(b_argv, "B=2"); + + /// Cross check: a.cpp should not have B=2, b.cpp should not have A=1. + EXPECT_NOT_CONTAINS(a_argv, "B=2"); + EXPECT_NOT_CONTAINS(b_argv, "A=1"); +}; + +TEST_CASE(IncludePathAbsolutize) { + /// Relative include paths should be absolutized against the directory. + CompilationDatabase database; + database.add_command("/project/build", + "main.cpp", + "clang++ -Iinclude -isystem sys/inc -iquote ../src main.cpp"sv); + + auto result = database.lookup("main.cpp", quiet_options()).front().arguments; + + /// Check each argument individually with separator normalization + /// (print_argv escapes backslashes, breaking convert_to_slash on Windows). + auto has_path = [](llvm::ArrayRef args, llvm::StringRef needle) { + for(auto* arg: args) { + if(path::convert_to_slash(arg).find(needle.str()) != std::string::npos) + return true; + } + return false; + }; + + /// Relative paths must be resolved against /project/build. + EXPECT_TRUE(has_path(result, "/project/build/include")); + EXPECT_TRUE(has_path(result, "/project/build/sys/inc")); + /// ../src relative to /project/build → /project/src (or /project/build/../src) + EXPECT_TRUE(has_path(result, "/project/")); + + /// Absolute paths should be kept as-is. + CompilationDatabase database2; + database2.add_command("/project/build", "main.cpp", "clang++ -I/usr/include main.cpp"sv); + + auto result2 = database2.lookup("main.cpp", quiet_options()).front().arguments; + EXPECT_TRUE(has_path(result2, "/usr/include")); +}; + +TEST_CASE(SemanticOptionsPreserved) { + /// Flags that affect semantics must survive. + expect_strip("clang++ -std=c++20 -fno-exceptions -fno-rtti -pedantic main.cpp", + "clang++ -std=c++20 -fno-exceptions -fno-rtti -pedantic main.cpp"); + expect_strip("clang++ -std=c++20 -Wall -Werror main.cpp", + "clang++ -std=c++20 -Wall -Werror main.cpp"); +}; + +TEST_CASE(LookupSearchConfig) { + CompilationDatabase database; + database.add_command( + "/project", + "main.cpp", + "clang++ -std=c++20 -I/usr/include -isystem /usr/local/include main.cpp"sv); + + ASSERT_FALSE(database.has_cached_configs()); + + auto options = quiet_options(); + auto config = database.lookup_search_config("main.cpp", options); + + /// Should have search dirs from the command. + EXPECT_FALSE(config.dirs.empty()); + + /// Second call should hit cache. + EXPECT_TRUE(database.has_cached_configs()); + auto config2 = database.lookup_search_config("main.cpp", options); + ASSERT_EQ(config.dirs.size(), config2.dirs.size()); +}; + +TEST_CASE(ResolvePath) { + CompilationDatabase database; + database.add_command("fake", "test/main.cpp", "clang++ test/main.cpp"sv); + + /// After add_command, lookup should work and resolve_path via the file in arguments. + auto result = database.lookup("test/main.cpp", quiet_options()).front().arguments; + /// The last argument is the file, resolved from PathPool. + ASSERT_EQ(result.back(), "test/main.cpp"sv); +}; + +TEST_CASE(MoveSemantics) { + CompilationDatabase db1; + db1.add_command("fake", "main.cpp", "clang++ -std=c++23 main.cpp"sv); + + /// Move construct. + CompilationDatabase db2 = std::move(db1); + + auto options = quiet_options(); + auto result = db2.lookup("main.cpp", options).front().arguments; + ASSERT_EQ(result.size(), 3U); + ASSERT_EQ(result[1], "-std=c++23"sv); + + /// Move assign. + CompilationDatabase db3; + db3 = std::move(db2); + result = db3.lookup("main.cpp", options).front().arguments; + ASSERT_EQ(result.size(), 3U); + ASSERT_EQ(result[1], "-std=c++23"sv); +}; + +/// Write JSON to a temp file, load into a CDB, remove the file. +/// Returns the number of entries loaded. +std::size_t load_json(CompilationDatabase& database, llvm::StringRef json) { + auto path = fs::createTemporaryFile("cdb", "json"); + if(!path) + return 0; + { + std::error_code ec; + llvm::raw_fd_ostream out(*path, ec); + if(ec) + return 0; + out << json; + } + auto count = database.load(*path); + llvm::sys::fs::remove(*path); + return count; +} + +TEST_CASE(LoadMixedFormats) { + /// "arguments" array and "command" string can coexist in the same CDB. + /// Use relative file paths so that the test works on both Linux and Windows + /// (paths like "/src/a.cpp" are not absolute on Windows — no drive letter). + CompilationDatabase database; + auto count = load_json(database, R"([ + {"directory": "/build", "file": "a.cpp", + "arguments": ["clang++", "-std=c++20", "a.cpp"]}, + {"directory": "/build", "file": "b.cpp", + "command": "clang++ -std=c++23 b.cpp"} + ])"); + + ASSERT_EQ(count, 2U); + + auto options = quiet_options(); + + auto a = database.lookup(path::join("/build", "a.cpp"), options); + ASSERT_EQ(a.size(), 1U); + EXPECT_CONTAINS(print_argv(a.front().arguments), "-std=c++20"); + + auto b = database.lookup(path::join("/build", "b.cpp"), options); + ASSERT_EQ(b.size(), 1U); + EXPECT_CONTAINS(print_argv(b.front().arguments), "-std=c++23"); +}; + +TEST_CASE(LoadErrorRecovery) { + /// Bad entries should be skipped; good entries still load. + CompilationDatabase database; + auto count = load_json(database, R"([ + {"file": "no_dir.cpp", + "arguments": ["clang++", "no_dir.cpp"]}, + {"directory": "/build", + "arguments": ["clang++", "no_file.cpp"]}, + {"directory": "/build", "file": "no_args.cpp"}, + {"directory": "/build", "file": "good.cpp", + "arguments": ["clang++", "-std=c++20", "good.cpp"]}, + 42, + {"directory": "/build", "file": "also_good.cpp", + "command": "clang++ -Wall also_good.cpp"} + ])"); + + /// Only the two valid entries should survive. + ASSERT_EQ(count, 2U); + + auto options = quiet_options(); + + auto good = database.lookup(path::join("/build", "good.cpp"), options); + ASSERT_EQ(good.size(), 1U); + EXPECT_CONTAINS(print_argv(good.front().arguments), "-std=c++20"); + + auto also = database.lookup(path::join("/build", "also_good.cpp"), options); + ASSERT_EQ(also.size(), 1U); + EXPECT_CONTAINS(print_argv(also.front().arguments), "-Wall"); +}; + +TEST_CASE(LoadEmptyCommand) { + /// Whitespace-only or empty "command" should not crash. + CompilationDatabase database; + auto count = load_json(database, R"([ + {"directory": "/build", "file": "empty.cpp", "command": ""}, + {"directory": "/build", "file": "spaces.cpp", "command": " "}, + {"directory": "/build", "file": "ok.cpp", + "command": "clang++ -std=c++20 ok.cpp"} + ])"); + + /// Only the valid entry survives. + ASSERT_EQ(count, 1U); + + auto ok = database.lookup(path::join("/build", "ok.cpp"), quiet_options()); + ASSERT_EQ(ok.size(), 1U); + EXPECT_CONTAINS(print_argv(ok.front().arguments), "-std=c++20"); +}; + +TEST_CASE(LoadReload) { + /// Second load() replaces all entries from the first. + CompilationDatabase database; + + auto file_a = path::join("/build", "a.cpp"); + auto file_b = path::join("/build", "b.cpp"); + + load_json(database, R"([ + {"directory": "/build", "file": "a.cpp", + "arguments": ["clang++", "-std=c++17", "a.cpp"]} + ])"); + + auto options = quiet_options(); + + auto a = database.lookup(file_a, options); + ASSERT_EQ(a.size(), 1U); + EXPECT_CONTAINS(print_argv(a.front().arguments), "-std=c++17"); + + /// Reload with different content. + auto count = load_json(database, R"([ + {"directory": "/build", "file": "b.cpp", + "arguments": ["clang++", "-std=c++23", "b.cpp"]} + ])"); + + ASSERT_EQ(count, 1U); + + /// Old entry gone (falls back to default). + auto a2 = database.lookup(file_a, options); + ASSERT_EQ(a2.size(), 1U); + EXPECT_NOT_CONTAINS(print_argv(a2.front().arguments), "-std=c++17"); + + /// New entry present. + auto b = database.lookup(file_b, options); + ASSERT_EQ(b.size(), 1U); + EXPECT_CONTAINS(print_argv(b.front().arguments), "-std=c++23"); +}; + +TEST_CASE(LoadCommandQuoting) { + /// "command" string with spaces in paths and quoted defines. + CompilationDatabase database; + auto count = load_json(database, R"([ + {"directory": "/build", "file": "main.cpp", + "command": "clang++ -std=c++20 \"-DMSG=hello world\" -I\"/path with spaces\" main.cpp"} + ])"); + + ASSERT_EQ(count, 1U); + + auto result = database.lookup(path::join("/build", "main.cpp"), quiet_options()); + ASSERT_EQ(result.size(), 1U); + auto argv = print_argv(result.front().arguments); + + /// The define and include path should be present after shell tokenization. + EXPECT_CONTAINS(argv, "hello world"); + EXPECT_CONTAINS(argv, "/path with spaces"); +}; + +TEST_CASE(LoadRelativePath) { + /// load() should resolve relative file paths against directory. + CompilationDatabase database; + auto count = load_json(database, R"([ + {"directory": "/project/build", "file": "src/main.cpp", + "arguments": ["clang++", "-std=c++20", "src/main.cpp"]}, + {"directory": "/other/build", "file": "src/main.cpp", + "arguments": ["clang++", "-std=c++17", "src/main.cpp"]} + ])"); + + ASSERT_EQ(count, 2U); + + auto options = quiet_options(); + + /// Lookup by the resolved absolute path (use path::join for correct separator). + auto results = database.lookup(path::join("/project/build", "src/main.cpp"), options); + ASSERT_EQ(results.size(), 1U); + EXPECT_CONTAINS(print_argv(results.front().arguments), "-std=c++20"); + + auto results2 = database.lookup(path::join("/other/build", "src/main.cpp"), options); + ASSERT_EQ(results2.size(), 1U); + EXPECT_CONTAINS(print_argv(results2.front().arguments), "-std=c++17"); + + /// Relative path lookup should not match (different path_id). + auto results3 = database.lookup("src/main.cpp", options); + ASSERT_EQ(results3.size(), 1U); + /// Falls back to default command since no match. + EXPECT_CONTAINS(print_argv(results3.front().arguments), "clang"); +}; + TEST_CASE(Module) { // TODO: revisit module command handling. } @@ -201,134 +546,32 @@ TEST_CASE(Module) { TEST_CASE(ResourceDir) { // When query_toolchain is enabled, resource dir is injected automatically. CompilationDatabase database; - using namespace std::literals; database.add_command("/fake", "main.cpp", "clang++ -std=c++23 test.cpp"sv); // Without query_toolchain, no resource dir injection. - auto args_no_tc = database.lookup("main.cpp").arguments; + auto args_no_tc = database.lookup("main.cpp").front().arguments; ASSERT_EQ(args_no_tc.size(), 3U); ASSERT_EQ(args_no_tc[0], "clang++"sv); ASSERT_EQ(args_no_tc[1], "-std=c++23"sv); ASSERT_EQ(args_no_tc[2], "main.cpp"sv); // With query_toolchain, resource dir is present in the result. - auto args_tc = database.lookup("main.cpp", {.query_toolchain = true}).arguments; + auto args_tc = database.lookup("main.cpp", {.query_toolchain = true}).front().arguments; bool has_resource_dir = false; for(size_t i = 0; i + 1 < args_tc.size(); ++i) { - if(args_tc[i] == llvm::StringRef("-resource-dir")) { - EXPECT_EQ(llvm::StringRef(args_tc[i + 1]), CompilationDatabase::resource_dir()); + if(args_tc[i] == "-resource-dir"sv) { + EXPECT_EQ(llvm::StringRef(args_tc[i + 1]), resource_dir()); has_resource_dir = true; break; } } - EXPECT_TRUE(has_resource_dir); -}; - -void expect_load(llvm::StringRef content, - llvm::StringRef workspace, - llvm::StringRef file, - llvm::StringRef directory, - llvm::ArrayRef arguments) { - CompilationDatabase database; - auto loaded = database.load_commands(content, workspace); - ASSERT_TRUE(loaded.has_value()); - - CommandOptions options; - options.suppress_logging = true; - auto info = database.lookup(file, options); - - ASSERT_EQ(info.directory, directory); - ASSERT_EQ(info.arguments.size(), arguments.size()); - for(size_t i = 0; i < arguments.size(); i++) { - llvm::StringRef arg = info.arguments[i]; - llvm::StringRef expect_arg = arguments[i]; - ASSERT_EQ(arg, expect_arg); + if(resource_dir().empty()) { + EXPECT_FALSE(has_resource_dir); + } else { + EXPECT_TRUE(has_resource_dir); } }; -/// TODO: add windows path testcase -// skip_unless(Linux || macOS) / test("LoadAbsoluteUnixStyle") = [expect_load] { -// constexpr const char* cmake = R"([ -// { -// "directory": "/home/developer/clice/build", -// "command": "/usr/bin/c++ -I/home/developer/clice/include -// -I/home/developer/clice/build/_deps/libuv-src/include -isystem -// /home/developer/clice/build/_deps/tomlplusplus-src/include -std=gnu++23 -fno-rtti -// -fno-exceptions -Wno-deprecated-declarations -Wno-undefined-inline -O3 -o -// CMakeFiles/clice-core.dir/src/Driver/clice.cpp.o -c -// /home/developer/clice/src/Driver/clice.cpp", "file": -// "/home/developer/clice/src/Driver/clice.cpp", "output": -// "CMakeFiles/clice-core.dir/src/Driver/clice.cpp.o" -// } -// ])"; -// -// expect_load(cmake, -// "/home/developer/clice", -// "/home/developer/clice/src/Driver/clice.cpp", -// "/home/developer/clice/build", -// { -// "/usr/bin/c++", -// "-I", -// "/home/developer/clice/include", -// "-I", -// "/home/developer/clice/build/_deps/libuv-src/include", -// "-isystem", -// "/home/developer/clice/build/_deps/tomlplusplus-src/include", -// "-std=gnu++23", -// "-fno-rtti", -// "-fno-exceptions", -// "-Wno-deprecated-declarations", -// "-Wno-undefined-inline", -// "-O3", -// "/home/developer/clice/src/Driver/clice.cpp", -// }); -// }; - -// skip_unless(Linux || macOS) / test("LoadRelativeUnixStyle") = [expect_load] { -// constexpr const char* xmake = R"([ -// { -// "directory": "/home/developer/clice", -// "arguments": ["/usr/bin/clang", "-c", "-Qunused-arguments", "-m64", "-g", "-O0", -// "-std=c++23", "-Iinclude", "-I/home/developer/clice/include", "-fno-exceptions", -// "-fno-cxx-exceptions", "-isystem", -// "/home/developer/.xmake/packages/l/libuv/v1.51.0/3ca1562e6c5d485f9ccafec8e0c50b6f/include", -// "-isystem", -// "/home/developer/.xmake/packages/t/toml++/v3.4.0/bde7344d843e41928b1d325fe55450e0/include", -// "-fsanitize=address", "-fno-rtti", "-o", -// "build/.objs/clice/linux/x86_64/debug/src/Driver/clice.cc.o", "src/Driver/clice.cc"], -// "file": "src/Driver/clice.cc" -// } -// ])"; -// -// expect_load( -// xmake, -// "/home/developer/clice", -// "/home/developer/clice/src/Driver/clice.cc", -// "/home/developer/clice", -// { -// "/usr/bin/clang", -// "-Qunused-arguments", -// "-m64", -// "-g", -// "-O0", -// "-std=c++23", -// // parameter "-Iinclude" in CDB, should be convert to absolute path -// "-I", -// "/home/developer/clice/include", -// "-I", -// "/home/developer/clice/include", -// "-fno-exceptions", -// "-fno-cxx-exceptions", -// "-isystem", -// "/home/developer/.xmake/packages/l/libuv/v1.51.0/3ca1562e6c5d485f9ccafec8e0c50b6f/include", -// "-isystem", -// "/home/developer/.xmake/packages/t/toml++/v3.4.0/bde7344d843e41928b1d325fe55450e0/include", -// "-fsanitize=address", -// "-fno-rtti", -// "/home/developer/clice/src/Driver/clice.cc", -// }); -//}; - }; // TEST_SUITE(Command) } // namespace diff --git a/tests/unit/command/toolchain_provider_tests.cpp b/tests/unit/command/toolchain_provider_tests.cpp index d1bbfcf8..768c656d 100644 --- a/tests/unit/command/toolchain_provider_tests.cpp +++ b/tests/unit/command/toolchain_provider_tests.cpp @@ -1,5 +1,5 @@ #include "test/test.h" -#include "command/toolchain_provider.h" +#include "command/command.h" namespace clice::testing { @@ -8,25 +8,25 @@ namespace { TEST_SUITE(ToolchainProvider) { TEST_CASE(InitiallyEmpty) { - ToolchainProvider provider; - EXPECT_FALSE(provider.has_cached_entries()); + CompilationDatabase cdb; + EXPECT_FALSE(cdb.has_cached_toolchain()); } TEST_CASE(InjectResultsPopulatesCache) { - ToolchainProvider provider; + CompilationDatabase cdb; std::vector results; results.push_back({ "key1", {"-cc1", "-triple", "x86_64-linux-gnu"} }); - provider.inject_results(results); + cdb.inject_results(results); - EXPECT_TRUE(provider.has_cached_entries()); + EXPECT_TRUE(cdb.has_cached_toolchain()); } TEST_CASE(InjectResultsSkipsDuplicateKeys) { - ToolchainProvider provider; + CompilationDatabase cdb; std::vector results; results.push_back({ @@ -37,141 +37,140 @@ TEST_CASE(InjectResultsSkipsDuplicateKeys) { "key1", {"-cc1", "-triple", "aarch64"} }); - provider.inject_results(results); + cdb.inject_results(results); - // After injection, query_cached with same key should return the first result. - // We verify indirectly: inject twice, cache should still work. - EXPECT_TRUE(provider.has_cached_entries()); + // After injection, cache should still work. + EXPECT_TRUE(cdb.has_cached_toolchain()); } TEST_CASE(GetPendingQueriesReturnsUncachedOnly) { - ToolchainProvider provider; + CompilationDatabase cdb; // Two entries with same flags but different user-content options. // They share the same cache key, so only one query is needed. - ToolchainProvider::PendingEntry entry1; + CompilationDatabase::PendingEntry entry1; entry1.file = "a.cpp"; entry1.directory = "/tmp"; entry1.arguments = {"clang++", "-std=c++17", "-DFOO", "a.cpp"}; - ToolchainProvider::PendingEntry entry2; + CompilationDatabase::PendingEntry entry2; entry2.file = "b.cpp"; entry2.directory = "/tmp"; entry2.arguments = {"clang++", "-std=c++17", "-DBAR", "b.cpp"}; - auto queries = provider.get_pending_queries({entry1, entry2}); + auto queries = cdb.get_pending_queries({entry1, entry2}); // Same driver, same extension, same non-content flags → one query. EXPECT_EQ(queries.size(), 1u); } TEST_CASE(GetPendingQueriesDeduplicatesSameKey) { - ToolchainProvider provider; + CompilationDatabase cdb; // Three entries with same driver and same flags (only -I/-D differ, // which are user-content options excluded from the cache key). - ToolchainProvider::PendingEntry entry1; + CompilationDatabase::PendingEntry entry1; entry1.file = "x.cpp"; entry1.directory = "/project"; entry1.arguments = {"clang++", "-Wall", "-O2", "-DFOO=1", "-I/inc/a", "x.cpp"}; - ToolchainProvider::PendingEntry entry2; + CompilationDatabase::PendingEntry entry2; entry2.file = "y.cpp"; entry2.directory = "/project"; entry2.arguments = {"clang++", "-Wall", "-O2", "-DBAR=2", "-I/inc/b", "y.cpp"}; - ToolchainProvider::PendingEntry entry3; + CompilationDatabase::PendingEntry entry3; entry3.file = "z.cpp"; entry3.directory = "/project"; entry3.arguments = {"clang++", "-Wall", "-O2", "-Uhello", "z.cpp"}; - auto queries = provider.get_pending_queries({entry1, entry2, entry3}); + auto queries = cdb.get_pending_queries({entry1, entry2, entry3}); // Same driver, same extension, same non-content flags → same key. EXPECT_EQ(queries.size(), 1u); } TEST_CASE(GetPendingQueriesDifferentDrivers) { - ToolchainProvider provider; + CompilationDatabase cdb; - ToolchainProvider::PendingEntry entry1; + CompilationDatabase::PendingEntry entry1; entry1.file = "a.cpp"; entry1.directory = "/tmp"; entry1.arguments = {"clang++", "a.cpp"}; - ToolchainProvider::PendingEntry entry2; + CompilationDatabase::PendingEntry entry2; entry2.file = "b.cpp"; entry2.directory = "/tmp"; entry2.arguments = {"g++", "b.cpp"}; - auto queries = provider.get_pending_queries({entry1, entry2}); + auto queries = cdb.get_pending_queries({entry1, entry2}); // Different drivers → different keys → two queries. EXPECT_EQ(queries.size(), 2u); } TEST_CASE(GetPendingQueriesDifferentTargets) { - ToolchainProvider provider; + CompilationDatabase cdb; - ToolchainProvider::PendingEntry entry1; + CompilationDatabase::PendingEntry entry1; entry1.file = "a.cpp"; entry1.directory = "/tmp"; entry1.arguments = {"clang++", "--target=x86_64-linux-gnu", "a.cpp"}; - ToolchainProvider::PendingEntry entry2; + CompilationDatabase::PendingEntry entry2; entry2.file = "b.cpp"; entry2.directory = "/tmp"; entry2.arguments = {"clang++", "--target=aarch64-linux-gnu", "b.cpp"}; - auto queries = provider.get_pending_queries({entry1, entry2}); + auto queries = cdb.get_pending_queries({entry1, entry2}); // Different targets → different keys → two queries. EXPECT_EQ(queries.size(), 2u); } TEST_CASE(GetPendingQueriesDifferentLanguageMode) { - ToolchainProvider provider; + CompilationDatabase cdb; // clang foo.h (default: c-header) vs clang -x c++ foo.h (c++) // produce different system include paths, so they must have different keys. - ToolchainProvider::PendingEntry entry1; + CompilationDatabase::PendingEntry entry1; entry1.file = "foo.h"; entry1.directory = "/tmp"; entry1.arguments = {"clang", "foo.h"}; - ToolchainProvider::PendingEntry entry2; + CompilationDatabase::PendingEntry entry2; entry2.file = "foo.h"; entry2.directory = "/tmp"; entry2.arguments = {"clang", "-x", "c++", "foo.h"}; - auto queries = provider.get_pending_queries({entry1, entry2}); + auto queries = cdb.get_pending_queries({entry1, entry2}); // -x c++ changes language mode → different keys → two queries. EXPECT_EQ(queries.size(), 2u); } TEST_CASE(GetPendingQueriesSkipsEmptyArgs) { - ToolchainProvider provider; + CompilationDatabase cdb; - ToolchainProvider::PendingEntry empty; + CompilationDatabase::PendingEntry empty; empty.file = "empty.cpp"; empty.directory = "/tmp"; // arguments is empty - ToolchainProvider::PendingEntry valid; + CompilationDatabase::PendingEntry valid; valid.file = "valid.cpp"; valid.directory = "/tmp"; valid.arguments = {"clang++", "valid.cpp"}; - auto queries = provider.get_pending_queries({empty, valid}); + auto queries = cdb.get_pending_queries({empty, valid}); EXPECT_EQ(queries.size(), 1u); } TEST_CASE(InjectThenGetPendingSkipsCached) { - ToolchainProvider provider; + CompilationDatabase cdb; // First, get pending queries to learn what key is generated. - ToolchainProvider::PendingEntry entry; + CompilationDatabase::PendingEntry entry; entry.file = "test.cpp"; entry.directory = "/tmp"; entry.arguments = {"clang++", "test.cpp"}; - auto queries = provider.get_pending_queries({entry}); + auto queries = cdb.get_pending_queries({entry}); ASSERT_EQ(queries.size(), 1u); // Inject a result for that key. @@ -180,23 +179,13 @@ TEST_CASE(InjectThenGetPendingSkipsCached) { queries[0].key, {"-cc1", "-triple", "x86_64-linux-gnu"} }); - provider.inject_results(results); + cdb.inject_results(results); // Now the same entry should produce no pending queries. - auto queries2 = provider.get_pending_queries({entry}); + auto queries2 = cdb.get_pending_queries({entry}); EXPECT_EQ(queries2.size(), 0u); } -TEST_CASE(MoveConstruction) { - ToolchainProvider provider; - std::vector results; - results.push_back({"key1", {"-cc1"}}); - provider.inject_results(results); - - ToolchainProvider moved(std::move(provider)); - EXPECT_TRUE(moved.has_cached_entries()); -} - }; // TEST_SUITE(ToolchainProvider) } // namespace diff --git a/tests/unit/command/toolchain_tests.cpp b/tests/unit/command/toolchain_tests.cpp index 1de5c47a..b31277a6 100644 --- a/tests/unit/command/toolchain_tests.cpp +++ b/tests/unit/command/toolchain_tests.cpp @@ -1,4 +1,5 @@ #include "test/test.h" +#include "command/argument_parser.h" #include "command/command.h" #include "command/toolchain.h" #include "compile/compilation.h" @@ -6,7 +7,6 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/StringSaver.h" -#include "clang/Driver/Driver.h" namespace clice::testing { namespace { @@ -52,10 +52,8 @@ TEST_CASE(GCC, {.skip = !(CIEnvironment && (Windows || Linux))}) { llvm::BumpPtrAllocator a; llvm::StringSaver s(a); auto arguments = toolchain::query_toolchain({ - .arguments = {"g++", - "-std=c++23", "-resource-dir", - CompilationDatabase::resource_dir().data(), - "-xc++", file->c_str()}, + .arguments = + {"g++", "-std=c++23", "-resource-dir", resource_dir().data(), "-xc++", file->c_str()}, .callback = [&](const char* str) { return s.save(str).data(); } }); @@ -93,7 +91,7 @@ TEST_CASE(Clang, {.skip = !CIEnvironment}) { auto arguments = toolchain::query_toolchain({ .arguments = {"clang++", "-std=c++23", "-resource-dir", - CompilationDatabase::resource_dir().data(), + resource_dir().data(), "-xc++", file->c_str()}, .callback = [&](const char* str) { return s.save(str).data(); } }); diff --git a/tests/unit/server/stateless_worker_tests.cpp b/tests/unit/server/stateless_worker_tests.cpp index c9a57e56..bd2f826a 100644 --- a/tests/unit/server/stateless_worker_tests.cpp +++ b/tests/unit/server/stateless_worker_tests.cpp @@ -94,12 +94,8 @@ TEST_CASE(BuildPCHRequest) { worker::BuildPCHParams params; params.file = hdr.path; params.directory = "/tmp"; - params.arguments = {"clang++", - "-resource-dir", - std::string(CompilationDatabase::resource_dir()), - "-x", - "c++-header", - hdr.path}; + params.arguments = + {"clang++", "-resource-dir", std::string(resource_dir()), "-x", "c++-header", hdr.path}; params.content = "#pragma once\nint pch_global = 42;\n"; auto result = co_await w.peer->send_request(params); @@ -157,7 +153,7 @@ TEST_CASE(BuildPCMRequest) { params.directory = "/tmp"; params.arguments = {"clang++", "-resource-dir", - std::string(CompilationDatabase::resource_dir()), + std::string(resource_dir()), "-std=c++20", "--precompile", src.path}; diff --git a/tests/unit/server/worker_test_helpers.h b/tests/unit/server/worker_test_helpers.h index 34bef875..bb2954c8 100644 --- a/tests/unit/server/worker_test_helpers.h +++ b/tests/unit/server/worker_test_helpers.h @@ -10,6 +10,7 @@ #include #endif +#include "command/argument_parser.h" #include "command/command.h" #include "eventide/async/async.h" #include "eventide/ipc/peer.h" @@ -36,7 +37,7 @@ namespace et = eventide; /// Resolve path to the clice binary for spawning workers. inline std::string clice_binary() { - auto res_dir = CompilationDatabase::resource_dir(); + auto res_dir = resource_dir(); // res_dir is /lib/clang/... // clice binary is at /bin/clice auto build_dir = llvm::sys::path::parent_path( @@ -74,12 +75,8 @@ struct TempFile { /// Build compile arguments for a source file, including -resource-dir. inline std::vector make_args(const std::string& file_path, const std::string& extra = "") { - std::vector args = {"clang++", - "-fsyntax-only", - "-resource-dir", - std::string(CompilationDatabase::resource_dir()), - "-c", - file_path}; + std::vector args = + {"clang++", "-fsyntax-only", "-resource-dir", std::string(resource_dir()), "-c", file_path}; if(!extra.empty()) { args.insert(args.begin() + 1, extra); } diff --git a/tests/unit/test/tester.cpp b/tests/unit/test/tester.cpp index 3509429c..63a83027 100644 --- a/tests/unit/test/tester.cpp +++ b/tests/unit/test/tester.cpp @@ -17,7 +17,7 @@ void Tester::prepare(llvm::StringRef standard) { options.query_toolchain = true; options.suppress_logging = true; - params.arguments = database.lookup(src_path, options).arguments; + params.arguments = database.lookup(src_path, options).front().arguments; for(auto& [file, source]: sources.all_files) { if(file == src_path) { @@ -54,7 +54,7 @@ bool Tester::compile_with_pch(llvm::StringRef standard) { options.query_toolchain = true; options.suppress_logging = true; - params.arguments = database.lookup(src_path, options).arguments; + params.arguments = database.lookup(src_path, options).front().arguments; auto pch_path = fs::createTemporaryFile("clice", "pch"); if(!pch_path) {