diff --git a/include/Compiler/Command.h b/include/Compiler/Command.h index 23d565a9..3f26731f 100644 --- a/include/Compiler/Command.h +++ b/include/Compiler/Command.h @@ -1,59 +1,52 @@ #pragma once #include +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" namespace clice { -/// Processes and adjusts a raw compile command from compile_commands.json. -/// -/// This function tokenizes the input command, removes unnecessary arguments, -/// and ensures the resulting format is suitable for execution. -/// -/// @param command The raw shell-escaped compile command. -/// @param out A vector to hold pointers to the processed arguments. -/// @param buffer A storage buffer for the actual argument strings. -std::expected mangle_command(llvm::StringRef command, - llvm::SmallVectorImpl& out, - llvm::SmallVectorImpl& buffer); - /// `CompilationDatabase` is responsible for managing the compile commands. /// /// FIXME: currently we assume that a file only occurs once in the CDB. /// This is not always correct, but it is enough for now. class CompilationDatabase { public: - /// Update the compile commands with the given file. - void updateCommands(llvm::StringRef file); + using Self = CompilationDatabase; - /// Update the compile commands with the given file and compile command. - void updateCommand(llvm::StringRef file, llvm::StringRef command); + /// Update the compile commands with the given file. + void update_commands(this Self& self, llvm::StringRef file); /// Update the module map with the given file and module name. - void updateModule(llvm::StringRef file, llvm::StringRef name); - - /// Lookup the compile commands of the given file. - llvm::StringRef getCommand(llvm::StringRef file); + void update_module(llvm::StringRef file, llvm::StringRef name); /// Lookup the module interface unit file path of the given module name. - llvm::StringRef getModuleFile(llvm::StringRef name); + llvm::StringRef get_module_file(llvm::StringRef name); auto size() const { return commands.size(); } - auto begin() { - return commands.begin(); - } + enum class Style { + GNU = 0, + MSVC, + }; - auto end() { - return commands.end(); - } + void add_command(this Self& self, + llvm::StringRef path, + llvm::StringRef command, + Style style = Style::GNU); + + llvm::ArrayRef get_command(this Self& self, llvm::StringRef path); private: - /// A map between file path and compile commands. - llvm::StringMap commands; + /// Save a string into memory pool. Make sure end with `\0`. + llvm::StringRef save_string(this Self& self, llvm::StringRef string); + std::vector save_args(this Self& self, llvm::ArrayRef args); + +private: /// For C++20 module, we only can got dependent module name /// in source context. But we need dependent module file path /// to build PCM. So we will scan(preprocess) all project files @@ -61,6 +54,17 @@ private: /// **Note that** this only includes module interface unit, for module /// implementation unit, the scan could be delayed until compiling it. llvm::StringMap moduleMap; + + /// Memory pool for command arguments. + llvm::BumpPtrAllocator memory_pool; + + /// For lookup whether we already have the key. + llvm::DenseSet unique; + + // A map between file path and compile commands. + /// TODO: Path cannot represent unique file, we should use better, like inode ... + llvm::DenseMap>> commands; }; } // namespace clice + diff --git a/include/Compiler/Compilation.h b/include/Compiler/Compilation.h index 817ac4a8..b0f69b45 100644 --- a/include/Compiler/Compilation.h +++ b/include/Compiler/Compilation.h @@ -16,7 +16,7 @@ struct CompilationParams { llvm::SmallString<128> outPath; /// Responsible for storing the arguments. - llvm::SmallString<1024> command; + llvm::ArrayRef arguments; llvm::IntrusiveRefCntPtr vfs = new ThreadSafeFS(); @@ -32,6 +32,7 @@ struct CompilationParams { /// The memory buffers for all remapped file. llvm::StringMap> buffers; + void add_remapped_file(llvm::StringRef path, llvm::StringRef content, std::uint32_t bound = -1) { diff --git a/include/Test/CTest.h b/include/Test/CTest.h index 3044cce6..d87414e2 100644 --- a/include/Test/CTest.h +++ b/include/Test/CTest.h @@ -3,12 +3,14 @@ #include "Test.h" #include "Annotation.h" #include "Server/Protocol.h" +#include "Compiler/Command.h" #include "Compiler/Compilation.h" namespace clice::testing { struct Tester { CompilationParams params; + CompilationDatabase database; std::optional unit; std::string src_path; @@ -77,7 +79,10 @@ public: } Tester& compile(llvm::StringRef standard = "-std=c++20") { - params.command = std::format("clang++ {} {} -fms-extensions", standard, src_path); + auto command = std::format("clang++ {} {} -fms-extensions", standard, src_path); + database.add_command(src_path, command); + params.arguments = database.get_command(src_path); + auto info = clice::compile(params); ASSERT_TRUE(info); this->unit.emplace(std::move(*info)); diff --git a/src/Compiler/Command.cpp b/src/Compiler/Command.cpp index 1f643527..67efedd1 100644 --- a/src/Compiler/Command.cpp +++ b/src/Compiler/Command.cpp @@ -2,77 +2,12 @@ #include "Compiler/Command.h" #include "Compiler/Compilation.h" #include "Support/FileSystem.h" +#include "llvm/Support/CommandLine.h" namespace clice { -std::expected mangle_command(llvm::StringRef command, - llvm::SmallVectorImpl& out, - llvm::SmallVectorImpl& buffer) { - llvm::SmallString<128> current; - llvm::SmallVector indices; - bool inSingleQuote = false; - bool inDoubleQuote = false; - - for(size_t i = 0; i < command.size(); ++i) { - char c = command[i]; - if(c == ' ' && !inSingleQuote && !inDoubleQuote) { - if(!current.empty()) { - indices.push_back(buffer.size()); - buffer.append(current); - buffer.push_back('\0'); - current.clear(); - } - } else if(c == '\'' && !inDoubleQuote) { - inSingleQuote = !inSingleQuote; - } else if(c == '"' && !inSingleQuote) { - inDoubleQuote = !inDoubleQuote; - } else { - current.push_back(c); - } - } - - if(!current.empty()) { - indices.push_back(buffer.size()); - buffer.append(current); - buffer.push_back('\0'); - } - - /// Add resource directory. - indices.push_back(buffer.size()); - current = std::format("-resource-dir={}", fs::resource_dir); - buffer.append(current); - buffer.push_back('\0'); - - /// FIXME: use better way to remove args. - for(size_t i = 0; i < indices.size(); ++i) { - llvm::StringRef arg(buffer.data() + indices[i]); - - /// Skip `-c` and `-o` arguments. - if(arg == "-c") { - continue; - } - - if(arg.starts_with("-o")) { - if(arg == "-o") { - ++i; - } - continue; - } - - if(arg.starts_with("@CMakeFiles")) { - continue; - } - - /// TODO: remove PCH. - - out.push_back(arg.data()); - } - - return {}; -} - /// Update the compile commands with the given file. -void CompilationDatabase::updateCommands(llvm::StringRef filename) { +void CompilationDatabase::update_commands(this Self& self, llvm::StringRef filename) { auto path = path::real_path(filename); filename = path; @@ -140,12 +75,12 @@ void CompilationDatabase::updateCommands(llvm::StringRef filename) { continue; } - commands[path] = *command; + self.add_command(path, *command); } log::info("Successfully loaded compile commands from {0}, total {1} commands", filename, - commands.size()); + self.commands.size()); /// Scan all files to build module map. // CompilationParams params; @@ -159,29 +94,16 @@ void CompilationDatabase::updateCommands(llvm::StringRef filename) { // } //} - log::info("Successfully built module map, total {0} modules", moduleMap.size()); -} - -void CompilationDatabase::updateCommand(llvm::StringRef file, llvm::StringRef command) { - commands[path::real_path(file)] = command; + log::info("Successfully built module map, total {0} modules", self.moduleMap.size()); } /// Update the module map with the given file and module name. -void CompilationDatabase::updateModule(llvm::StringRef file, llvm::StringRef name) { +void CompilationDatabase::update_module(llvm::StringRef file, llvm::StringRef name) { moduleMap[path::real_path(file)] = file; } -/// Lookup the compile commands of the given file. -llvm::StringRef CompilationDatabase::getCommand(llvm::StringRef file) { - auto iter = commands.find(file); - if(iter == commands.end()) { - return ""; - } - return iter->second; -} - /// Lookup the module interface unit file path of the given module name. -llvm::StringRef CompilationDatabase::getModuleFile(llvm::StringRef name) { +llvm::StringRef CompilationDatabase::get_module_file(llvm::StringRef name) { auto iter = moduleMap.find(name); if(iter == moduleMap.end()) { return ""; @@ -189,4 +111,81 @@ llvm::StringRef CompilationDatabase::getModuleFile(llvm::StringRef name) { return iter->second; } +llvm::StringRef CompilationDatabase::save_string(this Self& self, llvm::StringRef string) { + auto it = self.unique.find(string); + + /// FIXME: arg may be empty? + + /// If we already store the argument, reuse it. + if(it != self.unique.end()) { + return *it; + } + + /// Allocate new argument. + const auto size = string.size(); + auto ptr = self.memory_pool.Allocate(size + 1); + std::memcpy(ptr, string.data(), size); + ptr[size] = '\0'; + + /// Insert new argument. + auto result = llvm::StringRef(ptr, size); + self.unique.insert(result); + return result; +} + +std::vector CompilationDatabase::save_args(this Self& self, + llvm::ArrayRef args) { + std::vector result; + result.reserve(args.size()); + + for(auto i = 0; i < args.size(); i++) { + result.emplace_back(self.save_string(args[i]).data()); + } + + return result; +} + +void CompilationDatabase::add_command(this Self& self, + llvm::StringRef path, + llvm::StringRef command, + Style style) { + llvm::SmallVector args; + + /// temporary allocator to meet the argument requirements of tokenize. + llvm::BumpPtrAllocator allocator; + llvm::StringSaver saver(allocator); + + /// FIXME: we may want to check the first argument of command to + /// make sure its mode. + if(style == Style::GNU) { + llvm::cl::TokenizeGNUCommandLine(command, saver, args); + } else if(style == Style::MSVC) { + llvm::cl::TokenizeWindowsCommandLineFull(command, saver, args); + } else { + std::abort(); + } + + auto path_ = self.save_string(path); + auto new_args = self.save_args(args); + + auto it = self.commands.find(path_.data()); + if(it == self.commands.end()) { + self.commands.try_emplace(path_.data(), + std::make_unique>(std::move(new_args))); + } else { + *it->second = std::move(new_args); + } +} + +llvm::ArrayRef CompilationDatabase::get_command(this Self& self, + llvm::StringRef path) { + auto path_ = self.save_string(path); + auto it = self.commands.find(path_.data()); + if(it != self.commands.end()) { + return *it->second; + } else { + return {}; + } +} + } // namespace clice diff --git a/src/Compiler/Compilation.cpp b/src/Compiler/Compilation.cpp index c006d86a..7c8769c3 100644 --- a/src/Compiler/Compilation.cpp +++ b/src/Compiler/Compilation.cpp @@ -42,18 +42,13 @@ auto create_invocation(CompilationParams& params, llvm::IntrusiveRefCntPtr& diagnostic_engine) -> std::expected, std::string> { - /// Split orgin command into c-style command arguments for creating invocation. - llvm::SmallString<1024> buffer; - llvm::SmallVector args; - TRY_OR_RETURN(mangle_command(params.command, args, buffer)); - /// Create clang invocation. clang::CreateInvocationOptions options = { .Diags = diagnostic_engine, .VFS = params.vfs, }; - auto invocation = clang::createInvocation(args, options); + auto invocation = clang::createInvocation(params.arguments, options); if(!invocation) { return report_diagnostics("fail to create compiler invocation", *diagnostics); } @@ -221,7 +216,7 @@ std::expected compile(CompilationParams& params, P out.path = params.outPath.str(); /// out.preamble = params.content.substr(0, *params.bound); - out.command = params.command.str(); + /// out.command = params.arguments.str(); /// FIXME: out.deps = info->deps(); return clang_compile(params, [&](clang::CompilerInstance& instance) { diff --git a/src/Server/Indexer.cpp b/src/Server/Indexer.cpp index 16110f5a..8303d242 100644 --- a/src/Server/Indexer.cpp +++ b/src/Server/Indexer.cpp @@ -47,7 +47,7 @@ async::Task<> Indexer::index(CompilationUnit& unit) { async::Task<> Indexer::index(llvm::StringRef file) { CompilationParams params; - params.command = database.getCommand(file); + params.arguments = database.get_command(file); auto AST = co_await async::submit([&] { return compile(params); }); diff --git a/src/Server/Scheduler.cpp b/src/Server/Scheduler.cpp index 5c388f36..35cb1dda 100644 --- a/src/Server/Scheduler.cpp +++ b/src/Server/Scheduler.cpp @@ -54,7 +54,7 @@ async::Task Scheduler::completion(std::string path, std::uint32_t o /// Set compilation params ... . CompilationParams params; - params.command = database.getCommand(path); + params.arguments = database.get_command(path); params.add_remapped_file(path, openFile->content); params.pch = {PCH->path, PCH->preamble.size()}; params.completion = {path, offset}; @@ -74,8 +74,9 @@ async::Task Scheduler::isPCHOutdated(llvm::StringRef path, llvm::StringRef } /// Check command and preamble matchs. - auto command = database.getCommand(path); - if(openFile->PCH->command != command || openFile->PCH->preamble != preamble) { + auto command = database.get_command(path); + /// FIXME: check command. openFile->PCH->command != command + if(openFile->PCH->preamble != preamble) { co_return true; } @@ -104,7 +105,7 @@ async::Task<> Scheduler::buildPCH(std::string path, std::string content) { std::uint32_t bound, std::string content) -> async::Task<> { CompilationParams params; - params.command = scheduler.database.getCommand(path); + params.arguments = scheduler.database.get_command(path); params.outPath = path::join(config::index.dir, path::filename(path) + ".pch"); params.add_remapped_file(path, content, bound); @@ -161,7 +162,7 @@ async::Task<> Scheduler::buildAST(std::string path, std::string content) { } CompilationParams params; - params.command = database.getCommand(path); + params.arguments = database.get_command(path); params.add_remapped_file(path, content); params.pch = {PCH->path, PCH->preamble.size()}; diff --git a/src/Server/Server.cpp b/src/Server/Server.cpp index d898ffbd..168001d0 100644 --- a/src/Server/Server.cpp +++ b/src/Server/Server.cpp @@ -118,7 +118,7 @@ async::Task Server::onInitialize(json::Value value) { config::init(converter.workspace()); for(auto& dir: config::server.compile_commands_dirs) { - database.updateCommands(dir + "/compile_commands.json"); + database.update_commands(dir + "/compile_commands.json"); } co_return result; diff --git a/unittests/Compiler/Command.cpp b/unittests/Compiler/Command.cpp index 160be781..e72cb6c9 100644 --- a/unittests/Compiler/Command.cpp +++ b/unittests/Compiler/Command.cpp @@ -5,9 +5,45 @@ namespace clice::testing { namespace { -TEST(CompilationDatabase, Command) {} +TEST(Command, SimpleAddGet) { + CompilationDatabase database; + database.add_command("test.cpp", "clang++ -std=c++23 test.cpp"); + auto command = database.get_command("test.cpp"); + ASSERT_EQ(command.size(), 3); -TEST(CompilationDatabase, Module) {} + using namespace std::literals; + EXPECT_EQ(command[0], "clang++"sv); + EXPECT_EQ(command[1], "-std=c++23"sv); + EXPECT_EQ(command[2], "test.cpp"sv); +} + +TEST(Command, Reuse) { + CompilationDatabase database; + database.add_command("test.cpp", "clang++ -std=c++23 test.cpp"); + database.add_command("test2.cpp", "clang++ -std=c++23 test2.cpp"); + + auto command1 = database.get_command("test.cpp"); + auto command2 = database.get_command("test2.cpp"); + ASSERT_EQ(command1.size(), 3); + ASSERT_EQ(command2.size(), 3); + + using namespace std::literals; + EXPECT_EQ(command1[0], "clang++"sv); + EXPECT_EQ(command1[1], "-std=c++23"sv); + EXPECT_EQ(command1[2], "test.cpp"sv); + + EXPECT_EQ(command1[0], command2[0]); + EXPECT_EQ(command1[1], command2[1]); + EXPECT_EQ(command2[2], "test2.cpp"sv); +} + +TEST(Command, Merge) {} + +TEST(Command, Filter) {} + +TEST(Command, QueryDriver) {} + +TEST(Command, Module) {} } // namespace diff --git a/unittests/Compiler/Diagnostic.cpp b/unittests/Compiler/Diagnostic.cpp index f8801002..788de2b3 100644 --- a/unittests/Compiler/Diagnostic.cpp +++ b/unittests/Compiler/Diagnostic.cpp @@ -9,17 +9,26 @@ namespace { using namespace clice; TEST(Diagnostic, CommandError) { + std::vector arguments = { + "clang++", + }; + CompilationParams params; /// miss input file. - params.command = "clang++"; + params.arguments = arguments; params.add_remapped_file("main.cpp", "int main() { return 0; }"); auto unit = compile(params); ASSERT_FALSE(unit); } TEST(Diagnostic, Error) { + std::vector arguments = { + "clang++", + "main.cpp", + }; + CompilationParams params; - params.command = "clang++ main.cpp"; + params.arguments = arguments; params.add_remapped_file("main.cpp", "int main() { return 0 }"); auto unit = compile(params); ASSERT_TRUE(unit); diff --git a/unittests/Compiler/Module.cpp b/unittests/Compiler/Module.cpp index 5eb03adc..8c332d21 100644 --- a/unittests/Compiler/Module.cpp +++ b/unittests/Compiler/Module.cpp @@ -10,9 +10,17 @@ PCMInfo buildPCM(llvm::StringRef file, llvm::StringRef code) { llvm::SmallString<128> outPath; fs::createUniquePath(llvm::Twine(file) + "%%%%%%.pcm", outPath, true); + std::string path = file.str(); + std::vector arguments = { + "clang++", + "-std=c++20", + "-xc++", + path.c_str(), + }; + CompilationParams params; params.outPath = outPath; - params.command = "clang++ -std=c++20 -x c++ " + file.str(); + params.arguments = arguments; params.add_remapped_file(file, code); params.add_remapped_file("./test.h", "export int foo2();"); @@ -26,8 +34,15 @@ PCMInfo buildPCM(llvm::StringRef file, llvm::StringRef code) { } ModuleInfo scan(llvm::StringRef content) { + std::vector arguments = { + "clang++", + "-std=c++20", + "-xc++", + "main.ixx", + }; + CompilationParams params; - params.command = "clang++ -std=c++20 -x c++ main.ixx"; + params.arguments = arguments; params.add_remapped_file("main.ixx", content); params.add_remapped_file("./test.h", "export module A"); auto info = scanModule(params); diff --git a/unittests/Compiler/Preamble.cpp b/unittests/Compiler/Preamble.cpp index 3df93e51..e7c1b26c 100644 --- a/unittests/Compiler/Preamble.cpp +++ b/unittests/Compiler/Preamble.cpp @@ -79,12 +79,20 @@ void EXPECT_BUILD_PCH(llvm::StringRef main_file, auto bound = computePreambleBound(content); params.add_remapped_file(main_file, content, bound); + std::vector arguments = { + "clang++", + "-xc++", + "-std=c++20", + }; + if(!preamble.empty()) { - params.command = std::format("clang++ -xc++ -std=c++20 --include=preamble.h {}", main_file); - } else { - params.command = std::format("clang++ -xc++ -std=c++20 {}", main_file); + arguments.emplace_back("--include=preamble.h"); } + std::string buffer = main_file.str(); + arguments.emplace_back(buffer.c_str()); + params.arguments = arguments; + for(auto& [path, content]: files) { params.add_remapped_file(path::join(".", path), content); } @@ -97,7 +105,7 @@ void EXPECT_BUILD_PCH(llvm::StringRef main_file, ASSERT_TRUE(AST, chain); EXPECT_EQ(info.path, outPath, chain); - EXPECT_EQ(info.command, params.command, chain); + /// EXPECT_EQ(info.command, params.arguments, chain); /// TODO: EXPECT_EQ(info.deps, deps); } @@ -202,7 +210,9 @@ int foo(); CompilationParams params; params.add_remapped_file("main.cpp", content); - params.command = "clang++ -std=c++20 main.cpp"; + + std::vector arguments = {"clang++", "-std=c++20", "main.cpp"}; + params.arguments = arguments; for(auto& [path, file]: files) { params.add_remapped_file(path::join(".", path), file); @@ -253,7 +263,9 @@ int y = foo(); auto bounds = computePreambleBounds(content); CompilationParams params; - params.command = "clang++ -std=c++20 main.cpp"; + + std::vector arguments = {"clang++", "-std=c++20", "main.cpp"}; + params.arguments = arguments; PCHInfo info; std::uint32_t last_bound = 0; @@ -279,7 +291,7 @@ int y = foo(); ASSERT_TRUE(AST); EXPECT_EQ(info.path, outPath); - EXPECT_EQ(info.command, params.command); + /// EXPECT_EQ(info.command, params.arguments); } } diff --git a/unittests/Feature/CodeCompletion.cpp b/unittests/Feature/CodeCompletion.cpp index 909eff2e..5ba32b37 100644 --- a/unittests/Feature/CodeCompletion.cpp +++ b/unittests/Feature/CodeCompletion.cpp @@ -22,7 +22,9 @@ int main() { Annotation annotation = {code}; CompilationParams params; - params.command = "clang++ -std=c++20 main.cpp"; + std::vector arguments = {"clang++", "-std=c++20", "main.cpp"}; + params.arguments = arguments; + params.completion = {"main.cpp", annotation.offset("pos")}; params.add_remapped_file("main.cpp", annotation.source()); diff --git a/unittests/Feature/SignatureHelp.cpp b/unittests/Feature/SignatureHelp.cpp index 7560da48..9dfcc3f3 100644 --- a/unittests/Feature/SignatureHelp.cpp +++ b/unittests/Feature/SignatureHelp.cpp @@ -19,7 +19,9 @@ int main() { )cpp"; CompilationParams params; - params.command = "clang++ -std=c++20 main.cpp"; + std::vector arguments = {"clang++", "-std=c++20", "main.cpp"}; + params.arguments = arguments; + params.add_remapped_file("main.cpp", code); /// params.completion = {"main.cpp", 9, 10}; diff --git a/xmake.lua b/xmake.lua index 4ba1d945..96eb08d5 100644 --- a/xmake.lua +++ b/xmake.lua @@ -193,15 +193,9 @@ package("llvm") package:add("defines", "CLANG_BUILD_STATIC") end - if has_config("llvm") then - os.vcp("bin", package:installdir()) - os.vcp("lib", package:installdir()) - os.vcp("include", package:installdir()) - else - os.mv("bin", package:installdir()) - os.mv("lib", package:installdir()) - os.mv("include", package:installdir()) - end + os.vcp("bin", package:installdir()) + os.vcp("lib", package:installdir()) + os.vcp("include", package:installdir()) end) if has_config("release") then