From b46ff1782060db90e94af940bfb22a73631cc368 Mon Sep 17 00:00:00 2001 From: ykiko Date: Thu, 11 Jul 2024 12:04:09 +0800 Subject: [PATCH] add docs. --- .vscode/launch.json | 2 +- docs/clang/Preprocessor.md | 55 ++++++++++ docs/clang/README.md | 194 +++++++++++++++++++++++++++++++++++ include/Support/Filesystem.h | 15 +++ samples/CMakeLists.txt | 7 ++ samples/main.cpp | 13 +++ src/Clang/Preamble.cpp | 5 +- tests/Clang/Preamble.cpp | 78 +++++++++++++- 8 files changed, 364 insertions(+), 5 deletions(-) create mode 100644 docs/clang/Preprocessor.md create mode 100644 docs/clang/README.md create mode 100644 samples/CMakeLists.txt create mode 100644 samples/main.cpp diff --git a/.vscode/launch.json b/.vscode/launch.json index 1bbdc0e5..3dbf4bdb 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "type": "lldb", "request": "launch", "name": "Launch", - "program": "${workspaceFolder}/build/clice", + "program": "${workspaceFolder}/build/clice_test", "args": [], "cwd": "${workspaceFolder}" }, diff --git a/docs/clang/Preprocessor.md b/docs/clang/Preprocessor.md new file mode 100644 index 00000000..3a996cad --- /dev/null +++ b/docs/clang/Preprocessor.md @@ -0,0 +1,55 @@ +本小节会详细介绍`Preprocessor`提供的一些给用户的接口 + +```cpp +using namespace clang; +class Callback : public clang::PPCallbacks { +public: + Preprocessor& pp; + SourceRange last; + Callback(Preprocessor& pp) : pp(pp) {} + /// Called by Preprocessor::HandleMacroExpandedIdentifier when a + /// macro invocation is found. + void MacroExpands(const Token& MacroNameTok, + const MacroDefinition& MD, + SourceRange Range, + const MacroArgs* Args) override { + auto name = MacroNameTok.getIdentifierInfo()->getName(); + if(name.starts_with("__")) + return; + llvm::outs() << "MacroExpands: " << name; + if(MD.getMacroInfo()->isFunctionLike()) { + llvm::outs() << "("; + int len = Args->getNumMacroArguments(); + for(auto i = 0; i < len; i++) { + auto arg = Args->getUnexpArgument(i); + auto len2 = Args->getArgLength(arg); + for(auto j = 0; j < len2; j++) { + llvm::outs() << pp.getSpelling(*(arg + j)); + } + if(i < len - 1) + llvm::outs() << ", "; + } + llvm::outs() << ")"; + } + llvm::outs() << "\n"; + auto& m = pp.getSourceManager(); + auto x = m.getExpansionRange(Range); + // auto z = m.getImmediateExpansionRange(x.getBegin()); + auto text = Lexer::getSourceText(x, m, pp.getLangOpts()); + llvm::outs() << text << "\n"; + } + /// Hook called whenever a macro definition is seen. + void MacroDefined(const Token& MacroNameTok, const MacroDirective* MD) override { + auto name = MacroNameTok.getIdentifierInfo()->getName(); + if(name.starts_with("__")) + return; + // llvm::outs() << "MacroDefined: " << name << "\n"; + } +} +``` + +```cpp +// must be after BeginSourceFile +auto& preprocessor = instance->getPreprocessor(); +preprocessor.addPPCallbacks(std::make_unique(preprocessor)); +``` \ No newline at end of file diff --git a/docs/clang/README.md b/docs/clang/README.md new file mode 100644 index 00000000..fcc13ba8 --- /dev/null +++ b/docs/clang/README.md @@ -0,0 +1,194 @@ +在编写 clice 这个项目的时候,很大一个挑战就是和 clang 的源码进行交互。尽管 clang 一开始就被设计为模块化的项目,但是由于文档的匮乏,以及其本身就是一个生命周期非常长的项目了,不可避免的导致不同模块间的耦合程度加重,导致基于它编写相关的代码的时候较为困难。本文的旨在为 clice 项目中使用到的 clang 源码部分提供详细的介绍,方便阅读。 + +# Overview + +TODO: + +我们的目标是,基于 clang 的代码,自己编译出一个编译器前端程序出来,可以产生 AST 以便于我们使用,中端和后端这里就省略了。 + +# CompilerInstance + +```cpp +class CompilerInstance { /* ... */ } +``` + +这个类其实就代表一个 C++ 编译器实例,通过它我们就能完成实际代码的编译工作。它是个可默认构造的类型 + +```cpp +auto instance = std::make_unique(); +``` + +但是不要被表象迷惑了,这样默认构造出来的`instance`其实是不能直接用,如果你在 Debug 模式下构建,你会得到一大堆断言失败的错误。`CompilerInstance`有非常多的`set*`方法,只有在这些方法都正确的调用之后,才能执行最后的编译。下面就一步步让我们看看有哪些成员要被正确设置。 + +# Diagnostic + +编译器如何处理错误?各种错误,比如解析命令行可能出错,预处理阶段可能出错,语法分析语义分析阶段也可能出错,如何呈现报错信息呢?这就是本小结要讨论的问题。 + +核心的类型主要有四个 + +`DiagnosticsEngine`用于管理所有和诊断相关的对象。 + +```cpp +class DiagnosticOptions{ /* ... */ } +``` + +`DiagnosticOptions`用于设置诊断选项。 + +```cpp +class DiagnosticConsumer{ /* ... */ } +``` + +`DiagnosticConsumer`用于处理诊断信息。可以重写这个类的方法来自定义诊断信息的处理方式。有一个默认的实现`TextDiagnosticPrinter`,它会将诊断信息输出到指定的流中。 + +```cpp +class DiagnosticIDs { /* ... */ } +``` + +`DiagnosticIDs` 负责管理诊断消息的唯一标识符。每个诊断消息都有一个唯一的 ID,用于在代码中引用特定的诊断消息。 + +```cpp +class DiagnosticsEngine{ /* ... */ } +``` + +`DiagnosticsEngine`是一个诊断引擎,用于生成和管理诊断消息。 + +创建 + +```cpp +clang::DiagnosticIDs* ids = new clang::DiagnosticIDs(); +clang::DiagnosticOptions* diag_opts = new clang::DiagnosticOptions(); +clang::DiagnosticConsumer* consumer = new clang::TextDiagnosticPrinter(llvm::errs(), diag_opts); +clang::DiagnosticsEngine* engine = new clang::DiagnosticsEngine(ids, diag_opts, consumer); +``` + +准备好`DiagnosticsEngine`之后,就可以设置给`instance`了,注意参数是一个裸指针,`instance`会获取它的所有权。 + +```cpp +instance->setDiagnostics(engine); +``` + +# CompilerInvocation + +```cpp +class CompilerInvocation { /* ... */ } +``` + +这个类型用于向编译器传递一些信息,比如编译选项,输入文件等等,它同样是一个可默认构造的类型 + +```cpp +auto invocation = std::make_shared(); +``` + +同样,这样构造出来的对象是不能直接用的。可以使用`CompilerInvocation::CreateFromArgs`从一组命令行选项来初始化它。 + +```cpp +std::vector args = {"-Xclang", "-c", "main.cpp"}; +clang::CompilerInvocation::CreateFromArgs(*invocation, args, instance->getDiagnostics()); +``` + +通过它的`getFrontendOpts`方法,我们可以获取到解析过的编译选项。 + +```cpp +auto& opts = invocation->getFrontendOpts(); +``` + +clang 提供了代码补全的接口,如果想使用的话需要设置相应的`getFrontendOpts` + +```cpp +auto& codeCompletionAt = opts.CodeCompletionAt; +codeCompletionAt.FileName = "main.cpp"; +codeCompletionAt.Line = 10; +codeCompletionAt.Column = 4; +``` + +效果上和使用这个编译选项是类似的 + +```shell +clang++ -cc1 -fsyntax-only -code-completion-at main.cpp:10:4 main.cpp +``` + +准备好`invocation`之后就可以设置给`instance`了 + +```cpp +instance->setInvocation(std::move(invocation)); +``` + +# Target + +target 也就是我们常说的目标,这会影响最终生成的代码,例如不同平台的类型大小和对齐等等因素不同,那么`sizeof`等运算符求值得到的结果也就不同。不过由于往往在编译选项中就会默认指定 target 了,我们不需要再去自己创建,只需要 + +```cpp +if(!instance->createTarget()) { + llvm::errs() << "Failed to create target\n"; + std::terminate(); +} +``` + +就会自动根据当前的编译选项来创建对应的 target 了。 + +# FileManager and SourceManager + +```cpp +if(auto manager = instance->createFileManager()) { + instance->createSourceManager(*manager); +} else { + llvm::errs() << "Failed to create file manager\n"; + std::terminate(); +} +``` + +# Preprocessor + +```cpp +class Preprocessor { /* ... */ } +``` + +Preprocessor 就是预处理器,负责源文件的预处理工作,比如宏展开,条件编译等等。同样,基于先前的设置,我们可以方便的使用`createPreprocessor`来创建一个预处理器,而不需要自己用 Preprocessor 来构造,省去了一些不必要的麻烦。 + +```cpp +instance->createPreprocessor(clang::TranslationUnitKind::TU_Complete); +auto& preprocessor = instance->getPreprocessor(); +``` + +clang 暴露给了我们一些钩子在预处理的过程中获取一些信息。例如可以重写 PPCallbacks 里面的一些方法来获取一些信息。例如下面这个示例就是在打印每次宏展开的时候输出一些信息。clice 就通过这种方式来获取一个源文件中的头文件信息。 + +```cpp +using namespace clang; + +class Callback : public PPCallbacks { +public: + /// Called by Preprocessor::HandleMacroExpandedIdentifier when a + /// macro invocation is found. + void MacroExpands(const Token& MacroNameTok, + const MacroDefinition& MD, + SourceRange Range, + const MacroArgs* Args) override { + llvm::outs() << "MacroExpands: " << MacroNameTok.getIdentifierInfo()->getName() << "\n"; + } + + /// Hook called whenever a macro definition is seen. + void MacroDefined(const Token& MacroNameTok, const MacroDirective* MD) override { + llvm::outs() << "MacroDefined: " << MacroNameTok.getIdentifierInfo()->getName() << "\n"; + } +}; +``` + +之后只需要将这个 Callback 设置给 Preprocessor 就可以了 + +```cpp +preprocessor.addPPCallbacks(std::make_unique()); +``` + +# AST + +```cpp + +``` + + + + + + + + diff --git a/include/Support/Filesystem.h b/include/Support/Filesystem.h index aff9a57a..30ef37d7 100644 --- a/include/Support/Filesystem.h +++ b/include/Support/Filesystem.h @@ -1,9 +1,24 @@ #pragma once +#include #include namespace clice { namespace fs = std::filesystem; +inline std::string readAll(std::string_view path) { + std::string result; + FILE* file = fopen(path.data(), "r"); + if(file) { + fseek(file, 0, SEEK_END); + size_t size = ftell(file); + result.resize(size); + fseek(file, 0, SEEK_SET); + fread(result.data(), 1, size, file); + fclose(file); + } + return result; } + +} // namespace clice diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt new file mode 100644 index 00000000..3bc9e841 --- /dev/null +++ b/samples/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.22) +project(sample) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +add_executable(sample main.cpp) diff --git a/samples/main.cpp b/samples/main.cpp new file mode 100644 index 00000000..82059c8f --- /dev/null +++ b/samples/main.cpp @@ -0,0 +1,13 @@ +#define EXPAND(...) __VA_ARGS__ +#define GET_PARAM(...) __VA_ARGS__, +#define GET_TYPE_IMPL(...) GET_PARAM __VA_ARGS__ + +#define GET_FST(_1, ...) _1 +#define GET_WRAP(...) GET_FST(__VA_ARGS__) +#define GET_TYPE(x) EXPAND(GET_WRAP(GET_TYPE_IMPL(x))) + +int main() { + GET_TYPE((int)y) x = 1; + GET_TYPE((double)y) y = 1; + return 0; +} diff --git a/src/Clang/Preamble.cpp b/src/Clang/Preamble.cpp index e8054b8e..c6e61f88 100644 --- a/src/Clang/Preamble.cpp +++ b/src/Clang/Preamble.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace clice { @@ -12,10 +13,12 @@ Preamble clang::DiagnosticsEngine* engine; auto VFS = llvm::vfs::getRealFileSystem(); - if(auto error = VFS->setCurrentWorkingDirectory(path)) { + auto dir = fs::path(path).parent_path(); + if(auto error = VFS->setCurrentWorkingDirectory(dir.string())) { logger::error("failed to set current working directory: {}", error.message()); } else { engine = new clang::DiagnosticsEngine(new clang::DiagnosticIDs(), new clang::DiagnosticOptions()); + engine->setClient(new clang::TextDiagnosticPrinter(llvm::errs(), &engine->getDiagnosticOptions())); } // if store the preamble in memory, if not, store it in a file(storagePath) diff --git a/tests/Clang/Preamble.cpp b/tests/Clang/Preamble.cpp index 089e6d97..6838e7a9 100644 --- a/tests/Clang/Preamble.cpp +++ b/tests/Clang/Preamble.cpp @@ -1,6 +1,78 @@ #include +#include +#include +#include +#include +#include -TEST(Preamble, Include) { - EXPECT_EQ(1, 1); - EXPECT_EQ(1 + 1, 2); +// TEST(clice, preamble) { +// using namespace clice; +// CompileDatabase::instance().load("/home/ykiko/Project/C++/clice/samples/build"); +// std::string_view path = "/home/ykiko/Project/C++/clice/samples/main.cpp"; +// auto content = readAll(path); +// CompilerInvocation invocation; +// auto preamble = Preamble::build(path, content, invocation); +// } + +TEST(clice, clang) { + auto instance = std::make_unique(); + + clang::DiagnosticIDs* ids = new clang::DiagnosticIDs(); + clang::DiagnosticOptions* diag_opts = new clang::DiagnosticOptions(); + clang::DiagnosticConsumer* consumer = new clang::TextDiagnosticPrinter(llvm::errs(), diag_opts); + clang::DiagnosticsEngine* engine = new clang::DiagnosticsEngine(ids, diag_opts, consumer); + instance->setDiagnostics(engine); + + auto invocation = std::make_shared(); + std::vector args = { + "-x", + "c++", + "-no-round-trip-args", + "-std=gnu++20", + "/home/ykiko/Project/C++/clice/samples/main.cpp", + }; + // invocation = clang::createInvocation(args, {}); + clang::CompilerInvocation::CreateFromArgs(*invocation, args, instance->getDiagnostics()); + instance->setInvocation(std::move(invocation)); + + /// if need code completion + // auto& opts = invocation->getFrontendOpts(); + // auto& codeCompletionAt = opts.CodeCompletionAt; + // codeCompletionAt.FileName = "main.cpp"; + // codeCompletionAt.Line = 10; + // codeCompletionAt.Column = 4; + + if(!instance->createTarget()) { + llvm::errs() << "Failed to create target\n"; + std::terminate(); + } + + if(auto manager = instance->createFileManager()) { + instance->createSourceManager(*manager); + } else { + llvm::errs() << "Failed to create file manager\n"; + std::terminate(); + } + + instance->createPreprocessor(clang::TranslationUnitKind::TU_Complete); + + instance->createASTContext(); + + /// if code completion + // instance->setCodeCompletionConsumer(consumer); + clang::SyntaxOnlyAction action; + + if(!action.BeginSourceFile(*instance, instance->getFrontendOpts().Inputs[0])) { + llvm::errs() << "Failed to begin source file\n"; + std::terminate(); + } + + if(auto error = action.Execute()) { + llvm::errs() << "Failed to execute action: " << error << "\n"; + std::terminate(); + } + + // instance->getASTContext().getTranslationUnitDecl()->dump(); + + action.EndSourceFile(); }