From 75cb74ca785c00cd21893c2354ef39bb6540fb4c Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 9 Aug 2024 13:43:54 +0800 Subject: [PATCH] some update. --- .clang-format | 4 +- .vscode/launch.json | 21 +- docs/clang/SourceLocation.md | 0 docs/examples/ASTVisitor.cpp | 169 ++++++++++++-- docs/examples/Preprocessor.cpp | 110 +++++---- include/Clang/ParsedAST.h | 5 +- include/Feature/SemanticToken.h | 6 + include/Protocol/Language/SemanticToken.h | 132 +++++++---- main.cpp | 82 +++++-- src/Feature/SemanticToken.cpp | 26 ++- src/Feature/SemanticToken2.cpp | 269 ++++++++++++++++++++++ test.cpp | 9 +- 12 files changed, 680 insertions(+), 153 deletions(-) create mode 100644 docs/clang/SourceLocation.md create mode 100644 src/Feature/SemanticToken2.cpp diff --git a/.clang-format b/.clang-format index 9d8ba58e..d384dc18 100644 --- a/.clang-format +++ b/.clang-format @@ -2,7 +2,7 @@ # compatible with clang-format 18 UseTab: Never -ColumnLimit: 110 +ColumnLimit: 100 # Indent IndentWidth: 4 @@ -96,7 +96,7 @@ SpacesInSquareBrackets: false QualifierAlignment: Custom QualifierOrder: ["constexpr", "const", "inline", "static", "type"] SortIncludes: Never -SortUsingDeclarations: LexicographicNumeric +SortUsingDeclarations: Never IncludeBlocks: Merge WhitespaceSensitiveMacros: ["PK_PROTECTED", "LUA_PROTECTED"] diff --git a/.vscode/launch.json b/.vscode/launch.json index 0df4d8a7..0f554026 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,8 +15,18 @@ { "type": "lldb", "request": "launch", - "name": "Launch", + "name": "ASTVisitor", "program": "${workspaceFolder}/build/ASTVisitor", + "args": [ + "/home/ykiko/C++/clice/test.cpp", + ], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Preprocessor", + "program": "${workspaceFolder}/build/Preprocessor", "args": [ "/home/ykiko/C++/clice/main.cpp", ], @@ -28,5 +38,14 @@ // "name": "Attach", // "program": "${workspaceFolder}/build/clice" //}, + ], + "compounds": [ + { + "name": "Select and Launch", + "configurations": [ + "ASTVisitor", + "Preprocessor" + ] + } ] } \ No newline at end of file diff --git a/docs/clang/SourceLocation.md b/docs/clang/SourceLocation.md new file mode 100644 index 00000000..e69de29b diff --git a/docs/examples/ASTVisitor.cpp b/docs/examples/ASTVisitor.cpp index 2b29d84d..f34337c4 100644 --- a/docs/examples/ASTVisitor.cpp +++ b/docs/examples/ASTVisitor.cpp @@ -79,8 +79,9 @@ public: return result.front(); } - /// for a complex dependent type: `X<...>::name::name2::...::nameN`, we can resolve it recursively. - /// so we only need to handle the `X<...>::name`, whose prefix is a template specialization type. + /// for a complex dependent type: `X<...>::name::name2::...::nameN`, we can resolve it + /// recursively. so we only need to handle the `X<...>::name`, whose prefix is a template + /// specialization type. clang::QualType simplify(const clang::TemplateSpecializationType* templateType, const clang::IdentifierInfo* identifier) { // X is a class template or a type alias template @@ -97,7 +98,8 @@ public: namedDecl->dump(); } - } else if(auto aliasTemplateDecl = llvm::dyn_cast(templateDecl)) { + } else if(auto aliasTemplateDecl = + llvm::dyn_cast(templateDecl)) { // TODO: } else { templateDecl->dump(); @@ -242,7 +244,8 @@ public: case NestedNameSpecifier::SpecifierKind::TypeSpecWithTemplate: { llvm::outs() << "------------------ TypeSpecWithTemplate -----------------------\n"; - // when the kind of TST is TypeSpecWithTemplate, e.g. std::vector::template name:: + // when the kind of TST is TypeSpecWithTemplate, e.g. std::vector::template + // name:: TST->dump(); return resolve(QualType(TST->getAsType(), 0), II, arguments); } @@ -312,7 +315,8 @@ public: } } - QualType resolve(QualType T, const IdentifierInfo* II, ArrayRef arguments = {}) { + QualType + resolve(QualType T, const IdentifierInfo* II, ArrayRef arguments = {}) { if(!T->isDependentType() && arguments.size() == 0) { // TODO: } @@ -420,9 +424,9 @@ public: std::vector result; for(auto arg: arguments) { if(arg.getKind() == TemplateArgument::ArgKind::Type) { + // check whether it is a TemplateTypeParmType. if(auto type = llvm::dyn_cast(arg.getAsType())) { const TemplateTypeParmDecl* param = type->getDecl(); - if(param && param->hasDefaultArgument()) { result.push_back(param->getDefaultArgument().getArgument()); continue; @@ -443,7 +447,6 @@ public: nullptr, false, dealias(QualType(DTST->getQualifier()->getAsType(), 0)).getTypePtr()); - return Ctx.getDependentTemplateSpecializationType(DTST->getKeyword(), NNS, DTST->getIdentifier(), @@ -456,13 +459,15 @@ public: QualType resolve(QualType type) { while(true) { - // llvm::outs() << "--------------------------------------------------------------------\n"; + // llvm::outs() << + // "--------------------------------------------------------------------\n"; // type.dump(); MultiLevelTemplateArgumentList list; if(auto DNT = type->getAs()) { type = resolve(resolve(DNT->getQualifier(), DNT->getIdentifier())); - for(auto begin = arguments.rbegin(), end = arguments.rend(); begin != end; ++begin) { + for(auto begin = arguments.rbegin(), end = arguments.rend(); begin != end; + ++begin) { list.addOuterTemplateArguments((*begin)->first, (*begin)->second, true); } type = S.SubstType(dealias(type), list, {}, {}); @@ -480,7 +485,8 @@ public: S.pushCodeSynthesisContext(context); list.addOuterTemplateArguments(TATD, args, true); - for(auto begin = arguments.rbegin(), end = arguments.rend(); begin != end; ++begin) { + for(auto begin = arguments.rbegin(), end = arguments.rend(); begin != end; + ++begin) { list.addOuterTemplateArguments((*begin)->first, (*begin)->second, true); } @@ -489,7 +495,8 @@ public: // TATD->getTemplatedDecl()->getUnderlyingType().dump(); type = dealias(TATD->getTemplatedDecl()->getUnderlyingType()); // llvm::outs() << "arguments: - // -------------------------------------------------------------\n"; list.dump(); + // -------------------------------------------------------------\n"; + // list.dump(); type = S.SubstType(type, list, {}, {}); // type.dump(); arguments.clear(); @@ -544,7 +551,7 @@ public: llvm::outs() << "--------------------------------------------------------------------\n"; Type.dump(); - + Type->getCanonicalTypeInternal(); if(auto TTPT = Type->getAs()) { Type->dump(); std::terminate(); @@ -574,14 +581,16 @@ public: S.pushCodeSynthesisContext(context); if(auto CTD = llvm::dyn_cast(TemplateDecl)) { - llvm::outs() << "--------------------------------------------------------------------\n"; + llvm::outs() + << "--------------------------------------------------------------------\n"; llvm::SmallVector paritals; CTD->getPartialSpecializations(paritals); for(auto partial: paritals) { partial->getInjectedSpecializationType().dump(); } - llvm::outs() << "--------------------------------------------------------------------\n"; + llvm::outs() + << "--------------------------------------------------------------------\n"; // CTD->findPartialSpecialization() auto partial = CTD->findPartialSpecialization(Type); if(partial) { @@ -606,6 +615,10 @@ public: } // namespace clang +#define Traverse(NAME) bool Traverse##NAME(clang::NAME* node) +#define WalkUpFrom(NAME) bool WalkUpFrom##NAME(clang::NAME* node) +#define VISIT(NAME) bool Visit##NAME(clang::NAME* node) + class ASTVistor : public clang::RecursiveASTVisitor { private: clang::Preprocessor& preprocessor; @@ -613,14 +626,16 @@ private: clang::syntax::TokenBuffer& buffer; clang::ASTContext& context; clang::Sema& sema; + clang::syntax::TokenBuffer& TB; public: ASTVistor(clang::Preprocessor& preprocessor, clang::syntax::TokenBuffer& buffer, clang::ASTContext& context, - clang::Sema& sema) : + clang::Sema& sema, + clang::syntax::TokenBuffer& TB) : preprocessor(preprocessor), sourceManager(preprocessor.getSourceManager()), buffer(buffer), - context(context), sema(sema) {} + context(context), sema(sema), TB(TB) {} bool TraverseTranslationUnitDecl(clang::TranslationUnitDecl* decl) { for(auto it = decl->decls_begin(), end = decl->decls_end(); it != end; ++it) { @@ -637,15 +652,122 @@ public: return true; } - bool FieldDeclecl(clang::FieldDecl* decl) { - llvm::outs() << "Visiting FieldDeclecl: " << decl->getDeclKindName() << "\n"; + // bool VisitTypeAliasDecl(clang::TypeAliasDecl* decl) {} + + // bool FieldDeclecl(clang::FieldDecl* decl) { + // llvm::outs() << "Visiting FieldDeclecl: " << decl->getDeclKindName() << "\n"; + // return true; + // } + // + // bool WalkUpFromCXXRecordDecl(clang::CXXRecordDecl* decl) { + // llvm::outs() << "Visiting CXXRecordDecl\n"; + // // clang::TemplateSpecializationType t; + // clang::TemplateSpecializationTypeLoc loc; + // + // return true; + //} + // + + bool VisitCXXStaticCastExpr(clang::CXXStaticCastExpr* expr) { + expr->getAngleBrackets().getBegin().dump(sourceManager); + expr->getAngleBrackets().getEnd().dump(sourceManager); return true; } - bool WalkUpFromCXXRecordDecl(clang::CXXRecordDecl* decl) { - llvm::outs() << "Visiting CXXRecordDecl\n"; + bool VisitTypeLoc(clang::TypeLoc loc) { + // loc.dump(); return true; } + + VISIT(DeclaratorDecl) { + for(unsigned i = 0; i < node->getNumTemplateParameterLists(); ++i) { + if(auto* TPL = node->getTemplateParameterList(i)) { + TPL->getLAngleLoc().dump(sourceManager); + TPL->getRAngleLoc().dump(sourceManager); + } + } + return true; + } + + VISIT(TagDecl) { + for(unsigned i = 0; i < node->getNumTemplateParameterLists(); ++i) { + if(auto* TPL = node->getTemplateParameterList(i)) { + TPL->getLAngleLoc().dump(sourceManager); + TPL->getRAngleLoc().dump(sourceManager); + } + } + return true; + } + + VISIT(TemplateDecl) { + if(clang::TemplateParameterList* params = node->getTemplateParameters()) { + auto langle = params->getLAngleLoc(); + auto rangle = params->getRAngleLoc(); + langle.dump(sourceManager); + rangle.dump(sourceManager); + } + return true; + } + + VISIT(FunctionDecl) { + if(auto* args = node->getTemplateSpecializationArgsAsWritten()) { + auto langle = args->LAngleLoc; + auto rangle = args->RAngleLoc; + langle.dump(sourceManager); + rangle.dump(sourceManager); + } + return true; + } + + VISIT(ClassTemplateSpecializationDecl) { + if(const clang::ASTTemplateArgumentListInfo* args = node->getTemplateArgsAsWritten()) { + auto langle = args->getLAngleLoc(); + auto rangle = args->getRAngleLoc(); + langle.dump(sourceManager); + rangle.dump(sourceManager); + } + return true; + } + + VISIT(ClassTemplatePartialSpecializationDecl) { + if(clang::TemplateParameterList* params = node->getTemplateParameters()) { + auto langle = params->getLAngleLoc(); + auto rangle = params->getRAngleLoc(); + langle.dump(sourceManager); + rangle.dump(sourceManager); + } + return true; + } + + // VISIT(FunctionTemplateSpecializationInfo) { + // if(clang::TemplateParameterList* params = node->getTemplateParameters()) { + // auto langle = params->getLAngleLoc(); + // auto rangle = params->getRAngleLoc(); + // } + // return true; + // } + + // bool VisitTemplateSpecializationTypeLoc(clang::TemplateSpecializationTypeLoc loc) { + // // loc.dump(); + // auto l_angle = loc.getLAngleLoc(); + // l_angle.dump(sourceManager); + // auto r_angle = sourceManager.getFileLoc(loc.getRAngleLoc()); + // r_angle.dump(sourceManager); + // + // // llvm::outs() << sourceManager.getSpellingColumnNumber(r_angle) << "\n"; + // // llvm::outs() << sourceManager.getExpansionColumnNumber(r_angle) << "\n"; + // + // llvm::outs() << sourceManager.isInMainFile(r_angle) << "\n"; + // + // auto ID = sourceManager.getFileID(r_angle); + // llvm::outs() << sourceManager.getFilename(r_angle) << "\n"; + // + // auto tokens = buffer.spelledTokens(sourceManager.getFileID(l_angle)); + // for(auto& token: tokens) { + // token.location().dump(sourceManager); + // } + // return true; + //} }; int main(int argc, const char** argv) { @@ -693,9 +815,14 @@ int main(int argc, const char** argv) { } clang::syntax::TokenBuffer buffer = std::move(collector).consume(); + buffer.indexExpandedTokens(); auto tu = instance->getASTContext().getTranslationUnitDecl(); - ASTVistor visitor{instance->getPreprocessor(), buffer, instance->getASTContext(), instance->getSema()}; + ASTVistor visitor{instance->getPreprocessor(), + buffer, + instance->getASTContext(), + instance->getSema(), + buffer}; visitor.TraverseDecl(tu); action.EndSourceFile(); diff --git a/docs/examples/Preprocessor.cpp b/docs/examples/Preprocessor.cpp index e1f3425a..cc546d90 100644 --- a/docs/examples/Preprocessor.cpp +++ b/docs/examples/Preprocessor.cpp @@ -2,6 +2,8 @@ #include #include +using namespace clang; + class PPCallback : public clang::PPCallbacks { private: clang::Preprocessor& pp; @@ -10,44 +12,57 @@ private: public: PPCallback(clang::Preprocessor& pp) : pp(pp), sm(pp.getSourceManager()) {} - void MacroExpands(const clang::Token& token, - const clang::MacroDefinition& macro, - clang::SourceRange range, - const clang::MacroArgs* args) override { - std::string name = pp.getSpelling(token); - if(name.starts_with("_")) - return; - - clang::MacroInfo* info = macro.getMacroInfo(); - // info->isBuiltinMacro(); - // info->isFunctionLike(); - // info->isObjectLike(); - // info->isVariadic(); - // info->getNumParams(); - // info->params(); - - const int size = args->getNumMacroArguments(); // Expanding macro arguments - for(auto i = 0; i < size; ++i) { - // get first token of first argument of expanding macro - const clang::Token* first = args->getUnexpArgument(i); - // iterate over tokens of first argument of expanding macro - for(auto j = 0; j < args->getArgLength(first); ++j) { - const clang::Token& tok = *(first + j); - // llvm::outs() << "Arg: " << pp.getSpelling(tok) << "\n"; - } + void InclusionDirective(SourceLocation HashLoc, + const Token& IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FilenameRange, + OptionalFileEntryRef File, + StringRef SearchPath, + StringRef RelativePath, + const Module* SuggestedModule, + bool ModuleImported, + SrcMgr::CharacteristicKind FileType) override { + if(sm.isInMainFile(HashLoc)) { + HashLoc.dump(sm); + IncludeTok.getLocation().dump(sm); + IncludeTok.getEndLoc().dump(sm); } - - auto expandingRange = sm.getExpansionRange(range); - auto text = clang::Lexer::getSourceText(expandingRange, sm, pp.getLangOpts()); - llvm::outs() << text << "\n"; } - void MacroDefined(const clang::Token& token, const clang::MacroDirective* directive) override { - std::string name = pp.getSpelling(token); - if(name.starts_with("_")) - return; - llvm::outs() << "MacroDefined: " << name << "\n"; - } + // void MacroExpands(const clang::Token& token, + // const clang::MacroDefinition& macro, + // clang::SourceRange range, + // const clang::MacroArgs* args) override { + // std::string name = pp.getSpelling(token); + // if(name.starts_with("_")) + // return; + // + // clang::MacroInfo* info = macro.getMacroInfo(); + // // info->isBuiltinMacro(); + // // info->isFunctionLike(); + // // info->isObjectLike(); + // // info->isVariadic(); + // // info->getNumParams(); + // // info->params(); + // + // const int size = args->getNumMacroArguments(); // Expanding macro arguments + // for(auto i = 0; i < size; ++i) { + // // get first token of first argument of expanding macro + // const clang::Token* first = args->getUnexpArgument(i); + // // iterate over tokens of first argument of expanding macro + // for(auto j = 0; j < args->getArgLength(first); ++j) { + // const clang::Token& tok = *(first + j); + // // llvm::outs() << "Arg: " << pp.getSpelling(tok) << "\n"; + // } + // } + // + // auto expandingRange = sm.getExpansionRange(range); + // auto text = clang::Lexer::getSourceText(expandingRange, sm, pp.getLangOpts()); + // llvm::outs() << text << "\n"; + //} + + void MacroDefined(const clang::Token& token, const clang::MacroDirective* directive) override {} }; int main(int argc, const char** argv) { @@ -64,7 +79,7 @@ int main(int argc, const char** argv) { auto invocation = std::make_shared(); std::vector args = { - "clang++", + "/usr/local/bin/clang++", "-Xclang", "-no-round-trip-args", "-std=c++20", @@ -107,7 +122,7 @@ int main(int argc, const char** argv) { // // // TODO: split annoated token // // } // }); - // pp.addPPCallbacks(std::make_unique(pp)); + pp.addPPCallbacks(std::make_unique(pp)); clang::syntax::TokenCollector collector{pp}; if(auto error = action.Execute()) { @@ -119,15 +134,18 @@ int main(int argc, const char** argv) { buffer.dumpForTests(); auto tokens = buffer.spelledTokens(sm.getMainFileID()); for(auto& token: tokens) { - llvm::outs() << "Token: " << token.text(sm) << "\n"; + + llvm::outs() << "Token: " << token.text(sm) << " " << clang::tok::getTokenName(token.kind()) + << "\n"; } - auto tokens2 = buffer.expandedTokens(); - for(auto& token: tokens2) { - llvm::outs() << "Token: " << token.text(sm) << "\n"; - } - // auto buffer = sm.getBufferData(sm.getMainFileID()); - // llvm::outs() << buffer << "\n"; - // buffer.spelledTokenContaining() - // all operations should before action end + buffer.expansionStartingAt(tokens[0].location()); + // auto tokens2 = buffer.expandedTokens(); + // for(auto& token: tokens2) { + // token.dumpForTests(sm); + // } + // auto buffer = sm.getBufferData(sm.getMainFileID()); + // llvm::outs() << buffer << "\n"; + // buffer.spelledTokenContaining() + // all operations should before action end action.EndSourceFile(); } diff --git a/include/Clang/ParsedAST.h b/include/Clang/ParsedAST.h index 79909658..49cd24c0 100644 --- a/include/Clang/ParsedAST.h +++ b/include/Clang/ParsedAST.h @@ -9,7 +9,6 @@ namespace clice { class ParsedAST { private: using Decl = clang::Decl*; - using TokenBuffer = clang::syntax::TokenBuffer; using ASTConsumer = std::unique_ptr; struct FrontendAction : public clang::ASTFrontendAction { @@ -43,10 +42,12 @@ public: auto& ASTContext() { return instance.getASTContext(); } - auto& TokensBuffer() { return buffer; } + auto& Preprocessor() { return instance.getPreprocessor(); } auto& SourceManager() { return instance.getSourceManager(); } + auto& TokenBuffer() { return buffer; } + auto& TranslationUnit() { return *instance.getASTContext().getTranslationUnitDecl(); } }; diff --git a/include/Feature/SemanticToken.h b/include/Feature/SemanticToken.h index cafc20f5..030862ee 100644 --- a/include/Feature/SemanticToken.h +++ b/include/Feature/SemanticToken.h @@ -6,7 +6,13 @@ namespace clice { class ParsedAST; +namespace feature { + protocol::SemanticTokens semanticTokens(const ParsedAST& ast); + + +} + } // namespace clice diff --git a/include/Protocol/Language/SemanticToken.h b/include/Protocol/Language/SemanticToken.h index 72973109..0bc2307f 100644 --- a/include/Protocol/Language/SemanticToken.h +++ b/include/Protocol/Language/SemanticToken.h @@ -4,72 +4,114 @@ namespace clice::protocol { -enum class SemanticTokenTypes : uint8_t { - Number, - /// extension: represent a character literal, - Char, - String, - Operator, - Keyword, +enum class SemanticTokenType : uint8_t { + /// Represents a comment. Comment, - /// extension: represent a preprocessor directive, - /// e.g. `#include`, `#define` + /// Represents a number literal. + Number, + /// Represents a character literal. + Char, + /// Represents a string literal. + String, + /// Represents a C/C++ keyword (e.g., `int`, `class`, `struct`). + Keyword, + /// Represents a compiler built-in macro, function, or keyword (e.g., `__stdcall`, + /// `__attribute__`, `__FUNCSIG__`). + Builtin, + /// Represents a preprocessor directive (e.g., `#include`, `#define`, `#if`). Directive, - /// extension: represent a header file path, - /// e.g. `` + /// Represents a header file path (e.g., ``). HeaderPath, + /// Represents a C/C++ macro name, both in definition and invocation. Macro, - /// extension: represent a macro parameter, - /// e.g. `X` in `#define FOO(X) X` + /// Represents a C/C++ macro parameter, both in definition and invocation. MacroParameter, + /// Represents a C++ namespace name. Namespace, + /// Represents a C/C++ type name. Type, + /// Represents a C/C++ struct name. Struct, - /// extension: represent a union, + /// Represents a C/C++ union name. Union, + /// Represents a C/C++ class name. Class, - /// extension: represent a field, + /// Represents a C/C++ field name. Field, + /// Represents a C/C++ enum name. Enum, + /// Represents a C/C++ enum field (member) name. EnumMember, + /// Represents a C/C++ variable name. Variable, + /// Represents a C/C++ function name. Function, + /// Represents a C++ method name. Method, + /// Represents a C/C++ function/method parameter name. Parameter, - /// extension, represent a type template parameter, - /// e.g. `T` in `template class Foo {};` - TypeTemplateParameter, - /// extension, represent a non-type template parameter, - /// e.g. `N` in `template class Foo {};` - NonTypeTemplateParameter, - /// extension, represent a template template parameter, - /// e.g. `T` in `template