diff --git a/include/AST/Selection.h b/include/AST/Selection.h index a88e5b7d..0430e0e4 100644 --- a/include/AST/Selection.h +++ b/include/AST/Selection.h @@ -1,181 +1,151 @@ #pragma once +#include +#include "SourceCode.h" #include "clang/AST/ASTTypeTraits.h" +#include "clang/AST/PrettyPrinter.h" #include "clang/Tooling/Syntax/Tokens.h" - -#include +#include "llvm/ADT/SmallVector.h" namespace clice { -// Code Action: -// add implementation in cpp file(important). -// extract implementation to cpp file(important). -// generate virtual function declaration(full qualified?). -// generate c++20 coroutine and awaiter interface. -// expand macro(one step by step). -// invert if. - class CompilationUnit; -namespace { -class SelectionBuilder; -} - +/// A selection can partially or completely cover several AST nodes. +/// The SelectionTree contains nodes that are covered, and their parents. +/// SelectionTree does not contain all AST nodes, rather only: +/// Decl, Stmt, TypeLoc, NestedNamespaceSpecifierLoc, CXXCtorInitializer. +/// (These are the nodes with source ranges that fit in DynTypedNode). +/// +/// Usually commonAncestor() is the place to start: +/// - it's the simplest answer to "what node is under the cursor" +/// - the selected Expr (for example) can be found by walking up the parent +/// chain and checking Node->ASTNode. +/// - if you want to traverse the selected nodes, they are all under +/// commonAncestor() in the tree. +/// +/// SelectionTree tries to behave sensibly in the presence of macros, but does +/// not model any preprocessor concepts: the output is a subset of the AST. +/// When a macro argument is specifically selected, only its first expansion is +/// selected in the AST. (Returning a selection forest is unreasonably difficult +/// for callers to handle correctly.) +/// +/// Comments, directives and whitespace are completely ignored. +/// Semicolons are also ignored, as the AST generally does not model them well. +/// +/// The SelectionTree owns the Node structures, but the ASTNode attributes +/// point back into the AST it was constructed with. class SelectionTree { - friend class SelectionBuilder; - public: - /// The extent to which an selection is covered by the AST node. - enum class CoverageKind : unsigned { - /// For example, if the selection is - /// - /// void f() { - /// int x = 1; - /// ^^^ - /// } - /// - /// The FunctionDecl `f()` and VarDecl `x` would fully cover the selection. - Full, + /// Create selection trees for the given range, and pass them to Func. + /// + /// There may be multiple possible selection trees: + /// - if the range is empty and borders two tokens, a tree for the right token + /// and a tree for the left token will be yielded. + /// - Func should return true on success (stop) and false on failure (continue) + /// + /// Always yields at least one tree. If no tokens are touched, it is empty. + static bool create_each(CompilationUnit& unit, + LocalSourceRange range, + llvm::function_ref callback); - /// For example, if the selection is - /// - /// if (x == 1) { - /// ^^^^^^^^^^^^^ - /// int y = 2; - /// } - /// - /// The IfStmt would fully cover the selection while the Expr `x == 1` would partially - /// cover the selection. - Partial, - }; - - /// An AST node is involved in the selection, either selected directly or some descendant node - /// is selected. - struct Node { - /// The AST node that is selected. - clang::DynTypedNode dynNode; - - /// The extent to which the selection is covered by the AST node. - CoverageKind kind; - - /// In most cases, there is only 1 child in a selected node. Use SmallVector with stack - /// capability 1 to reduce the size of Node. - llvm::SmallVector children; - - /// The parent node in the selection tree. nullptr for root node. - Node* parent; - - template - bool isOneOf() const { - return dynNode.get() || (dynNode.get() || ...); - } - }; - - /// Construct an empty selection tree. - SelectionTree() = default; + /// Create a selection tree for the given range. + /// + /// Where ambiguous (range is empty and borders two tokens), prefer the token + /// on the right. + static SelectionTree create_right(CompilationUnit& unit, LocalSourceRange range); + /// Copies are no good - contain pointers to other nodes. SelectionTree(const SelectionTree&) = delete; SelectionTree& operator= (const SelectionTree&) = delete; + /// Moves are OK though - internal storage is pointer-stable when moved. SelectionTree(SelectionTree&&) = default; SelectionTree& operator= (SelectionTree&&) = default; - /// Check if there is any selection. - bool hasValue() const { - return root != nullptr; - } + // Describes to what extent an AST node is covered by the selection. + enum SelectionKind : unsigned char { + // The AST node owns no characters covered by the selection. + // Note that characters owned by children don't count: + // if (x == 0) scream(); + // ^^^^^^ + // The IfStmt would be Unselected because all the selected characters are + // associated with its children. + // (Invisible nodes like ImplicitCastExpr are always unselected). + Unselected, + // The AST node owns selected characters, but is not completely covered. + Partial, + // The AST node owns characters, and is covered by the selection. + Complete, + }; - // Return nullptr if there is no selection. - const Node* getRoot() const { - return root; - } + // An AST node that is implicated in the selection. + // (Either selected directly, or some descendant is selected). + struct Node { + /// The parent within the selection tree. nullptr for TranslationUnitDecl. + Node* parent; - std::deque& children() { - return storage; - } + /// Direct children within the selection tree. + llvm::SmallVector children; - const std::deque& children() const { - return storage; - } + /// The extent to which this node is covered by the selection. + SelectionKind selected; - /// Return true to continue the walk, false to stop. - using Walker = llvm::function_ref; + clang::DynTypedNode data; - /// Return true if the walk is completed, false if the walk is interrupted. - bool walkDfs(Walker ops) const { - if(!root) - return true; - - llvm::SmallVector stack; - stack.push_back(root); - while(!stack.empty()) { - auto node = stack.pop_back_val(); - - if(!ops(node)) - return false; - - for(auto child: node->children) { - stack.push_back(child); - } + template + auto get() const { + return data.get(); } - return true; + /// Get the source range of this node. + clang::SourceRange source_range() const; + + /// Printable node kind, like "CXXRecordDecl" or "AutoTypeLoc". + std::string kind() const; + + /// Walk up the AST to get the lexical DeclContext of this Node, which is not + /// the node itself. + const clang::DeclContext& decl_context() const; + + /// If this node is a wrapper with no syntax (e.g. implicit cast), return + /// its contents. (If multiple wrappers are present, unwraps all of them). + const Node& ignore_implicit() const; + + // If this node is inside a wrapper with no syntax (e.g. implicit cast), + // return that wrapper. (If multiple are present, unwraps all of them). + const Node& outer_implicit() const; + }; + + // The most specific common ancestor of all the selected nodes. + // Returns nullptr if the common ancestor is the root. + // (This is to avoid accidentally traversing the TUDecl and thus preamble). + const Node* common_ancestor() const; + + // The selection node corresponding to TranslationUnitDecl. + const Node& root() const { + return *m_root; } - /// Return true if the walk is completed, false if the walk is interrupted. - bool walkBfs(Walker ops) const { - if(!root) - return true; + void print(llvm::raw_ostream& os, const Node& node, int indent) const; - std::deque queue; - queue.push_back(root); - - while(!queue.empty()) { - auto node = queue.front(); - queue.pop_front(); - - if(!ops(node)) - return false; - - for(auto child: node->children) { - queue.push_front(child); - } - } - - return true; + friend llvm::raw_ostream& operator<< (llvm::raw_ostream& os, const SelectionTree& tree) { + tree.print(os, tree.root(), 1); + return os; } - explicit operator bool () const { - return hasValue(); - } - - void dump(llvm::raw_ostream& os, clang::ASTContext& context) const; - - static SelectionTree selectOffsetRange(std::uint32_t begin, - std::uint32_t end, - clang::ASTContext& context, - CompilationUnit& unit) { - return SelectionTree(begin, end, context, unit); - } - - static SelectionTree selectToken(const clang::syntax::Token& token, - clang::ASTContext& context, - CompilationUnit& unit); - private: - /// Construct a selection tree from the given source range. `start` and `end` means offset from - /// file start location, these arguments should come from function `SourceConverter::toOffset`. - SelectionTree(std::uint32_t begin, - std::uint32_t end, - clang::ASTContext& context, - CompilationUnit& unit); + // Creates a selection tree for the given range in the main file. + // The range includes bytes [Start, End). + SelectionTree(CompilationUnit& unit, LocalSourceRange range); - // The root node of selection tree. - Node* root; + // Stable-pointer storage, FIXME: use memory pool instead? + std::deque nodes; - // The AST nodes was stored in the order from root to leaf. - // Use deque as the stable pointer storage. - std::deque storage; + const Node* m_root; + + clang::PrintingPolicy print_policy; }; } // namespace clice + diff --git a/include/AST/SourceCode.h b/include/AST/SourceCode.h index 2a8de6f1..539407e9 100644 --- a/include/AST/SourceCode.h +++ b/include/AST/SourceCode.h @@ -1,8 +1,39 @@ #pragma once -#include "SourceLocation.h" +#include #include "clang/Lex/Token.h" -#include "llvm/ADT/FunctionExtras.h" +#include "clang/Basic/SourceLocation.h" + +namespace std { + +template <> +struct tuple_size : std::integral_constant {}; + +template <> +struct tuple_element<0, clang::SourceRange> { + using type = clang::SourceLocation; +}; + +template <> +struct tuple_element<1, clang::SourceRange> { + using type = clang::SourceLocation; +}; + +} // namespace std + +namespace clang { + +/// Through ADL, make `clang::SourceRange` could be destructured. +template +clang::SourceLocation get(clang::SourceRange range) { + if constexpr(I == 0) { + return range.getBegin(); + } else { + return range.getEnd(); + } +} + +} // namespace clang namespace clice { diff --git a/include/AST/SourceLocation.h b/include/AST/SourceLocation.h deleted file mode 100644 index cdba4b64..00000000 --- a/include/AST/SourceLocation.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include "clang/Basic/SourceLocation.h" - -namespace std { - -template <> -struct tuple_size : std::integral_constant {}; - -template <> -struct tuple_element<0, clang::SourceRange> { - using type = clang::SourceLocation; -}; - -template <> -struct tuple_element<1, clang::SourceRange> { - using type = clang::SourceLocation; -}; - -} // namespace std - -namespace clang { - -/// Through ADL, make `clang::SourceRange` could be destructured. -template -clang::SourceLocation get(clang::SourceRange range) { - if constexpr(I == 0) { - return range.getBegin(); - } else { - return range.getEnd(); - } -} - -} // namespace clang - diff --git a/include/Compiler/CompilationUnit.h b/include/Compiler/CompilationUnit.h index 7eab271e..66c8bb08 100644 --- a/include/Compiler/CompilationUnit.h +++ b/include/Compiler/CompilationUnit.h @@ -113,15 +113,31 @@ public: /// macro expansion). auto expansion_location(clang::SourceLocation location) -> clang::SourceLocation; + /// + auto file_location(clang::SourceLocation location) -> clang::SourceLocation; + /// FIXME: Do we really need this function? auto presumed_location(clang::SourceLocation location) -> clang::PresumedLoc; + /// Create a file location with given file id and offset. + auto create_location(clang::FileID fid, std::uint32_t offset) -> clang::SourceLocation; + /// Get the spelled tokens(raw token) of the file id. auto spelled_tokens(clang::FileID fid) -> llvm::ArrayRef; + /// The spelled tokens that overlap or touch a spelling location Loc. + /// This always returns 0-2 tokens. + auto spelled_tokens_touch(clang::SourceLocation location) + -> llvm::ArrayRef; + + auto expanded_tokens() -> llvm::ArrayRef; + /// Get the expanded tokens(after preprocessing) of the file id. auto expanded_tokens(clang::SourceRange range) -> llvm::ArrayRef; + auto expansions_overlapping(llvm::ArrayRef) + -> std::vector; + /// Get the token length. auto token_length(clang::SourceLocation location) -> std::uint32_t; @@ -143,6 +159,8 @@ public: clang::ASTContext& context(); + clang::syntax::TokenBuffer& token_buffer(); + TemplateResolver& resolver(); llvm::DenseMap& directives(); diff --git a/include/Compiler/Directive.h b/include/Compiler/Directive.h index 84b67253..4b857db0 100644 --- a/include/Compiler/Directive.h +++ b/include/Compiler/Directive.h @@ -1,6 +1,6 @@ #pragma once -#include "AST/SourceLocation.h" +#include "AST/SourceCode.h" #include "clang/Lex/MacroInfo.h" #include "llvm/ADT/DenseMap.h" diff --git a/include/Feature/Hover.h b/include/Feature/Hover.h index 53c1372f..a73c3e7f 100644 --- a/include/Feature/Hover.h +++ b/include/Feature/Hover.h @@ -41,6 +41,7 @@ struct HoverItem { struct Hover { /// Title SymbolKind kind; + std::string name; /// Extra information. @@ -56,28 +57,11 @@ struct Hover { std::string source; }; -/// Hover information for all symbols in the file. -struct Hovers { - struct Occurrence { - LocalSourceRange range; - uint32_t index; - }; - - /// Hover information for all symbols in the file. - std::vector hovers; - - /// A map between the file offset and the index of the hover. - std::vector occurrences; -}; - /// Generate the hover information for the given declaration(for test). Hover hover(CompilationUnit& unit, const clang::NamedDecl* decl); /// Generate the hover information for the symbol at the given offset. -Hover hover(CompilationUnit& unit, uint32_t offset); - -/// Generate the hover information for all files in the given unit. -index::Shared indexHover(CompilationUnit& unit); +Hover hover(CompilationUnit& unit, std::uint32_t offset); } // namespace clice::feature diff --git a/include/Protocol/Basic.h b/include/Protocol/Basic.h index db9427fc..06072f70 100644 --- a/include/Protocol/Basic.h +++ b/include/Protocol/Basic.h @@ -123,4 +123,12 @@ struct TextDocumentPositionParams { Position position; }; +struct MarkupContent { + /// The type of the Markup. + string kind; + + /// The content itself. + string value; +}; + } // namespace clice::proto diff --git a/include/Protocol/Feature/Hover.h b/include/Protocol/Feature/Hover.h index b758cb01..c722be1c 100644 --- a/include/Protocol/Feature/Hover.h +++ b/include/Protocol/Feature/Hover.h @@ -8,4 +8,15 @@ struct HoverClientCapabilities {}; using HoverOptions = bool; +using HoverParams = TextDocumentPositionParams; + +struct Hover { + /// The hover's content + MarkupContent contents; + + /// An optional range is a range inside a text document + /// that is used to visualize a hover, e.g. by changing the background color. + /// FIXME: Range range; +}; + } // namespace clice::proto diff --git a/include/Protocol/Initialize.h b/include/Protocol/Initialize.h index 486255a8..2e5488ad 100644 --- a/include/Protocol/Initialize.h +++ b/include/Protocol/Initialize.h @@ -85,58 +85,58 @@ struct ServerCapabilities { HoverOptions hoverProvider; /// The server provides signature help support. - SignatureHelpOptions signatureHelpProvider; + /// FIXME: SignatureHelpOptions signatureHelpProvider; /// The server provides go to declaration support. - DeclarationOptions declarationProvider; + /// FIXME: DeclarationOptions declarationProvider; /// The server provides goto definition support. - DefinitionOptions definitionProvider; + /// FIXME: DefinitionOptions definitionProvider; /// The server provides goto type definition support. - TypeDefinitionOptions typeDefinitionProvider; + /// FIXME: TypeDefinitionOptions typeDefinitionProvider; /// The server provides goto implementation support. - ImplementationOptions implementationProvider; + /// FIXME: ImplementationOptions implementationProvider; /// The server provides find references support. - ReferenceOptions referencesProvider; + /// FIXME: ReferenceOptions referencesProvider; /// The server provides document highlight support. - DocumentHighlightOptions documentHighlightProvider; + /// FIXME: DocumentHighlightOptions documentHighlightProvider; /// The server provides document symbol support. - DocumentSymbolOptions documentSymbolProvider; + /// FIXME: DocumentSymbolOptions documentSymbolProvider; /// The server provides code actions. The `CodeActionOptions` return type is /// only valid if the client signals code action literal support via the /// property `textDocument.codeAction.codeActionLiteralSupport`. - CodeActionOptions codeActionProvider; + /// FIXME: CodeActionOptions codeActionProvider; /// The server provides code lens. - CodeLensOptions codeLensProvider; + /// FIXME: CodeLensOptions codeLensProvider; /// The server provides document link support. - DocumentLinkOptions documentLinkProvider; + /// FIXME: DocumentLinkOptions documentLinkProvider; /// The server provides color provider support. /// FIXME: DocumentColorOptions colorProvider; /// The server provides document formatting. - DocumentFormattingOptions documentFormattingProvider; + /// FIXME: DocumentFormattingOptions documentFormattingProvider; /// The server provides document range formatting. - DocumentRangeFormattingOptions documentRangeFormattingProvider; + /// FIXME: DocumentRangeFormattingOptions documentRangeFormattingProvider; /// The server provides document formatting on typing. - DocumentOnTypeFormattingOptions documentOnTypeFormattingProvider; + /// FIXME: DocumentOnTypeFormattingOptions documentOnTypeFormattingProvider; /// The server provides rename support. RenameOptions may only be specified if the client /// states that it supports `prepareSupport` in its initial `initialize` request. - RenameOptions renameProvider; + /// FIXME: RenameOptions renameProvider; /// The server provides folding provider support. - FoldingRangeOptions foldingRangeProvider; + /// FIXME: FoldingRangeOptions foldingRangeProvider; /// The server provides execute command support. /// FIXME: ExecuteCommandOptions executeCommandProvider; @@ -148,7 +148,7 @@ struct ServerCapabilities { /// FIXME: LinkedEditingRangeOptions linkedEditingRangeProvider; /// The server provides call hierarchy support. - CallHierarchyOptions callHierarchyProvider; + /// FIXME: CallHierarchyOptions callHierarchyProvider; /// The server provides semantic tokens support. SemanticTokensOptions semanticTokensProvider; @@ -157,13 +157,13 @@ struct ServerCapabilities { /// FIXME: MonikerOptions monikerProvider; /// The server provides type hierarchy support. - TypeHierarchyOptions typeHierarchyProvider; + /// FIXME: TypeHierarchyOptions typeHierarchyProvider; /// The server provides inline values. /// FIXME: InlineValueOptions inlineValueProvider; /// The server provides inlay hints. - InlayHintOptions inlayHintProvider; + /// FIXME: InlayHintOptions inlayHintProvider; /// The server has support for pull model diagnostics. /// FIXME: DiagnosticOptions diagnosticProvider; diff --git a/include/Server/Server.h b/include/Server/Server.h index f93ac1bf..91c64535 100644 --- a/include/Server/Server.h +++ b/include/Server/Server.h @@ -103,6 +103,8 @@ private: async::Task<> on_did_close(proto::DidCloseTextDocumentParams params); private: + async::Task on_hover(proto::HoverParams params); + async::Task on_semantic_token(proto::SemanticTokensParams params); async::Task on_completion(proto::CompletionParams params); diff --git a/include/Support/Logger.h b/include/Support/Logger.h index 70c6d883..8daefe8a 100644 --- a/include/Support/Logger.h +++ b/include/Support/Logger.h @@ -43,7 +43,9 @@ void warn(std::format_string fmt, Args&&... args) { template void debug(std::format_string fmt, Args&&... args) { +#ifndef NDEBUG log::log(Level::DEBUG, fmt.get(), std::forward(args)...); +#endif } template diff --git a/include/Test/Annotation.h b/include/Test/Annotation.h index 8ba3863a..ab44b33d 100644 --- a/include/Test/Annotation.h +++ b/include/Test/Annotation.h @@ -1,5 +1,6 @@ #pragma once +#include "AST/SourceCode.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" @@ -7,40 +8,137 @@ namespace clice::testing { struct AnnotatedSource { std::string content; + /// All named offsets llvm::StringMap offsets; + llvm::StringMap ranges; + + std::vector nameless_offsets; + + /// Point Annotation: + /// - $(key): Marks a single point. + /// + /// Range Annotation: + /// - @key[...content...]: Marks a range. + /// + /// A range annotation for 'key' creates both a `ranges["key"]` and an `offsets["key"]` + /// (pointing to the start). static AnnotatedSource from(llvm::StringRef content) { std::string source; - llvm::StringMap offsets; - source.reserve(content.size()); + llvm::StringMap offsets; + llvm::StringMap ranges; + std::vector nameless_offsets; + std::uint32_t offset = 0; - for(uint32_t i = 0; i < content.size();) { - auto c = content[i]; + std::uint32_t i = 0; + // Helper lambda to parse a point annotation $(key). + // It captures all necessary variables by reference. + // Returns true if a point was successfully parsed, false otherwise. + auto try_parse_point_annotation = [&]() -> bool { + if(content[i] != '$') { + return false; + } + + // Peek ahead to see if it's "$(key)" or just "$" + if(i + 1 < content.size() && content[i + 1] == '(') { + // It's the full "$(key)" syntax + uint32_t key_start = i + 2; + size_t key_end = content.find(')', key_start); + + if(key_end == llvm::StringRef::npos) { + return false; + } // Malformed + + llvm::StringRef key = content.slice(key_start, key_end); + /// empty key is regarded as a nameless, and `()` is not consumed. + if(key.empty()) { + // It's the shorthand "$" syntax for an nameless key + nameless_offsets.emplace_back(offset); + i += 1; // Advance cursor past the single '$' + } else { + offsets.try_emplace(key, offset); + i = key_end + 1; // Advance cursor past the entire "$(key)" + } + return true; + } else { + // It's the shorthand "$" syntax for an nameless key + nameless_offsets.emplace_back(offset); + i += 1; // Advance cursor past the single '$' + return true; + } + }; + + while(i < content.size()) { + // Check for a point annotation first. + if(try_parse_point_annotation()) { + continue; + } + + char c = content[i]; + + // Handle Range: @key[...] if(c == '@') { + // Skip '@' i += 1; - auto key = content.substr(i).take_until([](char c) { return c == ' '; }); - offsets.try_emplace(key, offset); + + const char open_bracket = '['; + const char close_bracket = ']'; + + llvm::StringRef key = content.substr(i).take_until( + [&](char c) { return isspace(c) || c == open_bracket; }); + i += key.size(); + + while(i < content.size() && isspace(content[i])) { + i++; + } + + assert(i < content.size() && content[i] == open_bracket && + "Expect @key[...] for ranges."); + i += 1; // Skip '[' + + uint32_t begin_offset = offset; + int bracket_level = 1; + + while(i < content.size() && bracket_level > 0) { + // Inside a range, we can still have nested point annotations. + if(try_parse_point_annotation()) { + continue; + } + + char inner_c = content[i]; + if(inner_c == open_bracket) + bracket_level++; + else if(inner_c == close_bracket) + bracket_level--; + + if(bracket_level > 0) { + source += inner_c; + offset += 1; + i += 1; + } else { + i += 1; // Skip the final ']' + } + } + + ranges.try_emplace(key, LocalSourceRange{begin_offset, offset}); continue; } - if(c == '$') { - assert(i + 1 < content.size() && content[i + 1] == '(' && "expect $(name)"); - i += 2; - auto key = content.substr(i).take_until([](char c) { return c == ')'; }); - i += key.size() + 1; - offsets.try_emplace(key, offset); - continue; - } - - i += 1; - offset += 1; + // If nothing else matched, it's a regular character. source += c; + offset += 1; + i += 1; } - return AnnotatedSource{std::move(source), std::move(offsets)}; + return AnnotatedSource{ + std::move(source), + std::move(offsets), + std::move(ranges), + std::move(nameless_offsets), + }; } }; @@ -81,7 +179,7 @@ struct AnnotatedSources { while(!content.empty()) { llvm::StringRef line = content.take_front(content.find_first_of("\r\n")); content = content.drop_front(line.size()); - if(content.starts_with("\n")) { + if(content.starts_with("\r\n")) { content = content.drop_front(2); } else if(content.starts_with("\n")) { content = content.drop_front(1); diff --git a/include/Test/LocationChain.h b/include/Test/LocationChain.h index adb97cc2..5a59b3c7 100644 --- a/include/Test/LocationChain.h +++ b/include/Test/LocationChain.h @@ -16,10 +16,12 @@ struct LocationChain { LocationChain(LocationChain& outer, std::source_location current = std::source_location::current()) : - locations{std::move(outer.locations)} { + locations{outer.locations} { locations.emplace_back(current); } + LocationChain(const LocationChain&) = delete; + /// Dump all locations. void backtrace() { for(auto location: locations) { diff --git a/include/Test/Test.h b/include/Test/Test.h index 3968ec4e..e2874a7e 100644 --- a/include/Test/Test.h +++ b/include/Test/Test.h @@ -47,6 +47,7 @@ inline void EXPECT_FAILURE(std::string message, LocationChain chain = LocationCh inline void ASSERT_FAILURE(std::string message, LocationChain chain = LocationChain()) { chain.backtrace(); GTEST_MESSAGE_AT_("", 0, message.c_str(), ::testing::TestPartResult::kFatalFailure); + std::abort(); } inline void EXPECT_TRUE(auto&& value, LocationChain chain = LocationChain()) { diff --git a/include/Test/Tester.h b/include/Test/Tester.h index f854573d..40703543 100644 --- a/include/Test/Tester.h +++ b/include/Test/Tester.h @@ -26,20 +26,34 @@ struct Tester { sources.add_source(name, content); } - Tester& compile(llvm::StringRef standard = "-std=c++20") { + void add_files(llvm::StringRef main_file, llvm::StringRef content) { + src_path = main_file; + sources.add_sources(content); + } + + bool compile(llvm::StringRef standard = "-std=c++20") { auto command = std::format("clang++ {} {} -fms-extensions", standard, src_path); database.update_command("fake", src_path, command); - params.arguments = database.get_command(src_path).arguments; + params.arguments = database.get_command(src_path, true, true).arguments; for(auto& [file, source]: sources.all_files) { - params.add_remapped_file(file, source.content); + if(file == src_path) { + params.add_remapped_file(file, source.content); + } else { + /// FIXME: This is a workaround. + std::string path = path::is_absolute(file) ? file.str() : path::join(".", file); + params.add_remapped_file(path, source.content); + } } auto info = clice::compile(params); - ASSERT_TRUE(info); + if(!info) { + return false; + } + this->unit.emplace(std::move(*info)); - return *this; + return true; } bool compile_with_pch(llvm::StringRef standard = "-std=c++20") { @@ -93,6 +107,44 @@ struct Tester { std::uint32_t operator[] (llvm::StringRef file, llvm::StringRef pos) { return sources.all_files.lookup(file).offsets.lookup(pos); } + + std::uint32_t point(llvm::StringRef name = "", llvm::StringRef file = "") { + if(file.empty()) { + file = src_path; + } + + auto& offsets = sources.all_files[file].offsets; + if(name.empty()) { + assert(offsets.size() == 1); + return offsets.begin()->second; + } else { + assert(offsets.contains(name)); + return offsets.lookup(name); + } + } + + llvm::ArrayRef nameless_points(llvm::StringRef file = "") { + if(file.empty()) { + file = src_path; + } + + return sources.all_files[file].nameless_offsets; + } + + LocalSourceRange range(llvm::StringRef name = "", llvm::StringRef file = "") { + if(file.empty()) { + file = src_path; + } + + auto& ranges = sources.all_files[file].ranges; + if(name.empty()) { + assert(ranges.size() == 1); + return ranges.begin()->second; + } else { + assert(ranges.contains(name)); + return ranges.lookup(name); + } + } }; struct TestFixture : ::testing::Test, Tester {}; diff --git a/src/AST/Selection.cpp b/src/AST/Selection.cpp index 206c7d0f..ad65b8db 100644 --- a/src/AST/Selection.cpp +++ b/src/AST/Selection.cpp @@ -1,295 +1,1208 @@ +#include +#include +#include +#include #include "AST/Selection.h" #include "Compiler/CompilationUnit.h" - +#include "Support/Logger.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/AST/ASTConcept.h" +#include "clang/AST/ASTTypeTraits.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/PrettyPrinter.h" #include "clang/AST/RecursiveASTVisitor.h" - -#include +#include "clang/AST/TypeLoc.h" +#include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Tooling/Syntax/Tokens.h" namespace clice { +using namespace clang; + namespace { -struct SelectionBuilder { - using Token = clang::syntax::Token; - using OffsetPair = std::pair; +using Node = SelectionTree::Node; - SelectionBuilder(std::uint32_t begin, - std::uint32_t end, - clang::ASTContext& context, - CompilationUnit& unit) : context(context), unit(unit) { - assert(end >= begin && "End offset should be greater than or equal to begin offset."); +#ifndef NDEBUG +#define nlog(...) log::debug(__VA_ARGS__) +#endif - // The location in clang AST is token-based, of course. Because the parser - // processes tokens from the lexer. So we need to find boundary tokens at first. - // FIXME: support other file. - auto& src = context.getSourceManager(); - auto tokens = unit.spelled_tokens(src.getMainFileID()); - auto bound = selectionBound(tokens, {begin, end}, unit); +std::vector get_attributes(const DynTypedNode& N) { + std::vector result; - left = bound.first, right = bound.second; + if(const auto* TL = N.get()) { + for(auto ATL = TL->getAs(); !ATL.isNull(); + ATL = ATL.getModifiedLoc().getAs()) { + if(const Attr* A = ATL.getAttr()) { + result.push_back(A); + } + assert(!ATL.getModifiedLoc().isNull()); + } } - /// Construct a selection builder from two boundary tokens. the `left` and `right` should come - /// from `fixSelectionBound`. - /// The constructor is used for unittest. - SelectionBuilder(const Token* left, - const Token* right, - clang::ASTContext& context, - CompilationUnit& unit) : - left(left), right(right), context(context), unit(unit) {} - - /// Compute 2 boundary tokens by given pair of offset as the selection range, the `end` of - /// pair should be greater than `begin`. - static auto selectionBound(llvm::ArrayRef tokens, - OffsetPair offsets, - CompilationUnit& unit) -> std::pair { - auto [begin, end] = offsets; - assert(end >= begin && "Can not build a selection range for a invalid OffsetPair"); - - // int xxxx = 3; - // ^^^^^^ - // expect to find the first token whose end location is greater than `begin`. - auto left = std::partition_point(tokens.begin(), tokens.end(), [&](const auto& token) { - return unit.file_offset(token.endLocation()) <= begin; - }); - - // int xxxx = 3; - // ^^^^^^ - // expect to find the last token whose start location is less than to `end`. - auto right = std::partition_point(left, tokens.end(), [&](const auto& token) { - return unit.file_offset(token.location()) < end; - }); - - // right - 1: the right is the first token whose start location is greater than `end`. - return {left, right - 1}; + if(const auto* S = N.get()) { + for(; S != nullptr; S = dyn_cast(S->getSubStmt())) { + for(const Attr* A: S->getAttrs()) { + if(A) { + result.push_back(A); + } + } + } } - bool isValidOffsetRange() const { - const auto tokens = unit.spelled_tokens(context.getSourceManager().getMainFileID()); - return left != tokens.end() && right != tokens.end(); + if(const auto* D = N.get()) { + for(const Attr* A: D->attrs()) { + if(A) { + result.push_back(A); + } + } + } + return result; +} + +// Measure the fraction of selections that were enabled by recovery AST. +void recordMetrics(const SelectionTree& S, const LangOptions& Lang) { + /// if(!trace::enabled()) + /// return; + /// const char* LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C"; + /// constexpr static trace::Metric SelectionUsedRecovery("selection_recovery", + /// trace::Metric::Distribution, + /// "language"); + /// constexpr static trace::Metric RecoveryType("selection_recovery_type", + /// trace::Metric::Distribution, + /// "language"); + /// const auto* Common = S.commonAncestor(); + /// for(const auto* N = Common; N; N = N->Parent) { + /// if(const auto* RE = N->ASTNode.get()) { + /// SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast. + /// RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel); + /// return; + /// } + /// } + /// if(Common) + /// SelectionUsedRecovery.record(0, LanguageLabel); // unused. +} + +// Return the range covering a node and all its children. +SourceRange getSourceRange(const DynTypedNode& N) { + // MemberExprs to implicitly access anonymous fields should not claim any + // tokens for themselves. Given: + // struct A { struct { int b; }; }; + // The clang AST reports the following nodes for an access to b: + // A().b; + // [----] MemberExpr, base = A()., member = b + // [----] MemberExpr: base = A(), member = + // [-] CXXConstructExpr + // For our purposes, we don't want the second MemberExpr to own any tokens, + // so we reduce its range to match the CXXConstructExpr. + // (It's not clear that changing the clang AST would be correct in general). + if(const auto* ME = N.get()) { + if(!ME->getMemberDecl()->getDeclName()) + return ME->getBase() ? getSourceRange(DynTypedNode::create(*ME->getBase())) + : SourceRange(); + } + return N.getSourceRange(); +} + +// An IntervalSet maintains a set of disjoint subranges of an array. +// +// Initially, it contains the entire array. +// [-----------------------------------------------------------] +// +// When a range is erased(), it will typically split the array in two. +// Claim: [--------------------] +// after: [----------------] [-------------------] +// +// erase() returns the segments actually erased. Given the state above: +// Claim: [---------------------------------------] +// Out: [---------] [------] +// After: [-----] [-----------] +// +// It is used to track (expanded) tokens not yet associated with an AST node. +// On traversing an AST node, its token range is erased from the unclaimed set. +// The tokens actually removed are associated with that node, and hit-tested +// against the selection to determine whether the node is selected. +template +class IntervalSet { +public: + IntervalSet(llvm::ArrayRef Range) { + UnclaimedRanges.insert(Range); } - template - clang::SourceRange getSourceRange(const Node* node) { - if constexpr(std::is_base_of_v) - return node->getRange(); - else - return node->getSourceRange(); - } + // Removes the elements of Claim from the set, modifying or removing ranges + // that overlap it. + // Returns the continuous subranges of Claim that were actually removed. + llvm::SmallVector> erase(llvm::ArrayRef Claim) { + llvm::SmallVector> Out; + if(Claim.empty()) + return Out; - template - bool hook(const Node* node, const Callback& callback) { + // General case: + // Claim: [-----------------] + // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] + // Overlap: ^first ^second + // Ranges C and D are fully included. Ranges B and E must be trimmed. + auto Overlap = + std::make_pair(UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C + UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F + // Rewind to cover B. + if(Overlap.first != UnclaimedRanges.begin()) { + --Overlap.first; + // ...unless B isn't selected at all. + if(Overlap.first->end() <= Claim.begin()) + ++Overlap.first; + } + if(Overlap.first == Overlap.second) + return Out; - if(!node) { - return true; + // First, copy all overlapping ranges into the output. + auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); + // If any of the overlapping ranges were sliced by the claim, split them: + // - restrict the returned range to the claimed part + // - save the unclaimed part so it can be reinserted + llvm::ArrayRef RemainingHead, RemainingTail; + if(Claim.begin() > OutFirst->begin()) { + RemainingHead = {OutFirst->begin(), Claim.begin()}; + *OutFirst = {Claim.begin(), OutFirst->end()}; + } + if(Claim.end() < Out.back().end()) { + RemainingTail = {Claim.end(), Out.back().end()}; + Out.back() = {Out.back().begin(), Claim.end()}; } - if constexpr(requires { node->isImplicit(); }) - if(node->isImplicit()) - return true; + // Erase all the overlapping ranges (invalidating all iterators). + UnclaimedRanges.erase(Overlap.first, Overlap.second); + // Reinsert ranges that were merely trimmed. + if(!RemainingHead.empty()) + UnclaimedRanges.insert(RemainingHead); + if(!RemainingTail.empty()) + UnclaimedRanges.insert(RemainingTail); - clang::SourceRange range = getSourceRange(node); - if(range.isInvalid()) + return Out; + } + +private: + using TokenRange = llvm::ArrayRef; + + struct RangeLess { + bool operator() (llvm::ArrayRef L, llvm::ArrayRef R) const { + return L.begin() < R.begin(); + } + }; + + // Disjoint sorted unclaimed ranges of expanded tokens. + std::set, RangeLess> UnclaimedRanges; +}; + +// Sentinel value for the selectedness of a node where we've seen no tokens yet. +// This resolves to Unselected if no tokens are ever seen. +// But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. +// This value is never exposed publicly. +constexpr SelectionTree::SelectionKind NoTokens = static_cast( + static_cast(SelectionTree::Complete + 1)); + +// Nodes start with NoTokens, and then use this function to aggregate the +// selectedness as more tokens are found. +void update(SelectionTree::SelectionKind& Result, SelectionTree::SelectionKind New) { + if(New == NoTokens) + return; + if(Result == NoTokens) + Result = New; + else if(Result != New) + // Can only be completely selected (or unselected) if all tokens are. + Result = SelectionTree::Partial; +} + +// As well as comments, don't count semicolons as real tokens. +// They're not properly claimed as expr-statement is missing from the AST. +bool should_ignore(const syntax::Token& token) { + switch(token.kind()) { + // Even "attached" comments are not considered part of a node's range. + case tok::comment: + // The AST doesn't directly store locations for terminating semicolons. + case tok::semi: + // We don't have locations for cvr-qualifiers: see QualifiedTypeLoc. + case tok::kw_const: + case tok::kw_volatile: + case tok::kw_restrict: return true; + default: return false; + } +} + +// Determine whether 'Target' is the first expansion of the macro +// argument whose top-level spelling location is 'SpellingLoc'. +bool is_first_expansion(FileID Target, SourceLocation SpellingLoc, const SourceManager& SM) { + SourceLocation Prev = SpellingLoc; + while(true) { + // If the arg is expanded multiple times, getMacroArgExpandedLocation() + // returns the first expansion. + SourceLocation Next = SM.getMacroArgExpandedLocation(Prev); + // So if we reach the target, target is the first-expansion of the + // first-expansion ... + if(SM.getFileID(Next) == Target) return true; - // No overlap, the node is not selected. - if(range.getEnd() < left->location() || range.getBegin() > right->endLocation()) - return true; + // Otherwise, if the FileID stops changing, we've reached the innermost + // macro expansion, and Target was on a different branch. + if(SM.getFileID(Next) == SM.getFileID(Prev)) + return false; - // There is overlap between source range of node and selection, by default it is partial. - auto coverage = SelectionTree::CoverageKind::Partial; + Prev = Next; + } + return false; +} - // The source range of current node contains the boundary tokens. it' a full coverage. - if(range.getBegin() <= left->location() && range.getEnd() >= right->location()) - coverage = SelectionTree::CoverageKind::Full; +// SelectionTester can determine whether a range of tokens from the PP-expanded +// stream (corresponding to an AST node) is considered selected. +// +// When the tokens result from macro expansions, the appropriate tokens in the +// main file are examined (macro invocation or args). Similarly for #includes. +// However, only the first expansion of a given spelled token is considered +// selected. +// +// It tests each token in the range (not just the endpoints) as contiguous +// expanded tokens may not have contiguous spellings (with macros). +// +// Non-token text, and tokens not modeled in the AST (comments, semicolons) +// are ignored when determining selectedness. +class SelectionTester { +public: + // The selection is offsets [SelBegin, SelEnd) in SelFile. + SelectionTester(CompilationUnit& unit, + FileID selected_file, + LocalSourceRange selected_range, + const SourceManager& SM) : + selected_file(selected_file), selected_file_range(SM.getLocForStartOfFile(selected_file), + SM.getLocForEndOfFile(selected_file)), + SM(SM) { + // Find all tokens (partially) selected in the file. + auto spelled_tokens = unit.spelled_tokens(selected_file); - SelectionTree::Node selected{ - .dynNode = clang::DynTypedNode::create(*node), - .kind = coverage, - .parent = stack.empty() ? nullptr : stack.top(), + const syntax::Token* first = + llvm::partition_point(spelled_tokens, [&](const syntax::Token& token) { + return unit.file_offset(token.endLocation()) <= selected_range.begin; + }); + + const syntax::Token* last = + std::partition_point(first, spelled_tokens.end(), [&](const syntax::Token& token) { + return unit.file_offset(token.location()) < selected_range.end; + }); + + auto selected_tokens = llvm::ArrayRef(first, last); + + // Find which of these are preprocessed to nothing and should be ignored. + llvm::BitVector PPIgnored(selected_tokens.size(), false); + + for(const syntax::TokenBuffer::Expansion& expansion: + unit.expansions_overlapping(selected_tokens)) { + if(expansion.Expanded.empty()) { + for(const syntax::Token& token: expansion.Spelled) { + if(&token >= first && &token < last) { + PPIgnored[&token - first] = true; + } + } + } + } + + // Precompute selectedness and offset for selected spelled tokens. + for(unsigned I = 0; I < selected_tokens.size(); ++I) { + if(should_ignore(selected_tokens[I]) || PPIgnored[I]) { + continue; + } + + selected_spelled.emplace_back(); + Tok& token = selected_spelled.back(); + token.offset = unit.file_offset(selected_tokens[I].location()); + + if(token.offset >= selected_range.begin && + token.offset + selected_tokens[I].length() <= selected_range.end) { + token.selected = SelectionTree::Complete; + } else { + token.selected = SelectionTree::Partial; + } + } + + maybe_selected_expanded = computeMaybeSelectedExpandedTokens(unit.token_buffer()); + } + + // Test whether a consecutive range of tokens is selected. + // The tokens are taken from the expanded token stream. + SelectionTree::SelectionKind test(llvm::ArrayRef ExpandedTokens) const { + if(ExpandedTokens.empty()) + return NoTokens; + + if(selected_spelled.empty()) + return SelectionTree::Unselected; + + // Cheap (pointer) check whether any of the tokens could touch selection. + // In most cases, the node's overall source range touches ExpandedTokens, + // or we would have failed mayHit(). However now we're only considering + // the *unclaimed* spans of expanded tokens. + // This is a significant performance improvement when a lot of nodes + // surround the selection, including when generated by macros. + if(maybe_selected_expanded.empty() || + &ExpandedTokens.front() > &maybe_selected_expanded.back() || + &ExpandedTokens.back() < &maybe_selected_expanded.front()) { + return SelectionTree::Unselected; + } + + // The eof token is used as a sentinel. + // In general, source range from an AST node should not claim the eof token, + // but it could occur for unmatched-bracket cases. + // FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not + // return the eof token. + if(ExpandedTokens.back().kind() == tok::eof) + ExpandedTokens = ExpandedTokens.drop_back(); + + SelectionTree::SelectionKind result = NoTokens; + + while(!ExpandedTokens.empty()) { + // Take consecutive tokens from the same context together for efficiency. + SourceLocation Start = ExpandedTokens.front().location(); + FileID FID = SM.getFileID(Start); + // Comparing SourceLocations against bounds is cheaper than getFileID(). + SourceLocation Limit = SM.getComposedLoc(FID, SM.getFileIDSize(FID)); + auto Batch = ExpandedTokens.take_while([&](const syntax::Token& T) { + return T.location() >= Start && T.location() < Limit; + }); + assert(!Batch.empty()); + ExpandedTokens = ExpandedTokens.drop_front(Batch.size()); + + update(result, testChunk(FID, Batch)); + } + + return result; + } + + // Cheap check whether any of the tokens in R might be selected. + // If it returns false, test() will return NoTokens or Unselected. + // If it returns true, test() may return any value. + bool mayHit(SourceRange R) const { + if(selected_spelled.empty() || maybe_selected_expanded.empty()) + return false; + // If the node starts after the selection ends, it is not selected. + // Tokens a macro location might claim are >= its expansion start. + // So if the expansion start > last selected token, we can prune it. + // (This is particularly helpful for GTest's TEST macro). + if(auto B = offsetInSelFile(getExpansionStart(R.getBegin()))) + if(*B > selected_spelled.back().offset) + return false; + // If the node ends before the selection begins, it is not selected. + SourceLocation EndLoc = R.getEnd(); + while(EndLoc.isMacroID()) + EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd(); + // In the rare case that the expansion range is a char range, EndLoc is + // ~one token too far to the right. We may fail to prune, that's OK. + if(auto E = offsetInSelFile(EndLoc)) + if(*E < selected_spelled.front().offset) + return false; + return true; + } + +private: + // Plausible expanded tokens that might be affected by the selection. + // This is an overestimate, it may contain tokens that are not selected. + // The point is to allow cheap pruning in test() + llvm::ArrayRef + computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer& Toks) { + if(selected_spelled.empty()) + return {}; + + auto LastAffectedToken = [&](SourceLocation Loc) { + auto Offset = offsetInSelFile(Loc); + while(Loc.isValid() && !Offset) { + Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd() + : SM.getIncludeLoc(SM.getFileID(Loc)); + Offset = offsetInSelFile(Loc); + } + return Offset; }; - // Store the selected node and link it to its father node. - storage.push_back(std::move(selected)); - if(!stack.empty()) - stack.top()->children.push_back(&storage.back()); + auto FirstAffectedToken = [&](SourceLocation Loc) { + auto Offset = offsetInSelFile(Loc); + while(Loc.isValid() && !Offset) { + Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin() + : SM.getIncludeLoc(SM.getFileID(Loc)); + Offset = offsetInSelFile(Loc); + } + return Offset; + }; - SelectionTree::Node& current = storage.back(); + const syntax::Token* Start = llvm::partition_point( + Toks.expandedTokens(), + [&, First = selected_spelled.front().offset](const syntax::Token& Tok) { + if(Tok.kind() == tok::eof) + return false; + // Implausible if upperbound(Tok) < First. + if(auto Offset = LastAffectedToken(Tok.location())) + return *Offset < First; + // A prefix of the expanded tokens may be from an implicit + // inclusion (e.g. preamble patch, or command-line -include). + return true; + }); - // For a full coverage case, node's children may also full coverage the selection range. so - // traverse them recursively until the node cover the selection range partially. - if(coverage == SelectionTree::CoverageKind::Full) { - stack.emplace(&storage.back()); - bool ret = callback(); - stack.pop(); - return ret; - } + bool EndInvalid = false; + const syntax::Token* End = std::partition_point( + Start, + Toks.expandedTokens().end(), + [&, Last = selected_spelled.back().offset](const syntax::Token& Tok) { + if(Tok.kind() == tok::eof) + return false; + // Plausible if lowerbound(Tok) <= Last. + if(auto Offset = FirstAffectedToken(Tok.location())) + return *Offset <= Last; + // Shouldn't happen: once we've seen tokens traceable to the main + // file, there shouldn't be any more implicit inclusions. + assert(false && "Expanded token could not be resolved to main file!"); + EndInvalid = true; + return true; // conservatively assume this token can overlap + }); + if(EndInvalid) + End = Toks.expandedTokens().end(); - /// For the given selection of a clang::TagDecl: - /// class X {/* something */}; - /// ^^^^^^^^^^^^^^^^^^^^^^^^^^ - /// we correct the selection to full source range of class X without semi: - /// class X {/* something */}; - /// ^^^^^^^^^^^^^^^^^^^^^^^^^ - if constexpr(std::derived_from) { - if(right->kind() == clang::tok::semi) - current.kind = SelectionTree::CoverageKind::Full; - } - - return true; + return llvm::ArrayRef(Start, End); } - template - void dump(const Node* node) { - if constexpr(requires { node->dump(); }) { - node->dump(); + // Hit-test a consecutive range of tokens from a single file ID. + SelectionTree::SelectionKind testChunk(FileID FID, llvm::ArrayRef Batch) const { + assert(!Batch.empty()); + SourceLocation StartLoc = Batch.front().location(); + // There are several possible categories of FileID depending on how the + // preprocessor was used to generate these tokens: + // main file, #included file, macro args, macro bodies. + // We need to identify the main-file tokens that represent Batch, and + // determine whether we want to exclusively claim them. Regular tokens + // represent one AST construct, but a macro invocation can represent many. + + // Handle tokens written directly in the main file. + if(FID == selected_file) { + return testTokenRange(*offsetInSelFile(Batch.front().location()), + *offsetInSelFile(Batch.back().location())); } - if constexpr(std::is_same_v) { - const clang::NestedNameSpecifierLoc& NNSL = *node; - NNSL.getNestedNameSpecifier()->dump(); - llvm::outs() << "\n"; + // Handle tokens in another file #included into the main file. + // Check if the #include is selected, but don't claim it exclusively. + if(StartLoc.isFileID()) { + for(SourceLocation Loc = Batch.front().location(); Loc.isValid(); + Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { + if(auto Offset = offsetInSelFile(Loc)) + // FIXME: use whole #include directive, not just the filename string. + return testToken(*Offset); + } + return NoTokens; } - if constexpr(std::is_same_v) { - const clang::Attr& attr = *node; - attr.getScopeLoc().dump(context.getSourceManager()); - attr.printPretty(llvm::outs(), context.getPrintingPolicy()); - llvm::outs() << "\n"; + assert(StartLoc.isMacroID()); + // Handle tokens that were passed as a macro argument. + SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc); + if(auto ArgOffset = offsetInSelFile(ArgStart)) { + if(is_first_expansion(FID, ArgStart, SM)) { + SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location()); + return testTokenRange(*ArgOffset, *offsetInSelFile(ArgEnd)); + } else { // NOLINT(llvm-else-after-return) + /* fall through and treat as part of the macro body */ + } } + + // Handle tokens produced by non-argument macro expansion. + // Check if the macro name is selected, don't claim it exclusively. + if(auto ExpansionOffset = offsetInSelFile(getExpansionStart(StartLoc))) + // FIXME: also check ( and ) for function-like macros? + return testToken(*ExpansionOffset); + return NoTokens; } - using Node = SelectionTree::Node; + // Is the closed token range [Begin, End] selected? + SelectionTree::SelectionKind testTokenRange(unsigned Begin, unsigned End) const { + assert(Begin <= End); + // Outside the selection entirely? + if(End < selected_spelled.front().offset || Begin > selected_spelled.back().offset) + return SelectionTree::Unselected; - SelectionTree build(); + // Compute range of tokens. + auto B = + llvm::partition_point(selected_spelled, [&](const Tok& T) { return T.offset < Begin; }); + auto E = std::partition_point(B, selected_spelled.end(), [&](const Tok& T) { + return T.offset <= End; + }); - /// the two boundary tokens. - const clang::syntax::Token* left; - const clang::syntax::Token* right; + // Aggregate selectedness of tokens in range. + bool ExtendsOutsideSelection = + Begin < selected_spelled.front().offset || End > selected_spelled.back().offset; + SelectionTree::SelectionKind Result = + ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; + for(auto It = B; It != E; ++It) + update(Result, It->selected); + return Result; + } - clang::ASTContext& context; - CompilationUnit& unit; - /// father nodes stack. - std::stack stack; - std::deque storage; + // Is the token at `Offset` selected? + SelectionTree::SelectionKind testToken(unsigned Offset) const { + // Outside the selection entirely? + if(Offset < selected_spelled.front().offset || Offset > selected_spelled.back().offset) + return SelectionTree::Unselected; + // Find the token, if it exists. + auto It = llvm::partition_point(selected_spelled, + [&](const Tok& T) { return T.offset < Offset; }); + if(It != selected_spelled.end() && It->offset == Offset) + return It->selected; + return NoTokens; + } + + // Decomposes Loc and returns the offset if the file ID is SelFile. + std::optional offsetInSelFile(SourceLocation Loc) const { + // Decoding Loc with SM.getDecomposedLoc is relatively expensive. + // But SourceLocations for a file are numerically contiguous, so we + // can use cheap integer operations instead. + if(Loc < selected_file_range.getBegin() || Loc >= selected_file_range.getEnd()) + return std::nullopt; + // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM. + return Loc.getRawEncoding() - selected_file_range.getBegin().getRawEncoding(); + } + + SourceLocation getExpansionStart(SourceLocation Loc) const { + while(Loc.isMacroID()) + Loc = SM.getImmediateExpansionRange(Loc).getBegin(); + return Loc; + } + + struct Tok { + unsigned offset; + SelectionTree::SelectionKind selected; + }; + + std::vector selected_spelled; + llvm::ArrayRef maybe_selected_expanded; + FileID selected_file; + SourceRange selected_file_range; + const SourceManager& SM; }; -struct SelectionCollector : public clang::RecursiveASTVisitor { - using Base = clang::RecursiveASTVisitor; +// Show the type of a node for debugging. +void printNodeKind(llvm::raw_ostream& OS, const DynTypedNode& N) { + if(const TypeLoc* TL = N.get()) { + // TypeLoc is a hierarchy, but has only a single ASTNodeKind. + // Synthesize the name from the Type subclass (except for QualifiedTypeLoc). + if(TL->getTypeLocClass() == TypeLoc::Qualified) + OS << "QualifiedTypeLoc"; + else + OS << TL->getType()->getTypeClassName() << "TypeLoc"; + } else { + OS << N.getNodeKind().asStringRef(); + } +} - SelectionBuilder& builder; +/// FIXME: Remove in release mode? +std::string printNodeToString(const DynTypedNode& N, const PrintingPolicy& PP) { + std::string S; + llvm::raw_string_ostream OS(S); + printNodeKind(OS, N); + return std::move(OS.str()); +} - SelectionCollector(SelectionBuilder& builder) : builder(builder) {} - - bool TraverseDecl(clang::Decl* decl) { - if(!decl) +bool isImplicit(const Stmt* S) { + // Some Stmts are implicit and shouldn't be traversed, but there's no + // "implicit" attribute on Stmt/Expr. + // Unwrap implicit casts first if present (other nodes too?). + if(auto* ICE = llvm::dyn_cast(S)) + S = ICE->getSubExprAsWritten(); + // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor. + // It would be nice if RAV handled this (!shouldTraverseImplicitCode()). + if(auto* CTI = llvm::dyn_cast(S)) + if(CTI->isImplicit()) return true; + // Make sure implicit access of anonymous structs don't end up owning tokens. + if(auto* ME = llvm::dyn_cast(S)) { + if(auto* FD = llvm::dyn_cast(ME->getMemberDecl())) + if(FD->isAnonymousStructOrUnion()) + // If Base is an implicit CXXThis, then the whole MemberExpr has no + // tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like + // an implicit cast. + return isImplicit(ME->getBase()); + } + // Refs to operator() and [] are (almost?) always implicit as part of calls. + if(auto* DRE = llvm::dyn_cast(S)) { + if(auto* FD = llvm::dyn_cast(DRE->getDecl())) { + switch(FD->getOverloadedOperator()) { + case OO_Call: + case OO_Subscript: return true; + default: break; + } + } + } + return false; +} - /// `TranslationUnitDecl` has invalid location information. - /// So we process it separately. - if(llvm::isa(decl)) { - return Base::TraverseDecl(decl); +// We find the selection by visiting written nodes in the AST, looking for nodes +// that intersect with the selected character range. +// +// While traversing, we maintain a parent stack. As nodes pop off the stack, +// we decide whether to keep them or not. To be kept, they must either be +// selected or contain some nodes that are. +// +// For simple cases (not inside macros) we prune subtrees that don't intersect. +class SelectionVisitor : public RecursiveASTVisitor { +public: + // Runs the visitor to gather selected nodes and their ancestors. + // If there is any selection, the root (TUDecl) is the first node. + static std::deque collect(CompilationUnit& unit, + const PrintingPolicy& PP, + LocalSourceRange range, + FileID fid) { + SelectionVisitor V(unit, PP, range, fid); + V.TraverseAST(unit.context()); + assert(V.stack.size() == 1 && "Unpaired push/pop?"); + assert(V.stack.top() == &V.nodes.front()); + return std::move(V.nodes); + } + + // We traverse all "well-behaved" nodes the same way: + // - push the node onto the stack + // - traverse its children recursively + // - pop it from the stack + // - hit testing: is intersection(node, selection) - union(children) empty? + // - attach it to the tree if it or any children hit the selection + // + // Two categories of nodes are not "well-behaved": + // - those without source range information, we don't record those + // - those that can't be stored in DynTypedNode. + bool TraverseDecl(Decl* X) { + if(llvm::isa_and_nonnull(X)) { + // Already pushed by constructor. + return Base::TraverseDecl(X); } - return builder.hook(decl, [&] { return Base::TraverseDecl(decl); }); - } - - bool TraverseStmt(clang::Stmt* stmt) { - return builder.hook(stmt, [&] { return Base::TraverseStmt(stmt); }); - } - - bool TraverseAttr(clang::Attr* attr) { - return builder.hook(attr, [&] { return Base::TraverseAttr(attr); }); - } - - /// we don't care about the node without location information, so skip them. - // bool shouldWalkTypesOfTypeLocs() { - // return false; - // } - - bool TraverseType(clang::QualType) { - return true; - } - - bool TraverseNestedNameSpecifier(clang::NestedNameSpecifier*) { - return true; - } - - bool TraverseTypeLoc(clang::TypeLoc loc) { - /// clang currently doesn't record any information for `QualifiedTypeLoc`. - /// It has same location with its inner type. So we just ignore it. - if(auto QTL = loc.getAs()) { - return TraverseTypeLoc(QTL.getUnqualifiedLoc()); + // Base::TraverseDecl will suppress children, but not this node itself. + if(X && X->isImplicit()) { + // Most implicit nodes have only implicit children and can be skipped. + // However there are exceptions (`void foo(Concept auto x)`), and + // the base implementation knows how to find them. + return Base::TraverseDecl(X); } - return builder.hook(&loc, [&] { return Base::TraverseTypeLoc(loc); }); + return traverse_node(X, [&] { return Base::TraverseDecl(X); }); } - bool TraverseNestedNameSpecifierLoc(const clang::NestedNameSpecifierLoc& NNS) { - return builder.hook(&NNS, [&] { return Base::TraverseNestedNameSpecifierLoc(NNS); }); + bool TraverseTypeLoc(TypeLoc X) { + return traverse_node(&X, [&] { return Base::TraverseTypeLoc(X); }); } - bool TraverseTemplateArgumentLoc(const clang::TemplateArgumentLoc& A) { - return builder.hook(&A, [&] { return Base::TraverseTemplateArgumentLoc(A); }); + bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc& X) { + return traverse_node(&X, [&] { return Base::TraverseTemplateArgumentLoc(X); }); } - bool TraverseCXXBaseSpecifier(const clang::CXXBaseSpecifier& BS) { - return builder.hook(&BS, [&] { return Base::TraverseCXXBaseSpecifier(BS); }); + bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) { + return traverse_node(&X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); }); } - bool TraverseConstructorInitializer(clang::CXXCtorInitializer* I) { - return builder.hook(I, [&] { return Base::TraverseConstructorInitializer(I); }); + bool TraverseConstructorInitializer(CXXCtorInitializer* X) { + return traverse_node(X, [&] { return Base::TraverseConstructorInitializer(X); }); } - /// FIXME: figure out concept in clang unit. - bool TraverseConceptReference(clang::ConceptReference* concept_) { + bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier& X) { + return traverse_node(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); }); + } + + bool TraverseAttr(Attr* X) { + return traverse_node(X, [&] { return Base::TraverseAttr(X); }); + } + + bool TraverseConceptReference(ConceptReference* X) { + return traverse_node(X, [&] { return Base::TraverseConceptReference(X); }); + } + + // Stmt is the same, but this form allows the data recursion optimization. + bool dataTraverseStmtPre(Stmt* X) { + if(!X || isImplicit(X)) + return false; + auto N = DynTypedNode::create(*X); + if(safely_skipable(N)) + return false; + push(std::move(N)); + if(shouldSkipChildren(X)) { + pop(); + return false; + } return true; } + + bool dataTraverseStmtPost(Stmt* X) { + pop(); + return true; + } + + // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived + // TraverseTypeLoc is not called for the inner UnqualTypeLoc. + // This means we'd never see 'int' in 'const int'! Work around that here. + // (The reason for the behavior is to avoid traversing the nested Type twice, + // but we ignore TraverseType anyway). + bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) { + return traverse_node(&QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); }); + } + + bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL) { + return traverse_node(&PL, [&] { return Base::TraverseObjCProtocolLoc(PL); }); + } + + // Uninteresting parts of the AST that don't have locations within them. + bool TraverseNestedNameSpecifier(NestedNameSpecifier*) { + return true; + } + + bool TraverseType(QualType) { + return true; + } + + // The DeclStmt for the loop variable claims to cover the whole range + // inside the parens, this causes the range-init expression to not be hit. + // Traverse the loop VarDecl instead, which has the right source range. + bool TraverseCXXForRangeStmt(CXXForRangeStmt* S) { + return traverse_node(S, [&] { + return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) && + TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody()); + }); + } + + // OpaqueValueExpr blocks traversal, we must explicitly traverse it. + bool TraverseOpaqueValueExpr(OpaqueValueExpr* E) { + return traverse_node(E, [&] { return TraverseStmt(E->getSourceExpr()); }); + } + + // We only want to traverse the *syntactic form* to understand the selection. + bool TraversePseudoObjectExpr(PseudoObjectExpr* E) { + return traverse_node(E, [&] { return TraverseStmt(E->getSyntacticForm()); }); + } + + bool TraverseTypeConstraint(const TypeConstraint* C) { + if(auto* E = C->getImmediatelyDeclaredConstraint()) { + // Technically this expression is 'implicit' and not traversed by the RAV. + // However, the range is correct, so we visit expression to avoid adding + // an extra kind to 'DynTypeNode' that hold 'TypeConstraint'. + return TraverseStmt(E); + } + return Base::TraverseTypeConstraint(C); + } + + // Override child traversal for certain node types. + using RecursiveASTVisitor::getStmtChildren; + + // PredefinedExpr like __func__ has a StringLiteral child for its value. + // It's not written, so don't traverse it. + Stmt::child_range getStmtChildren(PredefinedExpr*) { + return {StmtIterator{}, StmtIterator{}}; + } + +private: + using Base = RecursiveASTVisitor; + + SelectionVisitor(CompilationUnit& unit, + const PrintingPolicy& PP, + LocalSourceRange range, + FileID SelFile) : + unit(unit), SM(unit.context().getSourceManager()), lang_opts(unit.context().getLangOpts()), + print_policy(PP), checker(unit, SelFile, range, SM), + unclaimed_expanded_tokens(unit.expanded_tokens()) { + // Ensure we have a node for the TU decl, regardless of traversal scope. + nodes.emplace_back(); + nodes.back().data = DynTypedNode::create(*unit.context().getTranslationUnitDecl()); + nodes.back().parent = nullptr; + nodes.back().selected = SelectionTree::Unselected; + stack.push(&nodes.back()); + } + + // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo. + // Node is always a pointer so the generic code can handle any null checks. + template + bool traverse_node(T* Node, const Func& Body) { + if(Node == nullptr) + return true; + auto N = DynTypedNode::create(*Node); + if(safely_skipable(N)) + return true; + push(DynTypedNode::create(*Node)); + bool Ret = Body(); + pop(); + return Ret; + } + + // HIT TESTING + // + // We do rough hit testing on the way down the tree to avoid traversing + // subtrees that don't touch the selection (canSafelySkipNode), but + // fine-grained hit-testing is mostly done on the way back up (in pop()). + // This means children get to claim parts of the selection first, and parents + // are only selected if they own tokens that no child owned. + // + // Nodes *usually* nest nicely: a child's getSourceRange() lies within the + // parent's, and a node (transitively) owns all tokens in its range. + // + // Exception 1: when declarators nest, *inner* declarator is the *outer* type. + // e.g. void foo[5](int) is an array of functions. + // To handle this case, declarators are careful to only claim the tokens they + // own, rather than claim a range and rely on claim ordering. + // + // Exception 2: siblings both claim the same node. + // e.g. `int x, y;` produces two sibling VarDecls. + // ~~~~~ x + // ~~~~~~~~ y + // Here the first ("leftmost") sibling claims the tokens it wants, and the + // other sibling gets what's left. So selecting "int" only includes the left + // VarDecl in the selection tree. + + // An optimization for a common case: nodes outside macro expansions that + // don't intersect the selection may be recursively skipped. + bool safely_skipable(const DynTypedNode& N) { + SourceRange S = getSourceRange(N); + if(auto* TL = N.get()) { + // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile + // heuristics. We should consider only pruning critical TypeLoc nodes, to + // be more robust. + + // AttributedTypeLoc may point to the attribute's range, NOT the modified + // type's range. + if(auto AT = TL->getAs()) + S = AT.getModifiedLoc().getSourceRange(); + } + // SourceRange often doesn't manage to accurately cover attributes. + // Fortunately, attributes are rare. + if(llvm::any_of(get_attributes(N), [](const Attr* A) { return !A->isImplicit(); })) { + return false; + } + + if(!checker.mayHit(S)) { + log::debug("{2}skip: {0} {1}", + printNodeToString(N, print_policy), + S.printToString(SM), + indent()); + return true; + } + return false; + } + + // There are certain nodes we want to treat as leaves in the SelectionTree, + // although they do have children. + bool shouldSkipChildren(const Stmt* X) const { + // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i). + // Unfortunately TokenBuffer sees 12_i as one token and can't split it. + // So we treat UserDefinedLiteral as a leaf node, owning the token. + return llvm::isa(X); + } + + // Pushes a node onto the ancestor stack. Pairs with pop(). + // Performs early hit detection for some nodes (on the earlySourceRange). + void push(DynTypedNode node) { + SourceRange Early = earlySourceRange(node); + log::debug("{2}push: {0} {1}", + printNodeToString(node, print_policy), + node.getSourceRange().printToString(SM), + indent()); + nodes.emplace_back(); + nodes.back().data = std::move(node); + nodes.back().parent = stack.top(); + nodes.back().selected = NoTokens; + stack.push(&nodes.back()); + claimRange(Early, nodes.back().selected); + } + + // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). + // Performs primary hit detection. + void pop() { + Node& N = *stack.top(); + log::debug("{1}pop: {0}", printNodeToString(N.data, print_policy), indent(-1)); + claimTokensFor(N.data, N.selected); + if(N.selected == NoTokens) + N.selected = SelectionTree::Unselected; + if(N.selected || !N.children.empty()) { + // Attach to the tree. + N.parent->children.push_back(&N); + } else { + // Neither N any children are selected, it doesn't belong in the tree. + assert(&N == &nodes.back()); + nodes.pop_back(); + } + stack.pop(); + } + + // Returns the range of tokens that this node will claim directly, and + // is not available to the node's children. + // Usually empty, but sometimes children cover tokens but shouldn't own them. + SourceRange earlySourceRange(const DynTypedNode& N) { + if(const Decl* VD = N.get()) { + // We want the name in the var-decl to be claimed by the decl itself and + // not by any children. Ususally, we don't need this, because source + // ranges of children are not overlapped with their parent's. + // An exception is lambda captured var decl, where AutoTypeLoc is + // overlapped with the name loc. + // auto fun = [bar = foo]() { ... } + // ~~~~~~~~~ VarDecl + // ~~~ |- AutoTypeLoc + return VD->getLocation(); + } + + // When referring to a destructor ~Foo(), attribute Foo to the destructor + // rather than the TypeLoc nested inside it. + // We still traverse the TypeLoc, because it may contain other targeted + // things like the T in ~Foo(). + if(const auto* CDD = N.get()) + return CDD->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc(); + + if(const auto* ME = N.get()) { + auto NameInfo = ME->getMemberNameInfo(); + if(NameInfo.getName().getNameKind() == DeclarationName::CXXDestructorName) + return NameInfo.getNamedTypeInfo()->getTypeLoc().getBeginLoc(); + } + + return SourceRange(); + } + + // Claim tokens for N, after processing its children. + // By default this claims all unclaimed tokens in getSourceRange(). + // We override this if we want to claim fewer tokens (e.g. there are gaps). + void claimTokensFor(const DynTypedNode& N, SelectionTree::SelectionKind& Result) { + // CXXConstructExpr often shows implicit construction, like `string s;`. + // Don't associate any tokens with it unless there's some syntax like {}. + // This prevents it from claiming 's', its primary location. + if(const auto* CCE = N.get()) { + claimRange(CCE->getParenOrBraceRange(), Result); + return; + } + // ExprWithCleanups is always implicit. It often wraps CXXConstructExpr. + // Prevent it claiming 's' in the case above. + if(N.get()) + return; + + // Declarators nest "inside out", with parent types inside child ones. + // Instead of claiming the whole range (clobbering parent tokens), carefully + // claim the tokens owned by this node and non-declarator children. + // (We could manipulate traversal order instead, but this is easier). + // + // Non-declarator types nest normally, and are handled like other nodes. + // + // Example: + // Vec(*[2])(A)> is a Vec of arrays of pointers to functions, + // which accept A and return R. + // The TypeLoc hierarchy: + // Vec(*[2])(A)> m; + // Vec<#####################> TemplateSpecialization Vec + // --------[2]---------- `-Array + // -------*------------- `-Pointer + // ------(----)--------- `-Paren + // ------------(#######) `-Function + // R<###> |-TemplateSpecialization R + // int | `-Builtin int + // A<####> `-TemplateSpecialization A + // char `-Builtin char + // + // In each row + // --- represents unclaimed parts of the SourceRange. + // ### represents parts that children already claimed. + if(const auto* TL = N.get()) { + if(auto PTL = TL->getAs()) { + claimRange(PTL.getLParenLoc(), Result); + claimRange(PTL.getRParenLoc(), Result); + return; + } + if(auto ATL = TL->getAs()) { + claimRange(ATL.getBracketsRange(), Result); + return; + } + if(auto PTL = TL->getAs()) { + claimRange(PTL.getStarLoc(), Result); + return; + } + if(auto FTL = TL->getAs()) { + claimRange(SourceRange(FTL.getLParenLoc(), FTL.getEndLoc()), Result); + return; + } + } + + claimRange(getSourceRange(N), Result); + } + + // Perform hit-testing of a complete Node against the selection. + // This runs for every node in the AST, and must be fast in common cases. + // This is usually called from pop(), so we can take children into account. + // The existing state of Result is relevant. + void claimRange(SourceRange S, SelectionTree::SelectionKind& Result) { + for(const auto& ClaimedRange: unclaimed_expanded_tokens.erase(unit.expanded_tokens(S))) + update(Result, checker.test(ClaimedRange)); + + if(Result && Result != NoTokens) + log::debug("{1}hit selection: {0}", S.printToString(SM), indent()); + } + + std::string indent(int Offset = 0) { + // Cast for signed arithmetic. + int Amount = int(stack.size()) + Offset; + assert(Amount >= 0); + return std::string(Amount, ' '); + } + + SourceManager& SM; + const LangOptions& lang_opts; + const PrintingPolicy& print_policy; + CompilationUnit& unit; + std::stack stack; + SelectionTester checker; + IntervalSet unclaimed_expanded_tokens; + std::deque nodes; // Stable pointers as we add more nodes. }; -SelectionTree SelectionBuilder::build() { - SelectionCollector collector(*this); - - if(isValidOffsetRange()) - collector.TraverseAST(context); - - SelectionTree tree; - if(!storage.empty()) { - storage.shrink_to_fit(); - tree.storage = std::move(storage); - tree.root = &tree.storage.front(); - } - return tree; -} - -void dumpImpl(llvm::raw_ostream& os, const SelectionTree::Node* node, clang::ASTContext& context) { - if(node) { - node->dynNode.dump(os, context); - for(auto child: node->children) - dumpImpl(os, child, context); - } -} - } // namespace -SelectionTree::SelectionTree(std::uint32_t begin, - std::uint32_t end, - clang::ASTContext& context, - CompilationUnit& unit) { - SelectionBuilder builder(begin, end, context, unit); - *this = builder.build(); +llvm::SmallString<256> abbreviatedString(DynTypedNode N, const PrintingPolicy& PP) { + llvm::SmallString<256> Result; + { + llvm::raw_svector_ostream OS(Result); + N.print(OS, PP); + } + + auto Pos = Result.find('\n'); + if(Pos != llvm::StringRef::npos) { + bool MoreText = !llvm::all_of(Result.str().drop_front(Pos), llvm::isSpace); + Result.resize(Pos); + if(MoreText) { + Result.append(" …"); + } + } + return Result; } -void SelectionTree::dump(llvm::raw_ostream& os, clang::ASTContext& context) const { - if(hasValue()) - dumpImpl(os, root, context); +void SelectionTree::print(llvm::raw_ostream& OS, const SelectionTree::Node& N, int Indent) const { + if(N.selected) + OS.indent(Indent - 1) << (N.selected == SelectionTree::Complete ? '*' : '.'); + else + OS.indent(Indent); + printNodeKind(OS, N.data); + OS << ' ' << abbreviatedString(N.data, print_policy) << "\n"; + for(const Node* Child: N.children) + print(OS, *Child, Indent + 2); } -SelectionTree SelectionTree::selectToken(const clang::syntax::Token& token, - clang::ASTContext& context, - CompilationUnit& unit) { - auto range = token.range(context.getSourceManager()); - return SelectionTree(range.beginOffset(), range.endOffset(), context, unit); +std::string SelectionTree::Node::kind() const { + std::string S; + llvm::raw_string_ostream OS(S); + printNodeKind(OS, data); + return std::move(OS.str()); +} + +bool SelectionTree::create_each(CompilationUnit& unit, + LocalSourceRange range, + llvm::function_ref callback) { + auto [begin, end] = range; + + if(begin != end) { + return callback(SelectionTree(unit, range)); + } + + // Decide which selections emulate a "point" query in between characters. + // If it's ambiguous (the neighboring characters are selectable tokens), returns + // both possibilities in preference order. Always returns at least one range + // - if no tokens touched, and empty range. + llvm::SmallVector ranges; + + auto location = unit.create_location(unit.interested_file(), begin); + + // Prefer right token over left. + for(const syntax::Token& token: llvm::reverse(unit.spelled_tokens_touch(location))) { + if(should_ignore(token)) { + continue; + } + + auto offset = unit.file_offset(token.location()); + ranges.emplace_back(offset, offset + token.length()); + } + + /// Make sure, we have at least one range. + if(ranges.empty()) { + ranges.emplace_back(begin, begin); + } + + for(auto range: ranges) { + if(callback(SelectionTree(unit, range))) { + return true; + } + } + + return false; +} + +SelectionTree SelectionTree::create_right(CompilationUnit& unit, LocalSourceRange range) { + std::optional result; + create_each(unit, range, [&](SelectionTree T) { + result = std::move(T); + return true; + }); + return std::move(*result); +} + +SelectionTree::SelectionTree(CompilationUnit& unit, LocalSourceRange range) : + print_policy(unit.context().getLangOpts()) { + // No fundamental reason the selection needs to be in the main file, + // but that's all clice has needed so far. + const SourceManager& SM = unit.context().getSourceManager(); + FileID fid = SM.getMainFileID(); + print_policy.TerseOutput = true; + print_policy.IncludeNewlines = false; + auto [begin, end] = range; + + log::debug( + "Computing selection for {0}", + SourceRange(SM.getComposedLoc(fid, begin), SM.getComposedLoc(fid, end)).printToString(SM)); + + nodes = SelectionVisitor::collect(unit, print_policy, range, fid); + m_root = nodes.empty() ? nullptr : &nodes.front(); + recordMetrics(*this, unit.context().getLangOpts()); + /// FIXME: dlog("Built selection tree\n{0}", *this); +} + +const Node* SelectionTree::common_ancestor() const { + const Node* ancestor = m_root; + while(ancestor->children.size() == 1 && !ancestor->selected) { + ancestor = ancestor->children.front(); + } + + // Returning nullptr here is a bit unprincipled, but it makes the API safer: + // the TranslationUnitDecl contains all of the preamble, so traversing it is a + // performance cliff. Callers can check for null and use root() if they want. + return ancestor != m_root ? ancestor : nullptr; +} + +const DeclContext& SelectionTree::Node::decl_context() const { + for(const Node* CurrentNode = this; CurrentNode != nullptr; CurrentNode = CurrentNode->parent) { + if(const Decl* Current = CurrentNode->get()) { + if(CurrentNode != this) + if(auto* DC = dyn_cast(Current)) + return *DC; + return *Current->getLexicalDeclContext(); + } + if(const auto* LE = CurrentNode->get()) + if(CurrentNode != this) + return *LE->getCallOperator(); + } + llvm_unreachable("A tree must always be rooted at TranslationUnitDecl."); +} + +clang::SourceRange SelectionTree::Node::source_range() const { + return getSourceRange(data); +} + +const SelectionTree::Node& SelectionTree::Node::ignore_implicit() const { + if(children.size() == 1 && children.front()->source_range() == source_range()) + return children.front()->ignore_implicit(); + return *this; +} + +const SelectionTree::Node& SelectionTree::Node::outer_implicit() const { + if(parent && parent->source_range() == source_range()) + return parent->outer_implicit(); + return *this; } } // namespace clice diff --git a/src/Compiler/CompilationUnit.cpp b/src/Compiler/CompilationUnit.cpp index b47423b1..1a8b7382 100644 --- a/src/Compiler/CompilationUnit.cpp +++ b/src/Compiler/CompilationUnit.cpp @@ -134,6 +134,10 @@ auto CompilationUnit::expansion_location(clang::SourceLocation location) -> clan return impl->src_mgr.getExpansionLoc(location); } +auto CompilationUnit::file_location(clang::SourceLocation location) -> clang::SourceLocation { + return impl->src_mgr.getFileLoc(location); +} + auto CompilationUnit::include_location(clang::FileID fid) -> clang::SourceLocation { return impl->src_mgr.getIncludeLoc(fid); } @@ -142,15 +146,34 @@ auto CompilationUnit::presumed_location(clang::SourceLocation location) -> clang return impl->src_mgr.getPresumedLoc(location, false); } +auto CompilationUnit::create_location(clang::FileID fid, std::uint32_t offset) + -> clang::SourceLocation { + return impl->src_mgr.getComposedLoc(fid, offset); +} + auto CompilationUnit::spelled_tokens(clang::FileID fid) -> llvm::ArrayRef { return impl->buffer->spelledTokens(fid); } +auto CompilationUnit::spelled_tokens_touch(clang::SourceLocation location) + -> llvm::ArrayRef { + return clang::syntax::spelledTokensTouching(location, *impl->buffer); +} + +auto CompilationUnit::expanded_tokens() -> llvm::ArrayRef { + return impl->buffer->expandedTokens(); +} + auto CompilationUnit::expanded_tokens(clang::SourceRange range) -> llvm::ArrayRef { return impl->buffer->expandedTokens(range); } +auto CompilationUnit::expansions_overlapping(llvm::ArrayRef spelled_tokens) + -> std::vector { + return impl->buffer->expansionsOverlapping(spelled_tokens); +} + auto CompilationUnit::token_length(clang::SourceLocation location) -> std::uint32_t { return clang::Lexer::MeasureTokenLength(location, impl->src_mgr, impl->instance->getLangOpts()); } @@ -273,4 +296,8 @@ clang::ASTContext& CompilationUnit::context() { return impl->instance->getASTContext(); } +clang::syntax::TokenBuffer& CompilationUnit::token_buffer() { + return *impl->buffer; +} + } // namespace clice diff --git a/src/Feature/Hover.cpp b/src/Feature/Hover.cpp index 19cc08a9..10d55321 100644 --- a/src/Feature/Hover.cpp +++ b/src/Feature/Hover.cpp @@ -60,80 +60,6 @@ std::string getSourceCode(CompilationUnit& unit, const clang::NamedDecl* decl) { return ""; } -struct HoversStorage : Hovers { - llvm::DenseMap cache; - - void add(CompilationUnit& unit, const clang::NamedDecl* decl, LocalSourceRange range) { - auto [iter, success] = cache.try_emplace(decl, hovers.size()); - if(success) { - hovers.emplace_back(hover(unit, decl)); - } - occurrences.emplace_back(range, iter->second); - } - - void sort() { - std::vector hoverMap(hovers.size()); - - { - std::vector new2old(hovers.size()); - for(uint32_t i = 0; i < hovers.size(); ++i) { - new2old[i] = i; - } - - ranges::sort(views::zip(hovers, new2old), refl::less, [](const auto& element) { - return std::get<0>(element); - }); - - for(uint32_t i = 0; i < hovers.size(); ++i) { - hoverMap[new2old[i]] = i; - } - } - - for(auto& occurrence: occurrences) { - occurrence.index = hoverMap[occurrence.index]; - } - - ranges::sort(occurrences, refl::less, [](const auto& item) { return item.range; }); - } -}; - -/// For index all hover information in the given unit. -class HoverCollector : public SemanticVisitor { -public: - HoverCollector(CompilationUnit& unit) : SemanticVisitor(unit, false) {} - - void handleDeclOccurrence(const clang::NamedDecl* decl, - RelationKind kind, - clang::SourceLocation location) { - /// FIXME: Currently we only handle file location. - if(location.isMacroID()) { - return; - } - - decl = normalize(decl); - - auto [fid, range] = unit.decompose_range(location); - auto& file = files[fid]; - file.add(unit, decl, range); - } - - auto build() { - index::Shared hovers; - - run(); - - for(auto& [fid, storage]: files) { - storage.sort(); - hovers[fid] = std::move(static_cast(storage)); - } - - return hovers; - } - -private: - index::Shared files; -}; - } // namespace Hover hover(CompilationUnit& unit, const clang::NamedDecl* decl) { @@ -147,9 +73,21 @@ Hover hover(CompilationUnit& unit, const clang::NamedDecl* decl) { }; } -index::Shared indexHover(CompilationUnit& unit) { - HoverCollector collector(unit); - return collector.build(); +Hover hover(CompilationUnit& unit, std::uint32_t offset) { + Hover info; + + auto tree = SelectionTree::create_right(unit, {offset, offset}); + if(auto node = tree.common_ancestor()) { + if(auto decl = node->get()) { + return hover(unit, decl); + } else if(auto ref = node->get()) { + return hover(unit, ref->getDecl()); + } + + /// TODO: add .... + } + + return Hover{}; } } // namespace clice::feature diff --git a/src/Server/Feature.cpp b/src/Server/Feature.cpp index b92455dc..b6b5891c 100644 --- a/src/Server/Feature.cpp +++ b/src/Server/Feature.cpp @@ -1,20 +1,47 @@ #include "Server/Server.h" #include "Server/Convert.h" #include "Compiler/Compilation.h" +#include "Feature/Hover.h" namespace clice { +async::Task Server::on_hover(proto::HoverParams params) { + auto path = mapping.to_path(params.textDocument.uri); + + auto opening_file = &opening_files[path]; + auto guard = co_await opening_file->ast_built_lock.try_lock(); + + auto offset = to_offset(kind, opening_file->content, params.position); + + opening_file = &opening_files[path]; + auto content = opening_file->content; + auto ast = opening_file->ast; + if(!ast) { + co_return json::Value(nullptr); + } + + co_return co_await async::submit([kind = this->kind, offset, &ast] { + auto hover = feature::hover(*ast, offset); + + proto::Hover result; + result.contents.kind = "markdown"; + result.contents.value = std::format("{}: {}", hover.kind.name(), hover.name); + + return json::serialize(result); + }); +} + async::Task Server::on_semantic_token(proto::SemanticTokensParams params) { auto path = mapping.to_path(params.textDocument.uri); - auto openFile = &opening_files[path]; - auto guard = co_await openFile->ast_built_lock.try_lock(); + auto opening_file = &opening_files[path]; + auto guard = co_await opening_file->ast_built_lock.try_lock(); - openFile = &opening_files[path]; - auto content = openFile->content; - auto ast = openFile->ast; + opening_file = &opening_files[path]; + auto content = opening_file->content; + auto ast = opening_file->ast; if(!ast) { - co_return ""; + co_return json::Value(nullptr); } co_return co_await async::submit([kind = this->kind, &ast] { diff --git a/src/Server/Lifecycle.cpp b/src/Server/Lifecycle.cpp index 3e211356..513571ee 100644 --- a/src/Server/Lifecycle.cpp +++ b/src/Server/Lifecycle.cpp @@ -39,6 +39,9 @@ async::Task Server::on_initialize(proto::InitializeParams params) { capabilities.textDocumentSync.change = proto::TextDocumentSyncKind::Full; capabilities.textDocumentSync.save = true; + /// Hover + capabilities.hoverProvider = true; + /// Completion capabilities.completionProvider.triggerCharacters = {".", "<", ">", ":", "\"", "/", "*"}; capabilities.completionProvider.resolveProvider = false; diff --git a/src/Server/Server.cpp b/src/Server/Server.cpp index b7afb644..8e6575ac 100644 --- a/src/Server/Server.cpp +++ b/src/Server/Server.cpp @@ -63,6 +63,7 @@ Server::Server() { register_callback<&Server::on_did_save>("textDocument/didSave"); register_callback<&Server::on_did_close>("textDocument/didClose"); + register_callback<&Server::on_hover>("textDocument/hover"); register_callback<&Server::on_completion>("textDocument/completion"); register_callback<&Server::on_semantic_token>("textDocument/semanticTokens/full"); } diff --git a/tests/unit/AST/Selection.cpp b/tests/unit/AST/Selection.cpp index 675c46dd..102e80fb 100644 --- a/tests/unit/AST/Selection.cpp +++ b/tests/unit/AST/Selection.cpp @@ -1,515 +1,757 @@ -#include "src/AST/Selection.cpp" - #include "Test/Tester.h" +#include "AST/Selection.h" +#include "clang/Lex/Lexer.h" -namespace clice { +namespace clice::testing { -namespace testing { namespace { -using OffsetRange = std::pair; - -OffsetRange takeWholeFile(CompilationUnit& unit) { - auto fileID = unit.interested_file(); - auto begin = unit.decompose_location(unit.start_location(fileID)); - auto end = unit.decompose_location(unit.end_location(fileID)); - return {begin.second, end.second}; -} - -void debug(llvm::raw_ostream& os, - const SelectionTree::Node* node, - bool showCoverage = true, - size_t depth = 0) { - for(auto i = 0; i < depth; i++) - os << " "; - - if(auto typeLoc = node->dynNode.get()) { - if(typeLoc->getTypeLocClass() == clang::TypeLoc::TypeLocClass::Qualified) - os << "QualifiedTypeLoc"; - else - os << typeLoc->getType()->getTypeClassName() << "TypeLoc"; - } else - os << node->dynNode.getNodeKind().asStringRef(); - - if(showCoverage) - os << '(' << refl::enum_name(node->kind) << ')'; - - os << '\n'; - - for(auto& child: node->children) - debug(os, child, showCoverage, depth + 1); -} - -void debug(const SelectionTree& tree) { - if(tree) { - llvm::outs() << "----------------------------------------\n"; - debug(llvm::outs(), tree.getRoot()); - } -} - -struct SelectionTester : public Tester { - SelectionTester(llvm::StringRef file, llvm::StringRef content) { - add_main(file, content); - } - - void expectPreorderSequence(const SelectionTree& tree, - llvm::ArrayRef kinds) { - std::string buffer; - buffer.reserve(256); - llvm::raw_string_ostream os(buffer); - debug(os, tree.getRoot(), /*showCoverage=*/false); - - llvm::StringRef view = os.str(); - for(auto kind: kinds) { - auto strRepr = kind.asStringRef(); - auto pos = view.find(strRepr); - EXPECT_NE(pos, llvm::StringRef::npos); - view = view.ltrim().drop_front(strRepr.size()); - } - } -}; - using namespace clang; -template -std::array makeNodeSequence() { - return {ASTNodeKind::getFromNodeKind()...}; -} - -TEST(Selection, VarDeclSelectionBoundary) { - { - const char* code = R"cpp( - $(b1)int xxx$(b2)yyy$(e1) = 1$(e2);$(e3) - )cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - std::vector selects; - for(int begin = 1; begin <= 2; begin++) { - for(int end = 1; end <= 3; end++) { - uint32_t bp = tx["main.cpp", std::format("b{}", begin)]; - uint32_t ep = tx["main.cpp", std::format("e{}", end)]; - selects.push_back({bp, ep}); - } - } - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - for(auto& [begin, end]: selects) { - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); - } - } - - { - const char* code = R"cpp( - int $(b1)x$(e1) = 114, $(b2)y$(e2) = 514, $(b3)z$(e3); - )cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - std::vector selects; - for(int i = 1; i <= 3; ++i) { - uint32_t bp = tx["main.cpp", std::format("b{}", i)]; - uint32_t ep = tx["main.cpp", std::format("e{}", i)]; - selects.push_back({bp, ep}); - } - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - for(auto& [begin, end]: selects) { - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); - } - } - - { - const char* code = R"cpp( - $(b1)const$(b2) static$(b3) char $(b4)x$(e1) = 'c';$(e2) - )cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - std::vector selects; - for(int i = 1; i <= 4; ++i) { - for(int j = 1; j <= 2; ++j) { - uint32_t bp = tx["main.cpp", std::format("b{}", i)]; - uint32_t ep = tx["main.cpp", std::format("e{}", j)]; - selects.push_back({bp, ep}); - } - } - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - for(auto& [begin, end]: selects) { - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); - } - } - - { - const char* code = R"cpp( - struct A { - int a; - int b; - }; - - int main(int argc, char **argv) { - $(b)auto$(e) a = A{114, 514}; - return 0; - } - )cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - uint32_t bp = tx["main.cpp", "b"]; - uint32_t ep = tx["main.cpp", "e"]; - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {bp, ep}, unit); - SelectionBuilder builder{left, right, unit.context(), unit}; - auto tree = builder.build(); - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); - } -} - -TEST(Selection, ParmVarDeclBoundary) { - { - const char* code = R"cpp( - void f($(b1)int xxx$(b2)yyy$(e1) = 1$(e2)) {} - )cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - std::vector selects; - for(int begin = 1; begin <= 2; begin++) { - for(int end = 1; end <= 2; end++) { - uint32_t bp = tx["main.cpp", std::format("b{}", begin)]; - uint32_t ep = tx["main.cpp", std::format("e{}", end)]; - selects.push_back({bp, ep}); - } - } - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - for(auto& [begin, end]: selects) { - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); - } - } - - { - const char* code = R"cpp( - int foo() { return 42; }$(b1);$(e1) - )cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - auto bp = tx["main.cpp", "b1"]; - auto ep = tx["main.cpp", "e1"]; - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {bp, ep}, unit); - SelectionBuilder builder{left, right, unit.context(), unit}; - auto tree = builder.build(); - - tx.expectPreorderSequence(tree, {}); - } -} - -TEST(Selection, SingleStmt) { - const char* code = R"cpp( -namespace test { - int f() { - $(stmt_begin)int x = 1;$(stmt_end) +static unsigned getTokenLengthAtLoc(SourceLocation Loc, + const SourceManager& SM, + const LangOptions& LangOpts) { + clang::Token TheTok; + if(clang::Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) return 0; - } -} -)cpp"; + // FIXME: Here we check whether the token at the location is a greatergreater + // (>>) token and consider it as a single greater (>). This is to get it + // working for templates but it isn't correct for the right shift operator. We + // can avoid this by using half open char ranges in getFileRange() but getting + // token ending is not well supported in macroIDs. + if(TheTok.is(tok::greatergreater)) + return 1; - SelectionTester tx("main.cpp", code); - tx.compile(); - - auto& unit = *tx.unit; - - uint32_t begin = tx["main.cpp", "stmt_begin"]; - uint32_t end = tx["main.cpp", "stmt_end"]; - - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - EXPECT_EQ(left->kind(), clang::tok::kw_int); - EXPECT_EQ(right->kind(), clang::tok::semi); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); + return TheTok.getLength(); } -TEST(Selection, MultiStmt) { - const char* code = R"cpp( -namespace test { - int f() { - $(multi_begin)int x = 1; - int y = x + 1; - if (y) { x -= 1; } - $(multi_end) - return 0; - } -} -)cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - auto& unit = *tx.unit; - - uint32_t begin = tx["main.cpp", "multi_begin"]; - uint32_t end = tx["main.cpp", "multi_end"]; - - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - EXPECT_EQ(left->kind(), clang::tok::kw_int); - EXPECT_EQ(right->kind(), clang::tok::r_brace); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = - makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); +// Returns location of the starting of the token at a given EndLoc +static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, + const SourceManager& SM, + const LangOptions& LangOpts) { + return EndLoc.getLocWithOffset(-(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); } -TEST(Selection, EntireClass) { - const char* code = R"cpp( -namespace test{ -$(class_begin)class Test { - int x; - int y; - - void f(); -};$(class_end) -} -)cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - auto& unit = *tx.unit; - - uint32_t begin = tx["main.cpp", "class_begin"]; - uint32_t end = tx["main.cpp", "class_end"]; - - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - EXPECT_EQ(left->kind(), clang::tok::kw_class); - EXPECT_EQ(right->kind(), clang::tok::semi); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); +// Returns location of the last character of the token at a given loc +static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, + const SourceManager& SM, + const LangOptions& LangOpts) { + unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); + return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); } -TEST(Selection, ClassField) { - const char* code = R"cpp( -class Test { - int $(begin)x$(end); - int y; -}; -)cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - auto& unit = *tx.unit; - - uint32_t begin = tx["main.cpp", "begin"]; - uint32_t end = tx["main.cpp", "end"]; - - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - EXPECT_EQ(left->kind(), clang::tok::identifier); - EXPECT_EQ(right->kind(), clang::tok::identifier); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); +// Converts a char source range to a token range. +static SourceRange toTokenRange(CharSourceRange Range, + const SourceManager& SM, + const LangOptions& LangOpts) { + if(!Range.isTokenRange()) + Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); + return Range.getAsRange(); } -TEST(Selection, IfCondExpr) { - const char* code = R"cpp( -void f(int& x){ - if ($(begin1)x $(begin2)==$(end2) 1$(end1)) {} +// Returns the union of two token ranges. +// To find the maximum of the Ends of the ranges, we compare the location of the +// last character of the token. +static SourceRange unionTokenRange(SourceRange R1, + SourceRange R2, + const SourceManager& SM, + const LangOptions& LangOpts) { + SourceLocation Begin = + SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin()) ? R1.getBegin() : R2.getBegin(); + SourceLocation End = SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts), + getLocForTokenEnd(R2.getEnd(), SM, LangOpts)) + ? R2.getEnd() + : R1.getEnd(); + return SourceRange(Begin, End); } -)cpp"; - SelectionTester tx("main.cpp", code); - tx.compile(); +bool isValidFileRange(const SourceManager& Mgr, SourceRange R) { + if(!R.getBegin().isValid() || !R.getEnd().isValid()) + return false; - auto& unit = *tx.unit; + FileID BeginFID; + size_t BeginOffset = 0; + std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); - { - uint32_t begin = tx["main.cpp", "begin1"]; - uint32_t end = tx["main.cpp", "end1"]; + FileID EndFID; + size_t EndOffset = 0; + std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd()); - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); + return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset; +} - EXPECT_EQ(left->kind(), clang::tok::identifier); - EXPECT_EQ(right->kind(), clang::tok::numeric_constant); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); - } - - { - uint32_t begin = tx["main.cpp", "begin2"]; - uint32_t end = tx["main.cpp", "end2"]; - - auto tokens = unit.spelled_tokens(unit.interested_file()); - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - auto lk = left->kind(); - auto rk = right->kind(); - - EXPECT_EQ(left->kind(), clang::tok::equalequal); - EXPECT_EQ(right->kind(), clang::tok::equalequal); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); +SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager& SM) { + assert(SM.getLocForEndOfFile(IncludedFile).isFileID()); + FileID IncludingFile; + unsigned Offset; + std::tie(IncludingFile, Offset) = SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile)); + bool Invalid = false; + llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid); + if(Invalid) + return SourceLocation(); + // Now buf is "...\n#include \n..." + // and Offset points here: ^ + // Rewind to the preceding # on the line. + assert(Offset < Buf.size()); + for(;; --Offset) { + if(Buf[Offset] == '#') + return SM.getComposedLoc(IncludingFile, Offset); + if(Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on? + return SourceLocation(); } } -TEST(Selection, ClassMethod) { - const char* code = R"cpp( -class Test { - $(b1)void $(b2)f(int x, int y) $(b3){$(e1) - int z = x + y;$(e2) - }$(e3) -}; -)cpp"; - - SelectionTester tx("main.cpp", code); - tx.compile(); - - { // {b1, b2} X {e1, e2, e3} - std::vector b12_e123; - for(int begin = 1; begin <= 2; begin++) { - for(int end = 1; end <= 3; end++) { - uint32_t bp = tx["main.cpp", std::format("b{}", begin)]; - uint32_t ep = tx["main.cpp", std::format("e{}", end)]; - b12_e123.push_back({bp, ep}); - } - } - - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - for(auto& [begin, end]: b12_e123) { - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - auto kinds = makeNodeSequence(); - tx.expectPreorderSequence(tree, kinds); +// Given a range whose endpoints may be in different expansions or files, +// tries to find a range within a common file by following up the expansion and +// include location in each. +static SourceRange rangeInCommonFile(SourceRange R, + const SourceManager& SM, + const LangOptions& LangOpts) { + // Fast path for most common cases. + if(SM.isWrittenInSameFile(R.getBegin(), R.getEnd())) + return R; + // Record the stack of expansion locations for the beginning, keyed by FileID. + llvm::DenseMap BeginExpansions; + for(SourceLocation Begin = R.getBegin(); Begin.isValid(); + Begin = Begin.isFileID() ? includeHashLoc(SM.getFileID(Begin), SM) + : SM.getImmediateExpansionRange(Begin).getBegin()) { + BeginExpansions[SM.getFileID(Begin)] = Begin; + } + // Move up the stack of expansion locations for the end until we find the + // location in BeginExpansions with that has the same file id. + for(SourceLocation End = R.getEnd(); End.isValid(); + End = End.isFileID() + ? includeHashLoc(SM.getFileID(End), SM) + : toTokenRange(SM.getImmediateExpansionRange(End), SM, LangOpts).getEnd()) { + auto It = BeginExpansions.find(SM.getFileID(End)); + if(It != BeginExpansions.end()) { + if(SM.getFileOffset(It->second) > SM.getFileOffset(End)) + return SourceLocation(); + return {It->second, End}; } } + return SourceRange(); +} - { - // {b3} X {e1, e2, e3} - std::vector b3_e123; - for(int begin = 3; begin <= 3; begin++) { - for(int end = 1; end <= 3; end++) { - uint32_t bp = tx["main.cpp", std::format("b{}", begin)]; - uint32_t ep = tx["main.cpp", std::format("e{}", end)]; - b3_e123.push_back({bp, ep}); - } - } +// Find an expansion range (not necessarily immediate) the ends of which are in +// the same file id. +static SourceRange getExpansionTokenRangeInSameFile(SourceLocation Loc, + const SourceManager& SM, + const LangOptions& LangOpts) { + return rangeInCommonFile(toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), + SM, + LangOpts); +} - auto& unit = *tx.unit; - auto tokens = unit.spelled_tokens(unit.interested_file()); - for(auto& [begin, end]: b3_e123) { - auto [left, right] = SelectionBuilder::selectionBound(tokens, {begin, end}, unit); - - SelectionBuilder builder(left, right, unit.context(), unit); - auto tree = builder.build(); - // debug(tree); - - // for b3 X e1, only care about the method body. - auto kinds = makeNodeSequence(); - - // for b3 X e23, ther is also a partial coverage of DeclStmt, but don't check it here. - // auto kinds = makeNodeSequence(); - - tx.expectPreorderSequence(tree, kinds); +// Returns the file range for a given Location as a Token Range +// This is quite similar to getFileLoc in SourceManager as both use +// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). +// However: +// - We want to maintain the full range information as we move from one file to +// the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. +// - We want to split '>>' tokens as the lexer parses the '>>' in nested +// template instantiations as a '>>' instead of two '>'s. +// There is also getExpansionRange but it simply calls +// getImmediateExpansionRange on the begin and ends separately which is wrong. +static SourceRange getTokenFileRange(SourceLocation Loc, + const SourceManager& SM, + const LangOptions& LangOpts) { + SourceRange FileRange = Loc; + while(!FileRange.getBegin().isFileID()) { + if(SM.isMacroArgExpansion(FileRange.getBegin())) { + FileRange = unionTokenRange(SM.getImmediateSpellingLoc(FileRange.getBegin()), + SM.getImmediateSpellingLoc(FileRange.getEnd()), + SM, + LangOpts); + assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd())); + } else { + SourceRange ExpansionRangeForBegin = + getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts); + SourceRange ExpansionRangeForEnd = + getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts); + if(ExpansionRangeForBegin.isInvalid() || ExpansionRangeForEnd.isInvalid()) + return SourceRange(); + assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(), + ExpansionRangeForEnd.getBegin()) && + "Both Expansion ranges should be in same file."); + FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, SM, LangOpts); } } + return FileRange; +} + +std::optional toHalfOpenFileRange(const SourceManager& SM, + const LangOptions& LangOpts, + SourceRange R) { + SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); + if(!isValidFileRange(SM, R1)) + return std::nullopt; + + SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); + if(!isValidFileRange(SM, R2)) + return std::nullopt; + + SourceRange Result = rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts); + unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); + // Convert from closed token range to half-open (char) range + Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); + if(!isValidFileRange(SM, Result)) + return std::nullopt; + + return Result; } } // namespace -} // namespace testing +void dump_diagnostics() {} -} // namespace clice +void select_right(llvm::StringRef code, auto&& callback, LocationChain chain = LocationChain()) { + Tester tester; + tester.add_main("main.cpp", code); + ASSERT_TRUE(tester.compile(), chain); + /// ASSERT_TRUE(tester.unit->diagnostics().empty(), chain); + + auto points = tester.nameless_points(); + ASSERT_TRUE(points.size() >= 1, chain); + + LocalSourceRange selected_range; + selected_range.begin = points[0]; + selected_range.end = points.size() == 2 ? points[1] : points[0]; + auto tree = SelectionTree::create_right(*tester.unit, selected_range); + callback(tester, tree); +} + +void EXPECT_SELECT(llvm::StringRef code, const char* kind, LocationChain chain = LocationChain()) { + select_right( + code, + [&](Tester& tester, SelectionTree& tree) { + auto node = tree.common_ancestor(); + if(!kind) { + ASSERT_FALSE(node, chain); + } else { + ASSERT_TRUE(node, chain); + auto range2 = toHalfOpenFileRange(tester.unit->context().getSourceManager(), + tester.unit->lang_options(), + node->source_range()); + LocalSourceRange range = { + tester.unit->file_offset(range2->getBegin()), + tester.unit->file_offset(range2->getEnd()), + }; + + /// llvm::outs() << tree << "\n"; + /// tree.print(llvm::outs(), *node, 2); + + ASSERT_EQ(node->kind(), llvm::StringRef(kind), chain); + ASSERT_EQ(range, tester.range(), chain); + } + }, + chain); +} + +TEST(Selection, Expressions) { + EXPECT_SELECT(R"( + struct AAA { struct BBB { static int ccc(); };}; + int x = @[AAA::BBB::c$c$c](); + )", + "DeclRefExpr"); + + EXPECT_SELECT(R"( + struct AAA { struct BBB { static int ccc(); };}; + int x = @[AAA::BBB::ccc($)]; + )", + "CallExpr"); + + EXPECT_SELECT(R"( + struct S { + int foo() const; + int bar() { return @[f$oo](); } + }; + )", + "MemberExpr"); + + EXPECT_SELECT(R"(void foo() { @[$foo](); })", "DeclRefExpr"); + EXPECT_SELECT(R"(void foo() { @[f$oo](); })", "DeclRefExpr"); + EXPECT_SELECT(R"(void foo() { @[fo$o](); })", "DeclRefExpr"); + + EXPECT_SELECT(R"(void foo() { @[foo$] (); })", "DeclRefExpr"); + + EXPECT_SELECT(R"(void foo() { @[foo$()]; })", "CallExpr"); + EXPECT_SELECT(R"(void foo() { @[foo$()]; /*comment*/$})", "CallExpr"); + EXPECT_SELECT(R"(const int x = 1, y = 2; int array[ @[$x] ][10][y];)", "DeclRefExpr"); + EXPECT_SELECT(R"(const int x = 1, y = 2; int array[x][10][ @[$y] ];)", "DeclRefExpr"); + EXPECT_SELECT(R"(void func(int x) { int v_array[ @[$x] ][10]; })", "DeclRefExpr"); + EXPECT_SELECT(R"( + int a; + decltype(@[$a] + a) b; + )", + "DeclRefExpr"); + + EXPECT_SELECT(R"( + void func() { @[__$func__]; } + )", + "PredefinedExpr"); +} + +TEST(Selection, Literals) { + EXPECT_SELECT(R"( + auto lambda = [](const char*){ return 0; }; + int x = lambda(@["y$"]); + )", + "StringLiteral"); + + EXPECT_SELECT(R"(int x = @[42]$;)", "IntegerLiteral"); + EXPECT_SELECT(R"(const int x = 1, y = 2; int array[x][ @[$10] ][y];)", "IntegerLiteral"); + + EXPECT_SELECT(R"( + struct Foo{}; + Foo operator""_ud(unsigned long long); + Foo x = @[$12_ud]; + )", + "UserDefinedLiteral"); +} + +TEST(Selection, ControlFlow) { + EXPECT_SELECT(R"( + void foo() { @[if (1$11) { return; } else {$ }]} } + )", + "IfStmt"); + + EXPECT_SELECT(R"(int bar; void foo() @[{ foo (); }]$)", "CompoundStmt"); + + /// FIXME: + /// EXPECT_SELECT(R"( + /// /*error-ok*/ + /// void func() @[{^])", + /// "CompoundStmt"); + + EXPECT_SELECT(R"( + struct Str { + const char *begin(); + const char *end(); + }; + Str makeStr(const char*); + void loop() { + for (const char C : @[mak$eStr("foo"$)]) + ; + } + )", + "CallExpr"); +} + +TEST(Selection, Declarations) { + /// FIXME: how to handle this? + /// EXPECT_SELECT(R"( + /// #define TARGET void foo() + /// @[TAR$GET{ return; }] + /// )", + /// "FunctionDecl"); + + EXPECT_SELECT(R"(@[$void foo$()];)", "FunctionDecl"); + EXPECT_SELECT(R"(@[void $foo()];)", "FunctionDecl"); + + EXPECT_SELECT(R"( + struct S { S(const char*); }; + @[S s $= "foo"]; + )", + "VarDecl"); + + EXPECT_SELECT(R"( + struct S { S(const char*); }; + @[S $s = "foo"]; + )", + "VarDecl"); + + EXPECT_SELECT(R"( + @[void (*$S)(int) = nullptr]; + )", + "VarDecl"); + + EXPECT_SELECT(R"(@[int $a], b;)", "VarDecl"); + EXPECT_SELECT(R"(@[int a, $b];)", "VarDecl"); + EXPECT_SELECT(R"(@[struct {int x;} $y];)", "VarDecl"); + EXPECT_SELECT(R"(struct foo { @[int has$h<:32:>]; };)", "FieldDecl"); + EXPECT_SELECT(R"(struct {@[int $x];} y;)", "FieldDecl"); + + EXPECT_SELECT(R"( + void test(int bar) { + auto l = [ $@[foo = bar] ] { }; + })", + "VarDecl"); +} + +TEST(Selection, Types) { + EXPECT_SELECT(R"( + struct AAA { struct BBB { static int ccc(); };}; + int x = AAA::@[B$B$B]::ccc(); + )", + "RecordTypeLoc"); + EXPECT_SELECT(R"( + struct AAA { struct BBB { static int ccc(); };}; + int x = AAA::@[B$BB$]::ccc(); + )", + "RecordTypeLoc"); + EXPECT_SELECT(R"( + struct Foo {}; + struct Bar : private @[Fo$o] {}; + )", + "RecordTypeLoc"); + EXPECT_SELECT(R"( + struct Foo {}; + struct Bar : @[Fo$o] {}; + )", + "RecordTypeLoc"); + EXPECT_SELECT(R"(@[$void] (*S)(int) = nullptr;)", "BuiltinTypeLoc"); + /// EXPECT_SELECT(R"(@[void (*S)$(int)] = nullptr;)", "FunctionProtoTypeLoc"); + EXPECT_SELECT(R"(@[void ($*S)(int)] = nullptr;)", "PointerTypeLoc"); + /// EXPECT_SELECT(R"(@[void $(*S)(int)] = nullptr;)", "ParenTypeLoc"); + EXPECT_SELECT(R"(@[$void] foo();)", "BuiltinTypeLoc"); + EXPECT_SELECT(R"(@[void foo$()];)", "FunctionProtoTypeLoc"); + EXPECT_SELECT(R"(const int x = 1, y = 2; @[i$nt] array[x][10][y];)", "BuiltinTypeLoc"); + EXPECT_SELECT(R"(int (*getFunc(@[do$uble]))(int);)", "BuiltinTypeLoc"); + EXPECT_SELECT(R"(class X{}; @[int X::$*]y[10];)", "MemberPointerTypeLoc"); + EXPECT_SELECT(R"(const @[a$uto] x = 42;)", "AutoTypeLoc"); + /// EXPECT_SELECT(R"(@[decltype$(1)] b;)", "DecltypeTypeLoc"); + EXPECT_SELECT(R"(@[de$cltype(a$uto)] a = 1;)", "AutoTypeLoc"); + EXPECT_SELECT(R"( + typedef int Foo; + enum Bar : @[Fo$o] {}; + )", + "TypedefTypeLoc"); + EXPECT_SELECT(R"( + typedef int Foo; + enum Bar : @[Fo$o]; + )", + "TypedefTypeLoc"); +} + +TEST(Selection, CXXFeatures) { + EXPECT_SELECT(R"( + template + int x = @[T::$U::]ccc(); + )", + "NestedNameSpecifierLoc"); + EXPECT_SELECT(R"( + struct Foo {}; + struct Bar : @[v$ir$tual private Foo] {}; + )", + "CXXBaseSpecifier"); + EXPECT_SELECT(R"( + struct X { X(int); }; + class Y { + X x; + Y() : @[$x(4)] {} + }; + )", + "CXXCtorInitializer"); + EXPECT_SELECT(R"(@[st$ruct {int x;}] y;)", "CXXRecordDecl"); + EXPECT_SELECT(R"(struct foo { @[op$erator int()]; };)", "CXXConversionDecl"); + EXPECT_SELECT(R"(struct foo { @[$~foo()]; };)", "CXXDestructorDecl"); + EXPECT_SELECT(R"(struct foo { @[~$foo()]; };)", "CXXDestructorDecl"); + EXPECT_SELECT(R"(struct foo { @[fo$o(){}] };)", "CXXConstructorDecl"); + EXPECT_SELECT(R"( + struct S1 { void f(); }; + struct S2 { S1 * operator->(); }; + void test(S2 s2) { + s2@[-$>]f(); + } + )", + "DeclRefExpr"); // Test for overloaded operator-> +} + +TEST(Selection, UsingEnum) { + EXPECT_SELECT(R"( + namespace ns { enum class A {}; }; + using enum ns::@[$A]; + )", + "EnumTypeLoc"); + EXPECT_SELECT(R"( + namespace ns { enum class A {}; using B = A; }; + using enum ns::@[$B]; + )", + "TypedefTypeLoc"); + EXPECT_SELECT(R"( + namespace ns { enum class A {}; }; + using enum @[$ns::]A; + )", + "NestedNameSpecifierLoc"); + EXPECT_SELECT(R"( + namespace ns { enum class A {}; }; + @[using $enum ns::A]; + )", + "UsingEnumDecl"); + EXPECT_SELECT(R"( + namespace ns { enum class A {}; }; + @[$using enum ns::A]; + )", + "UsingEnumDecl"); +} + +TEST(Selection, Templates) { + EXPECT_SELECT(R"(template void foo(@[T*$...]x);)", "PackExpansionTypeLoc"); + EXPECT_SELECT(R"(template void foo(@[$T]*...x);)", "TemplateTypeParmTypeLoc"); + EXPECT_SELECT(R"(template void foo() { @[$T] t; })", "TemplateTypeParmTypeLoc"); + EXPECT_SELECT(R"( + template struct Foo {}; + template <@[template class /*cursor here*/$U]> + struct Foo*> {}; + )", + "TemplateTemplateParmDecl"); + EXPECT_SELECT(R"(template struct foo { ~foo<@[$T]>(){} };)", + "TemplateTypeParmTypeLoc"); + EXPECT_SELECT(R"( + template class Vector {}; + template