Files
clang-p2996/clang/lib/Tooling/Syntax/Tree.cpp
Logan Smith 5336befe8c [NFC][tools] Return underlying strings directly instead of OS.str()
This avoids an unnecessary copy required by 'return OS.str()', allowing
instead for NRVO or implicit move. The .str() call (which flushes the
stream) is no longer required since 65b13610a5,
which made raw_string_ostream unbuffered by default.

Differential Revision: https://reviews.llvm.org/D115374
2021-12-09 16:05:46 -08:00

471 lines
13 KiB
C++

//===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Syntax/Tree.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Casting.h"
#include <cassert>
using namespace clang;
namespace {
static void traverse(const syntax::Node *N,
llvm::function_ref<void(const syntax::Node *)> Visit) {
if (auto *T = dyn_cast<syntax::Tree>(N)) {
for (const syntax::Node &C : T->getChildren())
traverse(&C, Visit);
}
Visit(N);
}
static void traverse(syntax::Node *N,
llvm::function_ref<void(syntax::Node *)> Visit) {
traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
Visit(const_cast<syntax::Node *>(N));
});
}
} // namespace
syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
const TokenBuffer &Tokens)
: SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
return Tokens;
}
std::pair<FileID, ArrayRef<syntax::Token>>
syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
auto FID = SourceMgr.createFileID(std::move(Input));
auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
assert(It.second && "duplicate FileID");
return {FID, It.first->second};
}
syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
assert(Tok != nullptr);
}
syntax::Node::Node(NodeKind Kind)
: Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
Kind(static_cast<unsigned>(Kind)), Role(0), Original(false),
CanModify(false) {
this->setRole(NodeRole::Detached);
}
bool syntax::Node::isDetached() const {
return getRole() == NodeRole::Detached;
}
void syntax::Node::setRole(NodeRole NR) {
this->Role = static_cast<unsigned>(NR);
}
void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) {
assert(Child->getRole() == NodeRole::Detached);
assert(Role != NodeRole::Detached);
Child->setRole(Role);
appendChildLowLevel(Child);
}
void syntax::Tree::appendChildLowLevel(Node *Child) {
assert(Child->Parent == nullptr);
assert(Child->NextSibling == nullptr);
assert(Child->PreviousSibling == nullptr);
assert(Child->getRole() != NodeRole::Detached);
Child->Parent = this;
if (this->LastChild) {
Child->PreviousSibling = this->LastChild;
this->LastChild->NextSibling = Child;
} else
this->FirstChild = Child;
this->LastChild = Child;
}
void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
assert(Child->getRole() == NodeRole::Detached);
assert(Role != NodeRole::Detached);
Child->setRole(Role);
prependChildLowLevel(Child);
}
void syntax::Tree::prependChildLowLevel(Node *Child) {
assert(Child->Parent == nullptr);
assert(Child->NextSibling == nullptr);
assert(Child->PreviousSibling == nullptr);
assert(Child->getRole() != NodeRole::Detached);
Child->Parent = this;
if (this->FirstChild) {
Child->NextSibling = this->FirstChild;
this->FirstChild->PreviousSibling = Child;
} else
this->LastChild = Child;
this->FirstChild = Child;
}
void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
Node *New) {
assert((!Begin || Begin->Parent == this) &&
"`Begin` is not a child of `this`.");
assert((!End || End->Parent == this) && "`End` is not a child of `this`.");
assert(canModify() && "Cannot modify `this`.");
#ifndef NDEBUG
for (auto *N = New; N; N = N->NextSibling) {
assert(N->Parent == nullptr);
assert(N->getRole() != NodeRole::Detached && "Roles must be set");
// FIXME: validate the role.
}
auto Reachable = [](Node *From, Node *N) {
if (!N)
return true;
for (auto *It = From; It; It = It->NextSibling)
if (It == N)
return true;
return false;
};
assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable.");
assert(Reachable(Begin, End) && "`End` is not after `Begin`.");
#endif
if (!New && Begin == End)
return;
// Mark modification.
for (auto *T = this; T && T->Original; T = T->Parent)
T->Original = false;
// Save the node before the range to be removed. Later we insert the `New`
// range after this node.
auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild;
// Detach old nodes.
for (auto *N = Begin; N != End;) {
auto *Next = N->NextSibling;
N->setRole(NodeRole::Detached);
N->Parent = nullptr;
N->NextSibling = nullptr;
N->PreviousSibling = nullptr;
if (N->Original)
traverse(N, [](Node *C) { C->Original = false; });
N = Next;
}
// Attach new range.
auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild;
auto *&NewLast = End ? End->PreviousSibling : LastChild;
if (!New) {
NewFirst = End;
NewLast = BeforeBegin;
return;
}
New->PreviousSibling = BeforeBegin;
NewFirst = New;
Node *LastInNew;
for (auto *N = New; N != nullptr; N = N->NextSibling) {
LastInNew = N;
N->Parent = this;
}
LastInNew->NextSibling = End;
NewLast = LastInNew;
}
namespace {
static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
const SourceManager &SM) {
assert(L);
const auto *Token = L->getToken();
assert(Token);
// Handle 'eof' separately, calling text() on it produces an empty string.
if (Token->kind() == tok::eof)
OS << "<eof>";
else
OS << Token->text(SM);
}
static void dumpNode(raw_ostream &OS, const syntax::Node *N,
const SourceManager &SM, std::vector<bool> IndentMask) {
auto DumpExtraInfo = [&OS](const syntax::Node *N) {
if (N->getRole() != syntax::NodeRole::Unknown)
OS << " " << N->getRole();
if (!N->isOriginal())
OS << " synthesized";
if (!N->canModify())
OS << " unmodifiable";
};
assert(N);
if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
OS << "'";
dumpLeaf(OS, L, SM);
OS << "'";
DumpExtraInfo(N);
OS << "\n";
return;
}
const auto *T = cast<syntax::Tree>(N);
OS << T->getKind();
DumpExtraInfo(N);
OS << "\n";
for (const syntax::Node &It : T->getChildren()) {
for (bool Filled : IndentMask) {
if (Filled)
OS << "| ";
else
OS << " ";
}
if (!It.getNextSibling()) {
OS << "`-";
IndentMask.push_back(false);
} else {
OS << "|-";
IndentMask.push_back(true);
}
dumpNode(OS, &It, SM, IndentMask);
IndentMask.pop_back();
}
}
} // namespace
std::string syntax::Node::dump(const SourceManager &SM) const {
std::string Str;
llvm::raw_string_ostream OS(Str);
dumpNode(OS, this, SM, /*IndentMask=*/{});
return std::move(OS.str());
}
std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
std::string Storage;
llvm::raw_string_ostream OS(Storage);
traverse(this, [&](const syntax::Node *N) {
if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
dumpLeaf(OS, L, SM);
OS << " ";
}
});
return Storage;
}
void syntax::Node::assertInvariants() const {
#ifndef NDEBUG
if (isDetached())
assert(getParent() == nullptr);
else
assert(getParent() != nullptr);
const auto *T = dyn_cast<Tree>(this);
if (!T)
return;
for (const Node &C : T->getChildren()) {
if (T->isOriginal())
assert(C.isOriginal());
assert(!C.isDetached());
assert(C.getParent() == T);
const auto *Next = C.getNextSibling();
assert(!Next || &C == Next->getPreviousSibling());
if (!C.getNextSibling())
assert(&C == T->getLastChild() &&
"Last child is reachable by advancing from the first child.");
}
const auto *L = dyn_cast<List>(T);
if (!L)
return;
for (const Node &C : T->getChildren()) {
assert(C.getRole() == NodeRole::ListElement ||
C.getRole() == NodeRole::ListDelimiter);
if (C.getRole() == NodeRole::ListDelimiter) {
assert(isa<Leaf>(C));
assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
}
}
#endif
}
void syntax::Node::assertInvariantsRecursive() const {
#ifndef NDEBUG
traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
#endif
}
const syntax::Leaf *syntax::Tree::findFirstLeaf() const {
for (const Node &C : getChildren()) {
if (const auto *L = dyn_cast<syntax::Leaf>(&C))
return L;
if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf())
return L;
}
return nullptr;
}
const syntax::Leaf *syntax::Tree::findLastLeaf() const {
for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) {
if (const auto *L = dyn_cast<syntax::Leaf>(C))
return L;
if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf())
return L;
}
return nullptr;
}
const syntax::Node *syntax::Tree::findChild(NodeRole R) const {
for (const Node &C : getChildren()) {
if (C.getRole() == R)
return &C;
}
return nullptr;
}
std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
syntax::List::getElementsAsNodesAndDelimiters() {
if (!getFirstChild())
return {};
std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
syntax::Node *ElementWithoutDelimiter = nullptr;
for (Node &C : getChildren()) {
switch (C.getRole()) {
case syntax::NodeRole::ListElement: {
if (ElementWithoutDelimiter) {
Children.push_back({ElementWithoutDelimiter, nullptr});
}
ElementWithoutDelimiter = &C;
break;
}
case syntax::NodeRole::ListDelimiter: {
Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)});
ElementWithoutDelimiter = nullptr;
break;
}
default:
llvm_unreachable(
"A list can have only elements and delimiters as children.");
}
}
switch (getTerminationKind()) {
case syntax::List::TerminationKind::Separated: {
Children.push_back({ElementWithoutDelimiter, nullptr});
break;
}
case syntax::List::TerminationKind::Terminated:
case syntax::List::TerminationKind::MaybeTerminated: {
if (ElementWithoutDelimiter) {
Children.push_back({ElementWithoutDelimiter, nullptr});
}
break;
}
}
return Children;
}
// Almost the same implementation of `getElementsAsNodesAndDelimiters` but
// ignoring delimiters
std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
if (!getFirstChild())
return {};
std::vector<syntax::Node *> Children;
syntax::Node *ElementWithoutDelimiter = nullptr;
for (Node &C : getChildren()) {
switch (C.getRole()) {
case syntax::NodeRole::ListElement: {
if (ElementWithoutDelimiter) {
Children.push_back(ElementWithoutDelimiter);
}
ElementWithoutDelimiter = &C;
break;
}
case syntax::NodeRole::ListDelimiter: {
Children.push_back(ElementWithoutDelimiter);
ElementWithoutDelimiter = nullptr;
break;
}
default:
llvm_unreachable("A list has only elements or delimiters.");
}
}
switch (getTerminationKind()) {
case syntax::List::TerminationKind::Separated: {
Children.push_back(ElementWithoutDelimiter);
break;
}
case syntax::List::TerminationKind::Terminated:
case syntax::List::TerminationKind::MaybeTerminated: {
if (ElementWithoutDelimiter) {
Children.push_back(ElementWithoutDelimiter);
}
break;
}
}
return Children;
}
clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
switch (this->getKind()) {
case NodeKind::NestedNameSpecifier:
return clang::tok::coloncolon;
case NodeKind::CallArguments:
case NodeKind::ParameterDeclarationList:
case NodeKind::DeclaratorList:
return clang::tok::comma;
default:
llvm_unreachable("This is not a subclass of List, thus "
"getDelimiterTokenKind() cannot be called");
}
}
syntax::List::TerminationKind syntax::List::getTerminationKind() const {
switch (this->getKind()) {
case NodeKind::NestedNameSpecifier:
return TerminationKind::Terminated;
case NodeKind::CallArguments:
case NodeKind::ParameterDeclarationList:
case NodeKind::DeclaratorList:
return TerminationKind::Separated;
default:
llvm_unreachable("This is not a subclass of List, thus "
"getTerminationKind() cannot be called");
}
}
bool syntax::List::canBeEmpty() const {
switch (this->getKind()) {
case NodeKind::NestedNameSpecifier:
return false;
case NodeKind::CallArguments:
return true;
case NodeKind::ParameterDeclarationList:
return true;
case NodeKind::DeclaratorList:
return true;
default:
llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
"cannot be called");
}
}