[clang-tidy] Switch misc-confusable-identifiers check to a faster algorithm. (#130369)

Optimizations:

- Only build the skeleton for each identifier once, rather than once for
each declaration of that identifier.
- Only compute the contexts in which identifiers are declared for
identifiers that have the same skeleton as another identifier in the
translation unit.
- Only compare pairs of declarations that are declared in related
contexts, rather than comparing all pairs of declarations with the same
skeleton.

Also simplify by removing the caching of enclosing `DeclContext` sets,
because with the above changes we don't even compute the enclosing
`DeclContext` sets in common cases. Instead, we terminate the traversal
to enclosing `DeclContext`s immediately if we've already found another
declaration in that context with the same identifier. (This optimization
is not currently applied to the `forallBases` traversal, but could be
applied there too if needed.)

This also fixes two bugs that together caused the check to fail to find
some of the issues it was looking for:

- The old check skipped comparisons of declarations from different
contexts unless both declarations were type template parameters. This
caused the checker to not warn on some instances of the CVE it is
intended to detect.
- The old check skipped comparisons of declarations in all base classes
other than the first one found by the traversal. This appears to be an
oversight, incorrectly returning `false` rather than `true` from the
`forallBases` callback, which terminates traversal.

This also fixes an issue where the check would have false positives for
template parameters and function parameters in some cases, because those
parameters sometimes have a parent `DeclContext` that is the parent of
the parameterized entity, or sometimes is the translation unit. In
either case, this would cause warnings about declarations that are never
visible together in any scope.

This decreases the runtime of this check, especially in the common case
where there are few or no skeletons with two or more different
identifiers. Running this check over LLVM, clang, and clang-tidy, the
wall time for the check as reported by clang-tidy's internal profiler is
reduced from 5202.86s to 3900.90s.
This commit is contained in:
Richard Smith
2025-07-01 13:31:46 -07:00
committed by GitHub
parent a061171426
commit c56c349d39
3 changed files with 211 additions and 120 deletions

View File

@@ -1,5 +1,4 @@
//===--- ConfusableIdentifierCheck.cpp -
// clang-tidy--------------------------===//
//===--- ConfusableIdentifierCheck.cpp - clang-tidy -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,6 +8,7 @@
#include "ConfusableIdentifierCheck.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ConvertUTF.h"
@@ -88,90 +88,68 @@ static llvm::SmallString<64U> skeleton(StringRef Name) {
return Skeleton;
}
static bool mayShadowImpl(const DeclContext *DC0, const DeclContext *DC1) {
return DC0 && DC0 == DC1;
}
namespace {
struct Entry {
const NamedDecl *ND;
const Decl *Parent;
bool FromDerivedClass;
};
} // namespace
static bool mayShadowImpl(const NamedDecl *ND0, const NamedDecl *ND1) {
return isa<TemplateTypeParmDecl>(ND0) || isa<TemplateTypeParmDecl>(ND1);
}
// Map from a context to the declarations in that context with the current
// skeleton. At most one entry per distinct identifier is tracked. The
// context is usually a `DeclContext`, but can also be a template declaration
// that has no corresponding context, such as an alias template or variable
// template.
using DeclsWithinContextMap =
llvm::DenseMap<const Decl *, llvm::SmallVector<Entry, 1>>;
static bool isMemberOf(const ConfusableIdentifierCheck::ContextInfo *DC0,
const ConfusableIdentifierCheck::ContextInfo *DC1) {
return llvm::is_contained(DC1->Bases, DC0->PrimaryContext);
}
static bool enclosesContext(const ConfusableIdentifierCheck::ContextInfo *DC0,
const ConfusableIdentifierCheck::ContextInfo *DC1) {
if (DC0->PrimaryContext == DC1->PrimaryContext)
return true;
return llvm::is_contained(DC0->PrimaryContexts, DC1->PrimaryContext) ||
llvm::is_contained(DC1->PrimaryContexts, DC0->PrimaryContext);
}
static bool mayShadow(const NamedDecl *ND0,
const ConfusableIdentifierCheck::ContextInfo *DC0,
const NamedDecl *ND1,
const ConfusableIdentifierCheck::ContextInfo *DC1) {
if (!DC0->Bases.empty() && !DC1->Bases.empty()) {
// if any of the declaration is a non-private member of the other
// declaration, it's shadowed by the former
if (ND1->getAccess() != AS_private && isMemberOf(DC1, DC0))
return true;
if (ND0->getAccess() != AS_private && isMemberOf(DC0, DC1))
static bool addToContext(DeclsWithinContextMap &DeclsWithinContext,
const Decl *Context, Entry E) {
auto &Decls = DeclsWithinContext[Context];
if (!Decls.empty() &&
Decls.back().ND->getIdentifier() == E.ND->getIdentifier()) {
// Already have a declaration with this identifier in this context. Don't
// track another one. This means that if an outer name is confusable with an
// inner name, we'll only diagnose the outer name once, pointing at the
// first inner declaration with that name.
if (Decls.back().FromDerivedClass && !E.FromDerivedClass) {
// Prefer the declaration that's not from the derived class, because that
// conflicts with more declarations.
Decls.back() = E;
return true;
}
if (!mayShadowImpl(DC0->NonTransparentContext, DC1->NonTransparentContext) &&
!mayShadowImpl(ND0, ND1))
return false;
return enclosesContext(DC0, DC1);
}
Decls.push_back(E);
return true;
}
const ConfusableIdentifierCheck::ContextInfo *
ConfusableIdentifierCheck::getContextInfo(const DeclContext *DC) {
const DeclContext *PrimaryContext = DC->getPrimaryContext();
auto [It, Inserted] = ContextInfos.try_emplace(PrimaryContext);
if (!Inserted)
return &It->second;
static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext,
const Decl *Parent, const NamedDecl *ND) {
const Decl *Outer = Parent;
while (Outer) {
if (const auto *NS = dyn_cast<NamespaceDecl>(Outer))
Outer = NS->getCanonicalDecl();
ContextInfo &Info = It->second;
Info.PrimaryContext = PrimaryContext;
Info.NonTransparentContext = PrimaryContext;
if (!addToContext(DeclsWithinContext, Outer, {ND, Parent, false}))
return;
while (Info.NonTransparentContext->isTransparentContext()) {
Info.NonTransparentContext = Info.NonTransparentContext->getParent();
if (!Info.NonTransparentContext)
break;
}
if (Info.NonTransparentContext)
Info.NonTransparentContext =
Info.NonTransparentContext->getPrimaryContext();
while (DC) {
if (!isa<LinkageSpecDecl>(DC) && !isa<ExportDecl>(DC))
Info.PrimaryContexts.push_back(DC->getPrimaryContext());
DC = DC->getParent();
}
if (const auto *RD = dyn_cast<CXXRecordDecl>(PrimaryContext)) {
if (const auto *RD = dyn_cast<CXXRecordDecl>(Outer)) {
RD = RD->getDefinition();
if (RD) {
Info.Bases.push_back(RD);
RD->forallBases([&](const CXXRecordDecl *Base) {
Info.Bases.push_back(Base);
return false;
addToContext(DeclsWithinContext, Base, {ND, Parent, true});
return true;
});
}
}
return &Info;
auto *OuterDC = Outer->getDeclContext();
if (!OuterDC)
break;
Outer = cast_or_null<Decl>(OuterDC->getNonTransparentContext());
}
}
void ConfusableIdentifierCheck::check(
@@ -180,7 +158,28 @@ void ConfusableIdentifierCheck::check(
if (!ND)
return;
IdentifierInfo *NDII = ND->getIdentifier();
addDeclToCheck(ND,
cast<Decl>(ND->getDeclContext()->getNonTransparentContext()));
// Associate template parameters with this declaration of this template.
if (const auto *TD = dyn_cast<TemplateDecl>(ND)) {
for (const NamedDecl *Param : *TD->getTemplateParameters())
addDeclToCheck(Param, TD->getTemplatedDecl());
}
// Associate function parameters with this declaration of this function.
if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
for (const NamedDecl *Param : FD->parameters())
addDeclToCheck(Param, ND);
}
}
void ConfusableIdentifierCheck::addDeclToCheck(const NamedDecl *ND,
const Decl *Parent) {
if (!ND || !Parent)
return;
const IdentifierInfo *NDII = ND->getIdentifier();
if (!NDII)
return;
@@ -188,34 +187,78 @@ void ConfusableIdentifierCheck::check(
if (NDName.empty())
return;
const ContextInfo *Info = getContextInfo(ND->getDeclContext());
llvm::SmallVector<Entry> &Mapped = Mapper[skeleton(NDName)];
for (const Entry &E : Mapped) {
if (!mayShadow(ND, Info, E.Declaration, E.Info))
continue;
const IdentifierInfo *ONDII = E.Declaration->getIdentifier();
StringRef ONDName = ONDII->getName();
if (ONDName == NDName)
continue;
diag(ND->getLocation(), "%0 is confusable with %1") << ND << E.Declaration;
diag(E.Declaration->getLocation(), "other declaration found here",
DiagnosticIDs::Note);
}
Mapped.push_back({ND, Info});
NameToDecls[NDII].push_back({ND, Parent});
}
void ConfusableIdentifierCheck::onEndOfTranslationUnit() {
Mapper.clear();
ContextInfos.clear();
llvm::StringMap<llvm::SmallVector<const IdentifierInfo *, 1>> SkeletonToNames;
// Compute the skeleton for each identifier.
for (auto &[Ident, Decls] : NameToDecls) {
SkeletonToNames[skeleton(Ident->getName())].push_back(Ident);
}
// Visit each skeleton with more than one identifier.
for (auto &[Skel, Idents] : SkeletonToNames) {
if (Idents.size() < 2) {
continue;
}
// Find the declaration contexts that transitively contain each identifier.
DeclsWithinContextMap DeclsWithinContext;
for (const IdentifierInfo *II : Idents) {
for (auto [ND, Parent] : NameToDecls[II]) {
addToEnclosingContexts(DeclsWithinContext, Parent, ND);
}
}
// Check to see if any declaration is declared in a context that
// transitively contains another declaration with a different identifier but
// the same skeleton.
for (const IdentifierInfo *II : Idents) {
for (auto [OuterND, OuterParent] : NameToDecls[II]) {
for (Entry Inner : DeclsWithinContext[OuterParent]) {
// Don't complain if the identifiers are the same.
if (OuterND->getIdentifier() == Inner.ND->getIdentifier())
continue;
// Don't complain about a derived-class name shadowing a base class
// private member.
if (OuterND->getAccess() == AS_private && Inner.FromDerivedClass)
continue;
// If the declarations are in the same context, only diagnose the
// later one.
if (OuterParent == Inner.Parent &&
Inner.ND->getASTContext()
.getSourceManager()
.isBeforeInTranslationUnit(Inner.ND->getLocation(),
OuterND->getLocation()))
continue;
diag(Inner.ND->getLocation(), "%0 is confusable with %1")
<< Inner.ND << OuterND;
diag(OuterND->getLocation(), "other declaration found here",
DiagnosticIDs::Note);
}
}
}
}
NameToDecls.clear();
}
void ConfusableIdentifierCheck::registerMatchers(
ast_matchers::MatchFinder *Finder) {
Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
// Parameter declarations sometimes use the translation unit or some outer
// enclosing context as their `DeclContext`, instead of their parent, so
// we handle them specially in `check`.
auto AnyParamDecl = ast_matchers::anyOf(
ast_matchers::parmVarDecl(), ast_matchers::templateTypeParmDecl(),
ast_matchers::nonTypeTemplateParmDecl(),
ast_matchers::templateTemplateParmDecl());
Finder->addMatcher(ast_matchers::namedDecl(ast_matchers::unless(AnyParamDecl))
.bind("nameddecl"),
this);
}
} // namespace clang::tidy::misc

View File

@@ -1,5 +1,4 @@
//===--- ConfusableIdentifierCheck.h - clang-tidy
//-------------------------------*- C++ -*-===//
//===--- ConfusableIdentifierCheck.h - clang-tidy ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,7 +10,7 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
#include "../ClangTidyCheck.h"
#include <unordered_map>
#include "llvm/ADT/DenseMap.h"
namespace clang::tidy::misc {
@@ -31,23 +30,13 @@ public:
return TK_IgnoreUnlessSpelledInSource;
}
struct ContextInfo {
const DeclContext *PrimaryContext;
const DeclContext *NonTransparentContext;
llvm::SmallVector<const DeclContext *> PrimaryContexts;
llvm::SmallVector<const CXXRecordDecl *> Bases;
};
private:
struct Entry {
const NamedDecl *Declaration;
const ContextInfo *Info;
};
void addDeclToCheck(const NamedDecl *ND, const Decl *Parent);
const ContextInfo *getContextInfo(const DeclContext *DC);
llvm::StringMap<llvm::SmallVector<Entry>> Mapper;
std::unordered_map<const DeclContext *, ContextInfo> ContextInfos;
llvm::DenseMap<
const IdentifierInfo *,
llvm::SmallVector<std::pair<const NamedDecl *, const Decl *>, 1>>
NameToDecls;
};
} // namespace clang::tidy::misc

View File

@@ -74,6 +74,47 @@ template <typename t1, typename tl>
// CHECK-MESSAGES: :[[#@LINE-2]]:20: note: other declaration found here
void f9();
namespace different_contexts {
// No warning for names in unrelated contexts.
template <typename u1> void different_templates_1();
template <typename ul> void different_templates_2();
namespace inner {
int ul;
}
}
namespace same_template {
template <typename u1, typename ul> using T = int;
// CHECK-MESSAGES: :[[#@LINE-1]]:35: warning: 'ul' is confusable with 'u1' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE-2]]:22: note: other declaration found here
template <typename v1, typename vl> int n;
// CHECK-MESSAGES: :[[#@LINE-1]]:35: warning: 'vl' is confusable with 'v1' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE-2]]:22: note: other declaration found here
template <typename w1> int wl;
// CHECK-MESSAGES: :[[#@LINE-1]]:22: warning: 'w1' is confusable with 'wl' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE-2]]:30: note: other declaration found here
int xl;
template <typename x1> int x;
// CHECK-MESSAGES: :[[#@LINE-1]]:22: warning: 'x1' is confusable with 'xl' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE-3]]:7: note: other declaration found here
}
namespace f10 {
int il;
namespace inner {
int i1;
// CHECK-MESSAGES: :[[#@LINE-1]]:7: warning: 'i1' is confusable with 'il' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE-4]]:5: note: other declaration found here
int j1;
// CHECK-MESSAGES: :[[#@LINE-1]]:7: warning: 'j1' is confusable with 'jl' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE+2]]:5: note: other declaration found here
}
int jl;
}
struct Base0 {
virtual void mO0();
@@ -103,3 +144,21 @@ struct Derived1 : Base1 {
long mI1(); // no warning: mII is private
};
struct Base2 {
long nO0;
private:
long nII;
};
struct Mid2 : Base0, Base1, Base2 {
};
struct Derived2 : Mid2 {
long nOO;
// CHECK-MESSAGES: :[[#@LINE-1]]:8: warning: 'nOO' is confusable with 'nO0' [misc-confusable-identifiers]
// CHECK-MESSAGES: :[[#@LINE-12]]:8: note: other declaration found here
long nI1(); // no warning: mII is private
};