From 98f7d756e334278e2e34177fa11e5a604d3b01ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9sz=C3=A1ros=20Gergely?= Date: Fri, 27 Jun 2025 22:39:07 +0200 Subject: [PATCH] [FileCheck] Improve printing variables with escapes (#145865) Firstly fix FileCheck printing string variables double-escaped (first regex, then C-style). This is confusing because it is not clear if the printed value is the literal value or exactly how it is escaped, without looking at FileCheck's source code. Secondly, only escape when doing so makes it easier to read the value (when the string contains tabs, newlines or non-printable characters). When the variable value is escaped, make a note of it in the output too, in order to avoid confusion. The common case that is motivating this change is variables that contain windows style paths with backslashes. These were printed as `"C:\\\\Program Files\\\\MyApp\\\\file.txt"`. Now prefer to print them as `"C:\Program Files\MyApp\file.txt"`. Printing the value literally also makes it easier to search for variables in the output, since the user can just copy-paste it. --- llvm/lib/FileCheck/FileCheck.cpp | 54 +++++++++++++++++++--- llvm/lib/FileCheck/FileCheckImpl.h | 24 ++++++++-- llvm/test/FileCheck/var-escape.txt | 6 +-- llvm/unittests/FileCheck/FileCheckTest.cpp | 8 ++-- 4 files changed, 75 insertions(+), 17 deletions(-) diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index bcca499322ae..b79f6ec5b4f0 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -264,7 +264,7 @@ BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { : *RightFormat; } -Expected NumericSubstitution::getResult() const { +Expected NumericSubstitution::getResultRegex() const { assert(ExpressionPointer->getAST() != nullptr && "Substituting empty expression"); Expected EvaluatedValue = ExpressionPointer->getAST()->eval(); @@ -274,7 +274,18 @@ Expected NumericSubstitution::getResult() const { return Format.getMatchingString(*EvaluatedValue); } -Expected StringSubstitution::getResult() const { +Expected NumericSubstitution::getResultForDiagnostics() const { + // The "regex" returned by getResultRegex() is just a numeric value + // like '42', '0x2A', '-17', 'DEADBEEF' etc. This is already suitable for use + // in diagnostics. + Expected Literal = getResultRegex(); + if (!Literal) + return Literal; + + return "\"" + std::move(*Literal) + "\""; +} + +Expected StringSubstitution::getResultRegex() const { // Look up the value and escape it so that we can put it into the regex. Expected VarVal = Context->getPatternVarValue(FromStr); if (!VarVal) @@ -282,6 +293,36 @@ Expected StringSubstitution::getResult() const { return Regex::escape(*VarVal); } +Expected StringSubstitution::getResultForDiagnostics() const { + Expected VarVal = Context->getPatternVarValue(FromStr); + if (!VarVal) + return VarVal.takeError(); + + std::string Result; + Result.reserve(VarVal->size() + 2); + raw_string_ostream OS(Result); + + OS << '"'; + // Escape the string if it contains any characters that + // make it hard to read, such as non-printable characters (including all + // whitespace except space) and double quotes. These are the characters that + // are escaped by write_escaped(), except we do not include backslashes, + // because they are common in Windows paths and escaping them would make the + // output harder to read. However, when we do escape, backslashes are escaped + // as well, otherwise the output would be ambiguous. + const bool NeedsEscaping = + llvm::any_of(*VarVal, [](char C) { return !isPrint(C) || C == '"'; }); + if (NeedsEscaping) + OS.write_escaped(*VarVal); + else + OS << *VarVal; + OS << '"'; + if (NeedsEscaping) + OS << " (escaped value)"; + + return Result; +} + bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } Expected @@ -1106,7 +1147,7 @@ Pattern::MatchResult Pattern::match(StringRef Buffer, Error Errs = Error::success(); for (const auto &Substitution : Substitutions) { // Substitute and check for failure (e.g. use of undefined variable). - Expected Value = Substitution->getResult(); + Expected Value = Substitution->getResultRegex(); if (!Value) { // Convert to an ErrorDiagnostic to get location information. This is // done here rather than printMatch/printNoMatch since now we know which @@ -1210,7 +1251,8 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, SmallString<256> Msg; raw_svector_ostream OS(Msg); - Expected MatchedValue = Substitution->getResult(); + Expected MatchedValue = + Substitution->getResultForDiagnostics(); // Substitution failures are handled in printNoMatch(). if (!MatchedValue) { consumeError(MatchedValue.takeError()); @@ -1218,8 +1260,8 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, } OS << "with \""; - OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; - OS.write_escaped(*MatchedValue) << "\""; + OS.write_escaped(Substitution->getFromString()) << "\" equal to "; + OS << *MatchedValue; // We report only the start of the match/search range to suggest we are // reporting the substitutions as set at the start of the match/search. diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h index b3cd2af5d57e..5cf5548cdfde 100644 --- a/llvm/lib/FileCheck/FileCheckImpl.h +++ b/llvm/lib/FileCheck/FileCheckImpl.h @@ -366,9 +366,15 @@ public: /// \returns the index where the substitution is to be performed in RegExStr. size_t getIndex() const { return InsertIdx; } + /// \returns a regular expression string that matches the result of the + /// substitution represented by this class instance or an error if + /// substitution failed. + virtual Expected getResultRegex() const = 0; + /// \returns a string containing the result of the substitution represented - /// by this class instance or an error if substitution failed. - virtual Expected getResult() const = 0; + /// by this class instance in a form suitable for diagnostics, or an error if + /// substitution failed. + virtual Expected getResultForDiagnostics() const = 0; }; class StringSubstitution : public Substitution { @@ -379,7 +385,12 @@ public: /// \returns the text that the string variable in this substitution matched /// when defined, or an error if the variable is undefined. - Expected getResult() const override; + Expected getResultRegex() const override; + + /// \returns the text that the string variable in this substitution matched + /// when defined, in a form suitable for diagnostics, or an error if the + /// variable is undefined. + Expected getResultForDiagnostics() const override; }; class NumericSubstitution : public Substitution { @@ -397,7 +408,12 @@ public: /// \returns a string containing the result of evaluating the expression in /// this substitution, or an error if evaluation failed. - Expected getResult() const override; + Expected getResultRegex() const override; + + /// \returns a string containing the result of evaluating the expression in + /// this substitution, in a form suitable for diagnostics, or an error if + /// evaluation failed. + Expected getResultForDiagnostics() const override; }; //===----------------------------------------------------------------------===// diff --git a/llvm/test/FileCheck/var-escape.txt b/llvm/test/FileCheck/var-escape.txt index c3dc0fc7591d..b290b6671be3 100644 --- a/llvm/test/FileCheck/var-escape.txt +++ b/llvm/test/FileCheck/var-escape.txt @@ -14,9 +14,9 @@ VARS-NEXT: [[WINPATH]] [[NOT_ESCAPED]] [[ESCAPED]] [[#$NUMERIC + 0]] ; RUN: -dump-input=never --strict-whitespace --check-prefix=VARS --input-file=%t.1 %s 2>&1 \ ; RUN: | FileCheck %s -CHECK: with "WINPATH" equal to "A:\\\\windows\\\\style\\\\path" -CHECK: with "NOT_ESCAPED" equal to "shouldn't be escaped \\[a-Z\\]\\\\\\+\\$" -CHECK: with "ESCAPED" equal to "\\\\ \014\013 needs\to \"be\" escaped\\\000" +CHECK: with "WINPATH" equal to "A:\windows\style\path" +CHECK: with "NOT_ESCAPED" equal to "shouldn't be escaped [a-Z]\+$" +CHECK: with "ESCAPED" equal to "\\ \014\013 needs\to \"be\" escaped\000" (escaped value) CHECK: with "$NUMERIC + 0" equal to "DEADBEEF" ; Test escaping of the name of a numeric substitution, which might contain diff --git a/llvm/unittests/FileCheck/FileCheckTest.cpp b/llvm/unittests/FileCheck/FileCheckTest.cpp index 91fce69b9a6c..2c4130330e97 100644 --- a/llvm/unittests/FileCheck/FileCheckTest.cpp +++ b/llvm/unittests/FileCheck/FileCheckTest.cpp @@ -1439,7 +1439,7 @@ TEST_F(FileCheckTest, Substitution) { // Substitution of an undefined string variable fails and error holds that // variable's name. StringSubstitution StringSubstitution(&Context, "VAR404", 42); - Expected SubstValue = StringSubstitution.getResult(); + Expected SubstValue = StringSubstitution.getResultRegex(); expectUndefErrors({"VAR404"}, SubstValue.takeError()); // Numeric substitution blocks constituted of defined numeric variables are @@ -1452,20 +1452,20 @@ TEST_F(FileCheckTest, Substitution) { std::move(NVarUse), ExpressionFormat(ExpressionFormat::Kind::HexUpper)); NumericSubstitution SubstitutionN(&Context, "N", std::move(ExpressionN), /*InsertIdx=*/30); - SubstValue = SubstitutionN.getResult(); + SubstValue = SubstitutionN.getResultRegex(); ASSERT_THAT_EXPECTED(SubstValue, Succeeded()); EXPECT_EQ("A", *SubstValue); // Substitution of an undefined numeric variable fails, error holds name of // undefined variable. NVar.clearValue(); - SubstValue = SubstitutionN.getResult(); + SubstValue = SubstitutionN.getResultRegex(); expectUndefErrors({"N"}, SubstValue.takeError()); // Substitution of a defined string variable returns the right value. Pattern P(Check::CheckPlain, &Context, 1); StringSubstitution = llvm::StringSubstitution(&Context, "FOO", 42); - SubstValue = StringSubstitution.getResult(); + SubstValue = StringSubstitution.getResultRegex(); ASSERT_THAT_EXPECTED(SubstValue, Succeeded()); EXPECT_EQ("BAR", *SubstValue); }