Add option to dump IR to files instead of stderr (#66412)

This patch adds a flag to LLVM such that the output generated by the `-print-(before|after|all)` family of flags is written to files in a directory rather than to stderr. This new flag is `-ir-dump-directory` and is used to specify where to write the files. No other flags are added, it just modifies the behavior of the print flags. This is a second simplified version of the changes proposed in https://github.com/llvm/llvm-project/pull/65179. This patch only adds support for the new pass manager. If this patch is accepted, similar support can be added to the legacy pass manager. Co-authored-by: Nuri Amari <nuriamari@fb.com>
2023-09-29 08:41:46 -07:00
parent 847de9c332
commit c718336c4c
4 changed files with 282 additions and 42 deletions
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -46,6 +46,18 @@ public:
  void registerCallbacks(PassInstrumentationCallbacks &PIC);

 private:
+  struct PassRunDescriptor {
+    const Module *M;
+    const std::string DumpIRFilename;
+    const std::string IRName;
+    const StringRef PassID;
+
+    PassRunDescriptor(const Module *M, std::string DumpIRFilename,
+                      std::string IRName, const StringRef PassID)
+        : M{M}, DumpIRFilename{DumpIRFilename}, IRName{IRName}, PassID(PassID) {
+    }
+  };
+
  void printBeforePass(StringRef PassID, Any IR);
  void printAfterPass(StringRef PassID, Any IR);
  void printAfterPassInvalidated(StringRef PassID);
@@ -55,15 +67,15 @@ private:
  bool shouldPrintPassNumbers();
  bool shouldPrintAtPassNumber();

-  using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>;
-
-  void pushModuleDesc(StringRef PassID, Any IR);
-  PrintModuleDesc popModuleDesc(StringRef PassID);
+  void pushPassRunDescriptor(StringRef PassID, Any IR,
+                             std::string &DumpIRFilename);
+  PassRunDescriptor popPassRunDescriptor(StringRef PassID);
+  std::string fetchDumpFilename(StringRef PassId, Any IR);

  PassInstrumentationCallbacks *PIC;
-  /// Stack of Module description, enough to print the module after a given
+  /// Stack of Pass Run descriptions, enough to print the IR unit after a given
  /// pass.
-  SmallVector<PrintModuleDesc, 2> ModuleDescStack;
+  SmallVector<PassRunDescriptor, 2> PassRunDescriptorStack;

  /// Used for print-at-pass-number
  unsigned CurrentPassNumber = 0;
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/StableHashing.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
@@ -33,6 +34,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/Signals.h"
@@ -120,6 +122,13 @@ static cl::opt<unsigned>
                cl::desc("Print IR at pass with this number as "
                         "reported by print-passes-names"));

+static cl::opt<std::string> IRDumpDirectory(
+    "ir-dump-directory",
+    cl::desc("If specified, IR printed using the "
+             "-print-[before|after]{-all} options will be dumped into "
+             "files in this directory rather than written to stderr"),
+    cl::Hidden, cl::value_desc("filename"));
+
 namespace {

 // An option for specifying an executable that will be called with the IR
@@ -681,33 +690,120 @@ bool IRComparer<T>::generateFunctionData(IRDataT<T> &Data, const Function &F) {
 }

 PrintIRInstrumentation::~PrintIRInstrumentation() {
-  assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit");
+  assert(PassRunDescriptorStack.empty() &&
+         "PassRunDescriptorStack is not empty at exit");
 }

-void PrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR) {
+static SmallString<32> getIRFileDisplayName(Any IR) {
+  SmallString<32> Result;
+  raw_svector_ostream ResultStream(Result);
  const Module *M = unwrapModule(IR);
-  ModuleDescStack.emplace_back(M, getIRName(IR), PassID);
+  stable_hash NameHash = stable_hash_combine_string(M->getName());
+  unsigned int MaxHashWidth = sizeof(stable_hash) * 8 / 4;
+  write_hex(ResultStream, NameHash, HexPrintStyle::Lower, MaxHashWidth);
+  if (any_cast<const Module *>(&IR)) {
+    ResultStream << "-module";
+  } else if (const Function **F = any_cast<const Function *>(&IR)) {
+    ResultStream << "-function-";
+    stable_hash FunctionNameHash = stable_hash_combine_string((*F)->getName());
+    write_hex(ResultStream, FunctionNameHash, HexPrintStyle::Lower,
+              MaxHashWidth);
+  } else if (const LazyCallGraph::SCC **C =
+                 any_cast<const LazyCallGraph::SCC *>(&IR)) {
+    ResultStream << "-scc-";
+    stable_hash SCCNameHash = stable_hash_combine_string((*C)->getName());
+    write_hex(ResultStream, SCCNameHash, HexPrintStyle::Lower, MaxHashWidth);
+  } else if (const Loop **L = any_cast<const Loop *>(&IR)) {
+    ResultStream << "-loop-";
+    stable_hash LoopNameHash = stable_hash_combine_string((*L)->getName());
+    write_hex(ResultStream, LoopNameHash, HexPrintStyle::Lower, MaxHashWidth);
+  } else {
+    llvm_unreachable("Unknown wrapped IR type");
+  }
+  return Result;
 }

-PrintIRInstrumentation::PrintModuleDesc
-PrintIRInstrumentation::popModuleDesc(StringRef PassID) {
-  assert(!ModuleDescStack.empty() && "empty ModuleDescStack");
-  PrintModuleDesc ModuleDesc = ModuleDescStack.pop_back_val();
-  assert(std::get<2>(ModuleDesc).equals(PassID) && "malformed ModuleDescStack");
-  return ModuleDesc;
+std::string PrintIRInstrumentation::fetchDumpFilename(StringRef PassName,
+                                                      Any IR) {
+  const StringRef RootDirectory = IRDumpDirectory;
+  assert(!RootDirectory.empty() &&
+         "The flag -ir-dump-directory must be passed to dump IR to files");
+  SmallString<128> ResultPath;
+  ResultPath += RootDirectory;
+  SmallString<64> Filename;
+  raw_svector_ostream FilenameStream(Filename);
+  FilenameStream << CurrentPassNumber;
+  FilenameStream << "-";
+  FilenameStream << getIRFileDisplayName(IR);
+  FilenameStream << "-";
+  FilenameStream << PassName;
+  sys::path::append(ResultPath, Filename);
+  return std::string(ResultPath);
+}
+
+enum class IRDumpFileSuffixType {
+  Before,
+  After,
+  Invalidated,
+};
+
+static StringRef getFileSuffix(IRDumpFileSuffixType Type) {
+  static constexpr std::array FileSuffixes = {"-before.ll", "-after.ll",
+                                              "-invalidated.ll"};
+  return FileSuffixes[static_cast<size_t>(Type)];
+}
+
+void PrintIRInstrumentation::pushPassRunDescriptor(
+    StringRef PassID, Any IR, std::string &DumpIRFilename) {
+  const Module *M = unwrapModule(IR);
+  PassRunDescriptorStack.emplace_back(
+      PassRunDescriptor(M, DumpIRFilename, getIRName(IR), PassID));
+}
+
+PrintIRInstrumentation::PassRunDescriptor
+PrintIRInstrumentation::popPassRunDescriptor(StringRef PassID) {
+  assert(!PassRunDescriptorStack.empty() && "empty PassRunDescriptorStack");
+  PassRunDescriptor Descriptor = PassRunDescriptorStack.pop_back_val();
+  assert(Descriptor.PassID.equals(PassID) &&
+         "malformed PassRunDescriptorStack");
+  return Descriptor;
+}
+
+// Callers are responsible for closing the returned file descriptor
+static int prepareDumpIRFileDescriptor(const StringRef DumpIRFilename) {
+  std::error_code EC;
+  auto ParentPath = llvm::sys::path::parent_path(DumpIRFilename);
+  if (!ParentPath.empty()) {
+    std::error_code EC = llvm::sys::fs::create_directories(ParentPath);
+    if (EC)
+      report_fatal_error(Twine("Failed to create directory ") + ParentPath +
+                         " to support -ir-dump-directory: " + EC.message());
+  }
+  int Result = 0;
+  EC = sys::fs::openFile(DumpIRFilename, Result, sys::fs::CD_OpenAlways,
+                         sys::fs::FA_Write, sys::fs::OF_None);
+  if (EC)
+    report_fatal_error(Twine("Failed to open ") + DumpIRFilename +
+                       " to support -ir-dump-directory: " + EC.message());
+  return Result;
 }

 void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
  if (isIgnored(PassID))
    return;

+  std::string DumpIRFilename;
+  if (!IRDumpDirectory.empty() &&
+      (shouldPrintBeforePass(PassID) || shouldPrintAfterPass(PassID)))
+    DumpIRFilename = fetchDumpFilename(PassID, IR);
+
  // Saving Module for AfterPassInvalidated operations.
  // Note: here we rely on a fact that we do not change modules while
  // traversing the pipeline, so the latest captured module is good
  // for all print operations that has not happen yet.
  if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() ||
      shouldPrintAfterPass(PassID))
-    pushModuleDesc(PassID, IR);
+    pushPassRunDescriptor(PassID, IR, DumpIRFilename);

  if (!shouldPrintIR(IR))
    return;
@@ -720,9 +816,20 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
  if (!shouldPrintBeforePass(PassID))
    return;

-  dbgs() << "*** IR Dump Before " << PassID << " on " << getIRName(IR)
-         << " ***\n";
-  unwrapAndPrint(dbgs(), IR);
+  auto WriteIRToStream = [&](raw_ostream &Stream) {
+    Stream << "; *** IR Dump Before " << PassID << " on " << getIRName(IR)
+           << " ***\n";
+    unwrapAndPrint(Stream, IR);
+  };
+
+  if (!DumpIRFilename.empty()) {
+    DumpIRFilename += getFileSuffix(IRDumpFileSuffixType::Before);
+    llvm::raw_fd_ostream DumpIRFileStream{
+        prepareDumpIRFileDescriptor(DumpIRFilename), /* shouldClose */ true};
+    WriteIRToStream(DumpIRFileStream);
+  } else {
+    WriteIRToStream(dbgs());
+  }
 }

 void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
@@ -733,21 +840,33 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
      !shouldPrintAtPassNumber())
    return;

-  const Module *M;
-  std::string IRName;
-  StringRef StoredPassID;
-  std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
+  auto [M, DumpIRFilename, IRName, StoredPassID] = popPassRunDescriptor(PassID);
  assert(StoredPassID == PassID && "mismatched PassID");

  if (!shouldPrintIR(IR) || !shouldPrintAfterPass(PassID))
    return;

-  dbgs() << "*** IR Dump "
-         << (shouldPrintAtPassNumber()
-                 ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
-                 : StringRef(formatv("After {0}", PassID)))
-         << " on " << IRName << " ***\n";
-  unwrapAndPrint(dbgs(), IR);
+  auto WriteIRToStream = [&](raw_ostream &Stream, const StringRef IRName) {
+    Stream << "; *** IR Dump "
+           << (shouldPrintAtPassNumber()
+                   ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
+                   : StringRef(formatv("After {0}", PassID)))
+           << " on " << IRName << " ***\n";
+    unwrapAndPrint(Stream, IR);
+  };
+
+  if (!IRDumpDirectory.empty()) {
+    assert(!DumpIRFilename.empty() && "DumpIRFilename must not be empty and "
+                                      "should be set in printBeforePass");
+    const std::string DumpIRFilenameWithSuffix =
+        DumpIRFilename + getFileSuffix(IRDumpFileSuffixType::After).str();
+    llvm::raw_fd_ostream DumpIRFileStream{
+        prepareDumpIRFileDescriptor(DumpIRFilenameWithSuffix),
+        /* shouldClose */ true};
+    WriteIRToStream(DumpIRFileStream, IRName);
+  } else {
+    WriteIRToStream(dbgs(), IRName);
+  }
 }

 void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
@@ -758,25 +877,38 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
      !shouldPrintAtPassNumber())
    return;

-  const Module *M;
-  std::string IRName;
-  StringRef StoredPassID;
-  std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
+  auto [M, DumpIRFilename, IRName, StoredPassID] = popPassRunDescriptor(PassID);
  assert(StoredPassID == PassID && "mismatched PassID");
  // Additional filtering (e.g. -filter-print-func) can lead to module
  // printing being skipped.
  if (!M || !shouldPrintAfterPass(PassID))
    return;

-  SmallString<20> Banner;
-  if (shouldPrintAtPassNumber())
-    Banner = formatv("*** IR Dump At {0}-{1} on {2} (invalidated) ***",
-                     CurrentPassNumber, PassID, IRName);
-  else 
-    Banner = formatv("*** IR Dump After {0} on {1} (invalidated) ***", 
-                     PassID, IRName);
-  dbgs() << Banner << "\n";
-  printIR(dbgs(), M);
+  auto WriteIRToStream = [&](raw_ostream &Stream, const Module *M,
+                             const StringRef IRName) {
+    SmallString<20> Banner;
+    if (shouldPrintAtPassNumber())
+      Banner = formatv("; *** IR Dump At {0}-{1} on {2} (invalidated) ***",
+                       CurrentPassNumber, PassID, IRName);
+    else
+      Banner = formatv("; *** IR Dump After {0} on {1} (invalidated) ***",
+                       PassID, IRName);
+    Stream << Banner << "\n";
+    printIR(Stream, M);
+  };
+
+  if (!IRDumpDirectory.empty()) {
+    assert(!DumpIRFilename.empty() && "DumpIRFilename must not be empty and "
+                                      "should be set in printBeforePass");
+    const std::string DumpIRFilenameWithSuffix =
+        DumpIRFilename + getFileSuffix(IRDumpFileSuffixType::Invalidated).str();
+    llvm::raw_fd_ostream DumpIRFileStream{
+        prepareDumpIRFileDescriptor(DumpIRFilenameWithSuffix),
+        /* shouldClose */ true};
+    WriteIRToStream(DumpIRFileStream, M, IRName);
+  } else {
+    WriteIRToStream(dbgs(), M, IRName);
+  }
 }

 bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) {
--- a/llvm/test/Other/dump-before-after-invalidated.ll
+++ b/llvm/test/Other/dump-before-after-invalidated.ll
@@ -0,0 +1,24 @@
+; RUN: rm -rf %t/logs
+; RUN: opt %s -disable-output -passes=loop-deletion -ir-dump-directory %t/logs -print-after=loop-deletion
+
+; RUN: ls %t/logs | FileCheck %s
+; CHECK: 2-{{[a-z0-9]+}}-loop-{{[a-z0-9]+}}-LoopDeletionPass-invalidated.ll
+
+; RUN: ls %t/logs | count 1
+; RUN: cat %t/logs/* | FileCheck %s --check-prefix=CHECK-CONTENTS
+
+; CHECK-CONTENTS: ; *** IR Dump After LoopDeletionPass on bb1 (invalidated) ***
+; CHECK-CONTENTS: define void @foo() {
+; CHECK-CONTENTS:   br label %bb2
+; CHECK-CONTENTS: bb2:                                              ; preds = %0
+; CHECK-CONTENTS:   ret void
+; CHECK-CONTENTS: }
+
+
+define void @foo() {
+  br label %bb1
+bb1:
+  br i1 false, label %bb1, label %bb2
+bb2:
+  ret void
+}
--- a/llvm/test/Other/dump-before-after.ll
+++ b/llvm/test/Other/dump-before-after.ll
@@ -0,0 +1,72 @@
+; RUN: rm -rf %t/logs
+
+; Basic dump before and after a single module pass
+
+; RUN: opt %s -disable-output -passes='no-op-module' -ir-dump-directory %t/logs -print-after=no-op-module -print-before=no-op-module
+; RUN: ls %t/logs | FileCheck %s --check-prefix=SINGLE-PASS
+; RUN: ls %t/logs | count 2
+; SINGLE-PASS-DAG: 0-[[MODULE_NAME_HASH:[a-z0-9]+]]-module-NoOpModulePass-after.ll
+; SINGLE-PASS-DAG: 0-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; RUN: cat %t/logs/*after.ll | FileCheck %s --check-prefix=SINGLE-PASS-CONTENTS
+
+; SINGLE-PASS-CONTENTS: ; *** IR Dump After NoOpModulePass on [module] ***
+; SINGLE-PASS-CONTENTS: define void @foo() {
+; SINGLE-PASS-CONTENTS:   ret void
+; SINGLE-PASS-CONTENTS: }
+; SINGLE-PASS-CONTENTS: define void @bar() {
+; SINGLE-PASS-CONTENTS: entry:
+; SINGLE-PASS-CONTENTS:   br label %my-loop
+; SINGLE-PASS-CONTENTS: my-loop:                                          ; preds = %my-loop, %entry
+; SINGLE-PASS-CONTENTS:   br label %my-loop
+; SINGLE-PASS-CONTENTS: }
+
+; RUN: rm -rf %t/logs
+
+; Dump before and after multiple runs of the same module pass
+; The integers preceeding log files represent relative pass execution order,
+; but they are not necessarily continuous. That is passes which are run
+; but not printed, still increment the count -- leading to gaps in the printed
+; integers.
+
+; RUN: opt %s -disable-output -passes='no-op-module,no-op-module,no-op-module' -ir-dump-directory %t/logs -print-after=no-op-module -print-before=no-op-module
+; RUN: ls %t/logs | FileCheck %s --check-prefix=MULTIPLE-PASSES
+; RUN: ls %t/logs | count 6
+; MULTIPLE-PASSES-DAG: 0-[[MODULE_NAME_HASH:[a-z0-9]+]]-module-NoOpModulePass-after.ll
+; MULTIPLE-PASSES-DAG: 0-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; MULTIPLE-PASSES-DAG: 1-[[MODULE_NAME_HASH]]-module-NoOpModulePass-after.ll
+; MULTIPLE-PASSES-DAG: 1-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; MULTIPLE-PASSES-DAG: 2-[[MODULE_NAME_HASH]]-module-NoOpModulePass-after.ll
+; MULTIPLE-PASSES-DAG: 2-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; RUN: rm -rf %t/logs
+
+; Dump before and after multiple passes, of various levels of granularity
+
+; RUN: opt %s -disable-output -passes='no-op-module,cgscc(no-op-cgscc),function(no-op-function),function(loop(no-op-loop)),no-op-module' -ir-dump-directory %t/logs -print-after=no-op-module,no-op-cgscc,no-op-function,no-op-loop -print-before=no-op-module,no-op-cgscc,no-op-function,no-op-loop
+; RUN: ls %t/logs | FileCheck %s --check-prefix=MULTIPLE-GRANULAR-PASSES
+; RUN: ls %t/logs | count 14
+; MULTIPLE-GRANULAR-PASSES-DAG: 0-[[MODULE_NAME_HASH:[a-z0-9]+]]-module-NoOpModulePass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 0-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 1-[[MODULE_NAME_HASH]]-scc-[[SCC_FOO_HASH:[a-z0-9]+]]-NoOpCGSCCPass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 1-[[MODULE_NAME_HASH]]-scc-[[SCC_FOO_HASH]]-NoOpCGSCCPass-before.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 2-[[MODULE_NAME_HASH]]-scc-[[SCC_BAR_HASH:[a-z0-9]+]]-NoOpCGSCCPass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 2-[[MODULE_NAME_HASH]]-scc-[[SCC_BAR_HASH]]-NoOpCGSCCPass-before.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 3-[[MODULE_NAME_HASH]]-function-[[FUNCTION_FOO_HASH:[a-z0-9]+]]-NoOpFunctionPass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 3-[[MODULE_NAME_HASH]]-function-[[FUNCTION_FOO_HASH]]-NoOpFunctionPass-before.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 4-[[MODULE_NAME_HASH]]-function-[[FUNCTION_BAR_HASH:[a-z0-9]+]]-NoOpFunctionPass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 4-[[MODULE_NAME_HASH]]-function-[[FUNCTION_BAR_HASH]]-NoOpFunctionPass-before.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 9-[[MODULE_NAME_HASH]]-loop-[[LOOP_NAME_HASH:[a-z0-9]+]]-NoOpLoopPass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 9-[[MODULE_NAME_HASH]]-loop-[[LOOP_NAME_HASH]]-NoOpLoopPass-before.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 10-[[MODULE_NAME_HASH]]-module-NoOpModulePass-after.ll
+; MULTIPLE-GRANULAR-PASSES-DAG: 10-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; RUN: rm -rf %t/logs
+
+define void @foo() {
+    ret void
+}
+
+define void @bar() {
+entry:
+    br label %my-loop
+my-loop:
+    br label %my-loop
+}