DebugInfo: Don't hash DIE offsets before they're computed

Instead of hashing DIE offsets, hash DIE references the same as they would be when used outside of a loclist - that is, deep hash the type on first use, and hash the numbering on subsequent uses. This does produce different hashes for different type references, where it did not before (because we were hashing zero all the time - so it didn't matter what type was referenced, the hash would be identical). This also allows us to enforce that the DIE offset (& size) is not queried before it is used (which came up while investigating another bug recently).
2021-12-25 15:59:29 -08:00
parent a00f480fe8
commit 2bddab25db
6 changed files with 43 additions and 15 deletions
--- a/llvm/include/llvm/CodeGen/DIE.h
+++ b/llvm/include/llvm/CodeGen/DIE.h
@@ -774,8 +774,16 @@ public:
  unsigned getAbbrevNumber() const { return AbbrevNumber; }
  dwarf::Tag getTag() const { return Tag; }
  /// Get the compile/type unit relative offset of this DIE.
-  unsigned getOffset() const { return Offset; }
-  unsigned getSize() const { return Size; }
+  unsigned getOffset() const {
+    // A real Offset can't be zero because the unit headers are at offset zero.
+    assert(Offset && "Offset being queried before it's been computed.");
+    return Offset;
+  }
+  unsigned getSize() const {
+    // A real Size can't be zero because it includes the non-empty abbrev code.
+    assert(Size && "Size being queried before it's been ocmputed.");
+    return Size;
+  }
  bool hasChildren() const { return ForceChildren || !Children.empty(); }
  void setForceChildren(bool B) { ForceChildren = B; }

--- a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -33,6 +33,7 @@ class ByteStreamer {
  virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
  virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
                           unsigned PadTo = 0) = 0;
+  virtual void emitDIERef(const DIE &D) = 0;
 };

 class APByteStreamer final : public ByteStreamer {
@@ -54,15 +55,21 @@ public:
    AP.OutStreamer->AddComment(Comment);
    AP.emitULEB128(DWord, nullptr, PadTo);
  }
+  void emitDIERef(const DIE &D) override {
+    uint64_t Offset = D.getOffset();
+    static constexpr unsigned ULEB128PadSize = 4;
+    assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+    emitULEB128(Offset, "", ULEB128PadSize);
+  }
 };

 class HashingByteStreamer final : public ByteStreamer {
 private:
  DIEHash &Hash;
 public:
- HashingByteStreamer(DIEHash &H) : Hash(H) {}
-  void emitInt8(uint8_t Byte, const Twine &Comment) override {
-    Hash.update(Byte);
+   HashingByteStreamer(DIEHash &H) : Hash(H) {}
+   void emitInt8(uint8_t Byte, const Twine &Comment) override {
+     Hash.update(Byte);
  }
  void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
    Hash.addSLEB128(DWord);
@@ -71,6 +78,7 @@ class HashingByteStreamer final : public ByteStreamer {
                   unsigned PadTo) override {
    Hash.addULEB128(DWord);
  }
+  void emitDIERef(const DIE &D) override { Hash.hashRawTypeReference(D); }
 };

 class BufferByteStreamer final : public ByteStreamer {
@@ -115,9 +123,14 @@ public:
      // with each other.
      for (size_t i = 1; i < Length; ++i)
        Comments.push_back("");
-
    }
  }
+  void emitDIERef(const DIE &D) override {
+    uint64_t Offset = D.getOffset();
+    static constexpr unsigned ULEB128PadSize = 4;
+    assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+    emitULEB128(Offset, "", ULEB128PadSize);
+  }
 };

 }
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -207,6 +207,18 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
  computeHash(Entry);
 }

+void DIEHash::hashRawTypeReference(const DIE &Entry) {
+  unsigned &DieNumber = Numbering[&Entry];
+  if (DieNumber) {
+    addULEB128('R');
+    addULEB128(DieNumber);
+    return;
+  }
+  DieNumber = Numbering.size();
+  addULEB128('T');
+  computeHash(Entry);
+}
+
 // Hash all of the values in a block like set of values. This assumes that
 // all of the data is going to be added as integers.
 void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -62,6 +62,8 @@ public:
  /// Encodes and adds \param Value to the hash as a SLEB128.
  void addSLEB128(int64_t Value);

+  void hashRawTypeReference(const DIE &Entry);
+
 private:
  /// Adds \param Str to the hash and includes a NULL byte.
  void addString(StringRef Str);
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2539,14 +2539,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
      if (Op.getDescription().Op[I] == Encoding::SizeNA)
        continue;
      if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
-        uint64_t Offset =
-            CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
-        assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
-        Streamer.emitULEB128(Offset, "", ULEB128PadSize);
-        // Make sure comments stay aligned.
-        for (unsigned J = 0; J < ULEB128PadSize; ++J)
-          if (Comment != End)
-            Comment++;
+        Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die);
      } else {
        for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
          Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
--- a/llvm/test/DebugInfo/X86/convert-loclist.ll
+++ b/llvm/test/DebugInfo/X86/convert-loclist.ll
@@ -13,7 +13,7 @@
 ; often - add another IR file with a different DW_OP_convert that's otherwise
 ; identical and demonstrate that they have different DWO IDs.

-; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xecf2563326b0bdd3
+; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xa6edbf487b0a7acf

 ; Regression testing a fairly quirky bug where instead of hashing (see above),
 ; extra bytes would be emitted into the output assembly in no