[HashRecognize] Rewrite arePHIsIntertwined (#144878)

The test crc8.le.tc16 is a valid CRC algorithm, but isn't recognized as
such due to a buggy arePHIsIntertwined, which is asymmetric in its
PHINode arguments. There is also a fundamental correctness issue: the
core functionality is to match a XOR that's a recurrence in both PHI
nodes, ignoring casts, but the user of the XOR is never checked. Rewrite
and rename the function.

crc8.le.tc16 is still not recognized as a valid CRC algorithm, due to an
incorrect check for loop iterations exceeding the bitwidth of the
result: in reality, it should not exceed the bitwidth of LHSAux, but we
leave this fix to a follow-up.

Co-authored-by: Piotr Fusik <p.fusik@samsung.com>
This commit is contained in:
Ramkumar Ramachandra
2025-07-02 15:36:27 +01:00
committed by GitHub
parent 8dcdc0ff1f
commit cbfd0d68ea
3 changed files with 224 additions and 35 deletions

View File

@@ -2119,6 +2119,13 @@ m_IntToPtr(const OpTy &Op) {
return CastOperator_match<OpTy, Instruction::IntToPtr>(Op); return CastOperator_match<OpTy, Instruction::IntToPtr>(Op);
} }
/// Matches any cast or self. Used to ignore casts.
template <typename OpTy>
inline match_combine_or<CastInst_match<OpTy, CastInst>, OpTy>
m_CastOrSelf(const OpTy &Op) {
return m_CombineOr(CastInst_match<OpTy, CastInst>(Op), Op);
}
/// Matches Trunc. /// Matches Trunc.
template <typename OpTy> template <typename OpTy>
inline CastInst_match<OpTy, TruncInst> m_Trunc(const OpTy &Op) { inline CastInst_match<OpTy, TruncInst> m_Trunc(const OpTy &Op) {

View File

@@ -497,42 +497,46 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
return Table; return Table;
} }
/// Checks if \p Reference is reachable from \p Needle on the use-def chain, and /// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain
/// that there are no stray PHI nodes while digging the use-def chain. \p /// of \p SI's condition, ignoring any casts. The purpose of this function is to
/// BOToMatch is a CRC peculiarity: at least one of the Users of Needle needs to /// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC
/// match this OpCode, which is XOR for CRC. /// computation. We cannot check the correctness of casts at this point, and
static bool arePHIsIntertwined( /// rely on the KnownBits propagation to check correctness of the CRC
const PHINode *Needle, const PHINode *Reference, const Loop &L, /// computation.
Instruction::BinaryOps BOToMatch = Instruction::BinaryOpsEnd) { ///
// Initialize the worklist with Users of the Needle. /// In other words, it checks for the following pattern:
///
/// loop:
/// %P1 = phi [_, %entry], [%P1.next, %loop]
/// %P2 = phi [_, %entry], [%P2.next, %loop]
/// ...
/// %xor = xor (CastOrSelf %P1), (CastOrSelf %P2)
///
/// where %xor is in the use-def chain of \p SI's condition.
static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1,
const PHINode *P2, const Loop &L) {
SmallVector<const Instruction *> Worklist; SmallVector<const Instruction *> Worklist;
for (const User *U : Needle->users()) {
if (auto *UI = dyn_cast<Instruction>(U))
if (L.contains(UI))
Worklist.push_back(UI);
}
// BOToMatch is usually XOR for CRC. // matchConditionalRecurrence has already ensured that the SelectInst's
if (BOToMatch != Instruction::BinaryOpsEnd) { // condition is an Instruction.
if (count_if(Worklist, [BOToMatch](const Instruction *I) { Worklist.push_back(cast<Instruction>(SI->getCondition()));
return I->getOpcode() == BOToMatch;
}) != 1)
return false;
}
while (!Worklist.empty()) { while (!Worklist.empty()) {
const Instruction *I = Worklist.pop_back_val(); const Instruction *I = Worklist.pop_back_val();
// Since Needle is never pushed onto the Worklist, I must either be the // Don't add a PHI's operands to the Worklist.
// Reference PHI node (in which case we're done), or a stray PHI node (in
// which case we abort).
if (isa<PHINode>(I)) if (isa<PHINode>(I))
return I == Reference; continue;
// If we match an XOR of the two PHIs ignoring casts, we're done.
if (match(I, m_c_Xor(m_CastOrSelf(m_Specific(P1)),
m_CastOrSelf(m_Specific(P2)))))
return true;
// Continue along the use-def chain.
for (const Use &U : I->operands()) for (const Use &U : I->operands())
if (auto *UI = dyn_cast<Instruction>(U)) if (auto *UI = dyn_cast<Instruction>(U))
// Don't push Needle back onto the Worklist. if (L.contains(UI))
if (UI != Needle && L.contains(UI))
Worklist.push_back(UI); Worklist.push_back(UI);
} }
return false; return false;
@@ -586,9 +590,19 @@ HashRecognize::recognizeCRC() const {
if (SimpleRecurrence) { if (SimpleRecurrence) {
if (isBigEndianBitShift(SimpleRecurrence.BO, SE) != ByteOrderSwapped) if (isBigEndianBitShift(SimpleRecurrence.BO, SE) != ByteOrderSwapped)
return "Loop with non-unit bitshifts"; return "Loop with non-unit bitshifts";
if (!arePHIsIntertwined(SimpleRecurrence.Phi, ConditionalRecurrence.Phi, L,
Instruction::BinaryOps::Xor)) // Ensure that the PHIs have exactly two uses:
return "Simple recurrence doesn't use conditional recurrence with XOR"; // the bit-shift, and the XOR (or a cast feeding into the XOR).
if (!ConditionalRecurrence.Phi->hasNUses(2) ||
!SimpleRecurrence.Phi->hasNUses(2))
return "Recurrences have stray uses";
// Check that the SelectInst ConditionalRecurrence.Step is conditional on
// the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi.
if (!isConditionalOnXorOfPHIs(cast<SelectInst>(ConditionalRecurrence.Step),
SimpleRecurrence.Phi,
ConditionalRecurrence.Phi, L))
return "Recurrences not intertwined with XOR";
} }
// Make sure that the computed value is used in the exit block: this should be // Make sure that the computed value is used in the exit block: this should be

View File

@@ -144,6 +144,34 @@ exit: ; preds = %loop
ret i16 %crc.next ret i16 %crc.next
} }
define i8 @crc8.le.tc16(i16 %msg, i8 %checksum) {
; CHECK-LABEL: 'crc8.le.tc16'
; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Loop iterations exceed bitwidth of result
;
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%crc = phi i8 [ %checksum, %entry ], [ %crc.next, %loop ]
%data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
%data.trunc = trunc i16 %data to i8
%xor.crc.data = xor i8 %crc, %data.trunc
%and.crc.data = and i8 %xor.crc.data, 1
%data.next = lshr i16 %data, 1
%check.sb = icmp eq i8 %and.crc.data, 0
%crc.lshr = lshr i8 %crc, 1
%crc.xor = xor i8 %crc.lshr, 29
%crc.next = select i1 %check.sb, i8 %crc.lshr, i8 %crc.xor
%iv.next = add nuw nsw i8 %iv, 1
%exit.cond = icmp samesign ult i8 %iv, 15
br i1 %exit.cond, label %loop, label %exit
exit: ; preds = %loop
ret i8 %crc.next
}
define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) { define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) {
; CHECK-LABEL: 'crc16.be.tc8.crc.init.li' ; CHECK-LABEL: 'crc16.be.tc8.crc.init.li'
; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8 ; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8
@@ -601,7 +629,7 @@ exit: ; preds = %loop
define i16 @not.crc.wrong.sb.check.const(i8 %msg, i16 %checksum) { define i16 @not.crc.wrong.sb.check.const(i8 %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.wrong.sb.check.const' ; CHECK-LABEL: 'not.crc.wrong.sb.check.const'
; CHECK-NEXT: Did not find a hash algorithm ; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR ; CHECK-NEXT: Reason: Bad RHS of significant-bit-check
; ;
entry: entry:
br label %loop br label %loop
@@ -610,9 +638,8 @@ loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
%crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
%crc.lshr = lshr i16 %crc, 8
%data.ext = zext i8 %data to i16 %data.ext = zext i8 %data to i16
%xor.crc.data = xor i16 %crc.lshr, %data.ext %xor.crc.data = xor i16 %crc, %data.ext
%check.sb = icmp samesign ult i16 %xor.crc.data, 128 %check.sb = icmp samesign ult i16 %xor.crc.data, 128
%crc.shl = shl i16 %crc, 1 %crc.shl = shl i16 %crc, 1
%crc.xor = xor i16 %crc.shl, 258 %crc.xor = xor i16 %crc.shl, 258
@@ -838,10 +865,37 @@ exit: ; preds = %loop
ret i16 %crc.next ret i16 %crc.next
} }
define i16 @not.crc.bad.cast(i8 %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.bad.cast'
; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Expected bottom 8 bits zero (????????00001011)
;
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
%crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
%data.ext = zext i8 %data to i16
%xor.crc.data = xor i16 %crc, %data.ext
%check.sb = icmp slt i16 %xor.crc.data, 0
%crc.shl = shl i16 %crc, 1
%crc.xor = xor i16 %crc.shl, 29
%crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
%data.next = shl i8 %data, 1
%iv.next = add nuw nsw i8 %iv, 1
%exit.cond = icmp samesign ult i8 %iv, 7
br i1 %exit.cond, label %loop, label %exit
exit: ; preds = %loop
ret i16 %crc.next
}
define i32 @not.crc.dead.msg.bad.use(i32 %checksum, i32 %msg) { define i32 @not.crc.dead.msg.bad.use(i32 %checksum, i32 %msg) {
; CHECK-LABEL: 'not.crc.dead.msg.bad.use' ; CHECK-LABEL: 'not.crc.dead.msg.bad.use'
; CHECK-NEXT: Did not find a hash algorithm ; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR ; CHECK-NEXT: Reason: Recurrences not intertwined with XOR
; ;
entry: entry:
br label %loop br label %loop
@@ -869,7 +923,7 @@ exit: ; preds = %loop
define i16 @not.crc.dead.msg.no.use(i8 %msg, i16 %checksum) { define i16 @not.crc.dead.msg.no.use(i8 %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.dead.msg.no.use' ; CHECK-LABEL: 'not.crc.dead.msg.no.use'
; CHECK-NEXT: Did not find a hash algorithm ; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR ; CHECK-NEXT: Reason: Recurrences have stray uses
; ;
entry: entry:
br label %loop br label %loop
@@ -898,7 +952,7 @@ exit: ; preds = %loop
define i32 @not.crc.dead.msg.wrong.op(i32 %checksum, i32 %msg) { define i32 @not.crc.dead.msg.wrong.op(i32 %checksum, i32 %msg) {
; CHECK-LABEL: 'not.crc.dead.msg.wrong.op' ; CHECK-LABEL: 'not.crc.dead.msg.wrong.op'
; CHECK-NEXT: Did not find a hash algorithm ; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR ; CHECK-NEXT: Reason: Recurrences not intertwined with XOR
; ;
entry: entry:
br label %loop br label %loop
@@ -922,6 +976,120 @@ exit: ; preds = %loop
ret i32 %crc.next ret i32 %crc.next
} }
define i16 @not.crc.dead.msg.xor.notin.select.chain(i16 %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.dead.msg.xor.notin.select.chain'
; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Recurrences have stray uses
;
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
%data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
%xor.crc.data = xor i16 %crc, %data
%or.crc.data = or i16 %crc, %data
%and.crc.data = and i16 %or.crc.data, 1
%data.next = lshr i16 %data, 1
%check.sb = icmp eq i16 %and.crc.data, 0
%crc.lshr = lshr i16 %crc, 1
%crc.xor = xor i16 %crc.lshr, -24575
%crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
%iv.next = add nuw nsw i8 %iv, 1
%exit.cond = icmp samesign ult i8 %iv, 15
br i1 %exit.cond, label %loop, label %exit
exit: ; preds = %loop
ret i16 %crc.next
}
define i16 @not.crc.bad.xor.crc.data(i16 %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.bad.xor.crc.data'
; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Recurrences have stray uses
;
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
%data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
%xor.crc.data = xor i16 %crc, %data
%mul.corrupt = mul i16 %xor.crc.data, 0
%xor.crc.data.corrupt = xor i16 %mul.corrupt, %crc
%and.crc.data = and i16 %xor.crc.data.corrupt, 1
%data.next = lshr i16 %data, 1
%check.sb = icmp eq i16 %and.crc.data, 0
%crc.lshr = lshr i16 %crc, 1
%crc.xor = xor i16 %crc.lshr, -24575
%crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
%iv.next = add nuw nsw i8 %iv, 1
%exit.cond = icmp samesign ult i8 %iv, 15
br i1 %exit.cond, label %loop, label %exit
exit: ; preds = %loop
ret i16 %crc.next
}
define i16 @not.crc.dead.msg.or.zero(i16 %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.dead.msg.or.zero'
; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Recurrences have stray uses
;
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
%data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
%xor.crc.data = xor i16 %crc, %data
%mul.corrupt = mul i16 %xor.crc.data, 0
%or.crc.data.corrupt = or i16 %mul.corrupt, %crc
%and.crc.data = and i16 %or.crc.data.corrupt, 1
%data.next = lshr i16 %data, 1
%check.sb = icmp eq i16 %and.crc.data, 0
%crc.lshr = lshr i16 %crc, 1
%crc.xor = xor i16 %crc.lshr, -24575
%crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
%iv.next = add nuw nsw i8 %iv, 1
%exit.cond = icmp samesign ult i8 %iv, 15
br i1 %exit.cond, label %loop, label %exit
exit: ; preds = %loop
ret i16 %crc.next
}
define i16 @not.crc.unknown.value(i16 %msg, i16 %checksum, i16 %corrupt) {
; CHECK-LABEL: 'not.crc.unknown.value'
; CHECK-NEXT: Did not find a hash algorithm
; CHECK-NEXT: Reason: Unknown Value
;
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
%crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
%data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
%xor.crc.data = xor i16 %crc, %data
%xor.crc.data.corrupt = mul i16 %xor.crc.data, %corrupt
%and.crc.data = and i16 %xor.crc.data.corrupt, 1
%data.next = lshr i16 %data, 1
%check.sb = icmp eq i16 %and.crc.data, 0
%crc.lshr = lshr i16 %crc, 1
%crc.xor = xor i16 %crc.lshr, -24575
%crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
%iv.next = add nuw nsw i8 %iv, 1
%exit.cond = icmp samesign ult i8 %iv, 15
br i1 %exit.cond, label %loop, label %exit
exit: ; preds = %loop
ret i16 %crc.next
}
define i16 @not.crc.float.simple.recurrence(float %msg, i16 %checksum) { define i16 @not.crc.float.simple.recurrence(float %msg, i16 %checksum) {
; CHECK-LABEL: 'not.crc.float.simple.recurrence' ; CHECK-LABEL: 'not.crc.float.simple.recurrence'
; CHECK-NEXT: Did not find a hash algorithm ; CHECK-NEXT: Did not find a hash algorithm