[PowerPC] Fix vector equality comparison for v2i64 pre-Power8

The current code makes the assumption that equality
comparison can be performed with a word comparison
instruction. While this is true if the entire 64-bit
results are used, it does not generally work. It is
possible that the low order words and high order
words produce different results and a user of only
one will get the wrong result.

This patch adds an and of the result words so that
each word has the result of the comparison of the
entire doubleword that contains it.

Differential revision: https://reviews.llvm.org/D115678
This commit is contained in:
Nemanja Ivanovic
2021-12-21 14:28:41 -06:00
parent a3ea9052d6
commit 1674d9b6b2
2 changed files with 58 additions and 9 deletions

View File

@@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (LHS.getValueType() == MVT::v2i64) {
// Equality can be handled by casting to the legal type for Altivec
// comparisons, everything else needs to be expanded.
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
return DAG.getNode(
ISD::BITCAST, dl, MVT::v2i64,
DAG.getSetCC(dl, MVT::v4i32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC));
}
return SDValue();
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return SDValue();
SDValue SetCC32 = DAG.getSetCC(
dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
int ShuffV[] = {1, 0, 3, 2};
SDValue Shuff =
DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
return DAG.getBitcast(
MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
}
// We handle most of these in the usual way.

View File

@@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc-aix- < %s | \
; RUN: FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-- < %s | \
; RUN: FileCheck %s --check-prefix=CHECK_LE
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \
; RUN: FileCheck %s --check-prefix=CHECK_P8LE
define i1 @foo(<2 x i64> %a) #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxswapd 35, 34
; CHECK-NEXT: lwz 3, L..C0(2) # %const.0
; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: lxvw4x 35, 0, 3
; CHECK-NEXT: addi 3, 1, -16
; CHECK-NEXT: vperm 3, 2, 2, 3
; CHECK-NEXT: xxland 0, 35, 34
; CHECK-NEXT: stxvw4x 0, 0, 3
; CHECK-NEXT: lwz 3, -12(1)
; CHECK-NEXT: blr
;
; CHECK_LE-LABEL: foo:
; CHECK_LE: # %bb.0: # %entry
; CHECK_LE-NEXT: xxswapd 35, 34
; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK_LE-NEXT: vcmpequw 2, 2, 3
; CHECK_LE-NEXT: lvx 3, 0, 3
; CHECK_LE-NEXT: addi 3, 1, -16
; CHECK_LE-NEXT: vperm 3, 2, 2, 3
; CHECK_LE-NEXT: xxland 34, 35, 34
; CHECK_LE-NEXT: stvx 2, 0, 3
; CHECK_LE-NEXT: ld 3, -16(1)
; CHECK_LE-NEXT: blr
;
; CHECK_P8LE-LABEL: foo:
; CHECK_P8LE: # %bb.0: # %entry
; CHECK_P8LE-NEXT: xxswapd 35, 34
; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3
; CHECK_P8LE-NEXT: xxswapd 0, 34
; CHECK_P8LE-NEXT: mffprd 3, 0
; CHECK_P8LE-NEXT: blr
entry:
%0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
%1 = icmp eq <2 x i64> %a, %0
%2 = extractelement <2 x i1> %1, i32 0
ret i1 %2
}