[PowerPC] Fix vector equality comparison for v2i64 pre-Power8
The current code makes the assumption that equality comparison can be performed with a word comparison instruction. While this is true if the entire 64-bit results are used, it does not generally work. It is possible that the low order words and high order words produce different results and a user of only one will get the wrong result. This patch adds an and of the result words so that each word has the result of the comparison of the entire doubleword that contains it. Differential revision: https://reviews.llvm.org/D115678
This commit is contained in:
@@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (LHS.getValueType() == MVT::v2i64) {
|
||||
// Equality can be handled by casting to the legal type for Altivec
|
||||
// comparisons, everything else needs to be expanded.
|
||||
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
|
||||
return DAG.getNode(
|
||||
ISD::BITCAST, dl, MVT::v2i64,
|
||||
DAG.getSetCC(dl, MVT::v4i32,
|
||||
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
|
||||
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
if (CC != ISD::SETEQ && CC != ISD::SETNE)
|
||||
return SDValue();
|
||||
SDValue SetCC32 = DAG.getSetCC(
|
||||
dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
|
||||
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
|
||||
int ShuffV[] = {1, 0, 3, 2};
|
||||
SDValue Shuff =
|
||||
DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
|
||||
return DAG.getBitcast(
|
||||
MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
|
||||
}
|
||||
|
||||
// We handle most of these in the usual way.
|
||||
|
||||
48
llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
Normal file
48
llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
Normal file
@@ -0,0 +1,48 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc-aix- < %s | \
|
||||
; RUN: FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-- < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK_LE
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK_P8LE
|
||||
define i1 @foo(<2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxswapd 35, 34
|
||||
; CHECK-NEXT: lwz 3, L..C0(2) # %const.0
|
||||
; CHECK-NEXT: vcmpequw 2, 2, 3
|
||||
; CHECK-NEXT: lxvw4x 35, 0, 3
|
||||
; CHECK-NEXT: addi 3, 1, -16
|
||||
; CHECK-NEXT: vperm 3, 2, 2, 3
|
||||
; CHECK-NEXT: xxland 0, 35, 34
|
||||
; CHECK-NEXT: stxvw4x 0, 0, 3
|
||||
; CHECK-NEXT: lwz 3, -12(1)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK_LE-LABEL: foo:
|
||||
; CHECK_LE: # %bb.0: # %entry
|
||||
; CHECK_LE-NEXT: xxswapd 35, 34
|
||||
; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK_LE-NEXT: vcmpequw 2, 2, 3
|
||||
; CHECK_LE-NEXT: lvx 3, 0, 3
|
||||
; CHECK_LE-NEXT: addi 3, 1, -16
|
||||
; CHECK_LE-NEXT: vperm 3, 2, 2, 3
|
||||
; CHECK_LE-NEXT: xxland 34, 35, 34
|
||||
; CHECK_LE-NEXT: stvx 2, 0, 3
|
||||
; CHECK_LE-NEXT: ld 3, -16(1)
|
||||
; CHECK_LE-NEXT: blr
|
||||
;
|
||||
; CHECK_P8LE-LABEL: foo:
|
||||
; CHECK_P8LE: # %bb.0: # %entry
|
||||
; CHECK_P8LE-NEXT: xxswapd 35, 34
|
||||
; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3
|
||||
; CHECK_P8LE-NEXT: xxswapd 0, 34
|
||||
; CHECK_P8LE-NEXT: mffprd 3, 0
|
||||
; CHECK_P8LE-NEXT: blr
|
||||
entry:
|
||||
%0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%1 = icmp eq <2 x i64> %a, %0
|
||||
%2 = extractelement <2 x i1> %1, i32 0
|
||||
ret i1 %2
|
||||
}
|
||||
Reference in New Issue
Block a user