[PowerPC] Fix vperm codegen

Commit rG934d5fa2b8672695c335deed0e19d0e777c98403 changed the vperm codegen
for cases that vperm is not replaced by xxperm, this patch is to revert that.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D138736
This commit is contained in:
Maryam Moghadas
2022-11-25 15:58:00 -06:00
parent ca856fff1c
commit 7614ba0a5d
13 changed files with 140 additions and 131 deletions

View File

@@ -10186,9 +10186,6 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
}
}
bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
@@ -10199,6 +10196,9 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
/*
Vectors will be appended like so: [ V1 | v2 ]
XXSWAPD on V1:
@@ -10219,24 +10219,27 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
if (V1HasXXSWAPD) {
if (SrcElt < 8)
SrcElt += 8;
else if (SrcElt < 16)
SrcElt -= 8;
}
if (V2HasXXSWAPD) {
if (SrcElt > 23)
SrcElt -= 8;
else if (SrcElt > 15)
SrcElt += 8;
}
if (NeedSwap) {
if (SrcElt < 16)
SrcElt += 16;
else
SrcElt -= 16;
if (Opcode == PPCISD::XXPERM) {
if (V1HasXXSWAPD) {
if (SrcElt < 8)
SrcElt += 8;
else if (SrcElt < 16)
SrcElt -= 8;
}
if (V2HasXXSWAPD) {
if (SrcElt > 23)
SrcElt -= 8;
else if (SrcElt > 15)
SrcElt += 8;
}
if (NeedSwap) {
if (SrcElt < 16)
SrcElt += 16;
else
SrcElt -= 16;
}
}
for (unsigned j = 0; j != BytesPerElement; ++j)
if (isLittleEndian)
ResultMask.push_back(
@@ -10246,16 +10249,15 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
}
if (V1HasXXSWAPD) {
dl = SDLoc(V1->getOperand(0));
V1 = V1->getOperand(0)->getOperand(1);
}
if (V2HasXXSWAPD) {
dl = SDLoc(V2->getOperand(0));
V2 = V2->getOperand(0)->getOperand(1);
}
if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) {
if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
if (V1HasXXSWAPD) {
dl = SDLoc(V1->getOperand(0));
V1 = V1->getOperand(0)->getOperand(1);
}
if (V2HasXXSWAPD) {
dl = SDLoc(V2->getOperand(0));
V2 = V2->getOperand(0)->getOperand(1);
}
if (isPPC64 && ValType != MVT::v2f64)
V1 = DAG.getBitcast(MVT::v2f64, V1);
if (isPPC64 && V2.getValueType() != MVT::v2f64)

View File

@@ -1058,14 +1058,15 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
;
; P8LE-LABEL: fromDiffMemVarDi:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
; P8LE-NEXT: sldi r4, r4, 2
; P8LE-NEXT: addi r5, r5, .LCPI9_0@toc@l
; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
; P8LE-NEXT: add r3, r3, r4
; P8LE-NEXT: lxvd2x vs0, 0, r5
; P8LE-NEXT: addi r4, r5, .LCPI9_0@toc@l
; P8LE-NEXT: addi r3, r3, -12
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
@@ -1478,10 +1479,11 @@ define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
; P8LE-LABEL: fromDiffMemConsDConvftoi:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI18_0@toc@ha
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI18_0@toc@l
; P8LE-NEXT: lxvd2x vs0, 0, r4
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspsxws v2, v2
; P8LE-NEXT: blr
@@ -2578,14 +2580,15 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
;
; P8LE-LABEL: fromDiffMemVarDui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
; P8LE-NEXT: sldi r4, r4, 2
; P8LE-NEXT: addi r5, r5, .LCPI41_0@toc@l
; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
; P8LE-NEXT: add r3, r3, r4
; P8LE-NEXT: lxvd2x vs0, 0, r5
; P8LE-NEXT: addi r4, r5, .LCPI41_0@toc@l
; P8LE-NEXT: addi r3, r3, -12
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
@@ -2998,10 +3001,11 @@ define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
; P8LE-LABEL: fromDiffMemConsDConvftoui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI50_0@toc@ha
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI50_0@toc@l
; P8LE-NEXT: lxvd2x vs0, 0, r4
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspuxws v2, v2
; P8LE-NEXT: blr

View File

@@ -491,10 +491,10 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
; CHECK-P9-BE: # %bb.0: # %entry
; CHECK-P9-BE-NEXT: lxsd v2, 0(r3)
; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI12_0@toc@ha
; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-BE-NEXT: xxlxor vs1, vs1, vs1
; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI12_0@toc@l
; CHECK-P9-BE-NEXT: lxv vs1, 0(r3)
; CHECK-P9-BE-NEXT: xxperm v2, vs0, vs1
; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
; CHECK-P9-BE-NEXT: xxperm v2, vs1, vs0
; CHECK-P9-BE-NEXT: blr
;
; CHECK-NOVSX-LABEL: testmrglb3:

View File

@@ -66,11 +66,11 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
; BE-LABEL: shufflevector_combine:
; BE: # %bb.0: # %newFuncRoot
; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; BE-NEXT: xxlxor vs0, vs0, vs0
; BE-NEXT: xxlxor vs1, vs1, vs1
; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; BE-NEXT: lxv vs1, 0(r3)
; BE-NEXT: lxv vs0, 0(r3)
; BE-NEXT: li r3, 0
; BE-NEXT: xxperm v2, vs0, vs1
; BE-NEXT: xxperm v2, vs1, vs0
; BE-NEXT: vinsw v2, r3, 8
; BE-NEXT: vpkuwum v2, v2, v2
; BE-NEXT: blr

View File

@@ -44,15 +44,15 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: add 5, 3, 4
; P9BE-NEXT: lxsdx 2, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; P9BE-NEXT: xxlxor 0, 0, 0
; P9BE-NEXT: xxlxor 1, 1, 1
; P9BE-NEXT: vspltisw 4, 8
; P9BE-NEXT: lxsd 3, 4(5)
; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: lxv 1, 0(3)
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: xxperm 2, 1, 0
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
@@ -285,10 +285,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: lxv 1, 0(3)
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: li 3, 0
; P9BE-NEXT: xxmrghw 0, 4, 2
; P9BE-NEXT: xxperm 3, 0, 1
; P9BE-NEXT: xxmrghw 2, 4, 2
; P9BE-NEXT: xxperm 3, 2, 0
; P9BE-NEXT: xxspltw 2, 3, 1
; P9BE-NEXT: vadduwm 2, 3, 2
; P9BE-NEXT: vextuwlx 3, 3, 2
@@ -312,10 +312,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: lxsihzx 0, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: xxperm 2, 0, 1
; P9BE-AIX-NEXT: lxv 1, 0(3)
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: li 3, 0
; P9BE-AIX-NEXT: xxmrghw 0, 4, 2
; P9BE-AIX-NEXT: xxperm 3, 0, 1
; P9BE-AIX-NEXT: xxmrghw 2, 4, 2
; P9BE-AIX-NEXT: xxperm 3, 2, 0
; P9BE-AIX-NEXT: xxspltw 2, 3, 1
; P9BE-AIX-NEXT: vadduwm 2, 3, 2
; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
@@ -395,13 +395,13 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9LE-NEXT: vmrghb 2, 3, 2
; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l
; P9LE-NEXT: vmrglh 2, 2, 4
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: lxv 0, 0(3)
; P9LE-NEXT: li 3, 0
; P9LE-NEXT: vmrghb 3, 3, 5
; P9LE-NEXT: xxmrglw 2, 2, 4
; P9LE-NEXT: vmrglh 3, 3, 4
; P9LE-NEXT: xxmrglw 0, 4, 3
; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: xxmrglw 3, 4, 3
; P9LE-NEXT: xxperm 2, 3, 0
; P9LE-NEXT: xxspltw 3, 2, 2
; P9LE-NEXT: vadduwm 2, 2, 3
; P9LE-NEXT: vextuwrx 3, 3, 2

View File

@@ -184,11 +184,12 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: mtvsrd v4, r3
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: xxswapd v3, vs1
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
@@ -431,11 +432,12 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha
; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: mtvsrd v4, r3
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: xxswapd v3, vs1
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;

View File

@@ -389,10 +389,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -411,10 +411,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
@@ -470,14 +470,15 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-LE-P8-LABEL: test_none_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
; CHECK-LE-P8-NEXT: lxvd2x v3, 0, r4
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v4, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: xxswapd v4, vs1
; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
@@ -544,10 +545,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
@@ -603,10 +604,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -625,10 +626,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;

View File

@@ -203,13 +203,13 @@ define void @test2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
;
; CHECK-P9-LABEL: test2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: lxv vs0, 0(r4)
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
; CHECK-P9-NEXT: xvcvuxddp vs0, vs1
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-P9-NEXT: xvcvuxddp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;

View File

@@ -80,13 +80,13 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtfprd f1, r3
; CHECK-BE-NEXT: mtfprd f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-NEXT: lxv vs2, 0(r3)
; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
; CHECK-BE-NEXT: xvcvuxwsp v2, vs1
; CHECK-BE-NEXT: lxv vs1, 0(r3)
; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-NEXT: xvcvuxwsp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <4 x i16>

View File

@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprwz f1, r3
; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-P9-NEXT: lxv vs2, 0(r3)
; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
; CHECK-P9-NEXT: xvcvuxddp v2, vs1
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-P9-NEXT: xvcvuxddp v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtfprwz f1, r3
; CHECK-BE-NEXT: mtfprwz f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-NEXT: lxv vs2, 0(r3)
; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
; CHECK-BE-NEXT: xvcvuxddp v2, vs1
; CHECK-BE-NEXT: lxv vs1, 0(r3)
; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-NEXT: xvcvuxddp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <2 x i16>

View File

@@ -76,24 +76,24 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprwz f1, r3
; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-P9-NEXT: lxv vs2, 0(r3)
; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
; CHECK-P9-NEXT: xvcvuxwsp v2, vs1
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-P9-NEXT: xvcvuxwsp v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtfprwz f1, r3
; CHECK-BE-NEXT: mtfprwz f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-NEXT: lxv vs2, 0(r3)
; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
; CHECK-BE-NEXT: xvcvuxwsp v2, vs1
; CHECK-BE-NEXT: lxv vs1, 0(r3)
; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-NEXT: xvcvuxwsp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <4 x i8>

View File

@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprwz f1, r3
; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-P9-NEXT: lxv vs2, 0(r3)
; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
; CHECK-P9-NEXT: xvcvuxddp v2, vs1
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-P9-NEXT: xvcvuxddp v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtfprwz f1, r3
; CHECK-BE-NEXT: mtfprwz f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-NEXT: lxv vs2, 0(r3)
; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
; CHECK-BE-NEXT: xvcvuxddp v2, vs1
; CHECK-BE-NEXT: lxv vs1, 0(r3)
; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-NEXT: xvcvuxddp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i16 %a.coerce to <2 x i8>

View File

@@ -7833,10 +7833,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i16(<2 x i16> %x) #0 {
; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI173_0@toc@ha
; PC64LE9-NEXT: xxlxor 0, 0, 0
; PC64LE9-NEXT: xxlxor 1, 1, 1
; PC64LE9-NEXT: addi 3, 3, .LCPI173_0@toc@l
; PC64LE9-NEXT: lxv 1, 0(3)
; PC64LE9-NEXT: xxperm 34, 0, 1
; PC64LE9-NEXT: lxv 0, 0(3)
; PC64LE9-NEXT: xxperm 34, 1, 0
; PC64LE9-NEXT: xvcvuxddp 34, 34
; PC64LE9-NEXT: blr
entry: