[X86] getFauxShuffleMask - add ISD::SHL/SRL handling
This is currently mostly the same as the VSHLI/VSRLI handling below, although I've kept them separate as I'm investigating adding non-uniform shift amount handling as a followup
This commit is contained in:
@@ -6270,6 +6270,30 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
|
||||
Ops.push_back(Src);
|
||||
return true;
|
||||
}
|
||||
case ISD::SHL:
|
||||
case ISD::SRL: {
|
||||
// We can only decode 'whole byte' bit shifts as shuffles.
|
||||
std::optional<uint64_t> Amt = DAG.getValidShiftAmount(N, DemandedElts);
|
||||
if (!Amt || (*Amt % 8) != 0)
|
||||
return false;
|
||||
|
||||
uint64_t ByteShift = *Amt / 8;
|
||||
Ops.push_back(N.getOperand(0));
|
||||
|
||||
// Clear mask to all zeros and insert the shifted byte indices.
|
||||
Mask.append(NumSizeInBytes, SM_SentinelZero);
|
||||
|
||||
if (ISD::SHL == Opcode) {
|
||||
for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
|
||||
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
|
||||
Mask[i + j] = i + j - ByteShift;
|
||||
} else {
|
||||
for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
|
||||
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
|
||||
Mask[i + j - ByteShift] = i + j;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRLI: {
|
||||
uint64_t ShiftVal = N.getConstantOperandVal(1);
|
||||
|
||||
@@ -246,19 +246,19 @@ define i32 @PR43159(ptr %a0) {
|
||||
; AVX2-LABEL: PR43159:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
|
||||
; AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %edi
|
||||
; AVX2-NEXT: vpextrd $1, %xmm0, %esi
|
||||
@@ -269,19 +269,19 @@ define i32 @PR43159(ptr %a0) {
|
||||
; AVX512VL-LABEL: PR43159:
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
|
||||
; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
|
||||
; AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
|
||||
; AVX512VL-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %edi
|
||||
; AVX512VL-NEXT: vpextrd $1, %xmm0, %esi
|
||||
@@ -292,19 +292,19 @@ define i32 @PR43159(ptr %a0) {
|
||||
; AVX512DQVL-LABEL: PR43159:
|
||||
; AVX512DQVL: # %bb.0: # %entry
|
||||
; AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512DQVL-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; AVX512DQVL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512DQVL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
|
||||
; AVX512DQVL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
|
||||
; AVX512DQVL-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vmovd %xmm0, %edi
|
||||
; AVX512DQVL-NEXT: vpextrd $1, %xmm0, %esi
|
||||
|
||||
Reference in New Issue
Block a user