[X86] collectConcatOps - handle extract_subvector(concat_subvectors(...)) patterns (#143406)
This commit is contained in:
@@ -4311,6 +4311,25 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
|
||||
}
|
||||
}
|
||||
|
||||
if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Src = N->getOperand(0);
|
||||
uint64_t Idx = N->getConstantOperandVal(1);
|
||||
|
||||
// Collect all the subvectors from the source vector and slice off the
|
||||
// extraction.
|
||||
SmallVector<SDValue, 4> SrcOps;
|
||||
if (collectConcatOps(Src.getNode(), SrcOps, DAG) &&
|
||||
VT.getSizeInBits() > SrcOps[0].getValueSizeInBits() &&
|
||||
(VT.getSizeInBits() % SrcOps[0].getValueSizeInBits()) == 0 &&
|
||||
(Idx % SrcOps[0].getValueType().getVectorNumElements()) == 0) {
|
||||
unsigned SubIdx = Idx / SrcOps[0].getValueType().getVectorNumElements();
|
||||
unsigned NumSubs = VT.getSizeInBits() / SrcOps[0].getValueSizeInBits();
|
||||
Ops.append(SrcOps.begin() + SubIdx, SrcOps.begin() + SubIdx + NumSubs);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -740,16 +740,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512-FCP-LABEL: store_i8_stride3_vf16:
|
||||
@@ -763,16 +762,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512-FCP-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512-FCP-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512-FCP-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512-FCP-NEXT: vzeroupper
|
||||
; AVX512-FCP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: store_i8_stride3_vf16:
|
||||
@@ -786,16 +784,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512DQ-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512DQ-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512DQ-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512DQ-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-FCP-LABEL: store_i8_stride3_vf16:
|
||||
@@ -809,16 +806,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512DQ-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512DQ-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-FCP-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512DQ-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512DQ-FCP-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512DQ-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-FCP-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512DQ-FCP-NEXT: vzeroupper
|
||||
; AVX512DQ-FCP-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: store_i8_stride3_vf16:
|
||||
@@ -832,16 +828,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512BW-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512BW-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BW-FCP-LABEL: store_i8_stride3_vf16:
|
||||
@@ -855,16 +850,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512BW-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512BW-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512BW-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512BW-FCP-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512BW-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512BW-FCP-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512BW-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512BW-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512BW-FCP-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512BW-FCP-NEXT: vzeroupper
|
||||
; AVX512BW-FCP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-BW-LABEL: store_i8_stride3_vf16:
|
||||
@@ -878,16 +872,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512DQ-BW-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512DQ-BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512DQ-BW-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-BW-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512DQ-BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-BW-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512DQ-BW-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512DQ-BW-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512DQ-BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-BW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-BW-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512DQ-BW-NEXT: vzeroupper
|
||||
; AVX512DQ-BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-BW-FCP-LABEL: store_i8_stride3_vf16:
|
||||
@@ -901,16 +894,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
|
||||
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512DQ-BW-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512DQ-BW-FCP-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
|
||||
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm1, (%rcx)
|
||||
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
|
||||
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm0, (%rcx)
|
||||
; AVX512DQ-BW-FCP-NEXT: vzeroupper
|
||||
; AVX512DQ-BW-FCP-NEXT: retq
|
||||
%in.vec0 = load <16 x i8>, ptr %in.vecptr0, align 64
|
||||
%in.vec1 = load <16 x i8>, ptr %in.vecptr1, align 64
|
||||
|
||||
@@ -962,16 +962,15 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
|
||||
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
|
||||
; AVX512-NEXT: # ymm3 = mem[0,1,0,1]
|
||||
; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovdqu %xmm0, 16(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %xmm1, (%rdi)
|
||||
; AVX512-NEXT: vmovdqu %xmm2, 32(%rdi)
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: vpshufb %ymm3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%1 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
%2 = shufflevector <16 x i8> %c, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
|
||||
Reference in New Issue
Block a user