[X86][FP16] Fix masking problem of VF[,C]MADDCSH intrinsics (#118071)
Fixes: #98306
This commit is contained in:
@@ -26265,6 +26265,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
}
|
||||
if (!NewOp)
|
||||
NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
|
||||
if (IntrData->Opc0 == X86ISD::VFMADDCSH ||
|
||||
IntrData->Opc0 == X86ISD::VFCMADDCSH)
|
||||
return getScalarMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
|
||||
return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case IFMA_OP:
|
||||
|
||||
@@ -13533,17 +13533,17 @@ let Uses = [MXCSR] in {
|
||||
multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
|
||||
bit IsCommutable> {
|
||||
let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
|
||||
defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
|
||||
defm r : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
|
||||
(ins VR128X:$src2, VR128X:$src3), OpcodeStr,
|
||||
"$src3, $src2", "$src2, $src3",
|
||||
(v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
|
||||
Sched<[WriteFMAX]>;
|
||||
defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
|
||||
defm m : AVX512_maskable_3src_scalar<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
|
||||
(ins VR128X:$src2, ssmem:$src3), OpcodeStr,
|
||||
"$src3, $src2", "$src2, $src3",
|
||||
(v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
|
||||
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
|
||||
defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
|
||||
defm rb : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
|
||||
(ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
|
||||
"$rc, $src3, $src2", "$src2, $src3, $rc",
|
||||
(v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
|
||||
|
||||
@@ -277,3 +277,15 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @PR98306() {
|
||||
; CHECK-LABEL: PR98306:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
|
||||
; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user