AMDGPU: Directly select minimumnum/maximumnum with ieee_mode=0 (#141903)
The hardware min/max follow the IR rules with IEEE mode disabled, so we can avoid the canonicalizes of the input. We lose the quieting of a signaling nan if both inputs are nans, but we only require that with strictfp.
This commit is contained in:
@@ -92,6 +92,8 @@ def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().F
|
||||
def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
|
||||
def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
|
||||
def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
|
||||
def IEEEModeEnabled : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
|
||||
def IEEEModeDisabled : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
|
||||
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
|
||||
}
|
||||
|
||||
|
||||
@@ -957,12 +957,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
|
||||
}
|
||||
|
||||
auto &MinNumMaxNum = getActionDefinitionsBuilder({
|
||||
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
|
||||
|
||||
// TODO: These should be custom lowered and are directly legal with IEEE=0
|
||||
auto &MinimumNumMaximumNum =
|
||||
getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
|
||||
auto &MinNumMaxNum = getActionDefinitionsBuilder(
|
||||
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
|
||||
G_FMAXNUM_IEEE});
|
||||
|
||||
if (ST.hasVOP3PInsts()) {
|
||||
MinNumMaxNum.customFor(FPTypesPK16)
|
||||
@@ -980,8 +977,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
.scalarize(0);
|
||||
}
|
||||
|
||||
MinimumNumMaximumNum.lower();
|
||||
|
||||
if (ST.hasVOP3PInsts())
|
||||
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
|
||||
|
||||
@@ -2162,6 +2157,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
|
||||
return legalizeFPTOI(MI, MRI, B, false);
|
||||
case TargetOpcode::G_FMINNUM:
|
||||
case TargetOpcode::G_FMAXNUM:
|
||||
case TargetOpcode::G_FMINIMUMNUM:
|
||||
case TargetOpcode::G_FMAXIMUMNUM:
|
||||
case TargetOpcode::G_FMINNUM_IEEE:
|
||||
case TargetOpcode::G_FMAXNUM_IEEE:
|
||||
return legalizeMinNumMaxNum(Helper, MI);
|
||||
@@ -2741,9 +2738,17 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
|
||||
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
|
||||
|
||||
// With ieee_mode disabled, the instructions have the correct behavior
|
||||
// already for G_FMINNUM/G_FMAXNUM
|
||||
if (!MFI->getMode().IEEE)
|
||||
// already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
|
||||
//
|
||||
// FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
|
||||
// enabled.
|
||||
if (!MFI->getMode().IEEE) {
|
||||
if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
|
||||
MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
|
||||
return true;
|
||||
|
||||
return !IsIEEEOp;
|
||||
}
|
||||
|
||||
if (IsIEEEOp)
|
||||
return true;
|
||||
|
||||
@@ -4009,6 +4009,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
case AMDGPU::G_FMAXNUM:
|
||||
case AMDGPU::G_FMINIMUM:
|
||||
case AMDGPU::G_FMAXIMUM:
|
||||
case AMDGPU::G_FMINIMUMNUM:
|
||||
case AMDGPU::G_FMAXIMUMNUM:
|
||||
case AMDGPU::G_INTRINSIC_TRUNC:
|
||||
case AMDGPU::G_STRICT_FADD:
|
||||
case AMDGPU::G_STRICT_FSUB:
|
||||
|
||||
@@ -531,8 +531,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
|
||||
Legal);
|
||||
|
||||
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
|
||||
Custom);
|
||||
setOperationAction(
|
||||
{ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
|
||||
{MVT::f32, MVT::f64}, Custom);
|
||||
|
||||
// These are really only legal for ieee_mode functions. We should be avoiding
|
||||
// them for functions that don't have ieee_mode enabled, so just say they are
|
||||
@@ -771,7 +772,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
MVT::v32f16, MVT::v32bf16},
|
||||
Custom);
|
||||
|
||||
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
|
||||
setOperationAction(
|
||||
{ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
|
||||
MVT::f16, Custom);
|
||||
setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
|
||||
|
||||
setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::FMINIMUMNUM,
|
||||
@@ -825,8 +828,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
|
||||
VT, Custom);
|
||||
|
||||
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
|
||||
Custom);
|
||||
setOperationAction(
|
||||
{ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
|
||||
{MVT::v2f16, MVT::v4f16}, Custom);
|
||||
|
||||
setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16, MVT::v4bf16},
|
||||
@@ -6062,6 +6066,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::FMINNUM:
|
||||
case ISD::FMAXNUM:
|
||||
return lowerFMINNUM_FMAXNUM(Op, DAG);
|
||||
case ISD::FMINIMUMNUM:
|
||||
case ISD::FMAXIMUMNUM:
|
||||
return lowerFMINIMUMNUM_FMAXIMUMNUM(Op, DAG);
|
||||
case ISD::FMINIMUM:
|
||||
case ISD::FMAXIMUM:
|
||||
return lowerFMINIMUM_FMAXIMUM(Op, DAG);
|
||||
@@ -6086,8 +6093,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::FMUL:
|
||||
case ISD::FMINNUM_IEEE:
|
||||
case ISD::FMAXNUM_IEEE:
|
||||
case ISD::FMINIMUMNUM:
|
||||
case ISD::FMAXIMUMNUM:
|
||||
case ISD::UADDSAT:
|
||||
case ISD::USUBSAT:
|
||||
case ISD::SADDSAT:
|
||||
@@ -6995,6 +7000,23 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue
|
||||
SITargetLowering::lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
bool IsIEEEMode = Info->getMode().IEEE;
|
||||
|
||||
if (IsIEEEMode)
|
||||
return expandFMINIMUMNUM_FMAXIMUMNUM(Op.getNode(), DAG);
|
||||
|
||||
if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
|
||||
VT == MVT::v16bf16)
|
||||
return splitBinaryVectorOp(Op, DAG);
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
@@ -147,6 +147,7 @@ private:
|
||||
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue splitFP_ROUNDVectorOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
|
||||
|
||||
@@ -1390,6 +1390,55 @@ def : GCNPat<
|
||||
(S_ADD_U64_PSEUDO $src0, $src1)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP min/max patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
class FPBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
|
||||
: GCNPat <(vt (node (vt (VOP3Mods vt:$src0, i32:$src0_mods)),
|
||||
(vt (VOP3Mods vt:$src1, i32:$src1_mods)))),
|
||||
(inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
class FPPkBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
|
||||
: GCNPat <(vt (node (VOP3PMods v2f16:$src0, i32:$src0_mods),
|
||||
(VOP3PMods v2f16:$src1, i32:$src1_mods))),
|
||||
(inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE)
|
||||
>;
|
||||
|
||||
/// With IEEE=0, signalingness is ignored and the non-nan input will
|
||||
/// be directly returned.
|
||||
let OtherPredicates = [IEEEModeDisabled] in {
|
||||
def : FPBinOpPat<fminimumnum, f32, V_MIN_F32_e64>;
|
||||
def : FPBinOpPat<fmaximumnum, f32, V_MAX_F32_e64>;
|
||||
def : FPBinOpPat<fminimumnum, f64, V_MIN_F64_e64>;
|
||||
def : FPBinOpPat<fmaximumnum, f64, V_MAX_F64_e64>;
|
||||
|
||||
let SubtargetPredicate = Has16BitInsts,
|
||||
True16Predicate = NotHasTrue16BitInsts in {
|
||||
def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_e64>;
|
||||
def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_e64>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = Has16BitInsts,
|
||||
True16Predicate = UseRealTrue16Insts in {
|
||||
def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_t16_e64>;
|
||||
def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_t16_e64>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = Has16BitInsts,
|
||||
True16Predicate = UseFakeTrue16Insts in {
|
||||
def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_fake16_e64>;
|
||||
def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_fake16_e64>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasVOP3PInsts in {
|
||||
def : FPPkBinOpPat<fminimumnum, v2f16, V_PK_MIN_F16>;
|
||||
def : FPPkBinOpPat<fmaximumnum, v2f16, V_PK_MAX_F16>;
|
||||
}
|
||||
}
|
||||
|
||||
/********** ============================================ **********/
|
||||
/********** Extraction, Insertion, Building and Casting **********/
|
||||
/********** ============================================ **********/
|
||||
|
||||
@@ -2019,9 +2019,7 @@ define float @v_fneg_minimumnum_f32_no_ieee(float %a, float %b) #4 {
|
||||
; GCN-LABEL: v_fneg_minimumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min = call float @llvm.minimumnum.f32(float %a, float %b)
|
||||
%fneg = fneg float %min
|
||||
@@ -2044,8 +2042,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, v0, v0
|
||||
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min = call float @llvm.minimumnum.f32(float %a, float %a)
|
||||
%min.fneg = fneg float %min
|
||||
@@ -2068,8 +2065,7 @@ define float @v_fneg_posk_minimumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_posk_minimumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, -4.0, v0
|
||||
; GCN-NEXT: v_max_f32_e64 v0, -v0, -4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min = call float @llvm.minimumnum.f32(float 4.0, float %a)
|
||||
%fneg = fneg float %min
|
||||
@@ -2092,8 +2088,7 @@ define float @v_fneg_negk_minimumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_negk_minimumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: v_max_f32_e64 v0, -v0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min = call float @llvm.minimumnum.f32(float -4.0, float %a)
|
||||
%fneg = fneg float %min
|
||||
@@ -2251,8 +2246,7 @@ define float @v_fneg_neg0_minimumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_neg0_minimumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 0, v0
|
||||
; GCN-NEXT: v_max_f32_e64 v0, -v0, 0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min = call float @llvm.minimumnum.f32(float -0.0, float %a)
|
||||
%fneg = fneg float %min
|
||||
@@ -2299,7 +2293,6 @@ define float @v_fneg_0_minimumnum_foldable_use_f32_no_ieee(float %a, float %b) #
|
||||
; GCN-LABEL: v_fneg_0_minimumnum_foldable_use_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 0, v0
|
||||
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
@@ -2330,9 +2323,7 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
|
||||
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
|
||||
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min = call float @llvm.minimumnum.f32(float %a, float %b)
|
||||
@@ -2364,9 +2355,7 @@ define float @v_fneg_maximumnum_f32_no_ieee(float %a, float %b) #4 {
|
||||
; GCN-LABEL: v_fneg_maximumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%max = call float @llvm.maximumnum.f32(float %a, float %b)
|
||||
%fneg = fneg float %max
|
||||
@@ -2389,8 +2378,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, v0, v0
|
||||
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%max = call float @llvm.maximumnum.f32(float %a, float %a)
|
||||
%max.fneg = fneg float %max
|
||||
@@ -2413,8 +2401,7 @@ define float @v_fneg_posk_maximumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_posk_maximumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, -4.0, v0
|
||||
; GCN-NEXT: v_min_f32_e64 v0, -v0, -4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%max = call float @llvm.maximumnum.f32(float 4.0, float %a)
|
||||
%fneg = fneg float %max
|
||||
@@ -2437,8 +2424,7 @@ define float @v_fneg_negk_maximumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_negk_maximumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: v_min_f32_e64 v0, -v0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%max = call float @llvm.maximumnum.f32(float -4.0, float %a)
|
||||
%fneg = fneg float %max
|
||||
@@ -2473,8 +2459,7 @@ define float @v_fneg_neg0_maximumnum_f32_no_ieee(float %a) #4 {
|
||||
; GCN-LABEL: v_fneg_neg0_maximumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 0, v0
|
||||
; GCN-NEXT: v_min_f32_e64 v0, -v0, 0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%max = call float @llvm.maximumnum.f32(float -0.0, float %a)
|
||||
%fneg = fneg float %max
|
||||
@@ -2499,7 +2484,6 @@ define float @v_fneg_0_maximumnum_foldable_use_f32_no_ieee(float %a, float %b) #
|
||||
; GCN-LABEL: v_fneg_0_maximumnum_foldable_use_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 0, v0
|
||||
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
@@ -2530,9 +2514,7 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
|
||||
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
|
||||
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%max = call float @llvm.maximumnum.f32(float %a, float %b)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user