[SimplifyCFG] Increase budget for FoldTwoEntryPHINode() if the branch is unpredictable. (#98495)
The `!unpredictable` metadata has been present for a long time, but it's usage in optimizations is still limited. This patch teaches `FoldTwoEntryPHINode()` to be more aggressive with an unpredictable branch to reduce mispredictions. A TTI interface `getBranchMispredictPenalty()` is added to distinguish between different hardwares to ensure we don't go too far for simpler cores. For simplicity, only a naive x86 implementation is included for the time being.
This commit is contained in:
@@ -419,6 +419,12 @@ public:
|
||||
/// this factor, it is very likely to be predicted correctly.
|
||||
BranchProbability getPredictableBranchThreshold() const;
|
||||
|
||||
/// Returns estimated penalty of a branch misprediction in latency. Indicates
|
||||
/// how aggressive the target wants for eliminating unpredictable branches. A
|
||||
/// zero return value means extra optimization applied to them should be
|
||||
/// minimal.
|
||||
InstructionCost getBranchMispredictPenalty() const;
|
||||
|
||||
/// Return true if branch divergence exists.
|
||||
///
|
||||
/// Branch divergence has a significantly negative impact on GPU performance
|
||||
@@ -1832,6 +1838,7 @@ public:
|
||||
ArrayRef<const Value *> Operands,
|
||||
TargetCostKind CostKind) = 0;
|
||||
virtual BranchProbability getPredictableBranchThreshold() = 0;
|
||||
virtual InstructionCost getBranchMispredictPenalty() = 0;
|
||||
virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
|
||||
virtual bool isSourceOfDivergence(const Value *V) = 0;
|
||||
virtual bool isAlwaysUniform(const Value *V) = 0;
|
||||
@@ -2243,6 +2250,9 @@ public:
|
||||
BranchProbability getPredictableBranchThreshold() override {
|
||||
return Impl.getPredictableBranchThreshold();
|
||||
}
|
||||
InstructionCost getBranchMispredictPenalty() override {
|
||||
return Impl.getBranchMispredictPenalty();
|
||||
}
|
||||
bool hasBranchDivergence(const Function *F = nullptr) override {
|
||||
return Impl.hasBranchDivergence(F);
|
||||
}
|
||||
|
||||
@@ -99,6 +99,8 @@ public:
|
||||
return BranchProbability(99, 100);
|
||||
}
|
||||
|
||||
InstructionCost getBranchMispredictPenalty() const { return 0; }
|
||||
|
||||
bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
|
||||
|
||||
bool isSourceOfDivergence(const Value *V) const { return false; }
|
||||
|
||||
@@ -30,6 +30,7 @@ struct SimplifyCFGOptions {
|
||||
bool SinkCommonInsts = false;
|
||||
bool SimplifyCondBranch = true;
|
||||
bool SpeculateBlocks = true;
|
||||
bool SpeculateUnpredictables = false;
|
||||
|
||||
AssumptionCache *AC = nullptr;
|
||||
|
||||
@@ -75,6 +76,10 @@ struct SimplifyCFGOptions {
|
||||
SpeculateBlocks = B;
|
||||
return *this;
|
||||
}
|
||||
SimplifyCFGOptions &speculateUnpredictables(bool B) {
|
||||
SpeculateUnpredictables = B;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
@@ -279,6 +279,10 @@ BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
|
||||
: TTIImpl->getPredictableBranchThreshold();
|
||||
}
|
||||
|
||||
InstructionCost TargetTransformInfo::getBranchMispredictPenalty() const {
|
||||
return TTIImpl->getBranchMispredictPenalty();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::hasBranchDivergence(const Function *F) const {
|
||||
return TTIImpl->hasBranchDivergence(F);
|
||||
}
|
||||
|
||||
@@ -845,6 +845,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
|
||||
Result.hoistCommonInsts(Enable);
|
||||
} else if (ParamName == "sink-common-insts") {
|
||||
Result.sinkCommonInsts(Enable);
|
||||
} else if (ParamName == "speculate-unpredictables") {
|
||||
Result.speculateUnpredictables(Enable);
|
||||
} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
|
||||
APInt BonusInstThreshold;
|
||||
if (ParamName.getAsInteger(0, BonusInstThreshold))
|
||||
|
||||
@@ -1515,8 +1515,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
||||
|
||||
// LoopSink (and other loop passes since the last simplifyCFG) might have
|
||||
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
|
||||
OptimizePM.addPass(
|
||||
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
|
||||
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
|
||||
.convertSwitchRangeToICmp(true)
|
||||
.speculateUnpredictables(true)));
|
||||
|
||||
// Add the core optimizing pipeline.
|
||||
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
|
||||
@@ -2034,9 +2035,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
|
||||
LateFPM.addPass(DivRemPairsPass());
|
||||
|
||||
// Delete basic blocks, which optimization passes may have killed.
|
||||
LateFPM.addPass(SimplifyCFGPass(
|
||||
SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
|
||||
true)));
|
||||
LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
|
||||
.convertSwitchRangeToICmp(true)
|
||||
.hoistCommonInsts(true)
|
||||
.speculateUnpredictables(true)));
|
||||
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
|
||||
|
||||
// Drop bodies of available eternally objects to improve GlobalDCE.
|
||||
|
||||
@@ -6756,3 +6756,8 @@ InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||
return AM.Scale != 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
InstructionCost X86TTIImpl::getBranchMispredictPenalty() const {
|
||||
// TODO: Hook MispredictPenalty of SchedMachineModel into this.
|
||||
return 14;
|
||||
}
|
||||
|
||||
@@ -294,6 +294,8 @@ public:
|
||||
bool supportsEfficientVectorElementLoadStore() const;
|
||||
bool enableInterleavedAccessVectorization();
|
||||
|
||||
InstructionCost getBranchMispredictPenalty() const;
|
||||
|
||||
private:
|
||||
bool supportsGather() const;
|
||||
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
|
||||
@@ -77,6 +77,9 @@ static cl::opt<bool> UserSinkCommonInsts(
|
||||
"sink-common-insts", cl::Hidden, cl::init(false),
|
||||
cl::desc("Sink common instructions (default = false)"));
|
||||
|
||||
static cl::opt<bool> UserSpeculateUnpredictables(
|
||||
"speculate-unpredictables", cl::Hidden, cl::init(false),
|
||||
cl::desc("Speculate unpredictable branches (default = false)"));
|
||||
|
||||
STATISTIC(NumSimpl, "Number of blocks simplified");
|
||||
|
||||
@@ -325,6 +328,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
|
||||
Options.HoistCommonInsts = UserHoistCommonInsts;
|
||||
if (UserSinkCommonInsts.getNumOccurrences())
|
||||
Options.SinkCommonInsts = UserSinkCommonInsts;
|
||||
if (UserSpeculateUnpredictables.getNumOccurrences())
|
||||
Options.SpeculateUnpredictables = UserSpeculateUnpredictables;
|
||||
}
|
||||
|
||||
SimplifyCFGPass::SimplifyCFGPass() {
|
||||
@@ -351,7 +356,9 @@ void SimplifyCFGPass::printPipeline(
|
||||
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
|
||||
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
|
||||
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
|
||||
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
|
||||
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
|
||||
OS << (Options.SpeculateUnpredictables ? "" : "no-")
|
||||
<< "speculate-unpredictables";
|
||||
OS << '>';
|
||||
}
|
||||
|
||||
|
||||
@@ -3476,7 +3476,8 @@ static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
|
||||
/// Given a BB that starts with the specified two-entry PHI node,
|
||||
/// see if we can eliminate it.
|
||||
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
DomTreeUpdater *DTU, const DataLayout &DL) {
|
||||
DomTreeUpdater *DTU, const DataLayout &DL,
|
||||
bool SpeculateUnpredictables) {
|
||||
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
|
||||
// statement", which has a very simple dominance structure. Basically, we
|
||||
// are trying to find the condition that is being branched on, which
|
||||
@@ -3508,7 +3509,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
// jump to one specific 'then' block (if we have two of them).
|
||||
// It isn't beneficial to speculatively execute the code
|
||||
// from the block that we know is predictably not entered.
|
||||
if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
|
||||
bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
|
||||
if (!IsUnpredictable) {
|
||||
uint64_t TWeight, FWeight;
|
||||
if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
|
||||
(TWeight + FWeight) != 0) {
|
||||
@@ -3551,6 +3553,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
InstructionCost Cost = 0;
|
||||
InstructionCost Budget =
|
||||
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
|
||||
if (SpeculateUnpredictables && IsUnpredictable)
|
||||
Budget += TTI.getBranchMispredictPenalty();
|
||||
|
||||
bool Changed = false;
|
||||
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
|
||||
@@ -3620,8 +3624,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
[](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
|
||||
return Changed;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
|
||||
<< " T: " << IfTrue->getName()
|
||||
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
|
||||
if (IsUnpredictable) dbgs() << " (unpredictable)";
|
||||
dbgs() << " T: " << IfTrue->getName()
|
||||
<< " F: " << IfFalse->getName() << "\n");
|
||||
|
||||
// If we can still promote the PHI nodes after this gauntlet of tests,
|
||||
@@ -7814,7 +7819,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
|
||||
// eliminate it, do so now.
|
||||
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
|
||||
if (PN->getNumIncomingValues() == 2)
|
||||
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
|
||||
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL,
|
||||
Options.SpeculateUnpredictables))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -49,8 +49,8 @@
|
||||
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
|
||||
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)
|
||||
|
||||
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
|
||||
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)
|
||||
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
|
||||
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
|
||||
|
||||
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
|
||||
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
||||
; Two-entry phi nodes with unpredictable conditions may get increased budget for folding.
|
||||
; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
|
||||
; RUN: opt < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-NOFOLD %s
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-FOLD %s
|
||||
|
||||
define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
|
||||
; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
|
||||
; CHECK-NOFOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NOFOLD-NEXT: [[BB:.*]]:
|
||||
; CHECK-NOFOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
|
||||
; CHECK-NOFOLD-NEXT: br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
|
||||
; CHECK-NOFOLD: [[BB3]]:
|
||||
; CHECK-NOFOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
|
||||
; CHECK-NOFOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
|
||||
; CHECK-NOFOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
|
||||
; CHECK-NOFOLD-NEXT: [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
|
||||
; CHECK-NOFOLD-NEXT: [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
|
||||
; CHECK-NOFOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
|
||||
; CHECK-NOFOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
|
||||
; CHECK-NOFOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
|
||||
; CHECK-NOFOLD-NEXT: [[I12:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[I11]])
|
||||
; CHECK-NOFOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
|
||||
; CHECK-NOFOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
|
||||
; CHECK-NOFOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
|
||||
; CHECK-NOFOLD-NEXT: [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
|
||||
; CHECK-NOFOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
|
||||
; CHECK-NOFOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
|
||||
; CHECK-NOFOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
|
||||
; CHECK-NOFOLD-NEXT: br label %[[BB20]]
|
||||
; CHECK-NOFOLD: [[BB20]]:
|
||||
; CHECK-NOFOLD-NEXT: [[I21:%.*]] = phi nsz <2 x float> [ [[I17]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
|
||||
; CHECK-NOFOLD-NEXT: [[I22:%.*]] = phi nsz <2 x float> [ [[I19]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
|
||||
; CHECK-NOFOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
|
||||
; CHECK-NOFOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
|
||||
; CHECK-NOFOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
|
||||
;
|
||||
; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
|
||||
; CHECK-FOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-FOLD-NEXT: [[BB:.*:]]
|
||||
; CHECK-FOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
|
||||
; CHECK-FOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
|
||||
; CHECK-FOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
|
||||
; CHECK-FOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
|
||||
; CHECK-FOLD-NEXT: [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
|
||||
; CHECK-FOLD-NEXT: [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
|
||||
; CHECK-FOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
|
||||
; CHECK-FOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
|
||||
; CHECK-FOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
|
||||
; CHECK-FOLD-NEXT: [[I12:%.*]] = tail call fast float @llvm.sqrt.f32(float [[I11]])
|
||||
; CHECK-FOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
|
||||
; CHECK-FOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
|
||||
; CHECK-FOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
|
||||
; CHECK-FOLD-NEXT: [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
|
||||
; CHECK-FOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
|
||||
; CHECK-FOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
|
||||
; CHECK-FOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
|
||||
; CHECK-FOLD-NEXT: [[I21:%.*]] = select nsz i1 [[I]], <2 x float> [[I17]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]]
|
||||
; CHECK-FOLD-NEXT: [[I22:%.*]] = select nsz i1 [[I]], <2 x float> [[I19]], <2 x float> zeroinitializer, !unpredictable [[META0]]
|
||||
; CHECK-FOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
|
||||
; CHECK-FOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
|
||||
; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
|
||||
;
|
||||
bb:
|
||||
%i = fcmp fast ogt float %arg, 0x3F747AE140000000
|
||||
br i1 %i, label %bb3, label %bb20, !unpredictable !0
|
||||
|
||||
bb3: ; preds = %bb
|
||||
%i4 = extractelement <2 x float> %arg1, i64 0
|
||||
%i5 = fmul fast float %i4, %i4
|
||||
%i6 = extractelement <2 x float> %arg1, i64 1
|
||||
%i7 = fmul fast float %i6, %i6
|
||||
%i8 = fadd fast float %i7, %i5
|
||||
%i9 = extractelement <2 x float> %arg2, i64 0
|
||||
%i10 = fmul fast float %i9, %i9
|
||||
%i11 = fadd fast float %i8, %i10
|
||||
%i12 = tail call fast noundef float @llvm.sqrt.f32(float %i11)
|
||||
%i13 = fdiv fast float 0x3FEFD70A40000000, %i12
|
||||
%i14 = fmul fast float %i13, %i4
|
||||
%i15 = insertelement <2 x float> poison, float %i14, i64 0
|
||||
%i16 = fmul fast float %i13, %i6
|
||||
%i17 = insertelement <2 x float> %i15, float %i16, i64 1
|
||||
%i18 = fmul fast float %i13, %i9
|
||||
%i19 = insertelement <2 x float> %arg2, float %i18, i64 0
|
||||
br label %bb20
|
||||
|
||||
bb20: ; preds = %bb3, %bb
|
||||
%i21 = phi nsz <2 x float> [ %i17, %bb3 ], [ zeroinitializer, %bb ]
|
||||
%i22 = phi nsz <2 x float> [ %i19, %bb3 ], [ zeroinitializer, %bb ]
|
||||
%i23 = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %i21, 0
|
||||
%i24 = insertvalue { <2 x float>, <2 x float> } %i23, <2 x float> %i22, 1
|
||||
ret { <2 x float>, <2 x float> } %i24
|
||||
}
|
||||
|
||||
declare float @llvm.sqrt.f32(float)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
!0 = !{}
|
||||
Reference in New Issue
Block a user