TargetInstrInfo: make getOperandLatency return optional (NFC) (#73769)

getOperandLatency has the following behavior: it returns -1 as a special
value, negative numbers other than -1 on some target-specific overrides,
or a valid non-negative latency. This behavior can be surprising, as
some callers do arithmetic on these negative values. Change the
interface of getOperandLatency to return a std::optional<unsigned> to
prevent surprises in callers. While at it, change the interface of
getInstrLatency to return unsigned instead of int.

This change was inspired by a refactoring in
TargetSchedModel::computeOperandLatency.
This commit is contained in:
Ramkumar Ramachandra
2023-12-01 11:29:19 +00:00
committed by GitHub
parent 460faa0c87
commit 9468de48fc
13 changed files with 207 additions and 204 deletions

View File

@@ -1706,9 +1706,9 @@ public:
return Opcode <= TargetOpcode::COPY;
}
virtual int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
virtual std::optional<unsigned>
getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode,
unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const;
/// Compute and return the use operand latency of a given pair of def and use.
/// In most cases, the static scheduling itinerary was enough to determine the
@@ -1718,10 +1718,10 @@ public:
/// This is a raw interface to the itinerary that may be directly overridden
/// by a target. Use computeOperandLatency to get the best estimate of
/// latency.
virtual int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const;
virtual std::optional<unsigned>
getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MachineInstr &UseMI, unsigned UseIdx) const;
/// Compute the instruction latency of a given instruction.
/// If the instruction has higher cost when predicated, it's returned via
@@ -1732,8 +1732,8 @@ public:
virtual unsigned getPredicationCost(const MachineInstr &MI) const;
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const;
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const;
/// Return the default expected latency for a def based on its opcode.
unsigned defaultDefLatency(const MCSchedModel &SchedModel,

View File

@@ -17,6 +17,7 @@
#include "llvm/MC/MCSchedule.h"
#include <algorithm>
#include <optional>
namespace llvm {
@@ -162,18 +163,19 @@ public:
return Latency;
}
/// Return the cycle for the given class and operand. Return -1 if no
/// cycle is specified for the operand.
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const {
/// Return the cycle for the given class and operand. Return std::nullopt if
/// the information is not available for the operand.
std::optional<unsigned> getOperandCycle(unsigned ItinClassIndx,
unsigned OperandIdx) const {
if (isEmpty())
return -1;
return std::nullopt;
unsigned FirstIdx = Itineraries[ItinClassIndx].FirstOperandCycle;
unsigned LastIdx = Itineraries[ItinClassIndx].LastOperandCycle;
if ((FirstIdx + OperandIdx) >= LastIdx)
return -1;
return std::nullopt;
return (int)OperandCycles[FirstIdx + OperandIdx];
return OperandCycles[FirstIdx + OperandIdx];
}
/// Return true if there is a pipeline forwarding between instructions
@@ -201,25 +203,27 @@ public:
/// Compute and return the use operand latency of a given itinerary
/// class and operand index if the value is produced by an instruction of the
/// specified itinerary class and def operand index.
int getOperandLatency(unsigned DefClass, unsigned DefIdx,
unsigned UseClass, unsigned UseIdx) const {
/// specified itinerary class and def operand index. Return std::nullopt if
/// the information is not available for the operand.
std::optional<unsigned> getOperandLatency(unsigned DefClass, unsigned DefIdx,
unsigned UseClass,
unsigned UseIdx) const {
if (isEmpty())
return -1;
return std::nullopt;
int DefCycle = getOperandCycle(DefClass, DefIdx);
if (DefCycle == -1)
return -1;
std::optional<unsigned> DefCycle = getOperandCycle(DefClass, DefIdx);
std::optional<unsigned> UseCycle = getOperandCycle(UseClass, UseIdx);
if (!DefCycle || !UseCycle)
return std::nullopt;
int UseCycle = getOperandCycle(UseClass, UseIdx);
if (UseCycle == -1)
return -1;
if (UseCycle > *DefCycle + 1)
return std::nullopt;
UseCycle = DefCycle - UseCycle + 1;
UseCycle = *DefCycle - *UseCycle + 1;
if (UseCycle > 0 &&
hasPipelineForwarding(DefClass, DefIdx, UseClass, UseIdx))
// FIXME: This assumes one cycle benefit for every pipeline forwarding.
--UseCycle;
UseCycle = *UseCycle - 1;
return UseCycle;
}

View File

@@ -659,7 +659,8 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
if (Use->isMachineOpcode())
// Adjust the use operand index by num of defs.
OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
std::optional<unsigned> Latency =
TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
@@ -667,10 +668,10 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
Latency = Latency - 1;
Latency = *Latency - 1;
}
if (Latency >= 0)
dep.setLatency(Latency);
if (Latency)
dep.setLatency(*Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {

View File

@@ -1379,15 +1379,15 @@ bool TargetInstrInfo::getMemOperandWithOffset(
// SelectionDAG latency interface.
//===----------------------------------------------------------------------===//
int
std::optional<unsigned>
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const {
if (!ItinData || ItinData->isEmpty())
return -1;
return std::nullopt;
if (!DefNode->isMachineOpcode())
return -1;
return std::nullopt;
unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
if (!UseNode->isMachineOpcode())
@@ -1396,8 +1396,8 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
SDNode *N) const {
unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
SDNode *N) const {
if (!ItinData || ItinData->isEmpty())
return 1;
@@ -1461,8 +1461,9 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
return false;
unsigned DefClass = DefMI.getDesc().getSchedClass();
int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
return (DefCycle != -1 && DefCycle <= 1);
std::optional<unsigned> DefCycle =
ItinData->getOperandCycle(DefClass, DefIdx);
return DefCycle <= 1;
}
bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
@@ -1580,11 +1581,9 @@ unsigned TargetInstrInfo::getCallFrameSizeAt(MachineInstr &MI) const {
/// Both DefMI and UseMI must be valid. By default, call directly to the
/// itinerary. This may be overriden by the target.
int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI,
unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const {
std::optional<unsigned> TargetInstrInfo::getOperandLatency(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
unsigned DefClass = DefMI.getDesc().getSchedClass();
unsigned UseClass = UseMI.getDesc().getSchedClass();
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);

View File

@@ -168,16 +168,20 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
return UseIdx;
}
// Top-level API for clients that know the operand indices.
// Top-level API for clients that know the operand indices. This doesn't need to
// return std::optional<unsigned>, as it always returns a valid latency.
unsigned TargetSchedModel::computeOperandLatency(
const MachineInstr *DefMI, unsigned DefOperIdx,
const MachineInstr *UseMI, unsigned UseOperIdx) const {
const unsigned InstrLatency = computeInstrLatency(DefMI);
const unsigned DefaultDefLatency = TII->defaultDefLatency(SchedModel, *DefMI);
if (!hasInstrSchedModel() && !hasInstrItineraries())
return TII->defaultDefLatency(SchedModel, *DefMI);
return InstrLatency;
if (hasInstrItineraries()) {
int OperLatency = 0;
std::optional<unsigned> OperLatency;
if (UseMI) {
OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
*UseMI, UseOperIdx);
@@ -186,21 +190,13 @@ unsigned TargetSchedModel::computeOperandLatency(
unsigned DefClass = DefMI->getDesc().getSchedClass();
OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
}
if (OperLatency >= 0)
return OperLatency;
// No operand latency was found.
unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
// Expected latency is the max of the stage latency and itinerary props.
// Rather than directly querying InstrItins stage latency, we call a TII
// hook to allow subtargets to specialize latency. This hook is only
// applicable to the InstrItins model. InstrSchedModel should model all
// special cases without TII hooks.
InstrLatency =
std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
return InstrLatency;
// Expected latency is the max of InstrLatency and DefaultDefLatency, if we
// didn't find an operand latency.
return OperLatency ? *OperLatency
: std::max(InstrLatency, DefaultDefLatency);
}
// hasInstrSchedModel()
const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
@@ -237,7 +233,7 @@ unsigned TargetSchedModel::computeOperandLatency(
// FIXME: Automatically giving all implicit defs defaultDefLatency is
// undesirable. We should only do it for defs that are known to the MC
// desc like flags. Truly implicit defs should get 1 cycle latency.
return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
return DefMI->isTransient() ? 0 : DefaultDefLatency;
}
unsigned

View File

@@ -180,12 +180,13 @@ static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
unsigned SCClass = Desc.getSchedClass();
int Latency = 0;
for (unsigned OpIdx = 0, OpIdxEnd = Inst.getNumOperands(); OpIdx != OpIdxEnd;
++OpIdx)
Latency = std::max(Latency, IID.getOperandCycle(SCClass, OpIdx));
unsigned Latency = 0;
return Latency;
for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
Latency = std::max(Latency, *OperCycle);
return (int)Latency;
}
/// Gets latency information for \p Inst, based on \p DC information.

View File

@@ -3872,17 +3872,16 @@ unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
llvm_unreachable("Didn't find the number of microops");
}
int
std::optional<unsigned>
ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
const MCInstrDesc &DefMCID, unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const {
int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
if (RegNo <= 0)
// Def is the address writeback.
return ItinData->getOperandCycle(DefClass, DefIdx);
int DefCycle;
unsigned DefCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
// (regno / 2) + (regno % 2) + 1
DefCycle = RegNo / 2 + 1;
@@ -3913,17 +3912,16 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
int
std::optional<unsigned>
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
const MCInstrDesc &DefMCID, unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const {
int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
if (RegNo <= 0)
// Def is the address writeback.
return ItinData->getOperandCycle(DefClass, DefIdx);
int DefCycle;
unsigned DefCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
// 4 registers would be issued: 1, 2, 1.
// 5 registers would be issued: 1, 2, 2.
@@ -3948,16 +3946,15 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
int
std::optional<unsigned>
ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass,
const MCInstrDesc &UseMCID, unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const {
int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
if (RegNo <= 0)
return ItinData->getOperandCycle(UseClass, UseIdx);
int UseCycle;
unsigned UseCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
// (regno / 2) + (regno % 2) + 1
UseCycle = RegNo / 2 + 1;
@@ -3988,16 +3985,15 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
return UseCycle;
}
int
std::optional<unsigned>
ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass,
const MCInstrDesc &UseMCID, unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const {
int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
if (RegNo <= 0)
return ItinData->getOperandCycle(UseClass, UseIdx);
int UseCycle;
unsigned UseCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
UseCycle = RegNo / 2;
if (UseCycle < 2)
@@ -4017,12 +4013,10 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
return UseCycle;
}
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefIdx, unsigned DefAlign,
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const {
std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const {
unsigned DefClass = DefMCID.getSchedClass();
unsigned UseClass = UseMCID.getSchedClass();
@@ -4032,7 +4026,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// This may be a def / use of a variable_ops instruction, the operand
// latency might be determinable dynamically. Let the target try to
// figure it out.
int DefCycle = -1;
std::optional<unsigned> DefCycle;
bool LdmBypass = false;
switch (DefMCID.getOpcode()) {
default:
@@ -4070,11 +4064,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
if (DefCycle == -1)
if (!DefCycle)
// We can't seem to determine the result latency of the def, assume it's 2.
DefCycle = 2;
int UseCycle = -1;
std::optional<unsigned> UseCycle;
switch (UseMCID.getOpcode()) {
default:
UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
@@ -4108,21 +4102,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
if (UseCycle == -1)
if (!UseCycle)
// Assume it's read in the first stage.
UseCycle = 1;
UseCycle = DefCycle - UseCycle + 1;
if (UseCycle > *DefCycle + 1)
return std::nullopt;
UseCycle = *DefCycle - *UseCycle + 1;
if (UseCycle > 0) {
if (LdmBypass) {
// It's a variable_ops instruction so we can't use DefIdx here. Just use
// first def operand.
if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
UseClass, UseIdx))
--UseCycle;
UseCycle = *UseCycle - 1;
} else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
UseClass, UseIdx)) {
--UseCycle;
UseCycle = *UseCycle - 1;
}
}
@@ -4362,14 +4359,12 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
return Adjust;
}
int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI,
unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const {
std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
// No operand latency. The caller may fall back to getInstrLatency.
if (!ItinData || ItinData->isEmpty())
return -1;
return std::nullopt;
const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
Register Reg = DefMO.getReg();
@@ -4390,7 +4385,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
ResolvedUseMI =
getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
if (!ResolvedUseMI)
return -1;
return std::nullopt;
}
return getOperandLatencyImpl(
@@ -4398,7 +4393,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
}
int ARMBaseInstrInfo::getOperandLatencyImpl(
std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
@@ -4430,7 +4425,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
}
if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
return -1;
return std::nullopt;
unsigned DefAlign = DefMI.hasOneMemOperand()
? (*DefMI.memoperands_begin())->getAlign().value()
@@ -4440,25 +4435,25 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
: 0;
// Get the itinerary's latency if possible, and handle variable_ops.
int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
UseIdx, UseAlign);
std::optional<unsigned> Latency = getOperandLatency(
ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
// Unable to find operand latency. The caller may resort to getInstrLatency.
if (Latency < 0)
return Latency;
if (!Latency)
return std::nullopt;
// Adjust for IT block position.
int Adj = DefAdj + UseAdj;
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
if (Adj >= 0 || (int)Latency > -Adj) {
return Latency + Adj;
if (Adj >= 0 || (int)*Latency > -Adj) {
return *Latency + Adj;
}
// Return the itinerary latency, which may be zero but not less than zero.
return Latency;
}
int
std::optional<unsigned>
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const {
@@ -4474,10 +4469,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return DefMCID.mayLoad() ? 3 : 1;
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
std::optional<unsigned> Latency =
ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
int Threshold = 1 + Adj;
return Latency <= Threshold ? 1 : Latency - Adj;
return !Latency || Latency <= Threshold ? 1 : *Latency - Adj;
}
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
@@ -4489,8 +4485,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned UseAlign = !UseMN->memoperands_empty()
? (*UseMN->memoperands_begin())->getAlign().value()
: 0;
int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
UseMCID, UseIdx, UseAlign);
std::optional<unsigned> Latency = getOperandLatency(
ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
if (!Latency)
return std::nullopt;
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
@@ -4506,7 +4504,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
--Latency;
Latency = *Latency - 1;
break;
}
case ARM::t2LDRs:
@@ -4517,7 +4515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShAmt =
cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
if (ShAmt == 0 || ShAmt == 2)
--Latency;
Latency = *Latency - 1;
break;
}
}
@@ -4534,9 +4532,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
Latency -= 2;
Latency = *Latency - 2;
else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
--Latency;
Latency = *Latency - 1;
break;
}
case ARM::t2LDRs:
@@ -4544,7 +4542,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDRHs:
case ARM::t2LDRSHs:
// Thumb2 mode: lsl 0-3 only.
Latency -= 2;
Latency = *Latency - 2;
break;
}
}
@@ -4710,7 +4708,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4LNq32Pseudo_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
++Latency;
Latency = *Latency + 1;
break;
}
@@ -4787,8 +4785,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return Latency;
}
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const {
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const {
if (!Node->isMachineOpcode())
return 1;
@@ -4836,8 +4834,9 @@ bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
if (DDomain == ARMII::DomainGeneral) {
unsigned DefClass = DefMI.getDesc().getSchedClass();
int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
return (DefCycle != -1 && DefCycle <= 2);
std::optional<unsigned> DefCycle =
ItinData->getOperandCycle(DefClass, DefIdx);
return DefCycle <= 2;
}
return false;
}

View File

@@ -316,13 +316,15 @@ public:
unsigned getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr &MI) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const override;
std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI,
unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const override;
std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode,
unsigned UseIdx) const override;
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
@@ -421,34 +423,34 @@ private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const;
int getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const;
int getVSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const;
int getSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const;
int getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefIdx, unsigned DefAlign,
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
std::optional<unsigned> getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass, unsigned DefIdx,
unsigned DefAlign) const;
std::optional<unsigned> getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass, unsigned DefIdx,
unsigned DefAlign) const;
std::optional<unsigned> getVSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass, unsigned UseIdx,
unsigned UseAlign) const;
std::optional<unsigned> getSTMUseCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID,
unsigned UseClass, unsigned UseIdx,
unsigned UseAlign) const;
std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefIdx, unsigned DefAlign,
const MCInstrDesc &UseMCID,
unsigned UseIdx,
unsigned UseAlign) const;
int getOperandLatencyImpl(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MCInstrDesc &DefMCID, unsigned DefAdj,
const MachineOperand &DefMO, unsigned Reg,
const MachineInstr &UseMI, unsigned UseIdx,
const MCInstrDesc &UseMCID, unsigned UseAdj) const;
std::optional<unsigned> getOperandLatencyImpl(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const;
unsigned getPredicationCost(const MachineInstr &MI) const override;
@@ -456,8 +458,8 @@ private:
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override;
unsigned getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override;
bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
const MachineRegisterInfo *MRI,

View File

@@ -4295,11 +4295,9 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency(
///
/// This is a raw interface to the itinerary that may be directly overriden by
/// a target. Use computeOperandLatency to get the best estimate of latency.
int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI,
unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const {
std::optional<unsigned> HexagonInstrInfo::getOperandLatency(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
// Get DefIdx and UseIdx for super registers.
@@ -4328,9 +4326,9 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
int Latency = TargetInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
UseMI, UseIdx);
if (!Latency)
std::optional<unsigned> Latency = TargetInstrInfo::getOperandLatency(
ItinData, DefMI, DefIdx, UseMI, UseIdx);
if (Latency == 0)
// We should never have 0 cycle latency between two instructions unless
// they can be packetized together. However, this decision can't be made
// here.

View File

@@ -309,10 +309,11 @@ public:
///
/// This is a raw interface to the itinerary that may be directly overriden by
/// a target. Use computeOperandLatency to get the best estimate of latency.
int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const override;
std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI,
unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const override;
/// Decompose the machine operand's target flags into two values - the direct
/// target flag value and any of bit flags that are applied.

View File

@@ -467,7 +467,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
// default.
if ((DstInst->isRegSequence() || DstInst->isCopy())) {
Register DReg = DstInst->getOperand(0).getReg();
int DLatency = -1;
std::optional<unsigned> DLatency;
for (const auto &DDep : Dst->Succs) {
MachineInstr *DDst = DDep.getSUnit()->getInstr();
int UseIdx = -1;
@@ -482,21 +482,21 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
if (UseIdx == -1)
continue;
int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0,
*DDst, UseIdx));
std::optional<unsigned> Latency =
InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
// Set DLatency for the first time.
DLatency = (DLatency == -1) ? Latency : DLatency;
if (!DLatency)
DLatency = Latency;
// For multiple uses, if the Latency is different across uses, reset
// DLatency.
if (DLatency != Latency) {
DLatency = -1;
DLatency = std::nullopt;
break;
}
}
DLatency = std::max(DLatency, 0);
Dep.setLatency((unsigned)DLatency);
Dep.setLatency(DLatency ? *DLatency : 0);
}
// Try to schedule uses near definitions to generate .cur.
@@ -581,15 +581,16 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
const MachineOperand &MO = DstI->getOperand(OpNum);
if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcI,
DefIdx, *DstI, OpNum));
std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
&InstrItins, *SrcI, DefIdx, *DstI, OpNum);
// For some instructions (ex: COPY), we might end up with < 0 latency
// as they don't have any Itinerary class associated with them.
Latency = std::max(Latency, 0);
if (!Latency)
Latency = 0;
bool IsArtificial = I.isArtificial();
Latency = updateLatency(*SrcI, *DstI, IsArtificial, Latency);
I.setLatency(Latency);
Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
I.setLatency(*Latency);
}
}

View File

@@ -155,22 +155,21 @@ unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
continue;
int Cycle = ItinData->getOperandCycle(DefClass, i);
if (Cycle < 0)
std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
if (!Cycle)
continue;
Latency = std::max(Latency, (unsigned) Cycle);
Latency = std::max(Latency, *Cycle);
}
return Latency;
}
int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const {
int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
UseMI, UseIdx);
std::optional<unsigned> PPCInstrInfo::getOperandLatency(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
ItinData, DefMI, DefIdx, UseMI, UseIdx);
if (!DefMI.getParent())
return Latency;
@@ -190,7 +189,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (UseMI.isBranch() && IsRegCR) {
if (Latency < 0)
if (!Latency)
Latency = getInstrLatency(ItinData, DefMI);
// On some cores, there is an additional delay between writing to a condition
@@ -210,8 +209,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
// FIXME: Is this needed for POWER9?
Latency += 2;
break;
Latency = *Latency + 2;
break;
}
}

View File

@@ -294,13 +294,15 @@ public:
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI, unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const override {
std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr &DefMI,
unsigned DefIdx,
const MachineInstr &UseMI,
unsigned UseIdx) const override;
std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode,
unsigned UseIdx) const override {
return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
UseNode, UseIdx);
}