[Reland][IPO] Added attributor for identifying invariant loads (#146584)
Patched and tested the `AAInvariantLoadPointer` attributor from #141800, which identifies pointers whose loads are eligible to be marked as `!invariant.load`. The bug in the attributor was due to `AAMemoryBehavior` always identifying pointers obtained from `alloca`s as having no writes. I'm not entirely sure why `AAMemoryBehavior` behaves this way, but it seems to be beceause it identifies the scope of an `alloca` to be limited to only that instruction (and, certainly, no memory writes occur within the `alloca` instructin). This patch just adds a check to disallow all loads from `alloca` pointers from being marked `!invariant.load` (since any well-defined program will have to write to stack pointers at some point).
This commit is contained in:
@@ -6335,6 +6335,47 @@ struct AAUnderlyingObjects : AbstractAttribute {
|
|||||||
AA::ValueScope Scope = AA::Interprocedural) const = 0;
|
AA::ValueScope Scope = AA::Interprocedural) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// An abstract interface for identifying pointers from which loads can be
|
||||||
|
/// marked invariant.
|
||||||
|
struct AAInvariantLoadPointer : public AbstractAttribute {
|
||||||
|
AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {}
|
||||||
|
|
||||||
|
/// See AbstractAttribute::isValidIRPositionForInit
|
||||||
|
static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
|
||||||
|
if (!IRP.getAssociatedType()->isPointerTy())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return AbstractAttribute::isValidIRPositionForInit(A, IRP);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an abstract attribute view for the position \p IRP.
|
||||||
|
static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP,
|
||||||
|
Attributor &A);
|
||||||
|
|
||||||
|
/// Return true if the pointer's contents are known to remain invariant.
|
||||||
|
virtual bool isKnownInvariant() const = 0;
|
||||||
|
virtual bool isKnownLocallyInvariant() const = 0;
|
||||||
|
|
||||||
|
/// Return true if the pointer's contents are assumed to remain invariant.
|
||||||
|
virtual bool isAssumedInvariant() const = 0;
|
||||||
|
virtual bool isAssumedLocallyInvariant() const = 0;
|
||||||
|
|
||||||
|
/// See AbstractAttribute::getName().
|
||||||
|
StringRef getName() const override { return "AAInvariantLoadPointer"; }
|
||||||
|
|
||||||
|
/// See AbstractAttribute::getIdAddr().
|
||||||
|
const char *getIdAddr() const override { return &ID; }
|
||||||
|
|
||||||
|
/// This function should return true if the type of the \p AA is
|
||||||
|
/// AAInvariantLoadPointer
|
||||||
|
static bool classof(const AbstractAttribute *AA) {
|
||||||
|
return (AA->getIdAddr() == &ID);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unique ID (due to the unique address).
|
||||||
|
static const char ID;
|
||||||
|
};
|
||||||
|
|
||||||
/// An abstract interface for address space information.
|
/// An abstract interface for address space information.
|
||||||
struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
|
struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
|
||||||
AAAddressSpace(const IRPosition &IRP, Attributor &A)
|
AAAddressSpace(const IRPosition &IRP, Attributor &A)
|
||||||
|
|||||||
@@ -3612,6 +3612,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
|
|||||||
if (SimplifyAllLoads)
|
if (SimplifyAllLoads)
|
||||||
getAssumedSimplified(IRPosition::value(I), nullptr,
|
getAssumedSimplified(IRPosition::value(I), nullptr,
|
||||||
UsedAssumedInformation, AA::Intraprocedural);
|
UsedAssumedInformation, AA::Intraprocedural);
|
||||||
|
getOrCreateAAFor<AAInvariantLoadPointer>(
|
||||||
|
IRPosition::value(*LI->getPointerOperand()));
|
||||||
getOrCreateAAFor<AAAddressSpace>(
|
getOrCreateAAFor<AAAddressSpace>(
|
||||||
IRPosition::value(*LI->getPointerOperand()));
|
IRPosition::value(*LI->getPointerOperand()));
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability)
|
|||||||
PIPE_OPERATOR(AAPointerInfo)
|
PIPE_OPERATOR(AAPointerInfo)
|
||||||
PIPE_OPERATOR(AAAssumptionInfo)
|
PIPE_OPERATOR(AAAssumptionInfo)
|
||||||
PIPE_OPERATOR(AAUnderlyingObjects)
|
PIPE_OPERATOR(AAUnderlyingObjects)
|
||||||
|
PIPE_OPERATOR(AAInvariantLoadPointer)
|
||||||
PIPE_OPERATOR(AAAddressSpace)
|
PIPE_OPERATOR(AAAddressSpace)
|
||||||
PIPE_OPERATOR(AAAllocationInfo)
|
PIPE_OPERATOR(AAAllocationInfo)
|
||||||
PIPE_OPERATOR(AAIndirectCallInfo)
|
PIPE_OPERATOR(AAIndirectCallInfo)
|
||||||
@@ -12533,6 +12534,346 @@ private:
|
|||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
/// --------------------- Invariant Load Pointer -------------------------------
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct AAInvariantLoadPointerImpl
|
||||||
|
: public StateWrapper<BitIntegerState<uint8_t, 15>,
|
||||||
|
AAInvariantLoadPointer> {
|
||||||
|
|
||||||
|
enum {
|
||||||
|
// pointer does not alias within the bounds of the function
|
||||||
|
IS_NOALIAS = 1 << 0,
|
||||||
|
// pointer is not involved in any effectful instructions within the bounds
|
||||||
|
// of the function
|
||||||
|
IS_NOEFFECT = 1 << 1,
|
||||||
|
// loads are invariant within the bounds of the function
|
||||||
|
IS_LOCALLY_INVARIANT = 1 << 2,
|
||||||
|
// memory lifetime is constrained within the bounds of the function
|
||||||
|
IS_LOCALLY_CONSTRAINED = 1 << 3,
|
||||||
|
|
||||||
|
IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT |
|
||||||
|
IS_LOCALLY_CONSTRAINED,
|
||||||
|
};
|
||||||
|
static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state");
|
||||||
|
|
||||||
|
using Base =
|
||||||
|
StateWrapper<BitIntegerState<uint8_t, 15>, AAInvariantLoadPointer>;
|
||||||
|
|
||||||
|
// the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
|
||||||
|
// pessimistic about IS_KNOWN_INVARIANT
|
||||||
|
AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
|
||||||
|
: Base(IRP) {}
|
||||||
|
|
||||||
|
bool isKnownInvariant() const final {
|
||||||
|
return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isKnownLocallyInvariant() const final {
|
||||||
|
if (isKnown(IS_LOCALLY_INVARIANT))
|
||||||
|
return true;
|
||||||
|
return isKnown(IS_NOALIAS | IS_NOEFFECT);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isAssumedInvariant() const final {
|
||||||
|
return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isAssumedLocallyInvariant() const final {
|
||||||
|
if (isAssumed(IS_LOCALLY_INVARIANT))
|
||||||
|
return true;
|
||||||
|
return isAssumed(IS_NOALIAS | IS_NOEFFECT);
|
||||||
|
}
|
||||||
|
|
||||||
|
ChangeStatus updateImpl(Attributor &A) override {
|
||||||
|
ChangeStatus Changed = ChangeStatus::UNCHANGED;
|
||||||
|
|
||||||
|
Changed |= updateNoAlias(A);
|
||||||
|
if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
|
||||||
|
Changed |= updateNoEffect(A);
|
||||||
|
|
||||||
|
Changed |= updateLocalInvariance(A);
|
||||||
|
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
ChangeStatus manifest(Attributor &A) override {
|
||||||
|
if (!isKnownInvariant())
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
|
||||||
|
ChangeStatus Changed = ChangeStatus::UNCHANGED;
|
||||||
|
const Value *Ptr = &getAssociatedValue();
|
||||||
|
const auto TagInvariantLoads = [&](const Use &U, bool &) {
|
||||||
|
if (U.get() != Ptr)
|
||||||
|
return true;
|
||||||
|
auto *I = dyn_cast<Instruction>(U.getUser());
|
||||||
|
if (!I)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Ensure that we are only changing uses from the corresponding callgraph
|
||||||
|
// SSC in the case that the AA isn't run on the entire module
|
||||||
|
if (!A.isRunOn(I->getFunction()))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (I->hasMetadata(LLVMContext::MD_invariant_load))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (auto *LI = dyn_cast<LoadInst>(I)) {
|
||||||
|
LI->setMetadata(LLVMContext::MD_invariant_load,
|
||||||
|
MDNode::get(LI->getContext(), {}));
|
||||||
|
Changed = ChangeStatus::CHANGED;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
(void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr);
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// See AbstractAttribute::getAsStr().
|
||||||
|
const std::string getAsStr(Attributor *) const override {
|
||||||
|
if (isKnownInvariant())
|
||||||
|
return "load-invariant pointer";
|
||||||
|
return "non-invariant pointer";
|
||||||
|
}
|
||||||
|
|
||||||
|
/// See AbstractAttribute::trackStatistics().
|
||||||
|
void trackStatistics() const override {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Indicate that noalias is required for the pointer to be invariant.
|
||||||
|
bool requiresNoAlias() const {
|
||||||
|
switch (getPositionKind()) {
|
||||||
|
default:
|
||||||
|
// Conservatively default to require noalias.
|
||||||
|
return true;
|
||||||
|
case IRP_FLOAT:
|
||||||
|
case IRP_RETURNED:
|
||||||
|
case IRP_CALL_SITE:
|
||||||
|
return false;
|
||||||
|
case IRP_CALL_SITE_RETURNED: {
|
||||||
|
const auto &CB = cast<CallBase>(getAnchorValue());
|
||||||
|
return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||||
|
&CB, /*MustPreserveNullness=*/false);
|
||||||
|
}
|
||||||
|
case IRP_ARGUMENT: {
|
||||||
|
const Function *F = getAssociatedFunction();
|
||||||
|
assert(F && "no associated function for argument");
|
||||||
|
return !isCallableCC(F->getCallingConv());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isExternal() const {
|
||||||
|
const Function *F = getAssociatedFunction();
|
||||||
|
if (!F)
|
||||||
|
return true;
|
||||||
|
return isCallableCC(F->getCallingConv()) &&
|
||||||
|
getPositionKind() != IRP_CALL_SITE_RETURNED;
|
||||||
|
}
|
||||||
|
|
||||||
|
ChangeStatus updateNoAlias(Attributor &A) {
|
||||||
|
if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
|
||||||
|
// Try to use AANoAlias.
|
||||||
|
if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
|
||||||
|
getIRPosition(), this, DepClassTy::REQUIRED)) {
|
||||||
|
if (ANoAlias->isKnownNoAlias()) {
|
||||||
|
addKnownBits(IS_NOALIAS);
|
||||||
|
return ChangeStatus::CHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ANoAlias->isAssumedNoAlias()) {
|
||||||
|
removeAssumedBits(IS_NOALIAS);
|
||||||
|
return ChangeStatus::CHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to infer noalias from argument attribute, since it is applicable for
|
||||||
|
// the duration of the function.
|
||||||
|
if (const Argument *Arg = getAssociatedArgument()) {
|
||||||
|
if (Arg->hasNoAliasAttr()) {
|
||||||
|
addKnownBits(IS_NOALIAS);
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Noalias information is not provided, and cannot be inferred,
|
||||||
|
// so we conservatively assume the pointer aliases.
|
||||||
|
removeAssumedBits(IS_NOALIAS);
|
||||||
|
return ChangeStatus::CHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
ChangeStatus updateNoEffect(Attributor &A) {
|
||||||
|
if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
|
||||||
|
if (!getAssociatedFunction())
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
|
||||||
|
if (isa<AllocaInst>(&getAssociatedValue()))
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
|
||||||
|
const auto HasNoEffectLoads = [&](const Use &U, bool &) {
|
||||||
|
const auto *LI = dyn_cast<LoadInst>(U.getUser());
|
||||||
|
return !LI || !LI->mayHaveSideEffects();
|
||||||
|
};
|
||||||
|
if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
|
||||||
|
if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
|
||||||
|
getIRPosition(), this, DepClassTy::REQUIRED)) {
|
||||||
|
// For non-instructions, try to use AAMemoryBehavior to infer the readonly
|
||||||
|
// attribute
|
||||||
|
if (!AMemoryBehavior->isAssumedReadOnly())
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
|
||||||
|
if (AMemoryBehavior->isKnownReadOnly()) {
|
||||||
|
addKnownBits(IS_NOEFFECT);
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const Argument *Arg = getAssociatedArgument()) {
|
||||||
|
if (Arg->onlyReadsMemory()) {
|
||||||
|
addKnownBits(IS_NOEFFECT);
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Readonly information is not provided, and cannot be inferred from
|
||||||
|
// AAMemoryBehavior.
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
ChangeStatus updateLocalInvariance(Attributor &A) {
|
||||||
|
if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT))
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
|
||||||
|
// try to infer invariance from underlying objects
|
||||||
|
const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
|
||||||
|
getIRPosition(), this, DepClassTy::REQUIRED);
|
||||||
|
if (!AUO)
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
|
||||||
|
bool UsedAssumedInformation = false;
|
||||||
|
const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) {
|
||||||
|
if (!V.getType()->isPointerTy())
|
||||||
|
return true;
|
||||||
|
const auto *IsInvariantLoadPointer =
|
||||||
|
A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
|
||||||
|
DepClassTy::REQUIRED);
|
||||||
|
// Conservatively fail if invariance cannot be inferred.
|
||||||
|
if (!IsInvariantLoadPointer)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (IsInvariantLoadPointer->isKnownLocallyInvariant())
|
||||||
|
return true;
|
||||||
|
if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
UsedAssumedInformation = true;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer))
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
|
||||||
|
if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
|
||||||
|
if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||||
|
CB, /*MustPreserveNullness=*/false)) {
|
||||||
|
for (const Value *Arg : CB->args()) {
|
||||||
|
if (!IsLocallyInvariantLoadIfPointer(*Arg))
|
||||||
|
return indicatePessimisticFixpoint();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!UsedAssumedInformation) {
|
||||||
|
// Pointer is known and not just assumed to be locally invariant.
|
||||||
|
addKnownBits(IS_LOCALLY_INVARIANT);
|
||||||
|
return ChangeStatus::CHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ChangeStatus::UNCHANGED;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
|
||||||
|
AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A)
|
||||||
|
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
|
||||||
|
AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
|
||||||
|
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||||
|
|
||||||
|
void initialize(Attributor &) override {
|
||||||
|
removeAssumedBits(IS_LOCALLY_CONSTRAINED);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AAInvariantLoadPointerCallSiteReturned final
|
||||||
|
: AAInvariantLoadPointerImpl {
|
||||||
|
AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
|
||||||
|
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||||
|
|
||||||
|
void initialize(Attributor &A) override {
|
||||||
|
const Function *F = getAssociatedFunction();
|
||||||
|
assert(F && "no associated function for return from call");
|
||||||
|
|
||||||
|
if (!F->isDeclaration() && !F->isIntrinsic())
|
||||||
|
return AAInvariantLoadPointerImpl::initialize(A);
|
||||||
|
|
||||||
|
const auto &CB = cast<CallBase>(getAnchorValue());
|
||||||
|
if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||||
|
&CB, /*MustPreserveNullness=*/false))
|
||||||
|
return AAInvariantLoadPointerImpl::initialize(A);
|
||||||
|
|
||||||
|
if (F->onlyReadsMemory() && F->hasNoSync())
|
||||||
|
return AAInvariantLoadPointerImpl::initialize(A);
|
||||||
|
|
||||||
|
// At this point, the function is opaque, so we conservatively assume
|
||||||
|
// non-invariance.
|
||||||
|
indicatePessimisticFixpoint();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
|
||||||
|
AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
|
||||||
|
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||||
|
|
||||||
|
void initialize(Attributor &) override {
|
||||||
|
const Function *F = getAssociatedFunction();
|
||||||
|
assert(F && "no associated function for argument");
|
||||||
|
|
||||||
|
if (!isCallableCC(F->getCallingConv())) {
|
||||||
|
addKnownBits(IS_LOCALLY_CONSTRAINED);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!F->hasLocalLinkage())
|
||||||
|
removeAssumedBits(IS_LOCALLY_CONSTRAINED);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AAInvariantLoadPointerCallSiteArgument final
|
||||||
|
: AAInvariantLoadPointerImpl {
|
||||||
|
AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A)
|
||||||
|
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
/// ------------------------ Address Space ------------------------------------
|
/// ------------------------ Address Space ------------------------------------
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@@ -13038,6 +13379,7 @@ const char AAInterFnReachability::ID = 0;
|
|||||||
const char AAPointerInfo::ID = 0;
|
const char AAPointerInfo::ID = 0;
|
||||||
const char AAAssumptionInfo::ID = 0;
|
const char AAAssumptionInfo::ID = 0;
|
||||||
const char AAUnderlyingObjects::ID = 0;
|
const char AAUnderlyingObjects::ID = 0;
|
||||||
|
const char AAInvariantLoadPointer::ID = 0;
|
||||||
const char AAAddressSpace::ID = 0;
|
const char AAAddressSpace::ID = 0;
|
||||||
const char AAAllocationInfo::ID = 0;
|
const char AAAllocationInfo::ID = 0;
|
||||||
const char AAIndirectCallInfo::ID = 0;
|
const char AAIndirectCallInfo::ID = 0;
|
||||||
@@ -13172,6 +13514,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
|
|||||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
|
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
|
||||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
|
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
|
||||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
|
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
|
||||||
|
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
|
||||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
|
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
|
||||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
|
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
|
||||||
|
|
||||||
|
|||||||
431
llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
Normal file
431
llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
Normal file
@@ -0,0 +1,431 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||||
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
|
||||||
|
|
||||||
|
@G = addrspace(1) global i32 zeroinitializer, align 4
|
||||||
|
declare void @clobber(i32) #0
|
||||||
|
declare void @clobber.p5(ptr addrspace(5)) #0
|
||||||
|
declare ptr addrspace(1) @get_ptr() #0
|
||||||
|
declare noalias ptr addrspace(1) @get_noalias_ptr() #0
|
||||||
|
declare noalias ptr addrspace(1) @get_untouched_ptr() #1
|
||||||
|
|
||||||
|
define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define void @test_nonkernel(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7:[0-9]+]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; may not be !invariant.load, as the caller may modify %ptr
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; may not be !invariant.load, as %ptr may alias a pointer in @clobber
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_gep(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
|
||||||
|
%val = load i32, ptr addrspace(1) %gep, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
|
||||||
|
%val = load i32, ptr addrspace(1) %gep, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; cannot be !invariant.load due to the write to %ptr
|
||||||
|
store i32 %swap, ptr addrspace(1) %ptr, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load volatile i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; volatiles loads cannot be !invariant.load
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
|
||||||
|
;; atomic loads with ordering guarantees may have side effects
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_global() {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
|
||||||
|
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) @G, align 4
|
||||||
|
;; is not an !invariant.load as global variables may change
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_alloca(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_alloca(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
|
||||||
|
; AMDGCN-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ALLOCA]], align 4
|
||||||
|
; AMDGCN-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[ALLOCA]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber.p5(ptr addrspace(5) noundef align 4 [[ALLOCA]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[LOAD]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
%alloca = alloca i32, addrspace(5)
|
||||||
|
store i32 %val, ptr addrspace(5) %alloca
|
||||||
|
%load = load i32, ptr addrspace(5) %alloca
|
||||||
|
call void @clobber.p5(ptr addrspace(5) %alloca)
|
||||||
|
call void @clobber(i32 %load)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal void @copy.i32(ptr addrspace(5) %alloca, i32 %qty) {
|
||||||
|
; AMDGCN-LABEL: define internal void @copy.i32(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(5) noalias nofree noundef writeonly align 4 captures(none) dereferenceable_or_null(4) [[ALLOCA:%.*]], i32 [[QTY:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||||
|
; AMDGCN-NEXT: store i32 [[QTY]], ptr addrspace(5) [[ALLOCA]], align 4
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
store i32 %qty, ptr addrspace(5) %alloca
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_internal_alloca(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_internal_alloca(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
|
||||||
|
; AMDGCN-NEXT: call void @copy.i32(ptr addrspace(5) noalias nofree noundef writeonly align 4 captures(none) dereferenceable_or_null(4) [[ALLOCA]], i32 [[VAL]]) #[[ATTR8:[0-9]+]]
|
||||||
|
; AMDGCN-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[ALLOCA]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[LOAD]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
%alloca = alloca i32, addrspace(5)
|
||||||
|
call void @copy.i32(ptr addrspace(5) %alloca, i32 %val)
|
||||||
|
%load = load i32, ptr addrspace(5) %alloca
|
||||||
|
call void @clobber(i32 %load)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; is an !invariant.load due to its only caller @test_call_internal_noalias
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR9:[0-9]+]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define internal i32 @test_internal_load(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR5]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; may not be an !invariant.load since the pointer in @test_call_internal may alias
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR9]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define internal i32 @test_internal_written(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR5]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; cannot be an !invariant.load because of the write in caller @test_call_internal_written
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR9]]
|
||||||
|
; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
|
||||||
|
store i32 %x, ptr addrspace(1) %ptr
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_call_ptr() {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
|
||||||
|
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%ptr = call ptr addrspace(1) @get_ptr()
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; may not be an !invariant.load since %ptr may alias
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_call_noalias_ptr() {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr(
|
||||||
|
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%ptr = call ptr addrspace(1) @get_noalias_ptr()
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; may not be an !invariant.load since %ptr may have been written to before returning
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_call_untouched_ptr() {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr(
|
||||||
|
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR10:[0-9]+]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%ptr = call ptr addrspace(1) @get_untouched_ptr()
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0)
|
||||||
|
%val = load i32, ptr addrspace(7) %rsrc, align 4
|
||||||
|
;; original %ptr may alias
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
|
||||||
|
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR11]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0)
|
||||||
|
%val = load i32, ptr addrspace(7) %rsrc, align 4
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
|
||||||
|
; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; either pointer yields an !invariant.load
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
|
||||||
|
; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; %ptr.false may alias, so no !invariant.load
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
|
||||||
|
; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||||
|
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
|
||||||
|
; AMDGCN: [[TRUE]]:
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: br label %[[FINISH:.*]]
|
||||||
|
; AMDGCN: [[FALSE]]:
|
||||||
|
; AMDGCN-NEXT: br label %[[FINISH]]
|
||||||
|
; AMDGCN: [[FINISH]]:
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br i1 %cond, label %true, label %false
|
||||||
|
true:
|
||||||
|
call void @clobber(i32 1)
|
||||||
|
br label %finish
|
||||||
|
false:
|
||||||
|
br label %finish
|
||||||
|
finish:
|
||||||
|
%ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; either pointer yields an !invariant.load
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
|
||||||
|
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
|
||||||
|
; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||||
|
; AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||||
|
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
|
||||||
|
; AMDGCN: [[TRUE]]:
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: br label %[[FINISH:.*]]
|
||||||
|
; AMDGCN: [[FALSE]]:
|
||||||
|
; AMDGCN-NEXT: br label %[[FINISH]]
|
||||||
|
; AMDGCN: [[FINISH]]:
|
||||||
|
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
|
||||||
|
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||||
|
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||||
|
; AMDGCN-NEXT: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br i1 %cond, label %true, label %false
|
||||||
|
true:
|
||||||
|
call void @clobber(i32 1)
|
||||||
|
br label %finish
|
||||||
|
false:
|
||||||
|
br label %finish
|
||||||
|
finish:
|
||||||
|
%ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
|
||||||
|
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||||
|
;; ptr.false may alias, so no !invariant.load
|
||||||
|
call void @clobber(i32 %val)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nofree norecurse nosync nounwind willreturn }
|
||||||
|
attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
|
||||||
|
;.
|
||||||
|
; AMDGCN: [[META0]] = !{}
|
||||||
|
;.
|
||||||
@@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) {
|
|||||||
; CHECK-LABEL: define {{[^@]+}}@f7_1
|
; CHECK-LABEL: define {{[^@]+}}@f7_1
|
||||||
; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
|
; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
|
||||||
; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
|
; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
|
||||||
; CHECK-NEXT: [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4
|
|
||||||
; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
|
; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
|
||||||
; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
|
; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
|
||||||
; CHECK: if.true:
|
; CHECK: if.true:
|
||||||
|
|||||||
@@ -342,14 +342,6 @@ define %S.2 @t3.helper() {
|
|||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8
|
; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8
|
||||||
; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]])
|
; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]])
|
||||||
; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8
|
|
||||||
; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0
|
|
||||||
; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1
|
|
||||||
; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8
|
|
||||||
; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1
|
|
||||||
; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2
|
|
||||||
; CHECK-NEXT: [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8
|
|
||||||
; CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2
|
|
||||||
; CHECK-NEXT: ret [[S_2]] zeroinitializer
|
; CHECK-NEXT: ret [[S_2]] zeroinitializer
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
|
|||||||
Reference in New Issue
Block a user