[Reland][IPO] Added attributor for identifying invariant loads (#146584)
Patched and tested the `AAInvariantLoadPointer` attributor from #141800, which identifies pointers whose loads are eligible to be marked as `!invariant.load`. The bug in the attributor was due to `AAMemoryBehavior` always identifying pointers obtained from `alloca`s as having no writes. I'm not entirely sure why `AAMemoryBehavior` behaves this way, but it seems to be beceause it identifies the scope of an `alloca` to be limited to only that instruction (and, certainly, no memory writes occur within the `alloca` instructin). This patch just adds a check to disallow all loads from `alloca` pointers from being marked `!invariant.load` (since any well-defined program will have to write to stack pointers at some point).
This commit is contained in:
@@ -6335,6 +6335,47 @@ struct AAUnderlyingObjects : AbstractAttribute {
|
||||
AA::ValueScope Scope = AA::Interprocedural) const = 0;
|
||||
};
|
||||
|
||||
/// An abstract interface for identifying pointers from which loads can be
|
||||
/// marked invariant.
|
||||
struct AAInvariantLoadPointer : public AbstractAttribute {
|
||||
AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {}
|
||||
|
||||
/// See AbstractAttribute::isValidIRPositionForInit
|
||||
static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
|
||||
if (!IRP.getAssociatedType()->isPointerTy())
|
||||
return false;
|
||||
|
||||
return AbstractAttribute::isValidIRPositionForInit(A, IRP);
|
||||
}
|
||||
|
||||
/// Create an abstract attribute view for the position \p IRP.
|
||||
static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP,
|
||||
Attributor &A);
|
||||
|
||||
/// Return true if the pointer's contents are known to remain invariant.
|
||||
virtual bool isKnownInvariant() const = 0;
|
||||
virtual bool isKnownLocallyInvariant() const = 0;
|
||||
|
||||
/// Return true if the pointer's contents are assumed to remain invariant.
|
||||
virtual bool isAssumedInvariant() const = 0;
|
||||
virtual bool isAssumedLocallyInvariant() const = 0;
|
||||
|
||||
/// See AbstractAttribute::getName().
|
||||
StringRef getName() const override { return "AAInvariantLoadPointer"; }
|
||||
|
||||
/// See AbstractAttribute::getIdAddr().
|
||||
const char *getIdAddr() const override { return &ID; }
|
||||
|
||||
/// This function should return true if the type of the \p AA is
|
||||
/// AAInvariantLoadPointer
|
||||
static bool classof(const AbstractAttribute *AA) {
|
||||
return (AA->getIdAddr() == &ID);
|
||||
}
|
||||
|
||||
/// Unique ID (due to the unique address).
|
||||
static const char ID;
|
||||
};
|
||||
|
||||
/// An abstract interface for address space information.
|
||||
struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
|
||||
AAAddressSpace(const IRPosition &IRP, Attributor &A)
|
||||
|
||||
@@ -3612,6 +3612,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
|
||||
if (SimplifyAllLoads)
|
||||
getAssumedSimplified(IRPosition::value(I), nullptr,
|
||||
UsedAssumedInformation, AA::Intraprocedural);
|
||||
getOrCreateAAFor<AAInvariantLoadPointer>(
|
||||
IRPosition::value(*LI->getPointerOperand()));
|
||||
getOrCreateAAFor<AAAddressSpace>(
|
||||
IRPosition::value(*LI->getPointerOperand()));
|
||||
} else {
|
||||
|
||||
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability)
|
||||
PIPE_OPERATOR(AAPointerInfo)
|
||||
PIPE_OPERATOR(AAAssumptionInfo)
|
||||
PIPE_OPERATOR(AAUnderlyingObjects)
|
||||
PIPE_OPERATOR(AAInvariantLoadPointer)
|
||||
PIPE_OPERATOR(AAAddressSpace)
|
||||
PIPE_OPERATOR(AAAllocationInfo)
|
||||
PIPE_OPERATOR(AAIndirectCallInfo)
|
||||
@@ -12533,6 +12534,346 @@ private:
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/// --------------------- Invariant Load Pointer -------------------------------
|
||||
namespace {
|
||||
|
||||
struct AAInvariantLoadPointerImpl
|
||||
: public StateWrapper<BitIntegerState<uint8_t, 15>,
|
||||
AAInvariantLoadPointer> {
|
||||
|
||||
enum {
|
||||
// pointer does not alias within the bounds of the function
|
||||
IS_NOALIAS = 1 << 0,
|
||||
// pointer is not involved in any effectful instructions within the bounds
|
||||
// of the function
|
||||
IS_NOEFFECT = 1 << 1,
|
||||
// loads are invariant within the bounds of the function
|
||||
IS_LOCALLY_INVARIANT = 1 << 2,
|
||||
// memory lifetime is constrained within the bounds of the function
|
||||
IS_LOCALLY_CONSTRAINED = 1 << 3,
|
||||
|
||||
IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT |
|
||||
IS_LOCALLY_CONSTRAINED,
|
||||
};
|
||||
static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state");
|
||||
|
||||
using Base =
|
||||
StateWrapper<BitIntegerState<uint8_t, 15>, AAInvariantLoadPointer>;
|
||||
|
||||
// the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
|
||||
// pessimistic about IS_KNOWN_INVARIANT
|
||||
AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
|
||||
: Base(IRP) {}
|
||||
|
||||
bool isKnownInvariant() const final {
|
||||
return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
|
||||
}
|
||||
|
||||
bool isKnownLocallyInvariant() const final {
|
||||
if (isKnown(IS_LOCALLY_INVARIANT))
|
||||
return true;
|
||||
return isKnown(IS_NOALIAS | IS_NOEFFECT);
|
||||
}
|
||||
|
||||
bool isAssumedInvariant() const final {
|
||||
return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
|
||||
}
|
||||
|
||||
bool isAssumedLocallyInvariant() const final {
|
||||
if (isAssumed(IS_LOCALLY_INVARIANT))
|
||||
return true;
|
||||
return isAssumed(IS_NOALIAS | IS_NOEFFECT);
|
||||
}
|
||||
|
||||
ChangeStatus updateImpl(Attributor &A) override {
|
||||
ChangeStatus Changed = ChangeStatus::UNCHANGED;
|
||||
|
||||
Changed |= updateNoAlias(A);
|
||||
if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
Changed |= updateNoEffect(A);
|
||||
|
||||
Changed |= updateLocalInvariance(A);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
ChangeStatus manifest(Attributor &A) override {
|
||||
if (!isKnownInvariant())
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
ChangeStatus Changed = ChangeStatus::UNCHANGED;
|
||||
const Value *Ptr = &getAssociatedValue();
|
||||
const auto TagInvariantLoads = [&](const Use &U, bool &) {
|
||||
if (U.get() != Ptr)
|
||||
return true;
|
||||
auto *I = dyn_cast<Instruction>(U.getUser());
|
||||
if (!I)
|
||||
return true;
|
||||
|
||||
// Ensure that we are only changing uses from the corresponding callgraph
|
||||
// SSC in the case that the AA isn't run on the entire module
|
||||
if (!A.isRunOn(I->getFunction()))
|
||||
return true;
|
||||
|
||||
if (I->hasMetadata(LLVMContext::MD_invariant_load))
|
||||
return true;
|
||||
|
||||
if (auto *LI = dyn_cast<LoadInst>(I)) {
|
||||
LI->setMetadata(LLVMContext::MD_invariant_load,
|
||||
MDNode::get(LI->getContext(), {}));
|
||||
Changed = ChangeStatus::CHANGED;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
(void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr);
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::getAsStr().
|
||||
const std::string getAsStr(Attributor *) const override {
|
||||
if (isKnownInvariant())
|
||||
return "load-invariant pointer";
|
||||
return "non-invariant pointer";
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::trackStatistics().
|
||||
void trackStatistics() const override {}
|
||||
|
||||
private:
|
||||
/// Indicate that noalias is required for the pointer to be invariant.
|
||||
bool requiresNoAlias() const {
|
||||
switch (getPositionKind()) {
|
||||
default:
|
||||
// Conservatively default to require noalias.
|
||||
return true;
|
||||
case IRP_FLOAT:
|
||||
case IRP_RETURNED:
|
||||
case IRP_CALL_SITE:
|
||||
return false;
|
||||
case IRP_CALL_SITE_RETURNED: {
|
||||
const auto &CB = cast<CallBase>(getAnchorValue());
|
||||
return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||
&CB, /*MustPreserveNullness=*/false);
|
||||
}
|
||||
case IRP_ARGUMENT: {
|
||||
const Function *F = getAssociatedFunction();
|
||||
assert(F && "no associated function for argument");
|
||||
return !isCallableCC(F->getCallingConv());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isExternal() const {
|
||||
const Function *F = getAssociatedFunction();
|
||||
if (!F)
|
||||
return true;
|
||||
return isCallableCC(F->getCallingConv()) &&
|
||||
getPositionKind() != IRP_CALL_SITE_RETURNED;
|
||||
}
|
||||
|
||||
ChangeStatus updateNoAlias(Attributor &A) {
|
||||
if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
// Try to use AANoAlias.
|
||||
if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
|
||||
getIRPosition(), this, DepClassTy::REQUIRED)) {
|
||||
if (ANoAlias->isKnownNoAlias()) {
|
||||
addKnownBits(IS_NOALIAS);
|
||||
return ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
if (!ANoAlias->isAssumedNoAlias()) {
|
||||
removeAssumedBits(IS_NOALIAS);
|
||||
return ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
// Try to infer noalias from argument attribute, since it is applicable for
|
||||
// the duration of the function.
|
||||
if (const Argument *Arg = getAssociatedArgument()) {
|
||||
if (Arg->hasNoAliasAttr()) {
|
||||
addKnownBits(IS_NOALIAS);
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
// Noalias information is not provided, and cannot be inferred,
|
||||
// so we conservatively assume the pointer aliases.
|
||||
removeAssumedBits(IS_NOALIAS);
|
||||
return ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
ChangeStatus updateNoEffect(Attributor &A) {
|
||||
if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
if (!getAssociatedFunction())
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
if (isa<AllocaInst>(&getAssociatedValue()))
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
const auto HasNoEffectLoads = [&](const Use &U, bool &) {
|
||||
const auto *LI = dyn_cast<LoadInst>(U.getUser());
|
||||
return !LI || !LI->mayHaveSideEffects();
|
||||
};
|
||||
if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
|
||||
getIRPosition(), this, DepClassTy::REQUIRED)) {
|
||||
// For non-instructions, try to use AAMemoryBehavior to infer the readonly
|
||||
// attribute
|
||||
if (!AMemoryBehavior->isAssumedReadOnly())
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
if (AMemoryBehavior->isKnownReadOnly()) {
|
||||
addKnownBits(IS_NOEFFECT);
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
if (const Argument *Arg = getAssociatedArgument()) {
|
||||
if (Arg->onlyReadsMemory()) {
|
||||
addKnownBits(IS_NOEFFECT);
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
// Readonly information is not provided, and cannot be inferred from
|
||||
// AAMemoryBehavior.
|
||||
return indicatePessimisticFixpoint();
|
||||
}
|
||||
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
|
||||
ChangeStatus updateLocalInvariance(Attributor &A) {
|
||||
if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT))
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
// try to infer invariance from underlying objects
|
||||
const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
|
||||
getIRPosition(), this, DepClassTy::REQUIRED);
|
||||
if (!AUO)
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
bool UsedAssumedInformation = false;
|
||||
const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) {
|
||||
if (!V.getType()->isPointerTy())
|
||||
return true;
|
||||
const auto *IsInvariantLoadPointer =
|
||||
A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
|
||||
DepClassTy::REQUIRED);
|
||||
// Conservatively fail if invariance cannot be inferred.
|
||||
if (!IsInvariantLoadPointer)
|
||||
return false;
|
||||
|
||||
if (IsInvariantLoadPointer->isKnownLocallyInvariant())
|
||||
return true;
|
||||
if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
|
||||
return false;
|
||||
|
||||
UsedAssumedInformation = true;
|
||||
return true;
|
||||
};
|
||||
if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer))
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
|
||||
if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||
CB, /*MustPreserveNullness=*/false)) {
|
||||
for (const Value *Arg : CB->args()) {
|
||||
if (!IsLocallyInvariantLoadIfPointer(*Arg))
|
||||
return indicatePessimisticFixpoint();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!UsedAssumedInformation) {
|
||||
// Pointer is known and not just assumed to be locally invariant.
|
||||
addKnownBits(IS_LOCALLY_INVARIANT);
|
||||
return ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
return ChangeStatus::UNCHANGED;
|
||||
}
|
||||
};
|
||||
|
||||
struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
|
||||
AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A)
|
||||
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||
};
|
||||
|
||||
struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
|
||||
AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
|
||||
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||
|
||||
void initialize(Attributor &) override {
|
||||
removeAssumedBits(IS_LOCALLY_CONSTRAINED);
|
||||
}
|
||||
};
|
||||
|
||||
struct AAInvariantLoadPointerCallSiteReturned final
|
||||
: AAInvariantLoadPointerImpl {
|
||||
AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
|
||||
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||
|
||||
void initialize(Attributor &A) override {
|
||||
const Function *F = getAssociatedFunction();
|
||||
assert(F && "no associated function for return from call");
|
||||
|
||||
if (!F->isDeclaration() && !F->isIntrinsic())
|
||||
return AAInvariantLoadPointerImpl::initialize(A);
|
||||
|
||||
const auto &CB = cast<CallBase>(getAnchorValue());
|
||||
if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||
&CB, /*MustPreserveNullness=*/false))
|
||||
return AAInvariantLoadPointerImpl::initialize(A);
|
||||
|
||||
if (F->onlyReadsMemory() && F->hasNoSync())
|
||||
return AAInvariantLoadPointerImpl::initialize(A);
|
||||
|
||||
// At this point, the function is opaque, so we conservatively assume
|
||||
// non-invariance.
|
||||
indicatePessimisticFixpoint();
|
||||
}
|
||||
};
|
||||
|
||||
struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
|
||||
AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
|
||||
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||
|
||||
void initialize(Attributor &) override {
|
||||
const Function *F = getAssociatedFunction();
|
||||
assert(F && "no associated function for argument");
|
||||
|
||||
if (!isCallableCC(F->getCallingConv())) {
|
||||
addKnownBits(IS_LOCALLY_CONSTRAINED);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!F->hasLocalLinkage())
|
||||
removeAssumedBits(IS_LOCALLY_CONSTRAINED);
|
||||
}
|
||||
};
|
||||
|
||||
struct AAInvariantLoadPointerCallSiteArgument final
|
||||
: AAInvariantLoadPointerImpl {
|
||||
AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A)
|
||||
: AAInvariantLoadPointerImpl(IRP, A) {}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/// ------------------------ Address Space ------------------------------------
|
||||
namespace {
|
||||
|
||||
@@ -13038,6 +13379,7 @@ const char AAInterFnReachability::ID = 0;
|
||||
const char AAPointerInfo::ID = 0;
|
||||
const char AAAssumptionInfo::ID = 0;
|
||||
const char AAUnderlyingObjects::ID = 0;
|
||||
const char AAInvariantLoadPointer::ID = 0;
|
||||
const char AAAddressSpace::ID = 0;
|
||||
const char AAAllocationInfo::ID = 0;
|
||||
const char AAIndirectCallInfo::ID = 0;
|
||||
@@ -13172,6 +13514,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
|
||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
|
||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
|
||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
|
||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
|
||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
|
||||
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
|
||||
|
||||
|
||||
431
llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
Normal file
431
llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
Normal file
@@ -0,0 +1,431 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
|
||||
|
||||
@G = addrspace(1) global i32 zeroinitializer, align 4
|
||||
declare void @clobber(i32) #0
|
||||
declare void @clobber.p5(ptr addrspace(5)) #0
|
||||
declare ptr addrspace(1) @get_ptr() #0
|
||||
declare noalias ptr addrspace(1) @get_noalias_ptr() #0
|
||||
declare noalias ptr addrspace(1) @get_untouched_ptr() #1
|
||||
|
||||
define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define void @test_nonkernel(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7:[0-9]+]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; may not be !invariant.load, as the caller may modify %ptr
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; may not be !invariant.load, as %ptr may alias a pointer in @clobber
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_gep(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
|
||||
%val = load i32, ptr addrspace(1) %gep, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
|
||||
%val = load i32, ptr addrspace(1) %gep, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; cannot be !invariant.load due to the write to %ptr
|
||||
store i32 %swap, ptr addrspace(1) %ptr, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load volatile i32, ptr addrspace(1) %ptr, align 4
|
||||
;; volatiles loads cannot be !invariant.load
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
|
||||
;; atomic loads with ordering guarantees may have side effects
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_global() {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
|
||||
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) @G, align 4
|
||||
;; is not an !invariant.load as global variables may change
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_alloca(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_alloca(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
; AMDGCN-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ALLOCA]], align 4
|
||||
; AMDGCN-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[ALLOCA]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber.p5(ptr addrspace(5) noundef align 4 [[ALLOCA]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[LOAD]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
store i32 %val, ptr addrspace(5) %alloca
|
||||
%load = load i32, ptr addrspace(5) %alloca
|
||||
call void @clobber.p5(ptr addrspace(5) %alloca)
|
||||
call void @clobber(i32 %load)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @copy.i32(ptr addrspace(5) %alloca, i32 %qty) {
|
||||
; AMDGCN-LABEL: define internal void @copy.i32(
|
||||
; AMDGCN-SAME: ptr addrspace(5) noalias nofree noundef writeonly align 4 captures(none) dereferenceable_or_null(4) [[ALLOCA:%.*]], i32 [[QTY:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
; AMDGCN-NEXT: store i32 [[QTY]], ptr addrspace(5) [[ALLOCA]], align 4
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
store i32 %qty, ptr addrspace(5) %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_internal_alloca(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_internal_alloca(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
; AMDGCN-NEXT: call void @copy.i32(ptr addrspace(5) noalias nofree noundef writeonly align 4 captures(none) dereferenceable_or_null(4) [[ALLOCA]], i32 [[VAL]]) #[[ATTR8:[0-9]+]]
|
||||
; AMDGCN-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[ALLOCA]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[LOAD]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
call void @copy.i32(ptr addrspace(5) %alloca, i32 %val)
|
||||
%load = load i32, ptr addrspace(5) %alloca
|
||||
call void @clobber(i32 %load)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; is an !invariant.load due to its only caller @test_call_internal_noalias
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR9:[0-9]+]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define internal i32 @test_internal_load(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR5]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; may not be an !invariant.load since the pointer in @test_call_internal may alias
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR9]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define internal i32 @test_internal_written(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR5]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; cannot be an !invariant.load because of the write in caller @test_call_internal_written
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR9]]
|
||||
; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
|
||||
store i32 %x, ptr addrspace(1) %ptr
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_ptr() {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
|
||||
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR7]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%ptr = call ptr addrspace(1) @get_ptr()
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; may not be an !invariant.load since %ptr may alias
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_noalias_ptr() {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr(
|
||||
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR7]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%ptr = call ptr addrspace(1) @get_noalias_ptr()
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; may not be an !invariant.load since %ptr may have been written to before returning
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_untouched_ptr() {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr(
|
||||
; AMDGCN-SAME: ) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR10:[0-9]+]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%ptr = call ptr addrspace(1) @get_untouched_ptr()
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0)
|
||||
%val = load i32, ptr addrspace(7) %rsrc, align 4
|
||||
;; original %ptr may alias
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR11]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0)
|
||||
%val = load i32, ptr addrspace(7) %rsrc, align 4
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
|
||||
; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; either pointer yields an !invariant.load
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
|
||||
; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; %ptr.false may alias, so no !invariant.load
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
|
||||
; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
|
||||
; AMDGCN: [[TRUE]]:
|
||||
; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: br label %[[FINISH:.*]]
|
||||
; AMDGCN: [[FALSE]]:
|
||||
; AMDGCN-NEXT: br label %[[FINISH]]
|
||||
; AMDGCN: [[FINISH]]:
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br i1 %cond, label %true, label %false
|
||||
true:
|
||||
call void @clobber(i32 1)
|
||||
br label %finish
|
||||
false:
|
||||
br label %finish
|
||||
finish:
|
||||
%ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; either pointer yields an !invariant.load
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
|
||||
; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
|
||||
; AMDGCN: [[TRUE]]:
|
||||
; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: br label %[[FINISH:.*]]
|
||||
; AMDGCN: [[FALSE]]:
|
||||
; AMDGCN-NEXT: br label %[[FINISH]]
|
||||
; AMDGCN: [[FINISH]]:
|
||||
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
; AMDGCN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br i1 %cond, label %true, label %false
|
||||
true:
|
||||
call void @clobber(i32 1)
|
||||
br label %finish
|
||||
false:
|
||||
br label %finish
|
||||
finish:
|
||||
%ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
|
||||
%val = load i32, ptr addrspace(1) %ptr, align 4
|
||||
;; ptr.false may alias, so no !invariant.load
|
||||
call void @clobber(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nofree norecurse nosync nounwind willreturn }
|
||||
attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
|
||||
;.
|
||||
; AMDGCN: [[META0]] = !{}
|
||||
;.
|
||||
@@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@f7_1
|
||||
; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
|
||||
; CHECK-NEXT: [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
|
||||
; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
|
||||
; CHECK: if.true:
|
||||
|
||||
@@ -342,14 +342,6 @@ define %S.2 @t3.helper() {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8
|
||||
; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]])
|
||||
; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8
|
||||
; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0
|
||||
; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1
|
||||
; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8
|
||||
; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1
|
||||
; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2
|
||||
; CHECK-NEXT: [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8
|
||||
; CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2
|
||||
; CHECK-NEXT: ret [[S_2]] zeroinitializer
|
||||
;
|
||||
entry:
|
||||
|
||||
Reference in New Issue
Block a user