[LAA] Factor out logic to compute dependence distance. (NFCI)
This patch refactors the logic to compute the dependence distance, stride, size and write info to a separate function. This limits the scope of various A* and B* variables, which in turn makes it easier to reason about their uses. In particular this makes it more explicit why dropping the various std::swaps as done in https://github.com/llvm/llvm-project/pull/70819/ is valid.
This commit is contained in:
@@ -61,6 +61,7 @@
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
@@ -1876,53 +1877,62 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
|
||||
});
|
||||
}
|
||||
|
||||
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
|
||||
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
|
||||
unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
|
||||
const DenseMap<Value *, SmallVector<const Value *, 16>>
|
||||
&UnderlyingObjects) {
|
||||
assert (AIdx < BIdx && "Must pass arguments in program order");
|
||||
|
||||
// Get the dependence distance, stride, type size in whether i is a write for
|
||||
// the dependence between A and B. Returns a DepType, if we can prove there's
|
||||
// no dependence or the analysis fails. Outlined to lambda to limit he scope
|
||||
// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
|
||||
// Returns either the dependence result, if it could already be determined, or a
|
||||
// tuple with (Distance, Stride, TypeSize, AIsWrite, BIsWrite).
|
||||
static std::variant<MemoryDepChecker::Dependence::DepType,
|
||||
std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>>
|
||||
getDependenceDistanceStrideAndSize(
|
||||
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
|
||||
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
|
||||
const DenseMap<Value *, const SCEV *> &Strides,
|
||||
const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
|
||||
PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) {
|
||||
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
|
||||
auto &SE = *PSE.getSE();
|
||||
auto [APtr, AIsWrite] = A;
|
||||
auto [BPtr, BIsWrite] = B;
|
||||
Type *ATy = getLoadStoreType(InstMap[AIdx]);
|
||||
Type *BTy = getLoadStoreType(InstMap[BIdx]);
|
||||
|
||||
// Two reads are independent.
|
||||
if (!AIsWrite && !BIsWrite)
|
||||
return Dependence::NoDep;
|
||||
return MemoryDepChecker::Dependence::NoDep;
|
||||
|
||||
Type *ATy = getLoadStoreType(AInst);
|
||||
Type *BTy = getLoadStoreType(BInst);
|
||||
|
||||
// We cannot check pointers in different address spaces.
|
||||
if (APtr->getType()->getPointerAddressSpace() !=
|
||||
BPtr->getType()->getPointerAddressSpace())
|
||||
return Dependence::Unknown;
|
||||
return MemoryDepChecker::Dependence::Unknown;
|
||||
|
||||
int64_t StrideAPtr =
|
||||
getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0);
|
||||
getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0);
|
||||
int64_t StrideBPtr =
|
||||
getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0);
|
||||
getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0);
|
||||
|
||||
const SCEV *Src = PSE.getSCEV(APtr);
|
||||
const SCEV *Sink = PSE.getSCEV(BPtr);
|
||||
|
||||
// If the induction step is negative we have to invert source and sink of the
|
||||
// dependence.
|
||||
// If the induction step is negative we have to invert source and sink of
|
||||
// the dependence.
|
||||
if (StrideAPtr < 0) {
|
||||
std::swap(APtr, BPtr);
|
||||
std::swap(ATy, BTy);
|
||||
std::swap(Src, Sink);
|
||||
std::swap(AIsWrite, BIsWrite);
|
||||
std::swap(AIdx, BIdx);
|
||||
std::swap(AInst, BInst);
|
||||
std::swap(StrideAPtr, StrideBPtr);
|
||||
}
|
||||
|
||||
ScalarEvolution &SE = *PSE.getSE();
|
||||
const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
|
||||
<< "(Induction step: " << StrideAPtr << ")\n");
|
||||
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
|
||||
<< *InstMap[BIdx] << ": " << *Dist << "\n");
|
||||
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
|
||||
<< ": " << *Dist << "\n");
|
||||
|
||||
// Needs accesses where the addresses of the accessed underlying objects do
|
||||
// not change within the loop.
|
||||
@@ -1930,22 +1940,46 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
|
||||
InnermostLoop) ||
|
||||
isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
|
||||
InnermostLoop))
|
||||
return Dependence::IndirectUnsafe;
|
||||
return MemoryDepChecker::Dependence::IndirectUnsafe;
|
||||
|
||||
// Need accesses with constant stride. We don't want to vectorize
|
||||
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
|
||||
// the address space.
|
||||
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
|
||||
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
|
||||
// in the address space.
|
||||
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
|
||||
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
|
||||
return Dependence::Unknown;
|
||||
return MemoryDepChecker::Dependence::Unknown;
|
||||
}
|
||||
|
||||
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
|
||||
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
|
||||
bool HasSameSize =
|
||||
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
|
||||
if (!HasSameSize)
|
||||
TypeByteSize = 0;
|
||||
uint64_t Stride = std::abs(StrideAPtr);
|
||||
return std::make_tuple(Dist, Stride, TypeByteSize, AIsWrite, BIsWrite);
|
||||
}
|
||||
|
||||
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
|
||||
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
|
||||
unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
|
||||
const DenseMap<Value *, SmallVector<const Value *, 16>>
|
||||
&UnderlyingObjects) {
|
||||
assert(AIdx < BIdx && "Must pass arguments in program order");
|
||||
|
||||
// Get the dependence distance, stride, type size and what access writes for
|
||||
// the dependence between A and B.
|
||||
auto Res = getDependenceDistanceStrideAndSize(
|
||||
A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE,
|
||||
InnermostLoop);
|
||||
if (std::holds_alternative<Dependence::DepType>(Res))
|
||||
return std::get<Dependence::DepType>(Res);
|
||||
|
||||
const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
|
||||
std::get<std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>>(Res);
|
||||
bool HasSameSize = TypeByteSize > 0;
|
||||
|
||||
ScalarEvolution &SE = *PSE.getSE();
|
||||
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
|
||||
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
|
||||
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
|
||||
Stride, TypeByteSize))
|
||||
|
||||
Reference in New Issue
Block a user