[LV]Split store-load forward distance analysis from other checks, NFC (#121156)
The patch splits the store-load forwarding distance analysis from other dependency analysis in LAA. Currently it supports only power-of-2 distances, required to support non-power-of-2 distances in future. Part of #100755
This commit is contained in:
@@ -216,6 +216,21 @@ public:
|
||||
return MaxSafeVectorWidthInBits;
|
||||
}
|
||||
|
||||
/// Return true if there are no store-load forwarding dependencies.
|
||||
bool isSafeForAnyStoreLoadForwardDistances() const {
|
||||
return MaxStoreLoadForwardSafeDistanceInBits ==
|
||||
std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
|
||||
/// Return safe power-of-2 number of elements, which do not prevent store-load
|
||||
/// forwarding, multiplied by the size of the elements in bits.
|
||||
uint64_t getStoreLoadForwardSafeDistanceInBits() const {
|
||||
assert(!isSafeForAnyStoreLoadForwardDistances() &&
|
||||
"Expected the distance, that prevent store-load forwarding, to be "
|
||||
"set.");
|
||||
return MaxStoreLoadForwardSafeDistanceInBits;
|
||||
}
|
||||
|
||||
/// In same cases when the dependency check fails we can still
|
||||
/// vectorize the loop with a dynamic array access check.
|
||||
bool shouldRetryWithRuntimeCheck() const {
|
||||
@@ -304,6 +319,11 @@ private:
|
||||
/// restrictive.
|
||||
uint64_t MaxSafeVectorWidthInBits = -1U;
|
||||
|
||||
/// Maximum power-of-2 number of elements, which do not prevent store-load
|
||||
/// forwarding, multiplied by the size of the elements in bits.
|
||||
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
|
||||
std::numeric_limits<uint64_t>::max();
|
||||
|
||||
/// If we see a non-constant dependence distance we can still try to
|
||||
/// vectorize this loop with runtime checks.
|
||||
bool FoundNonConstantDistanceDependence = false;
|
||||
@@ -357,7 +377,8 @@ private:
|
||||
///
|
||||
/// \return false if we shouldn't vectorize at all or avoid larger
|
||||
/// vectorization factors by limiting MinDepDistBytes.
|
||||
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
|
||||
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize,
|
||||
unsigned CommonStride = 0);
|
||||
|
||||
/// Updates the current safety status with \p S. We can go from Safe to
|
||||
/// either PossiblySafeWithRtChecks or Unsafe and from
|
||||
|
||||
@@ -382,7 +382,8 @@ public:
|
||||
const LoopAccessInfo *getLAI() const { return LAI; }
|
||||
|
||||
bool isSafeForAnyVectorWidth() const {
|
||||
return LAI->getDepChecker().isSafeForAnyVectorWidth();
|
||||
return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
|
||||
LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
|
||||
}
|
||||
|
||||
uint64_t getMaxSafeVectorWidthInBits() const {
|
||||
@@ -406,6 +407,17 @@ public:
|
||||
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
|
||||
}
|
||||
|
||||
/// Return true if there is store-load forwarding dependencies.
|
||||
bool isSafeForAnyStoreLoadForwardDistances() const {
|
||||
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
|
||||
}
|
||||
|
||||
/// Return safe power-of-2 number of elements, which do not prevent store-load
|
||||
/// forwarding and safe to operate simultaneously.
|
||||
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
|
||||
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
|
||||
}
|
||||
|
||||
/// Returns true if vector representation of the instruction \p I
|
||||
/// requires mask.
|
||||
bool isMaskRequired(const Instruction *I) const {
|
||||
|
||||
@@ -1740,7 +1740,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
|
||||
}
|
||||
|
||||
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
|
||||
uint64_t TypeByteSize) {
|
||||
uint64_t TypeByteSize,
|
||||
unsigned CommonStride) {
|
||||
// If loads occur at a distance that is not a multiple of a feasible vector
|
||||
// factor store-load forwarding does not take place.
|
||||
// Positive dependences might cause troubles because vectorizing them might
|
||||
@@ -1755,31 +1756,38 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
|
||||
// cause any slowdowns.
|
||||
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
|
||||
// Maximum vector factor.
|
||||
uint64_t MaxVFWithoutSLForwardIssues = std::min(
|
||||
VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
|
||||
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
|
||||
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
|
||||
MaxStoreLoadForwardSafeDistanceInBits);
|
||||
|
||||
// Compute the smallest VF at which the store and load would be misaligned.
|
||||
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
|
||||
VF *= 2) {
|
||||
for (uint64_t VF = 2 * TypeByteSize;
|
||||
VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
|
||||
// If the number of vector iteration between the store and the load are
|
||||
// small we could incur conflicts.
|
||||
if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
|
||||
MaxVFWithoutSLForwardIssues = (VF >> 1);
|
||||
MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
|
||||
if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "LAA: Distance " << Distance
|
||||
<< " that could cause a store-load forwarding conflict\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
|
||||
MaxVFWithoutSLForwardIssues !=
|
||||
VectorizerParams::MaxVectorWidth * TypeByteSize)
|
||||
MinDepDistBytes = MaxVFWithoutSLForwardIssues;
|
||||
if (CommonStride &&
|
||||
MaxVFWithoutSLForwardIssuesPowerOf2 <
|
||||
MaxStoreLoadForwardSafeDistanceInBits &&
|
||||
MaxVFWithoutSLForwardIssuesPowerOf2 !=
|
||||
VectorizerParams::MaxVectorWidth * TypeByteSize) {
|
||||
uint64_t MaxVF = MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride;
|
||||
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
|
||||
MaxStoreLoadForwardSafeDistanceInBits =
|
||||
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2227,20 +2235,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
||||
std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
|
||||
|
||||
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
|
||||
uint64_t MinDepDistBytesOld = MinDepDistBytes;
|
||||
if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
|
||||
couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
|
||||
// Sanity check that we didn't update MinDepDistBytes when calling
|
||||
// couldPreventStoreLoadForward
|
||||
assert(MinDepDistBytes == MinDepDistBytesOld &&
|
||||
"An update to MinDepDistBytes requires an update to "
|
||||
"MaxSafeVectorWidthInBits");
|
||||
(void)MinDepDistBytesOld;
|
||||
couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
|
||||
return Dependence::BackwardVectorizableButPreventsForwarding;
|
||||
}
|
||||
|
||||
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
|
||||
// since there is a backwards dependency.
|
||||
uint64_t MaxVF = MinDepDistBytes / *CommonStride;
|
||||
LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
|
||||
<< " with max VF = " << MaxVF << '\n');
|
||||
@@ -3005,6 +3003,11 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
|
||||
if (!DC.isSafeForAnyVectorWidth())
|
||||
OS << " with a maximum safe vector width of "
|
||||
<< DC.getMaxSafeVectorWidthInBits() << " bits";
|
||||
if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
|
||||
uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
|
||||
OS << ", with a maximum safe store-load forward width of " << SLDist
|
||||
<< " bits";
|
||||
}
|
||||
if (PtrRtChecking->Need)
|
||||
OS << " with run-time checks";
|
||||
OS << "\n";
|
||||
|
||||
@@ -3815,13 +3815,18 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
|
||||
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
|
||||
// the memory accesses that is most restrictive (involved in the smallest
|
||||
// dependence distance).
|
||||
unsigned MaxSafeElements =
|
||||
llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
|
||||
unsigned MaxSafeElementsPowerOf2 =
|
||||
bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
|
||||
if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
|
||||
unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
|
||||
MaxSafeElementsPowerOf2 =
|
||||
std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
|
||||
}
|
||||
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
|
||||
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
|
||||
|
||||
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
|
||||
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
|
||||
if (!Legal->isSafeForAnyVectorWidth())
|
||||
this->MaxSafeElements = MaxSafeElements;
|
||||
this->MaxSafeElements = MaxSafeElementsPowerOf2;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
|
||||
<< ".\n");
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
; for (i = 0; i < n; i++)
|
||||
; A[i + 4] = A[i] * 2;
|
||||
|
||||
; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits
|
||||
; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
|
||||
@@ -276,7 +276,7 @@ for.body: ; preds = %entry, %for.body
|
||||
define void @vectorizable_Read_Write(ptr nocapture %A) {
|
||||
; CHECK-LABEL: 'vectorizable_Read_Write'
|
||||
; CHECK-NEXT: for.body:
|
||||
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits
|
||||
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: BackwardVectorizable:
|
||||
; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 ->
|
||||
|
||||
Reference in New Issue
Block a user