[LV]Split store-load forward distance analysis from other checks, NFC (#121156)

The patch splits the store-load forwarding distance analysis from other
dependency analysis in LAA. Currently it supports only power-of-2
distances, required to support non-power-of-2 distances in future.

Part of #100755
This commit is contained in:
Alexey Bataev
2025-03-31 07:28:44 -04:00
committed by GitHub
parent f82283a84e
commit 78777a204a
6 changed files with 72 additions and 31 deletions

View File

@@ -216,6 +216,21 @@ public:
return MaxSafeVectorWidthInBits;
}
/// Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return MaxStoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max();
}
/// Return safe power-of-2 number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits.
uint64_t getStoreLoadForwardSafeDistanceInBits() const {
assert(!isSafeForAnyStoreLoadForwardDistances() &&
"Expected the distance, that prevent store-load forwarding, to be "
"set.");
return MaxStoreLoadForwardSafeDistanceInBits;
}
/// In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
bool shouldRetryWithRuntimeCheck() const {
@@ -304,6 +319,11 @@ private:
/// restrictive.
uint64_t MaxSafeVectorWidthInBits = -1U;
/// Maximum power-of-2 number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits.
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();
/// If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
bool FoundNonConstantDistanceDependence = false;
@@ -357,7 +377,8 @@ private:
///
/// \return false if we shouldn't vectorize at all or avoid larger
/// vectorization factors by limiting MinDepDistBytes.
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize,
unsigned CommonStride = 0);
/// Updates the current safety status with \p S. We can go from Safe to
/// either PossiblySafeWithRtChecks or Unsafe and from

View File

@@ -382,7 +382,8 @@ public:
const LoopAccessInfo *getLAI() const { return LAI; }
bool isSafeForAnyVectorWidth() const {
return LAI->getDepChecker().isSafeForAnyVectorWidth();
return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
}
uint64_t getMaxSafeVectorWidthInBits() const {
@@ -406,6 +407,17 @@ public:
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
}
/// Return true if there is store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
}
/// Return safe power-of-2 number of elements, which do not prevent store-load
/// forwarding and safe to operate simultaneously.
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
}
/// Returns true if vector representation of the instruction \p I
/// requires mask.
bool isMaskRequired(const Instruction *I) const {

View File

@@ -1740,7 +1740,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
}
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
uint64_t TypeByteSize) {
uint64_t TypeByteSize,
unsigned CommonStride) {
// If loads occur at a distance that is not a multiple of a feasible vector
// factor store-load forwarding does not take place.
// Positive dependences might cause troubles because vectorizing them might
@@ -1755,31 +1756,38 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// cause any slowdowns.
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
// Maximum vector factor.
uint64_t MaxVFWithoutSLForwardIssues = std::min(
VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
MaxStoreLoadForwardSafeDistanceInBits);
// Compute the smallest VF at which the store and load would be misaligned.
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
VF *= 2) {
for (uint64_t VF = 2 * TypeByteSize;
VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
// If the number of vector iteration between the store and the load are
// small we could incur conflicts.
if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
MaxVFWithoutSLForwardIssues = (VF >> 1);
MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
break;
}
}
if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
LLVM_DEBUG(
dbgs() << "LAA: Distance " << Distance
<< " that could cause a store-load forwarding conflict\n");
return true;
}
if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
MaxVFWithoutSLForwardIssues !=
VectorizerParams::MaxVectorWidth * TypeByteSize)
MinDepDistBytes = MaxVFWithoutSLForwardIssues;
if (CommonStride &&
MaxVFWithoutSLForwardIssuesPowerOf2 <
MaxStoreLoadForwardSafeDistanceInBits &&
MaxVFWithoutSLForwardIssuesPowerOf2 !=
VectorizerParams::MaxVectorWidth * TypeByteSize) {
uint64_t MaxVF = MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride;
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxStoreLoadForwardSafeDistanceInBits =
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
}
return false;
}
@@ -2227,20 +2235,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
uint64_t MinDepDistBytesOld = MinDepDistBytes;
if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
// Sanity check that we didn't update MinDepDistBytes when calling
// couldPreventStoreLoadForward
assert(MinDepDistBytes == MinDepDistBytesOld &&
"An update to MinDepDistBytes requires an update to "
"MaxSafeVectorWidthInBits");
(void)MinDepDistBytesOld;
couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
return Dependence::BackwardVectorizableButPreventsForwarding;
}
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
// since there is a backwards dependency.
uint64_t MaxVF = MinDepDistBytes / *CommonStride;
LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
<< " with max VF = " << MaxVF << '\n');
@@ -3005,6 +3003,11 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (!DC.isSafeForAnyVectorWidth())
OS << " with a maximum safe vector width of "
<< DC.getMaxSafeVectorWidthInBits() << " bits";
if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
OS << ", with a maximum safe store-load forward width of " << SLDist
<< " bits";
}
if (PtrRtChecking->Need)
OS << " with run-time checks";
OS << "\n";

View File

@@ -3815,13 +3815,18 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
// the memory accesses that is most restrictive (involved in the smallest
// dependence distance).
unsigned MaxSafeElements =
llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
unsigned MaxSafeElementsPowerOf2 =
bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
MaxSafeElementsPowerOf2 =
std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
}
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
if (!Legal->isSafeForAnyVectorWidth())
this->MaxSafeElements = MaxSafeElements;
this->MaxSafeElements = MaxSafeElementsPowerOf2;
LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
<< ".\n");

View File

@@ -4,7 +4,7 @@
; for (i = 0; i < n; i++)
; A[i + 4] = A[i] * 2;
; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits
; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"

View File

@@ -276,7 +276,7 @@ for.body: ; preds = %entry, %for.body
define void @vectorizable_Read_Write(ptr nocapture %A) {
; CHECK-LABEL: 'vectorizable_Read_Write'
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 ->