Reapply "[SandboxVec] Add a simple pack reuse pass (#141848)"

This reverts commit 31abf07742.
This commit is contained in:
Vasileios Porpodas
2025-06-04 14:27:01 -07:00
parent 33974b41c7
commit 79861d2db7
10 changed files with 347 additions and 4 deletions

View File

@@ -0,0 +1,36 @@
//===- PackReuse.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A pack de-duplication pass.
//
#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H
#include "llvm/ADT/StringRef.h"
#include "llvm/SandboxIR/Pass.h"
#include "llvm/SandboxIR/Region.h"
namespace llvm::sandboxir {
/// This pass aims at de-duplicating packs, i.e., try to reuse already existing
/// pack patterns instead of keeping both.
/// This is useful because even though the duplicates will most probably be
/// optimized away by future passes, their added cost can make vectorization
/// more conservative than it should be.
class PackReuse final : public RegionPass {
bool Change = false;
public:
PackReuse() : RegionPass("pack-reuse") {}
bool runOnRegion(Region &Rgn, const Analyses &A) final;
};
} // namespace llvm::sandboxir
#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H

View File

@@ -17,7 +17,25 @@
#include "llvm/SandboxIR/Type.h"
#include "llvm/SandboxIR/Utils.h"
namespace llvm::sandboxir {
namespace llvm {
/// Traits for DenseMap.
template <> struct DenseMapInfo<SmallVector<sandboxir::Value *>> {
static inline SmallVector<sandboxir::Value *> getEmptyKey() {
return SmallVector<sandboxir::Value *>({(sandboxir::Value *)-1});
}
static inline SmallVector<sandboxir::Value *> getTombstoneKey() {
return SmallVector<sandboxir::Value *>({(sandboxir::Value *)-2});
}
static unsigned getHashValue(const SmallVector<sandboxir::Value *> &Vec) {
return hash_combine_range(Vec.begin(), Vec.end());
}
static bool isEqual(const SmallVector<sandboxir::Value *> &Vec1,
const SmallVector<sandboxir::Value *> &Vec2) {
return Vec1 == Vec2;
}
};
namespace sandboxir {
class VecUtils {
public:
@@ -179,6 +197,70 @@ public:
/// \Returns the first integer power of 2 that is <= Num.
static unsigned getFloorPowerOf2(unsigned Num);
/// Helper struct for `matchPack()`. Describes the instructions and operands
/// of a pack pattern.
struct PackPattern {
/// The insertelement instructions that form the pack pattern in bottom-up
/// order, i.e., the first instruction in `Instrs` is the bottom-most
/// InsertElement instruction of the pack pattern.
/// For example in this simple pack pattern:
/// %Pack0 = insertelement <2 x i8> poison, i8 %v0, i64 0
/// %Pack1 = insertelement <2 x i8> %Pack0, i8 %v1, i64 1
/// this is [ %Pack1, %Pack0 ].
SmallVector<Instruction *> Instrs;
/// The "external" operands of the pack pattern, i.e., the values that get
/// packed into a vector, skipping the ones in `Instrs`. The operands are in
/// bottom-up order, starting from the operands of the bottom-most insert.
/// So in our example this would be [ %v1, %v0 ].
SmallVector<Value *> Operands;
};
/// If \p I is the last instruction of a pack pattern (i.e., an InsertElement
/// into a vector), then this function returns the instructions in the pack
/// and the operands in the pack, else returns nullopt.
/// Here is an example of a matched pattern:
/// %PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0
/// %PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1
/// TODO: this currently detects only simple canonicalized patterns.
static std::optional<PackPattern> matchPack(Instruction *I) {
// TODO: Support vector pack patterns.
// TODO: Support out-of-order inserts.
// Early return if `I` is not an Insert.
if (!isa<InsertElementInst>(I))
return std::nullopt;
auto *BB0 = I->getParent();
// The pack contains as many instrs as the lanes of the bottom-most Insert
unsigned ExpectedNumInserts = VecUtils::getNumLanes(I);
assert(ExpectedNumInserts >= 2 && "Expected at least 2 inserts!");
PackPattern Pack;
Pack.Operands.resize(ExpectedNumInserts);
// Collect the inserts by walking up the use-def chain.
Instruction *InsertI = I;
for (auto ExpectedLane : reverse(seq<unsigned>(ExpectedNumInserts))) {
if (InsertI == nullptr)
return std::nullopt;
if (InsertI->getParent() != BB0)
return std::nullopt;
// Check the lane.
auto *LaneC = dyn_cast<ConstantInt>(InsertI->getOperand(2));
if (LaneC == nullptr || LaneC->getSExtValue() != ExpectedLane)
return std::nullopt;
Pack.Instrs.push_back(InsertI);
Pack.Operands[ExpectedLane] = InsertI->getOperand(1);
Value *Op = InsertI->getOperand(0);
if (ExpectedLane == 0) {
// Check the topmost insert. The operand should be a Poison.
if (!isa<PoisonValue>(Op))
return std::nullopt;
} else {
InsertI = dyn_cast<InsertElementInst>(Op);
}
}
return Pack;
}
#ifndef NDEBUG
/// Helper dump function for debugging.
LLVM_DUMP_METHOD static void dump(ArrayRef<Value *> Bndl);
@@ -186,6 +268,8 @@ public:
#endif // NDEBUG
};
} // namespace llvm::sandboxir
} // namespace sandboxir
} // namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H

View File

@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMVectorize
SandboxVectorizer/Interval.cpp
SandboxVectorizer/Legality.cpp
SandboxVectorizer/Passes/BottomUpVec.cpp
SandboxVectorizer/Passes/PackReuse.cpp
SandboxVectorizer/Passes/RegionsFromBBs.cpp
SandboxVectorizer/Passes/RegionsFromMetadata.cpp
SandboxVectorizer/Passes/SeedCollection.cpp

View File

@@ -0,0 +1,53 @@
//===- PackReuse.cpp - A pack de-duplication pass -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
namespace llvm::sandboxir {
bool PackReuse::runOnRegion(Region &Rgn, const Analyses &A) {
if (Rgn.empty())
return Change;
// The key to the map is the ordered operands of the pack.
// The value is a vector of all Pack Instrs with the same operands.
DenseMap<std::pair<BasicBlock *, SmallVector<Value *>>,
SmallVector<SmallVector<Instruction *>>>
PacksMap;
// Go over the region and look for pack patterns.
for (auto *I : Rgn) {
auto PackOpt = VecUtils::matchPack(I);
if (PackOpt) {
// TODO: For now limit pack reuse within a BB.
BasicBlock *BB = (*PackOpt->Instrs.front()).getParent();
PacksMap[{BB, PackOpt->Operands}].push_back(PackOpt->Instrs);
}
}
for (auto &Pair : PacksMap) {
auto &Packs = Pair.second;
if (Packs.size() <= 1)
continue;
// Sort packs by program order.
sort(Packs, [](const auto &PackInstrs1, const auto &PackInstrs2) {
return PackInstrs1.front()->comesBefore(PackInstrs2.front());
});
Instruction *TopMostPack = Packs[0].front();
// Replace duplicate packs with the first one.
for (const auto &PackInstrs :
make_range(std::next(Packs.begin()), Packs.end())) {
PackInstrs.front()->replaceAllUsesWith(TopMostPack);
// Delete the pack instrs bottom-up since they are now dead.
for (auto *PackI : PackInstrs)
PackI->eraseFromParent();
}
Change = true;
}
return Change;
}
} // namespace llvm::sandboxir

View File

@@ -18,6 +18,7 @@
#endif
REGION_PASS("null", ::llvm::sandboxir::NullPass)
REGION_PASS("pack-reuse", ::llvm::sandboxir::PackReuse)
REGION_PASS("print-instruction-count", ::llvm::sandboxir::PrintInstructionCount)
REGION_PASS("print-region", ::llvm::sandboxir::PrintRegion)
REGION_PASS("tr-save", ::llvm::sandboxir::TransactionSave)

View File

@@ -20,10 +20,11 @@ RegionsFromMetadata::RegionsFromMetadata(StringRef Pipeline)
bool RegionsFromMetadata::runOnFunction(Function &F, const Analyses &A) {
SmallVector<std::unique_ptr<sandboxir::Region>> Regions =
sandboxir::Region::createRegionsFromMD(F, A.getTTI());
bool Change = false;
for (auto &R : Regions) {
RPM.runOnRegion(*R, A);
Change |= RPM.runOnRegion(*R, A);
}
return false;
return Change;
}
} // namespace llvm::sandboxir

View File

@@ -2,6 +2,7 @@
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintRegion.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h"

View File

@@ -0,0 +1,71 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-passes="regions-from-metadata<pack-reuse>" %s -S | FileCheck %s
define void @pack_reuse(i8 %v0, i8 %v1, ptr %ptr) {
; CHECK-LABEL: define void @pack_reuse(
; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META0:![0-9]+]]
; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META0]]
; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
; CHECK-NEXT: ret void
;
%PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0
; Should reuse PackA1.
%PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0
; Should remain.
%PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0
%PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0
store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0
ret void
}
; TODO: For now we don't support reusing packs from earlier BBs.
define void @pack_cross_bb(i8 %v0, i8 %v1, ptr %ptr) {
; CHECK-LABEL: define void @pack_cross_bb(
; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1:![0-9]+]]
; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META1]]
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
; CHECK-NEXT: [[PACKB0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1]]
; CHECK-NEXT: [[PACKB1:%.*]] = insertelement <2 x i8> [[PACKB0]], i8 [[V1]], i64 1, !sandboxvec [[META1]]
; CHECK-NEXT: store <2 x i8> [[PACKB1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META1]]
; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META1]]
; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
; CHECK-NEXT: ret void
;
entry:
%PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0
br label %bb
bb:
%PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0
%PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0
%PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0
store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0
ret void
}
!0 = distinct !{!"sandboxregion"}
;.
; CHECK: [[META0]] = distinct !{!"sandboxregion"}
; CHECK: [[META1]] = distinct !{!"sandboxregion"}
;.

View File

@@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<tr-save,bottom-up-vec,tr-accept>" %s -S | FileCheck %s --check-prefix NOREUSE
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<tr-save,bottom-up-vec,pack-reuse,tr-accept>" %s -S | FileCheck %s --check-prefix PKREUSE
define void @pack_reuse(ptr %ptr, ptr %ptrX, ptr %ptrY) {
; NOREUSE-LABEL: define void @pack_reuse(
; NOREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) {
; NOREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
; NOREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4
; NOREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]]
; NOREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]]
; NOREUSE-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0]]
; NOREUSE-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LDY]], i32 1, !sandboxvec [[META0]]
; NOREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
; NOREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK1]], [[PACK3]], !sandboxvec [[META0]]
; NOREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
; NOREUSE-NEXT: ret void
;
; PKREUSE-LABEL: define void @pack_reuse(
; PKREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) {
; PKREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
; PKREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4
; PKREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]]
; PKREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]]
; PKREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
; PKREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK3]], [[PACK3]], !sandboxvec [[META0]]
; PKREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
; PKREUSE-NEXT: ret void
;
%ldX = load float, ptr %ptrX
%ldY = load float, ptr %ptrY
%ptr0 = getelementptr float, ptr %ptr, i32 0
%ptr1 = getelementptr float, ptr %ptr, i32 1
%sub0 = fsub float %ldX, %ldX
%sub1 = fsub float %ldY, %ldY
store float %sub0, ptr %ptr0
store float %sub1, ptr %ptr1
ret void
}
;.
; NOREUSE: [[META0]] = distinct !{!"sandboxregion"}
;.
; PKREUSE: [[META0]] = distinct !{!"sandboxregion"}
;.

View File

@@ -20,6 +20,7 @@
#include "llvm/SandboxIR/Function.h"
#include "llvm/SandboxIR/Type.h"
#include "llvm/Support/SourceMgr.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
using namespace llvm;
@@ -563,3 +564,52 @@ TEST_F(VecUtilsTest, FloorPowerOf2) {
EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u);
EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u);
}
TEST_F(VecUtilsTest, MatchPackScalar) {
parseIR(R"IR(
define void @foo(i8 %v0, i8 %v1) {
bb0:
%NotPack = insertelement <2 x i8> poison, i8 %v0, i64 0
br label %bb1
bb1:
%Pack0 = insertelement <2 x i8> poison, i8 %v0, i64 0
%Pack1 = insertelement <2 x i8> %Pack0, i8 %v1, i64 1
%NotPack0 = insertelement <2 x i8> poison, i8 %v0, i64 0
%NotPack1 = insertelement <2 x i8> %NotPack0, i8 %v1, i64 0
%NotPack2 = insertelement <2 x i8> %NotPack1, i8 %v1, i64 1
%NotPackBB = insertelement <2 x i8> %NotPack, i8 %v1, i64 1
ret void
}
)IR");
Function &LLVMF = *M->getFunction("foo");
sandboxir::Context Ctx(C);
auto &F = *Ctx.createFunction(&LLVMF);
auto &BB = getBasicBlockByName(F, "bb1");
auto It = BB.begin();
auto *Pack0 = cast<sandboxir::InsertElementInst>(&*It++);
auto *Pack1 = cast<sandboxir::InsertElementInst>(&*It++);
auto *NotPack0 = cast<sandboxir::InsertElementInst>(&*It++);
auto *NotPack1 = cast<sandboxir::InsertElementInst>(&*It++);
auto *NotPack2 = cast<sandboxir::InsertElementInst>(&*It++);
auto *NotPackBB = cast<sandboxir::InsertElementInst>(&*It++);
auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
auto *Arg0 = F.getArg(0);
auto *Arg1 = F.getArg(1);
EXPECT_FALSE(sandboxir::VecUtils::matchPack(Pack0));
EXPECT_FALSE(sandboxir::VecUtils::matchPack(Ret));
{
auto PackOpt = sandboxir::VecUtils::matchPack(Pack1);
EXPECT_TRUE(PackOpt);
EXPECT_THAT(PackOpt->Instrs, testing::ElementsAre(Pack1, Pack0));
EXPECT_THAT(PackOpt->Operands, testing::ElementsAre(Arg0, Arg1));
}
{
for (auto *NotPack : {NotPack0, NotPack1, NotPack2, NotPackBB})
EXPECT_FALSE(sandboxir::VecUtils::matchPack(NotPack));
}
}