[DirectX] Implement memcpy in DXIL CBuffer Access pass (#144436)
Fixes #141840 This PR implements support for the `memcpy` intrinsic in the DXIL CBuffer Access pass with the following restrictions: - The CBuffer Access must be the `src` operand of `memcpy` and must be direct (i.e., not a GEP) - The type of the CBuffer Access must be of an Array Type These restrictions greatly simplify the implementation of `memcpy` yet still covers the known uses in DML shaders. Furthermore, to prevent errors like #141840 from occurring silently again, this PR adds error reporting for unsupported users of globals in the DXIL CBuffer Access pass.
This commit is contained in:
@@ -11,9 +11,11 @@
|
||||
#include "llvm/Frontend/HLSL/CBuffer.h"
|
||||
#include "llvm/Frontend/HLSL/HLSLResource.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/IntrinsicsDirectX.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
|
||||
#define DEBUG_TYPE "dxil-cbuffer-access"
|
||||
@@ -54,71 +56,106 @@ struct CBufferRowIntrin {
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static size_t getOffsetForCBufferGEP(GEPOperator *GEP, GlobalVariable *Global,
|
||||
const DataLayout &DL) {
|
||||
// Since we should always have a constant offset, we should only ever have a
|
||||
// single GEP of indirection from the Global.
|
||||
assert(GEP->getPointerOperand() == Global &&
|
||||
"Indirect access to resource handle");
|
||||
// Helper for creating CBuffer handles and loading data from them
|
||||
struct CBufferResource {
|
||||
GlobalVariable *GVHandle;
|
||||
GlobalVariable *Member;
|
||||
size_t MemberOffset;
|
||||
|
||||
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
|
||||
bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
|
||||
(void)Success;
|
||||
assert(Success && "Offsets into cbuffer globals must be constant");
|
||||
LoadInst *Handle;
|
||||
|
||||
if (auto *ATy = dyn_cast<ArrayType>(Global->getValueType()))
|
||||
ConstantOffset = hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
|
||||
CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member,
|
||||
size_t MemberOffset)
|
||||
: GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
|
||||
|
||||
return ConstantOffset.getZExtValue();
|
||||
}
|
||||
const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); }
|
||||
Type *getValueType() { return Member->getValueType(); }
|
||||
iterator_range<ConstantDataSequential::user_iterator> users() {
|
||||
return Member->users();
|
||||
}
|
||||
|
||||
/// Replace access via cbuffer global with a load from the cbuffer handle
|
||||
/// itself.
|
||||
static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
|
||||
GlobalVariable *HandleGV, size_t BaseOffset,
|
||||
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
|
||||
const DataLayout &DL = HandleGV->getDataLayout();
|
||||
/// Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
|
||||
/// `Val` can either be Member itself, or a GEP of a constant offset from
|
||||
/// Member
|
||||
size_t getOffsetForCBufferGEP(Value *Val) {
|
||||
assert(isa<PointerType>(Val->getType()) &&
|
||||
"Expected a pointer-typed value");
|
||||
|
||||
size_t Offset = BaseOffset;
|
||||
if (auto *GEP = dyn_cast<GEPOperator>(LI->getPointerOperand()))
|
||||
Offset += getOffsetForCBufferGEP(GEP, Global, DL);
|
||||
else if (LI->getPointerOperand() != Global)
|
||||
llvm_unreachable("Load instruction doesn't reference cbuffer global");
|
||||
if (Val == Member)
|
||||
return 0;
|
||||
|
||||
IRBuilder<> Builder(LI);
|
||||
auto *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV,
|
||||
HandleGV->getName());
|
||||
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
|
||||
// Since we should always have a constant offset, we should only ever have
|
||||
// a single GEP of indirection from the Global.
|
||||
assert(GEP->getPointerOperand() == Member &&
|
||||
"Indirect access to resource handle");
|
||||
|
||||
Type *Ty = LI->getType();
|
||||
CBufferRowIntrin Intrin(DL, Ty->getScalarType());
|
||||
// The cbuffer consists of some number of 16-byte rows.
|
||||
unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes;
|
||||
unsigned int CurrentIndex =
|
||||
(Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
|
||||
const DataLayout &DL = getDataLayout();
|
||||
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
|
||||
bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
|
||||
(void)Success;
|
||||
assert(Success && "Offsets into cbuffer globals must be constant");
|
||||
|
||||
auto *CBufLoad = Builder.CreateIntrinsic(
|
||||
Intrin.RetTy, Intrin.IID,
|
||||
{Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
|
||||
LI->getName());
|
||||
auto *Elt =
|
||||
Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, LI->getName());
|
||||
if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
|
||||
ConstantOffset =
|
||||
hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
|
||||
|
||||
Value *Result = nullptr;
|
||||
unsigned int Remaining =
|
||||
((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
|
||||
if (Remaining == 0) {
|
||||
// We only have a single element, so we're done.
|
||||
Result = Elt;
|
||||
|
||||
// However, if we loaded a <1 x T>, then we need to adjust the type here.
|
||||
if (auto *VT = dyn_cast<FixedVectorType>(LI->getType())) {
|
||||
assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
|
||||
Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
|
||||
Builder.getInt32(0));
|
||||
return ConstantOffset.getZExtValue();
|
||||
}
|
||||
} else {
|
||||
|
||||
llvm_unreachable("Expected Val to be a GlobalVariable or GEP");
|
||||
}
|
||||
|
||||
/// Create a handle for this cbuffer resource using the IRBuilder `Builder`
|
||||
/// and sets the handle as the current one to use for subsequent calls to
|
||||
/// `loadValue`
|
||||
void createAndSetCurrentHandle(IRBuilder<> &Builder) {
|
||||
Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle,
|
||||
GVHandle->getName());
|
||||
}
|
||||
|
||||
/// Load a value of type `Ty` at offset `Offset` using the handle from the
|
||||
/// last call to `createAndSetCurrentHandle`
|
||||
Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset,
|
||||
const Twine &Name = "") {
|
||||
assert(Handle &&
|
||||
"Expected a handle for this cbuffer global resource to be created "
|
||||
"before loading a value from it");
|
||||
const DataLayout &DL = getDataLayout();
|
||||
|
||||
size_t TargetOffset = MemberOffset + Offset;
|
||||
CBufferRowIntrin Intrin(DL, Ty->getScalarType());
|
||||
// The cbuffer consists of some number of 16-byte rows.
|
||||
unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
|
||||
unsigned int CurrentIndex =
|
||||
(TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
|
||||
|
||||
auto *CBufLoad = Builder.CreateIntrinsic(
|
||||
Intrin.RetTy, Intrin.IID,
|
||||
{Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
|
||||
Name + ".load");
|
||||
auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
|
||||
Name + ".extract");
|
||||
|
||||
Value *Result = nullptr;
|
||||
unsigned int Remaining =
|
||||
((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
|
||||
|
||||
if (Remaining == 0) {
|
||||
// We only have a single element, so we're done.
|
||||
Result = Elt;
|
||||
|
||||
// However, if we loaded a <1 x T>, then we need to adjust the type here.
|
||||
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
|
||||
assert(VT->getNumElements() == 1 &&
|
||||
"Can't have multiple elements here");
|
||||
Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
|
||||
Builder.getInt32(0), Name);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
// Walk each element and extract it, wrapping to new rows as needed.
|
||||
SmallVector<Value *> Extracts{Elt};
|
||||
while (Remaining--) {
|
||||
@@ -128,40 +165,138 @@ static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
|
||||
CBufLoad = Builder.CreateIntrinsic(
|
||||
Intrin.RetTy, Intrin.IID,
|
||||
{Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
|
||||
nullptr, LI->getName());
|
||||
nullptr, Name + ".load");
|
||||
|
||||
Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
|
||||
LI->getName()));
|
||||
Name + ".extract"));
|
||||
}
|
||||
|
||||
// Finally, we build up the original loaded value.
|
||||
Result = PoisonValue::get(Ty);
|
||||
for (int I = 0, E = Extracts.size(); I < E; ++I)
|
||||
Result =
|
||||
Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I));
|
||||
Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I),
|
||||
Name + formatv(".upto{}", I));
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/// Replace load via cbuffer global with a load from the cbuffer handle itself.
|
||||
static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
|
||||
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
|
||||
size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand());
|
||||
IRBuilder<> Builder(LI);
|
||||
CBR.createAndSetCurrentHandle(Builder);
|
||||
Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName());
|
||||
LI->replaceAllUsesWith(Result);
|
||||
DeadInsts.push_back(LI);
|
||||
}
|
||||
|
||||
static void replaceAccessesWithHandle(GlobalVariable *Global,
|
||||
GlobalVariable *HandleGV,
|
||||
size_t BaseOffset) {
|
||||
/// This function recursively copies N array elements from the cbuffer resource
|
||||
/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
|
||||
/// arrays into a sequence of scalar/vector extracts and stores.
|
||||
static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI,
|
||||
CBufferResource &CBR, ArrayType *ArrTy,
|
||||
size_t ArrOffset, size_t N,
|
||||
const Twine &Name = "") {
|
||||
const DataLayout &DL = MCI->getDataLayout();
|
||||
Type *ElemTy = ArrTy->getElementType();
|
||||
size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
|
||||
for (unsigned I = 0; I < N; ++I) {
|
||||
size_t Offset = ArrOffset + I * ElemTySize;
|
||||
|
||||
// Recursively copy nested arrays
|
||||
if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
|
||||
copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset,
|
||||
ElemArrTy->getNumElements(), Name);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Load CBuffer value and store it in Dest
|
||||
APInt CBufArrayOffset(
|
||||
DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
|
||||
CBufArrayOffset =
|
||||
hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
|
||||
Value *CBufferVal =
|
||||
CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
|
||||
Value *GEP =
|
||||
Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
|
||||
{Builder.getInt32(Offset)}, Name + ".dest");
|
||||
Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
|
||||
/// itself. Assumes the cbuffer global is an array, and the length of bytes to
|
||||
/// copy is divisible by array element allocation size.
|
||||
/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
|
||||
static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) {
|
||||
|
||||
ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
|
||||
assert(ArrTy && "MemCpy lowering is only supported for array types");
|
||||
|
||||
// This assumption vastly simplifies the implementation
|
||||
if (MCI->getSource() != CBR.Member)
|
||||
reportFatalUsageError(
|
||||
"Expected MemCpy source to be a cbuffer global variable");
|
||||
|
||||
ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
|
||||
uint64_t ByteLength = Length->getZExtValue();
|
||||
|
||||
// If length to copy is zero, no memcpy is needed
|
||||
if (ByteLength == 0) {
|
||||
MCI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
const DataLayout &DL = CBR.getDataLayout();
|
||||
|
||||
Type *ElemTy = ArrTy->getElementType();
|
||||
size_t ElemSize = DL.getTypeAllocSize(ElemTy);
|
||||
assert(ByteLength % ElemSize == 0 &&
|
||||
"Length of bytes to MemCpy must be divisible by allocation size of "
|
||||
"source/destination array elements");
|
||||
size_t ElemsToCpy = ByteLength / ElemSize;
|
||||
|
||||
IRBuilder<> Builder(MCI);
|
||||
CBR.createAndSetCurrentHandle(Builder);
|
||||
|
||||
copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy,
|
||||
"memcpy." + MCI->getDest()->getName() + "." +
|
||||
MCI->getSource()->getName());
|
||||
|
||||
MCI->eraseFromParent();
|
||||
}
|
||||
|
||||
static void replaceAccessesWithHandle(CBufferResource &CBR) {
|
||||
SmallVector<WeakTrackingVH> DeadInsts;
|
||||
|
||||
SmallVector<User *> ToProcess{Global->users()};
|
||||
SmallVector<User *> ToProcess{CBR.users()};
|
||||
while (!ToProcess.empty()) {
|
||||
User *Cur = ToProcess.pop_back_val();
|
||||
|
||||
// If we have a load instruction, replace the access.
|
||||
if (auto *LI = dyn_cast<LoadInst>(Cur)) {
|
||||
replaceAccess(LI, Global, HandleGV, BaseOffset, DeadInsts);
|
||||
replaceLoad(LI, CBR, DeadInsts);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we have a memcpy instruction, replace it with multiple accesses and
|
||||
// subsequent stores to the destination
|
||||
if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
|
||||
replaceMemCpy(MCI, CBR);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, walk users looking for a load...
|
||||
ToProcess.append(Cur->user_begin(), Cur->user_end());
|
||||
if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
|
||||
ToProcess.append(Cur->user_begin(), Cur->user_end());
|
||||
continue;
|
||||
}
|
||||
|
||||
llvm_unreachable("Unexpected user of Global");
|
||||
}
|
||||
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
|
||||
}
|
||||
@@ -173,7 +308,8 @@ static bool replaceCBufferAccesses(Module &M) {
|
||||
|
||||
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
|
||||
for (const hlsl::CBufferMember &Member : Mapping.Members) {
|
||||
replaceAccessesWithHandle(Member.GV, Mapping.Handle, Member.Offset);
|
||||
CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset);
|
||||
replaceAccessesWithHandle(CBR);
|
||||
Member.GV->removeFromParent();
|
||||
}
|
||||
|
||||
|
||||
216
llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
Normal file
216
llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
Normal file
@@ -0,0 +1,216 @@
|
||||
; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
|
||||
|
||||
; cbuffer CB : register(b0) {
|
||||
; float a1[3];
|
||||
; double3 a2[2];
|
||||
; float16_t a3[2][2];
|
||||
; uint64_t a4[3];
|
||||
; int2 a5[3][2];
|
||||
; uint16_t a6[1];
|
||||
; int64_t a7[2];
|
||||
; bool a8[4];
|
||||
; }
|
||||
%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }>
|
||||
|
||||
@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison
|
||||
@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
|
||||
@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
|
||||
@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
|
||||
@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
|
||||
@a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16
|
||||
@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
|
||||
@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
|
||||
@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
|
||||
|
||||
; CHECK: define void @f(
|
||||
define void @f(ptr %dst) {
|
||||
entry:
|
||||
%CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
|
||||
store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4
|
||||
|
||||
%a1.copy = alloca [3 x float], align 4
|
||||
%a2.copy = alloca [2 x <3 x double>], align 32
|
||||
%a3.copy = alloca [2 x [2 x half]], align 2
|
||||
%a4.copy = alloca [3 x i64], align 8
|
||||
%a5.copy = alloca [3 x [2 x <2 x i32>]], align 16
|
||||
%a6.copy = alloca [1 x i16], align 2
|
||||
%a7.copy = alloca [2 x i64], align 8
|
||||
%a8.copy = alloca [4 x i32], align 4
|
||||
|
||||
; Try copying no elements
|
||||
; CHECK-NOT: memcpy
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 0, i1 false)
|
||||
|
||||
; Try copying only the first element
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
|
||||
; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
|
||||
; CHECK: store float [[X]], ptr [[DEST]], align 4
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 4, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
|
||||
; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
|
||||
; CHECK: store float [[X]], ptr [[DEST]], align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
|
||||
; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4
|
||||
; CHECK: store float [[Y]], ptr [[DEST]], align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
|
||||
; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8
|
||||
; CHECK: store float [[Z]], ptr [[DEST]], align 4
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
|
||||
; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
|
||||
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
|
||||
; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
|
||||
; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
|
||||
; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
|
||||
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
|
||||
; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
|
||||
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
|
||||
; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
|
||||
; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 32
|
||||
; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 64, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
|
||||
; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0
|
||||
; CHECK: store half [[X]], ptr [[DEST]], align 2
|
||||
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
|
||||
; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2
|
||||
; CHECK: store half [[Y]], ptr [[DEST]], align 2
|
||||
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9)
|
||||
; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4
|
||||
; CHECK: store half [[X]], ptr [[DEST]], align 2
|
||||
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10)
|
||||
; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6
|
||||
; CHECK: store half [[Y]], ptr [[DEST]], align 2
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0
|
||||
; CHECK: store i64 [[X]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8
|
||||
; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13)
|
||||
; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16
|
||||
; CHECK: store i64 [[Z]], ptr [[DEST]], align 8
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0
|
||||
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8
|
||||
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16
|
||||
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24
|
||||
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32
|
||||
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
|
||||
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
|
||||
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40
|
||||
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0
|
||||
; CHECK: store i16 [[X]], ptr [[DEST]], align 2
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0
|
||||
; CHECK: store i64 [[X]], ptr [[DEST]], align 8
|
||||
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8
|
||||
; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false)
|
||||
|
||||
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20)
|
||||
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0
|
||||
; CHECK: store i32 [[X]], ptr [[DEST]], align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21)
|
||||
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4
|
||||
; CHECK: store i32 [[Y]], ptr [[DEST]], align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22)
|
||||
; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8
|
||||
; CHECK: store i32 [[Z]], ptr [[DEST]], align 4
|
||||
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23)
|
||||
; CHECK: [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
|
||||
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12
|
||||
; CHECK: store i32 [[W]], ptr [[DEST]], align 4
|
||||
call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg)
|
||||
|
||||
; CHECK-NOT: !hlsl.cbs =
|
||||
!hlsl.cbs = !{!0}
|
||||
|
||||
!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
|
||||
!1 = !{i32 0, i32 2}
|
||||
!2 = !{}
|
||||
Reference in New Issue
Block a user