The module currently stores the target triple as a string. This means that any code that wants to actually use the triple first has to instantiate a Triple, which is somewhat expensive. The change in #121652 caused a moderate compile-time regression due to this. While it would be easy enough to work around, I think that architecturally, it makes more sense to store the parsed Triple in the module, so that it can always be directly queried. For this change, I've opted not to add any magic conversions between std::string and Triple for backwards-compatibilty purses, and instead write out needed Triple()s or str()s explicitly. This is because I think a decent number of them should be changed to work on Triple as well, to avoid unnecessary conversions back and forth. The only interesting part in this patch is that the default triple is Triple("") instead of Triple() to preserve existing behavior. The former defaults to using the ELF object format instead of unknown object format. We should fix that as well.
910 lines
33 KiB
C++
910 lines
33 KiB
C++
//===- DXILOpLowering.cpp - Lowering to DXIL operations -------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "DXILOpLowering.h"
|
|
#include "DXILConstants.h"
|
|
#include "DXILIntrinsicExpansion.h"
|
|
#include "DXILOpBuilder.h"
|
|
#include "DXILResourceAnalysis.h"
|
|
#include "DXILShaderFlags.h"
|
|
#include "DirectX.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/Analysis/DXILMetadataAnalysis.h"
|
|
#include "llvm/Analysis/DXILResource.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/IR/DiagnosticInfo.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/Instruction.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/Intrinsics.h"
|
|
#include "llvm/IR/IntrinsicsDirectX.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/IR/PassManager.h"
|
|
#include "llvm/InitializePasses.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#define DEBUG_TYPE "dxil-op-lower"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::dxil;
|
|
|
|
static bool isVectorArgExpansion(Function &F) {
|
|
switch (F.getIntrinsicID()) {
|
|
case Intrinsic::dx_dot2:
|
|
case Intrinsic::dx_dot3:
|
|
case Intrinsic::dx_dot4:
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static SmallVector<Value *> populateOperands(Value *Arg, IRBuilder<> &Builder) {
|
|
SmallVector<Value *> ExtractedElements;
|
|
auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
|
|
for (unsigned I = 0; I < VecArg->getNumElements(); ++I) {
|
|
Value *Index = ConstantInt::get(Type::getInt32Ty(Arg->getContext()), I);
|
|
Value *ExtractedElement = Builder.CreateExtractElement(Arg, Index);
|
|
ExtractedElements.push_back(ExtractedElement);
|
|
}
|
|
return ExtractedElements;
|
|
}
|
|
|
|
static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
|
|
IRBuilder<> &Builder) {
|
|
// Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
|
|
unsigned NumOperands = Orig->getNumOperands() - 1;
|
|
assert(NumOperands > 0);
|
|
Value *Arg0 = Orig->getOperand(0);
|
|
[[maybe_unused]] auto *VecArg0 = dyn_cast<FixedVectorType>(Arg0->getType());
|
|
assert(VecArg0);
|
|
SmallVector<Value *> NewOperands = populateOperands(Arg0, Builder);
|
|
for (unsigned I = 1; I < NumOperands; ++I) {
|
|
Value *Arg = Orig->getOperand(I);
|
|
[[maybe_unused]] auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
|
|
assert(VecArg);
|
|
assert(VecArg0->getElementType() == VecArg->getElementType());
|
|
assert(VecArg0->getNumElements() == VecArg->getNumElements());
|
|
auto NextOperandList = populateOperands(Arg, Builder);
|
|
NewOperands.append(NextOperandList.begin(), NextOperandList.end());
|
|
}
|
|
return NewOperands;
|
|
}
|
|
|
|
namespace {
|
|
class OpLowerer {
|
|
Module &M;
|
|
DXILOpBuilder OpBuilder;
|
|
DXILBindingMap &DBM;
|
|
DXILResourceTypeMap &DRTM;
|
|
SmallVector<CallInst *> CleanupCasts;
|
|
|
|
public:
|
|
OpLowerer(Module &M, DXILBindingMap &DBM, DXILResourceTypeMap &DRTM)
|
|
: M(M), OpBuilder(M), DBM(DBM), DRTM(DRTM) {}
|
|
|
|
/// Replace every call to \c F using \c ReplaceCall, and then erase \c F. If
|
|
/// there is an error replacing a call, we emit a diagnostic and return true.
|
|
[[nodiscard]] bool
|
|
replaceFunction(Function &F,
|
|
llvm::function_ref<Error(CallInst *CI)> ReplaceCall) {
|
|
for (User *U : make_early_inc_range(F.users())) {
|
|
CallInst *CI = dyn_cast<CallInst>(U);
|
|
if (!CI)
|
|
continue;
|
|
|
|
if (Error E = ReplaceCall(CI)) {
|
|
std::string Message(toString(std::move(E)));
|
|
DiagnosticInfoUnsupported Diag(*CI->getFunction(), Message,
|
|
CI->getDebugLoc());
|
|
M.getContext().diagnose(Diag);
|
|
return true;
|
|
}
|
|
}
|
|
if (F.user_empty())
|
|
F.eraseFromParent();
|
|
return false;
|
|
}
|
|
|
|
struct IntrinArgSelect {
|
|
enum class Type {
|
|
#define DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(name) name,
|
|
#include "DXILOperation.inc"
|
|
};
|
|
Type Type;
|
|
int Value;
|
|
};
|
|
|
|
/// Replaces uses of a struct with uses of an equivalent named struct.
|
|
///
|
|
/// DXIL operations that return structs give them well known names, so we need
|
|
/// to update uses when we switch from an LLVM intrinsic to an op.
|
|
Error replaceNamedStructUses(CallInst *Intrin, CallInst *DXILOp) {
|
|
auto *IntrinTy = cast<StructType>(Intrin->getType());
|
|
auto *DXILOpTy = cast<StructType>(DXILOp->getType());
|
|
if (!IntrinTy->isLayoutIdentical(DXILOpTy))
|
|
return make_error<StringError>(
|
|
"Type mismatch between intrinsic and DXIL op",
|
|
inconvertibleErrorCode());
|
|
|
|
for (Use &U : make_early_inc_range(Intrin->uses()))
|
|
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()))
|
|
EVI->setOperand(0, DXILOp);
|
|
else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser()))
|
|
IVI->setOperand(0, DXILOp);
|
|
else
|
|
return make_error<StringError>("DXIL ops that return structs may only "
|
|
"be used by insert- and extractvalue",
|
|
inconvertibleErrorCode());
|
|
return Error::success();
|
|
}
|
|
|
|
[[nodiscard]] bool
|
|
replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
|
|
ArrayRef<IntrinArgSelect> ArgSelects) {
|
|
bool IsVectorArgExpansion = isVectorArgExpansion(F);
|
|
assert(!(IsVectorArgExpansion && ArgSelects.size()) &&
|
|
"Cann't do vector arg expansion when using arg selects.");
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
OpBuilder.getIRB().SetInsertPoint(CI);
|
|
SmallVector<Value *> Args;
|
|
if (ArgSelects.size()) {
|
|
for (const IntrinArgSelect &A : ArgSelects) {
|
|
switch (A.Type) {
|
|
case IntrinArgSelect::Type::Index:
|
|
Args.push_back(CI->getArgOperand(A.Value));
|
|
break;
|
|
case IntrinArgSelect::Type::I8:
|
|
Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
|
|
break;
|
|
case IntrinArgSelect::Type::I32:
|
|
Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
|
|
break;
|
|
}
|
|
}
|
|
} else if (IsVectorArgExpansion) {
|
|
Args = argVectorFlatten(CI, OpBuilder.getIRB());
|
|
} else {
|
|
Args.append(CI->arg_begin(), CI->arg_end());
|
|
}
|
|
|
|
Expected<CallInst *> OpCall =
|
|
OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
|
|
if (isa<StructType>(CI->getType())) {
|
|
if (Error E = replaceNamedStructUses(CI, *OpCall))
|
|
return E;
|
|
} else
|
|
CI->replaceAllUsesWith(*OpCall);
|
|
|
|
CI->eraseFromParent();
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
/// Create a cast between a `target("dx")` type and `dx.types.Handle`, which
|
|
/// is intended to be removed by the end of lowering. This is used to allow
|
|
/// lowering of ops which need to change their return or argument types in a
|
|
/// piecemeal way - we can add the casts in to avoid updating all of the uses
|
|
/// or defs, and by the end all of the casts will be redundant.
|
|
Value *createTmpHandleCast(Value *V, Type *Ty) {
|
|
CallInst *Cast = OpBuilder.getIRB().CreateIntrinsic(
|
|
Intrinsic::dx_resource_casthandle, {Ty, V->getType()}, {V});
|
|
CleanupCasts.push_back(Cast);
|
|
return Cast;
|
|
}
|
|
|
|
void cleanupHandleCasts() {
|
|
SmallVector<CallInst *> ToRemove;
|
|
SmallVector<Function *> CastFns;
|
|
|
|
for (CallInst *Cast : CleanupCasts) {
|
|
// These casts were only put in to ease the move from `target("dx")` types
|
|
// to `dx.types.Handle in a piecemeal way. At this point, all of the
|
|
// non-cast uses should now be `dx.types.Handle`, and remaining casts
|
|
// should all form pairs to and from the now unused `target("dx")` type.
|
|
CastFns.push_back(Cast->getCalledFunction());
|
|
|
|
// If the cast is not to `dx.types.Handle`, it should be the first part of
|
|
// the pair. Keep track so we can remove it once it has no more uses.
|
|
if (Cast->getType() != OpBuilder.getHandleType()) {
|
|
ToRemove.push_back(Cast);
|
|
continue;
|
|
}
|
|
// Otherwise, we're the second handle in a pair. Forward the arguments and
|
|
// remove the (second) cast.
|
|
CallInst *Def = cast<CallInst>(Cast->getOperand(0));
|
|
assert(Def->getIntrinsicID() == Intrinsic::dx_resource_casthandle &&
|
|
"Unbalanced pair of temporary handle casts");
|
|
Cast->replaceAllUsesWith(Def->getOperand(0));
|
|
Cast->eraseFromParent();
|
|
}
|
|
for (CallInst *Cast : ToRemove) {
|
|
assert(Cast->user_empty() && "Temporary handle cast still has users");
|
|
Cast->eraseFromParent();
|
|
}
|
|
|
|
// Deduplicate the cast functions so that we only erase each one once.
|
|
llvm::sort(CastFns);
|
|
CastFns.erase(llvm::unique(CastFns), CastFns.end());
|
|
for (Function *F : CastFns)
|
|
F->eraseFromParent();
|
|
|
|
CleanupCasts.clear();
|
|
}
|
|
|
|
// Remove the resource global associated with the handleFromBinding call
|
|
// instruction and their uses as they aren't needed anymore.
|
|
// TODO: We should verify that all the globals get removed.
|
|
// It's expected we'll need a custom pass in the future that will eliminate
|
|
// the need for this here.
|
|
void removeResourceGlobals(CallInst *CI) {
|
|
for (User *User : make_early_inc_range(CI->users())) {
|
|
if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
|
|
Value *V = Store->getOperand(1);
|
|
Store->eraseFromParent();
|
|
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
|
|
if (GV->use_empty()) {
|
|
GV->removeDeadConstantUsers();
|
|
GV->eraseFromParent();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
[[nodiscard]] bool lowerToCreateHandle(Function &F) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int8Ty = IRB.getInt8Ty();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
|
|
auto *It = DBM.find(CI);
|
|
assert(It != DBM.end() && "Resource not in map?");
|
|
dxil::ResourceBindingInfo &RI = *It;
|
|
|
|
const auto &Binding = RI.getBinding();
|
|
dxil::ResourceClass RC = DRTM[RI.getHandleTy()].getResourceClass();
|
|
|
|
Value *IndexOp = CI->getArgOperand(3);
|
|
if (Binding.LowerBound != 0)
|
|
IndexOp = IRB.CreateAdd(IndexOp,
|
|
ConstantInt::get(Int32Ty, Binding.LowerBound));
|
|
|
|
std::array<Value *, 4> Args{
|
|
ConstantInt::get(Int8Ty, llvm::to_underlying(RC)),
|
|
ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp,
|
|
CI->getArgOperand(4)};
|
|
Expected<CallInst *> OpCall =
|
|
OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName());
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
|
|
Value *Cast = createTmpHandleCast(*OpCall, CI->getType());
|
|
|
|
removeResourceGlobals(CI);
|
|
|
|
CI->replaceAllUsesWith(Cast);
|
|
CI->eraseFromParent();
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
[[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
|
|
auto *It = DBM.find(CI);
|
|
assert(It != DBM.end() && "Resource not in map?");
|
|
dxil::ResourceBindingInfo &RI = *It;
|
|
|
|
const auto &Binding = RI.getBinding();
|
|
dxil::ResourceTypeInfo &RTI = DRTM[RI.getHandleTy()];
|
|
dxil::ResourceClass RC = RTI.getResourceClass();
|
|
|
|
Value *IndexOp = CI->getArgOperand(3);
|
|
if (Binding.LowerBound != 0)
|
|
IndexOp = IRB.CreateAdd(IndexOp,
|
|
ConstantInt::get(Int32Ty, Binding.LowerBound));
|
|
|
|
std::pair<uint32_t, uint32_t> Props =
|
|
RI.getAnnotateProps(*F.getParent(), RTI);
|
|
|
|
// For `CreateHandleFromBinding` we need the upper bound rather than the
|
|
// size, so we need to be careful about the difference for "unbounded".
|
|
uint32_t Unbounded = std::numeric_limits<uint32_t>::max();
|
|
uint32_t UpperBound = Binding.Size == Unbounded
|
|
? Unbounded
|
|
: Binding.LowerBound + Binding.Size - 1;
|
|
Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound,
|
|
Binding.Space, RC);
|
|
std::array<Value *, 3> BindArgs{ResBind, IndexOp, CI->getArgOperand(4)};
|
|
Expected<CallInst *> OpBind = OpBuilder.tryCreateOp(
|
|
OpCode::CreateHandleFromBinding, BindArgs, CI->getName());
|
|
if (Error E = OpBind.takeError())
|
|
return E;
|
|
|
|
std::array<Value *, 2> AnnotateArgs{
|
|
*OpBind, OpBuilder.getResProps(Props.first, Props.second)};
|
|
Expected<CallInst *> OpAnnotate = OpBuilder.tryCreateOp(
|
|
OpCode::AnnotateHandle, AnnotateArgs,
|
|
CI->hasName() ? CI->getName() + "_annot" : Twine());
|
|
if (Error E = OpAnnotate.takeError())
|
|
return E;
|
|
|
|
Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());
|
|
|
|
removeResourceGlobals(CI);
|
|
|
|
CI->replaceAllUsesWith(Cast);
|
|
CI->eraseFromParent();
|
|
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
/// Lower `dx.resource.handlefrombinding` intrinsics depending on the shader
|
|
/// model and taking into account binding information from
|
|
/// DXILResourceBindingAnalysis.
|
|
bool lowerHandleFromBinding(Function &F) {
|
|
const Triple &TT = M.getTargetTriple();
|
|
if (TT.getDXILVersion() < VersionTuple(1, 6))
|
|
return lowerToCreateHandle(F);
|
|
return lowerToBindAndAnnotateHandle(F);
|
|
}
|
|
|
|
/// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
|
|
/// Since we expect to be post-scalarization, make an effort to avoid vectors.
|
|
Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
|
|
Instruction *OldResult = Intrin;
|
|
Type *OldTy = Intrin->getType();
|
|
|
|
if (HasCheckBit) {
|
|
auto *ST = cast<StructType>(OldTy);
|
|
|
|
Value *CheckOp = nullptr;
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
for (Use &U : make_early_inc_range(OldResult->uses())) {
|
|
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
|
|
ArrayRef<unsigned> Indices = EVI->getIndices();
|
|
assert(Indices.size() == 1);
|
|
// We're only interested in uses of the check bit for now.
|
|
if (Indices[0] != 1)
|
|
continue;
|
|
if (!CheckOp) {
|
|
Value *NewEVI = IRB.CreateExtractValue(Op, 4);
|
|
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
|
|
OpCode::CheckAccessFullyMapped, {NewEVI},
|
|
OldResult->hasName() ? OldResult->getName() + "_check"
|
|
: Twine(),
|
|
Int32Ty);
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
CheckOp = *OpCall;
|
|
}
|
|
EVI->replaceAllUsesWith(CheckOp);
|
|
EVI->eraseFromParent();
|
|
}
|
|
}
|
|
|
|
if (OldResult->use_empty()) {
|
|
// Only the check bit was used, so we're done here.
|
|
OldResult->eraseFromParent();
|
|
return Error::success();
|
|
}
|
|
|
|
assert(OldResult->hasOneUse() &&
|
|
isa<ExtractValueInst>(*OldResult->user_begin()) &&
|
|
"Expected only use to be extract of first element");
|
|
OldResult = cast<Instruction>(*OldResult->user_begin());
|
|
OldTy = ST->getElementType(0);
|
|
}
|
|
|
|
// For scalars, we just extract the first element.
|
|
if (!isa<FixedVectorType>(OldTy)) {
|
|
Value *EVI = IRB.CreateExtractValue(Op, 0);
|
|
OldResult->replaceAllUsesWith(EVI);
|
|
OldResult->eraseFromParent();
|
|
if (OldResult != Intrin) {
|
|
assert(Intrin->use_empty() && "Intrinsic still has uses?");
|
|
Intrin->eraseFromParent();
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
std::array<Value *, 4> Extracts = {};
|
|
SmallVector<ExtractElementInst *> DynamicAccesses;
|
|
|
|
// The users of the operation should all be scalarized, so we attempt to
|
|
// replace the extractelements with extractvalues directly.
|
|
for (Use &U : make_early_inc_range(OldResult->uses())) {
|
|
if (auto *EEI = dyn_cast<ExtractElementInst>(U.getUser())) {
|
|
if (auto *IndexOp = dyn_cast<ConstantInt>(EEI->getIndexOperand())) {
|
|
size_t IndexVal = IndexOp->getZExtValue();
|
|
assert(IndexVal < 4 && "Index into buffer load out of range");
|
|
if (!Extracts[IndexVal])
|
|
Extracts[IndexVal] = IRB.CreateExtractValue(Op, IndexVal);
|
|
EEI->replaceAllUsesWith(Extracts[IndexVal]);
|
|
EEI->eraseFromParent();
|
|
} else {
|
|
DynamicAccesses.push_back(EEI);
|
|
}
|
|
}
|
|
}
|
|
|
|
const auto *VecTy = cast<FixedVectorType>(OldTy);
|
|
const unsigned N = VecTy->getNumElements();
|
|
|
|
// If there's a dynamic access we need to round trip through stack memory so
|
|
// that we don't leave vectors around.
|
|
if (!DynamicAccesses.empty()) {
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
Constant *Zero = ConstantInt::get(Int32Ty, 0);
|
|
|
|
Type *ElTy = VecTy->getElementType();
|
|
Type *ArrayTy = ArrayType::get(ElTy, N);
|
|
Value *Alloca = IRB.CreateAlloca(ArrayTy);
|
|
|
|
for (int I = 0, E = N; I != E; ++I) {
|
|
if (!Extracts[I])
|
|
Extracts[I] = IRB.CreateExtractValue(Op, I);
|
|
Value *GEP = IRB.CreateInBoundsGEP(
|
|
ArrayTy, Alloca, {Zero, ConstantInt::get(Int32Ty, I)});
|
|
IRB.CreateStore(Extracts[I], GEP);
|
|
}
|
|
|
|
for (ExtractElementInst *EEI : DynamicAccesses) {
|
|
Value *GEP = IRB.CreateInBoundsGEP(ArrayTy, Alloca,
|
|
{Zero, EEI->getIndexOperand()});
|
|
Value *Load = IRB.CreateLoad(ElTy, GEP);
|
|
EEI->replaceAllUsesWith(Load);
|
|
EEI->eraseFromParent();
|
|
}
|
|
}
|
|
|
|
// If we still have uses, then we're not fully scalarized and need to
|
|
// recreate the vector. This should only happen for things like exported
|
|
// functions from libraries.
|
|
if (!OldResult->use_empty()) {
|
|
for (int I = 0, E = N; I != E; ++I)
|
|
if (!Extracts[I])
|
|
Extracts[I] = IRB.CreateExtractValue(Op, I);
|
|
|
|
Value *Vec = UndefValue::get(OldTy);
|
|
for (int I = 0, E = N; I != E; ++I)
|
|
Vec = IRB.CreateInsertElement(Vec, Extracts[I], I);
|
|
OldResult->replaceAllUsesWith(Vec);
|
|
}
|
|
|
|
OldResult->eraseFromParent();
|
|
if (OldResult != Intrin) {
|
|
assert(Intrin->use_empty() && "Intrinsic still has uses?");
|
|
Intrin->eraseFromParent();
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
[[nodiscard]] bool lowerTypedBufferLoad(Function &F, bool HasCheckBit) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
|
|
Value *Handle =
|
|
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
|
|
Value *Index0 = CI->getArgOperand(1);
|
|
Value *Index1 = UndefValue::get(Int32Ty);
|
|
|
|
Type *OldTy = CI->getType();
|
|
if (HasCheckBit)
|
|
OldTy = cast<StructType>(OldTy)->getElementType(0);
|
|
Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType());
|
|
|
|
std::array<Value *, 3> Args{Handle, Index0, Index1};
|
|
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
|
|
OpCode::BufferLoad, Args, CI->getName(), NewRetTy);
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
if (Error E = replaceResRetUses(CI, *OpCall, HasCheckBit))
|
|
return E;
|
|
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
[[nodiscard]] bool lowerRawBufferLoad(Function &F) {
|
|
const Triple &TT = M.getTargetTriple();
|
|
VersionTuple DXILVersion = TT.getDXILVersion();
|
|
const DataLayout &DL = F.getDataLayout();
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int8Ty = IRB.getInt8Ty();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
|
|
Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
|
|
Type *ScalarTy = OldTy->getScalarType();
|
|
Type *NewRetTy = OpBuilder.getResRetType(ScalarTy);
|
|
|
|
Value *Handle =
|
|
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
|
|
Value *Index0 = CI->getArgOperand(1);
|
|
Value *Index1 = CI->getArgOperand(2);
|
|
uint64_t NumElements =
|
|
DL.getTypeSizeInBits(OldTy) / DL.getTypeSizeInBits(ScalarTy);
|
|
Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
|
|
Value *Align =
|
|
ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value());
|
|
|
|
Expected<CallInst *> OpCall =
|
|
DXILVersion >= VersionTuple(1, 2)
|
|
? OpBuilder.tryCreateOp(OpCode::RawBufferLoad,
|
|
{Handle, Index0, Index1, Mask, Align},
|
|
CI->getName(), NewRetTy)
|
|
: OpBuilder.tryCreateOp(OpCode::BufferLoad,
|
|
{Handle, Index0, Index1}, CI->getName(),
|
|
NewRetTy);
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/true))
|
|
return E;
|
|
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
[[nodiscard]] bool lowerCBufferLoad(Function &F) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
|
|
Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
|
|
Type *ScalarTy = OldTy->getScalarType();
|
|
Type *NewRetTy = OpBuilder.getCBufRetType(ScalarTy);
|
|
|
|
Value *Handle =
|
|
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
|
|
Value *Index = CI->getArgOperand(1);
|
|
|
|
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
|
|
OpCode::CBufferLoadLegacy, {Handle, Index}, CI->getName(), NewRetTy);
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
if (Error E = replaceNamedStructUses(CI, *OpCall))
|
|
return E;
|
|
|
|
CI->eraseFromParent();
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
[[nodiscard]] bool lowerUpdateCounter(Function &F) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
Value *Handle =
|
|
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
|
|
Value *Op1 = CI->getArgOperand(1);
|
|
|
|
std::array<Value *, 2> Args{Handle, Op1};
|
|
|
|
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
|
|
OpCode::UpdateCounter, Args, CI->getName(), Int32Ty);
|
|
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
|
|
CI->replaceAllUsesWith(*OpCall);
|
|
CI->eraseFromParent();
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
[[nodiscard]] bool lowerGetPointer(Function &F) {
|
|
// These should have already been handled in DXILResourceAccess, so we can
|
|
// just clean up the dead prototype.
|
|
assert(F.user_empty() && "getpointer operations should have been removed");
|
|
F.eraseFromParent();
|
|
return false;
|
|
}
|
|
|
|
[[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) {
|
|
const Triple &TT = M.getTargetTriple();
|
|
VersionTuple DXILVersion = TT.getDXILVersion();
|
|
const DataLayout &DL = F.getDataLayout();
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int8Ty = IRB.getInt8Ty();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
|
|
Value *Handle =
|
|
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
|
|
Value *Index0 = CI->getArgOperand(1);
|
|
Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty);
|
|
|
|
Value *Data = CI->getArgOperand(IsRaw ? 3 : 2);
|
|
Type *DataTy = Data->getType();
|
|
Type *ScalarTy = DataTy->getScalarType();
|
|
|
|
uint64_t NumElements =
|
|
DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy);
|
|
Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
|
|
|
|
// TODO: check that we only have vector or scalar...
|
|
if (!IsRaw && NumElements != 4)
|
|
return make_error<StringError>(
|
|
"typedBufferStore data must be a vector of 4 elements",
|
|
inconvertibleErrorCode());
|
|
else if (NumElements > 4)
|
|
return make_error<StringError>(
|
|
"rawBufferStore data must have at most 4 elements",
|
|
inconvertibleErrorCode());
|
|
|
|
std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr};
|
|
if (DataTy == ScalarTy)
|
|
DataElements[0] = Data;
|
|
else {
|
|
// Since we're post-scalarizer, if we see a vector here it's likely
|
|
// constructed solely for the argument of the store. Just use the scalar
|
|
// values from before they're inserted into the temporary.
|
|
auto *IEI = dyn_cast<InsertElementInst>(Data);
|
|
while (IEI) {
|
|
auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
|
|
if (!IndexOp)
|
|
break;
|
|
size_t IndexVal = IndexOp->getZExtValue();
|
|
assert(IndexVal < 4 && "Too many elements for buffer store");
|
|
DataElements[IndexVal] = IEI->getOperand(1);
|
|
IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
|
|
}
|
|
}
|
|
|
|
// If for some reason we weren't able to forward the arguments from the
|
|
// scalarizer artifact, then we may need to actually extract elements from
|
|
// the vector.
|
|
for (int I = 0, E = NumElements; I < E; ++I)
|
|
if (DataElements[I] == nullptr)
|
|
DataElements[I] =
|
|
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I));
|
|
// For any elements beyond the length of the vector, fill up with undef.
|
|
for (int I = NumElements, E = 4; I < E; ++I)
|
|
if (DataElements[I] == nullptr)
|
|
DataElements[I] = UndefValue::get(ScalarTy);
|
|
|
|
dxil::OpCode Op = OpCode::BufferStore;
|
|
SmallVector<Value *, 9> Args{
|
|
Handle, Index0, Index1, DataElements[0],
|
|
DataElements[1], DataElements[2], DataElements[3], Mask};
|
|
if (IsRaw && DXILVersion >= VersionTuple(1, 2)) {
|
|
Op = OpCode::RawBufferStore;
|
|
// RawBufferStore requires the alignment
|
|
Args.push_back(
|
|
ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()));
|
|
}
|
|
Expected<CallInst *> OpCall =
|
|
OpBuilder.tryCreateOp(Op, Args, CI->getName());
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
|
|
CI->eraseFromParent();
|
|
// Clean up any leftover `insertelement`s
|
|
auto *IEI = dyn_cast<InsertElementInst>(Data);
|
|
while (IEI && IEI->use_empty()) {
|
|
InsertElementInst *Tmp = IEI;
|
|
IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
|
|
Tmp->eraseFromParent();
|
|
}
|
|
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
[[nodiscard]] bool lowerCtpopToCountBits(Function &F) {
|
|
IRBuilder<> &IRB = OpBuilder.getIRB();
|
|
Type *Int32Ty = IRB.getInt32Ty();
|
|
|
|
return replaceFunction(F, [&](CallInst *CI) -> Error {
|
|
IRB.SetInsertPoint(CI);
|
|
SmallVector<Value *> Args;
|
|
Args.append(CI->arg_begin(), CI->arg_end());
|
|
|
|
Type *RetTy = Int32Ty;
|
|
Type *FRT = F.getReturnType();
|
|
if (const auto *VT = dyn_cast<VectorType>(FRT))
|
|
RetTy = VectorType::get(RetTy, VT);
|
|
|
|
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
|
|
dxil::OpCode::CountBits, Args, CI->getName(), RetTy);
|
|
if (Error E = OpCall.takeError())
|
|
return E;
|
|
|
|
// If the result type is 32 bits we can do a direct replacement.
|
|
if (FRT->isIntOrIntVectorTy(32)) {
|
|
CI->replaceAllUsesWith(*OpCall);
|
|
CI->eraseFromParent();
|
|
return Error::success();
|
|
}
|
|
|
|
unsigned CastOp;
|
|
unsigned CastOp2;
|
|
if (FRT->isIntOrIntVectorTy(16)) {
|
|
CastOp = Instruction::ZExt;
|
|
CastOp2 = Instruction::SExt;
|
|
} else { // must be 64 bits
|
|
assert(FRT->isIntOrIntVectorTy(64) &&
|
|
"Currently only lowering 16, 32, or 64 bit ctpop to CountBits \
|
|
is supported.");
|
|
CastOp = Instruction::Trunc;
|
|
CastOp2 = Instruction::Trunc;
|
|
}
|
|
|
|
// It is correct to replace the ctpop with the dxil op and
|
|
// remove all casts to i32
|
|
bool NeedsCast = false;
|
|
for (User *User : make_early_inc_range(CI->users())) {
|
|
Instruction *I = dyn_cast<Instruction>(User);
|
|
if (I && (I->getOpcode() == CastOp || I->getOpcode() == CastOp2) &&
|
|
I->getType() == RetTy) {
|
|
I->replaceAllUsesWith(*OpCall);
|
|
I->eraseFromParent();
|
|
} else
|
|
NeedsCast = true;
|
|
}
|
|
|
|
// It is correct to replace a ctpop with the dxil op and
|
|
// a cast from i32 to the return type of the ctpop
|
|
// the cast is emitted here if there is a non-cast to i32
|
|
// instr which uses the ctpop
|
|
if (NeedsCast) {
|
|
Value *Cast =
|
|
IRB.CreateZExtOrTrunc(*OpCall, F.getReturnType(), "ctpop.cast");
|
|
CI->replaceAllUsesWith(Cast);
|
|
}
|
|
|
|
CI->eraseFromParent();
|
|
return Error::success();
|
|
});
|
|
}
|
|
|
|
bool lowerIntrinsics() {
|
|
bool Updated = false;
|
|
bool HasErrors = false;
|
|
|
|
for (Function &F : make_early_inc_range(M.functions())) {
|
|
if (!F.isDeclaration())
|
|
continue;
|
|
Intrinsic::ID ID = F.getIntrinsicID();
|
|
switch (ID) {
|
|
// NOTE: Skip dx_resource_casthandle here. They are
|
|
// resolved after this loop in cleanupHandleCasts.
|
|
case Intrinsic::dx_resource_casthandle:
|
|
// NOTE: llvm.dbg.value is supported as is in DXIL.
|
|
case Intrinsic::dbg_value:
|
|
case Intrinsic::not_intrinsic:
|
|
continue;
|
|
default: {
|
|
DiagnosticInfoUnsupported Diag(
|
|
F, "Unsupported intrinsic for DXIL lowering");
|
|
M.getContext().diagnose(Diag);
|
|
HasErrors |= true;
|
|
break;
|
|
}
|
|
#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...) \
|
|
case Intrin: \
|
|
HasErrors |= replaceFunctionWithOp( \
|
|
F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__}); \
|
|
break;
|
|
#include "DXILOperation.inc"
|
|
case Intrinsic::dx_resource_handlefrombinding:
|
|
HasErrors |= lowerHandleFromBinding(F);
|
|
break;
|
|
case Intrinsic::dx_resource_getpointer:
|
|
HasErrors |= lowerGetPointer(F);
|
|
break;
|
|
case Intrinsic::dx_resource_load_typedbuffer:
|
|
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
|
|
break;
|
|
case Intrinsic::dx_resource_store_typedbuffer:
|
|
HasErrors |= lowerBufferStore(F, /*IsRaw=*/false);
|
|
break;
|
|
case Intrinsic::dx_resource_load_rawbuffer:
|
|
HasErrors |= lowerRawBufferLoad(F);
|
|
break;
|
|
case Intrinsic::dx_resource_store_rawbuffer:
|
|
HasErrors |= lowerBufferStore(F, /*IsRaw=*/true);
|
|
break;
|
|
case Intrinsic::dx_resource_load_cbufferrow_2:
|
|
case Intrinsic::dx_resource_load_cbufferrow_4:
|
|
case Intrinsic::dx_resource_load_cbufferrow_8:
|
|
HasErrors |= lowerCBufferLoad(F);
|
|
break;
|
|
case Intrinsic::dx_resource_updatecounter:
|
|
HasErrors |= lowerUpdateCounter(F);
|
|
break;
|
|
case Intrinsic::ctpop:
|
|
HasErrors |= lowerCtpopToCountBits(F);
|
|
break;
|
|
}
|
|
Updated = true;
|
|
}
|
|
if (Updated && !HasErrors)
|
|
cleanupHandleCasts();
|
|
|
|
return Updated;
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
PreservedAnalyses DXILOpLowering::run(Module &M, ModuleAnalysisManager &MAM) {
|
|
DXILBindingMap &DBM = MAM.getResult<DXILResourceBindingAnalysis>(M);
|
|
DXILResourceTypeMap &DRTM = MAM.getResult<DXILResourceTypeAnalysis>(M);
|
|
|
|
bool MadeChanges = OpLowerer(M, DBM, DRTM).lowerIntrinsics();
|
|
if (!MadeChanges)
|
|
return PreservedAnalyses::all();
|
|
PreservedAnalyses PA;
|
|
PA.preserve<DXILResourceBindingAnalysis>();
|
|
PA.preserve<DXILMetadataAnalysis>();
|
|
PA.preserve<ShaderFlagsAnalysis>();
|
|
return PA;
|
|
}
|
|
|
|
namespace {
|
|
class DXILOpLoweringLegacy : public ModulePass {
|
|
public:
|
|
bool runOnModule(Module &M) override {
|
|
DXILBindingMap &DBM =
|
|
getAnalysis<DXILResourceBindingWrapperPass>().getBindingMap();
|
|
DXILResourceTypeMap &DRTM =
|
|
getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
|
|
|
|
return OpLowerer(M, DBM, DRTM).lowerIntrinsics();
|
|
}
|
|
StringRef getPassName() const override { return "DXIL Op Lowering"; }
|
|
DXILOpLoweringLegacy() : ModulePass(ID) {}
|
|
|
|
static char ID; // Pass identification.
|
|
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
|
|
AU.addRequired<DXILResourceTypeWrapperPass>();
|
|
AU.addRequired<DXILResourceBindingWrapperPass>();
|
|
AU.addPreserved<DXILResourceBindingWrapperPass>();
|
|
AU.addPreserved<DXILResourceMDWrapper>();
|
|
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
|
|
AU.addPreserved<ShaderFlagsAnalysisWrapper>();
|
|
}
|
|
};
|
|
char DXILOpLoweringLegacy::ID = 0;
|
|
} // end anonymous namespace
|
|
|
|
INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering",
|
|
false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(DXILResourceBindingWrapperPass)
|
|
INITIALIZE_PASS_END(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", false,
|
|
false)
|
|
|
|
ModulePass *llvm::createDXILOpLoweringLegacyPass() {
|
|
return new DXILOpLoweringLegacy();
|
|
}
|