This revision removes the TypeConverter parameter passed to the apply* methods, and instead moves the responsibility of region type conversion to patterns. The types of a region can be converted using the 'convertRegionTypes' method, which acts similarly to the existing 'applySignatureConversion'. This method ensures that all blocks within, and including those moved into, a region will have the block argument types converted using the provided converter. This has the benefit of making more of the legalization logic controlled by patterns, instead of being handled explicitly by the driver. It also opens up the possibility to support multiple type conversions at some point in the future. This revision also adds a new utility class `FailureOr<T>` that provides a LogicalResult friendly facility for returning a failure or a valid result value. Differential Revision: https://reviews.llvm.org/D81681
174 lines
7.9 KiB
C++
174 lines
7.9 KiB
C++
//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
|
|
#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
|
|
|
|
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
|
|
#include "mlir/Dialect/GPU/GPUDialect.h"
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
|
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
|
|
namespace mlir {
|
|
|
|
template <unsigned AllocaAddrSpace>
|
|
struct GPUFuncOpLowering : ConvertToLLVMPattern {
|
|
explicit GPUFuncOpLowering(LLVMTypeConverter &typeConverter)
|
|
: ConvertToLLVMPattern(gpu::GPUFuncOp::getOperationName(),
|
|
typeConverter.getDialect()->getContext(),
|
|
typeConverter) {}
|
|
|
|
LogicalResult
|
|
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
assert(operands.empty() && "func op is not expected to have operands");
|
|
auto gpuFuncOp = cast<gpu::GPUFuncOp>(op);
|
|
Location loc = gpuFuncOp.getLoc();
|
|
|
|
SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
|
|
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
|
|
for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
|
|
Value attribution = en.value();
|
|
|
|
auto type = attribution.getType().dyn_cast<MemRefType>();
|
|
assert(type && type.hasStaticShape() && "unexpected type in attribution");
|
|
|
|
uint64_t numElements = type.getNumElements();
|
|
|
|
auto elementType = typeConverter.convertType(type.getElementType())
|
|
.template cast<LLVM::LLVMType>();
|
|
auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
|
|
std::string name = std::string(
|
|
llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
|
|
auto globalOp = rewriter.create<LLVM::GlobalOp>(
|
|
gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
|
|
LLVM::Linkage::Internal, name, /*value=*/Attribute(),
|
|
gpu::GPUDialect::getWorkgroupAddressSpace());
|
|
workgroupBuffers.push_back(globalOp);
|
|
}
|
|
|
|
// Rewrite the original GPU function to an LLVM function.
|
|
auto funcType = typeConverter.convertType(gpuFuncOp.getType())
|
|
.template cast<LLVM::LLVMType>()
|
|
.getPointerElementTy();
|
|
|
|
// Remap proper input types.
|
|
TypeConverter::SignatureConversion signatureConversion(
|
|
gpuFuncOp.front().getNumArguments());
|
|
typeConverter.convertFunctionSignature(
|
|
gpuFuncOp.getType(), /*isVariadic=*/false, signatureConversion);
|
|
|
|
// Create the new function operation. Only copy those attributes that are
|
|
// not specific to function modeling.
|
|
SmallVector<NamedAttribute, 4> attributes;
|
|
for (const auto &attr : gpuFuncOp.getAttrs()) {
|
|
if (attr.first == SymbolTable::getSymbolAttrName() ||
|
|
attr.first == impl::getTypeAttrName() ||
|
|
attr.first == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName())
|
|
continue;
|
|
attributes.push_back(attr);
|
|
}
|
|
auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
|
|
gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
|
|
LLVM::Linkage::External, attributes);
|
|
|
|
{
|
|
// Insert operations that correspond to converted workgroup and private
|
|
// memory attributions to the body of the function. This must operate on
|
|
// the original function, before the body region is inlined in the new
|
|
// function to maintain the relation between block arguments and the
|
|
// parent operation that assigns their semantics.
|
|
OpBuilder::InsertionGuard guard(rewriter);
|
|
|
|
// Rewrite workgroup memory attributions to addresses of global buffers.
|
|
rewriter.setInsertionPointToStart(&gpuFuncOp.front());
|
|
unsigned numProperArguments = gpuFuncOp.getNumArguments();
|
|
auto i32Type = LLVM::LLVMType::getInt32Ty(typeConverter.getDialect());
|
|
|
|
Value zero = nullptr;
|
|
if (!workgroupBuffers.empty())
|
|
zero = rewriter.create<LLVM::ConstantOp>(loc, i32Type,
|
|
rewriter.getI32IntegerAttr(0));
|
|
for (auto en : llvm::enumerate(workgroupBuffers)) {
|
|
LLVM::GlobalOp global = en.value();
|
|
Value address = rewriter.create<LLVM::AddressOfOp>(loc, global);
|
|
auto elementType = global.getType().getArrayElementType();
|
|
Value memory = rewriter.create<LLVM::GEPOp>(
|
|
loc, elementType.getPointerTo(global.addr_space().getZExtValue()),
|
|
address, ArrayRef<Value>{zero, zero});
|
|
|
|
// Build a memref descriptor pointing to the buffer to plug with the
|
|
// existing memref infrastructure. This may use more registers than
|
|
// otherwise necessary given that memref sizes are fixed, but we can try
|
|
// and canonicalize that away later.
|
|
Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()];
|
|
auto type = attribution.getType().cast<MemRefType>();
|
|
auto descr = MemRefDescriptor::fromStaticShape(
|
|
rewriter, loc, typeConverter, type, memory);
|
|
signatureConversion.remapInput(numProperArguments + en.index(), descr);
|
|
}
|
|
|
|
// Rewrite private memory attributions to alloca'ed buffers.
|
|
unsigned numWorkgroupAttributions =
|
|
gpuFuncOp.getNumWorkgroupAttributions();
|
|
auto int64Ty = LLVM::LLVMType::getInt64Ty(typeConverter.getDialect());
|
|
for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
|
|
Value attribution = en.value();
|
|
auto type = attribution.getType().cast<MemRefType>();
|
|
assert(type && type.hasStaticShape() &&
|
|
"unexpected type in attribution");
|
|
|
|
// Explicitly drop memory space when lowering private memory
|
|
// attributions since NVVM models it as `alloca`s in the default
|
|
// memory space and does not support `alloca`s with addrspace(5).
|
|
auto ptrType = typeConverter.convertType(type.getElementType())
|
|
.template cast<LLVM::LLVMType>()
|
|
.getPointerTo(AllocaAddrSpace);
|
|
Value numElements = rewriter.create<LLVM::ConstantOp>(
|
|
gpuFuncOp.getLoc(), int64Ty,
|
|
rewriter.getI64IntegerAttr(type.getNumElements()));
|
|
Value allocated = rewriter.create<LLVM::AllocaOp>(
|
|
gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0);
|
|
auto descr = MemRefDescriptor::fromStaticShape(
|
|
rewriter, loc, typeConverter, type, allocated);
|
|
signatureConversion.remapInput(
|
|
numProperArguments + numWorkgroupAttributions + en.index(), descr);
|
|
}
|
|
}
|
|
|
|
// Move the region to the new function, update the entry block signature.
|
|
rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
|
|
llvmFuncOp.end());
|
|
if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), typeConverter,
|
|
&signatureConversion)))
|
|
return failure();
|
|
|
|
rewriter.eraseOp(gpuFuncOp);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
struct GPUReturnOpLowering : public ConvertToLLVMPattern {
|
|
GPUReturnOpLowering(LLVMTypeConverter &typeConverter)
|
|
: ConvertToLLVMPattern(gpu::ReturnOp::getOperationName(),
|
|
typeConverter.getDialect()->getContext(),
|
|
typeConverter) {}
|
|
|
|
LogicalResult
|
|
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
} // namespace mlir
|
|
|
|
#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
|