Files
clang-p2996/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
Alex Zinenko 2230bf99c7 [mlir] replace LLVMIntegerType with built-in integer type
The LLVM dialect type system has been closed until now, i.e. did not support
types from other dialects inside containers. While this has had obvious
benefits of deriving from a common base class, it has led to some simple types
being almost identical with the built-in types, namely integer and floating
point types. This in turn has led to a lot of larger-scale complexity: simple
types must still be converted, numerous operations that correspond to LLVM IR
intrinsics are replicated to produce versions operating on either LLVM dialect
or built-in types leading to quasi-duplicate dialects, lowering to the LLVM
dialect is essentially required to be one-shot because of type conversion, etc.
In this light, it is reasonable to trade off some local complexity in the
internal implementation of LLVM dialect types for removing larger-scale system
complexity. Previous commits to the LLVM dialect type system have adapted the
API to support types from other dialects.

Replace LLVMIntegerType with the built-in IntegerType plus additional checks
that such types are signless (these are isolated in a utility function that
replaced `isa<LLVMType>` and in the parser). Temporarily keep the possibility
to parse `!llvm.i32` as a synonym for `i32`, but add a deprecation notice.

Reviewed By: mehdi_amini, silvas, antiagainst

Differential Revision: https://reviews.llvm.org/D94178
2021-01-07 19:48:31 +01:00

169 lines
7.5 KiB
C++

//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Builders.h"
#include "llvm/Support/FormatVariadic.h"
namespace mlir {
template <unsigned AllocaAddrSpace>
struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
using ConvertOpToLLVMPattern<gpu::GPUFuncOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
assert(operands.empty() && "func op is not expected to have operands");
Location loc = gpuFuncOp.getLoc();
SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
Value attribution = en.value();
auto type = attribution.getType().dyn_cast<MemRefType>();
assert(type && type.hasStaticShape() && "unexpected type in attribution");
uint64_t numElements = type.getNumElements();
auto elementType = typeConverter->convertType(type.getElementType())
.template cast<Type>();
auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
std::string name = std::string(
llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
auto globalOp = rewriter.create<LLVM::GlobalOp>(
gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
LLVM::Linkage::Internal, name, /*value=*/Attribute(),
gpu::GPUDialect::getWorkgroupAddressSpace());
workgroupBuffers.push_back(globalOp);
}
// Rewrite the original GPU function to an LLVM function.
auto funcType = typeConverter->convertType(gpuFuncOp.getType())
.template cast<LLVM::LLVMPointerType>()
.getElementType();
// Remap proper input types.
TypeConverter::SignatureConversion signatureConversion(
gpuFuncOp.front().getNumArguments());
getTypeConverter()->convertFunctionSignature(
gpuFuncOp.getType(), /*isVariadic=*/false, signatureConversion);
// Create the new function operation. Only copy those attributes that are
// not specific to function modeling.
SmallVector<NamedAttribute, 4> attributes;
for (const auto &attr : gpuFuncOp.getAttrs()) {
if (attr.first == SymbolTable::getSymbolAttrName() ||
attr.first == impl::getTypeAttrName() ||
attr.first == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName())
continue;
attributes.push_back(attr);
}
auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
LLVM::Linkage::External, attributes);
{
// Insert operations that correspond to converted workgroup and private
// memory attributions to the body of the function. This must operate on
// the original function, before the body region is inlined in the new
// function to maintain the relation between block arguments and the
// parent operation that assigns their semantics.
OpBuilder::InsertionGuard guard(rewriter);
// Rewrite workgroup memory attributions to addresses of global buffers.
rewriter.setInsertionPointToStart(&gpuFuncOp.front());
unsigned numProperArguments = gpuFuncOp.getNumArguments();
auto i32Type = IntegerType::get(rewriter.getContext(), 32);
Value zero = nullptr;
if (!workgroupBuffers.empty())
zero = rewriter.create<LLVM::ConstantOp>(loc, i32Type,
rewriter.getI32IntegerAttr(0));
for (auto en : llvm::enumerate(workgroupBuffers)) {
LLVM::GlobalOp global = en.value();
Value address = rewriter.create<LLVM::AddressOfOp>(loc, global);
auto elementType =
global.getType().cast<LLVM::LLVMArrayType>().getElementType();
Value memory = rewriter.create<LLVM::GEPOp>(
loc, LLVM::LLVMPointerType::get(elementType, global.addr_space()),
address, ArrayRef<Value>{zero, zero});
// Build a memref descriptor pointing to the buffer to plug with the
// existing memref infrastructure. This may use more registers than
// otherwise necessary given that memref sizes are fixed, but we can try
// and canonicalize that away later.
Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()];
auto type = attribution.getType().cast<MemRefType>();
auto descr = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(), type, memory);
signatureConversion.remapInput(numProperArguments + en.index(), descr);
}
// Rewrite private memory attributions to alloca'ed buffers.
unsigned numWorkgroupAttributions =
gpuFuncOp.getNumWorkgroupAttributions();
auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
Value attribution = en.value();
auto type = attribution.getType().cast<MemRefType>();
assert(type && type.hasStaticShape() &&
"unexpected type in attribution");
// Explicitly drop memory space when lowering private memory
// attributions since NVVM models it as `alloca`s in the default
// memory space and does not support `alloca`s with addrspace(5).
auto ptrType = LLVM::LLVMPointerType::get(
typeConverter->convertType(type.getElementType())
.template cast<Type>(),
AllocaAddrSpace);
Value numElements = rewriter.create<LLVM::ConstantOp>(
gpuFuncOp.getLoc(), int64Ty,
rewriter.getI64IntegerAttr(type.getNumElements()));
Value allocated = rewriter.create<LLVM::AllocaOp>(
gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0);
auto descr = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(), type, allocated);
signatureConversion.remapInput(
numProperArguments + numWorkgroupAttributions + en.index(), descr);
}
}
// Move the region to the new function, update the entry block signature.
rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
llvmFuncOp.end());
if (failed(rewriter.convertRegionTypes(
&llvmFuncOp.getBody(), *typeConverter, &signatureConversion)))
return failure();
rewriter.eraseOp(gpuFuncOp);
return success();
}
};
struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::ReturnOp op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands);
return success();
}
};
} // namespace mlir
#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_