Files
clang-p2996/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
Christopher Bate 6ca1a09f03 [mlir][gpu] Migrate hard-coded address space integers to an enum attribute (gpu::AddressSpaceAttr)
This is a purely mechanical change that introduces an enum attribute in the GPU
dialect to represent the various memref memory spaces as opposed to the
hard-coded integer attributes that are currently used.

The following steps were taken to make the transition across the codebase:

1. Introduce a pass "gpu-lower-memory-space-attributes":

The pass updates all memref types that have a memory space attribute that is a
`gpu::AddressSpaceAttr`. These attributes are changed to `IntegerAttr`'s using a
mapping that is given by the caller. This pass is based on the
"map-memref-spirv-storage-class" pass and the common functions can probably
be refactored into a set of utilities under the MemRef dialect.

2. Update the verifiers of GPU/NVGPU dialect operations.

If a verifier currently checks the address space of an operand using
e.g.`getWorkspaceAddressSpace`, then it can continue to do so. However, the
checks are changed to only fail if the memory space is either missing or a wrong
value of type `gpu::AddressSpaceAttr`. Otherwise, it just assumes the address
space is correct because it was specifically lowered to something other than a
`gpu::AddressSpaceAttr`.

3. Update existing gpu-to-llvm conversion infrastructure.

In the existing gpu-to-X passes, we add a full conversion equivalent to
`gpu-lower-memory-space-attributes` just before doing the conversion to the
LLVMDialect. This is done because currently both the gpu-to-llvm passes
(rocdl,nvvm) run gpu-to-gpu rewrites within the pass, which introduce
`AddressSpaceAttr` memory space annotations. Therefore, I inserted the
memory space conversion between the gpu-to-gpu rewrites and the LLVM
conversion.

For more context see the below discourse discussion:
https://discourse.llvm.org/t/gpu-workgroup-shared-memory-address-space-is-hard-coded/

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D140644
2023-01-13 11:00:10 -07:00

118 lines
4.5 KiB
C++

//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
namespace mlir {
struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
GPUFuncOpLowering(LLVMTypeConverter &converter, unsigned allocaAddrSpace,
unsigned workgroupAddrSpace, StringAttr kernelAttributeName)
: ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter),
allocaAddrSpace(allocaAddrSpace),
workgroupAddrSpace(workgroupAddrSpace),
kernelAttributeName(kernelAttributeName) {}
LogicalResult
matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
private:
/// The address space to use for `alloca`s in private memory.
unsigned allocaAddrSpace;
/// The address space to use declaring workgroup memory.
unsigned workgroupAddrSpace;
/// The attribute name to use instead of `gpu.kernel`.
StringAttr kernelAttributeName;
};
/// The lowering of gpu.printf to a call to HIP hostcalls
///
/// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
/// to deal with %s (even if there were first-class strings in MLIR, they're not
/// legal input to gpu.printf) or non-constant format strings
struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
/// The lowering of gpu.printf to a call to an external printf() function
///
/// This pass will add a declaration of printf() to the GPUModule if needed
/// and seperate out the format strings into global constants. For some
/// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
/// will lower printf calls to appropriate device-side code
struct GPUPrintfOpToLLVMCallLowering
: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
GPUPrintfOpToLLVMCallLowering(LLVMTypeConverter &converter,
int addressSpace = 0)
: ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
addressSpace(addressSpace) {}
LogicalResult
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
private:
int addressSpace;
};
/// Lowering of gpu.printf to a vprintf standard library.
struct GPUPrintfOpToVPrintfLowering
: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, adaptor.getOperands());
return success();
}
};
namespace impl {
/// Unrolls op if it's operating on vectors.
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
ConversionPatternRewriter &rewriter,
LLVMTypeConverter &converter);
} // namespace impl
/// Rewriting that unrolls SourceOp to scalars if it's operating on vectors.
template <typename SourceOp>
struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
public:
using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
*this->getTypeConverter());
}
};
} // namespace mlir
#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_