Files
clang-p2996/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
Victor Perez d45de8003a [MLIR][GPU-LLVM] Convert gpu.func to llvm.func (#101664)
Add support in `-convert-gpu-to-llvm-spv` to convert `gpu.func` to
`llvm.func` operations.

- `spir_kernel`/`spir_func` calling conventions used for
kernels/functions.
- `workgroup` attributions encoded as additional `llvm.ptr<3>`
arguments.
- No attribute used to annotate kernels
- `reqd_work_group_size` attribute using to encode
`gpu.known_block_size`.
- `llvm.mlir.workgroup_attrib_size` used to encode workgroup attribution
sizes. This will be attached to the pointer argument workgroup
attributions lower to.

**Note**: A notable missing feature that will be addressed in a
follow-up PR is a `-use-bare-ptr-memref-call-conv` option to replace
MemRef arguments with bare pointers to the MemRef element types instead
of the current MemRef descriptor approach.

---------

Signed-off-by: Victor Perez <victor.perez@codeplay.com>
2024-08-09 16:09:11 +02:00

176 lines
7.0 KiB
C++

//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
namespace mlir {
/// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
/// create a 0-sized global array symbol similar as LLVM expects. It constructs
/// a memref descriptor with these values and return it.
struct GPUDynamicSharedMemoryOpLowering
: public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
using ConvertOpToLLVMPattern<
gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter,
unsigned alignmentBit = 0)
: ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter),
alignmentBit(alignmentBit) {}
LogicalResult
matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
private:
// Alignment bit
unsigned alignmentBit;
};
struct GPUFuncOpLoweringOptions {
/// The address space to use for `alloca`s in private memory.
unsigned allocaAddrSpace;
/// The address space to use declaring workgroup memory.
unsigned workgroupAddrSpace;
/// The attribute name to use instead of `gpu.kernel`. Null if no attribute
/// should be used.
StringAttr kernelAttributeName;
/// The attribute name to to set block size. Null if no attribute should be
/// used.
StringAttr kernelBlockSizeAttributeName;
/// The calling convention to use for kernel functions.
LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
/// The calling convention to use for non-kernel functions.
LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;
/// Whether to encode workgroup attributions as additional arguments instead
/// of a global variable.
bool encodeWorkgroupAttributionsAsArguments = false;
};
struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
GPUFuncOpLowering(const LLVMTypeConverter &converter,
const GPUFuncOpLoweringOptions &options)
: ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter),
allocaAddrSpace(options.allocaAddrSpace),
workgroupAddrSpace(options.workgroupAddrSpace),
kernelAttributeName(options.kernelAttributeName),
kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
kernelCallingConvention(options.kernelCallingConvention),
nonKernelCallingConvention(options.nonKernelCallingConvention),
encodeWorkgroupAttributionsAsArguments(
options.encodeWorkgroupAttributionsAsArguments) {}
LogicalResult
matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
private:
/// The address space to use for `alloca`s in private memory.
unsigned allocaAddrSpace;
/// The address space to use declaring workgroup memory.
unsigned workgroupAddrSpace;
/// The attribute name to use instead of `gpu.kernel`. Null if no attribute
/// should be used.
StringAttr kernelAttributeName;
/// The attribute name to to set block size. Null if no attribute should be
/// used.
StringAttr kernelBlockSizeAttributeName;
/// The calling convention to use for kernel functions
LLVM::CConv kernelCallingConvention;
/// The calling convention to use for non-kernel functions
LLVM::CConv nonKernelCallingConvention;
/// Whether to encode workgroup attributions as additional arguments instead
/// of a global variable.
bool encodeWorkgroupAttributionsAsArguments;
};
/// The lowering of gpu.printf to a call to HIP hostcalls
///
/// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
/// to deal with %s (even if there were first-class strings in MLIR, they're not
/// legal input to gpu.printf) or non-constant format strings
struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
/// The lowering of gpu.printf to a call to an external printf() function
///
/// This pass will add a declaration of printf() to the GPUModule if needed
/// and separate out the format strings into global constants. For some
/// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
/// will lower printf calls to appropriate device-side code
struct GPUPrintfOpToLLVMCallLowering
: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter,
int addressSpace = 0)
: ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
addressSpace(addressSpace) {}
LogicalResult
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
private:
int addressSpace;
};
/// Lowering of gpu.printf to a vprintf standard library.
struct GPUPrintfOpToVPrintfLowering
: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
namespace impl {
/// Unrolls op if it's operating on vectors.
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
ConversionPatternRewriter &rewriter,
const LLVMTypeConverter &converter);
} // namespace impl
/// Rewriting that unrolls SourceOp to scalars if it's operating on vectors.
template <typename SourceOp>
struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
public:
using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
*this->getTypeConverter());
}
};
} // namespace mlir
#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_