The discussion on forum: https://llvm.discourse.group/t/bug-in-partial-dialect-conversion/4115 The `applyPartialConversion` didn't handle the operations, that were marked as illegal inside dynamic legality callback. Instead of reporting error, if such operation was not converted to legal set, the method just added it to `unconvertedSet` in the same way as unknown operations. This patch fixes that and handle dynamically illegal operations as well. The patch includes 2 fixes for existing passes: * `tensor-bufferize` - explicitly mark `std.return` as legal. * `convert-parallel-loops-to-gpu` - ugly fix with marking visited operations to avoid recursive legality checks. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D108505
75 lines
2.7 KiB
C++
75 lines
2.7 KiB
C++
//===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
|
|
#include "../PassDetail.h"
|
|
#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
|
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
|
#include "mlir/Dialect/Complex/IR/Complex.h"
|
|
#include "mlir/Dialect/GPU/GPUDialect.h"
|
|
#include "mlir/Dialect/SCF/SCF.h"
|
|
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
|
#include "mlir/Transforms/DialectConversion.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::scf;
|
|
|
|
namespace {
|
|
// A pass that traverses top-level loops in the function and converts them to
|
|
// GPU launch operations. Nested launches are not allowed, so this does not
|
|
// walk the function recursively to avoid considering nested loops.
|
|
struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
|
|
ForLoopMapper() = default;
|
|
ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
|
|
this->numBlockDims = numBlockDims;
|
|
this->numThreadDims = numThreadDims;
|
|
}
|
|
|
|
void runOnFunction() override {
|
|
for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) {
|
|
if (auto forOp = dyn_cast<AffineForOp>(&op)) {
|
|
if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
|
|
numThreadDims)))
|
|
signalPassFailure();
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
struct ParallelLoopToGpuPass
|
|
: public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
|
|
void runOnOperation() override {
|
|
RewritePatternSet patterns(&getContext());
|
|
populateParallelLoopToGPUPatterns(patterns);
|
|
ConversionTarget target(getContext());
|
|
target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
|
|
configureParallelLoopToGPULegality(target);
|
|
if (failed(applyPartialConversion(getOperation(), target,
|
|
std::move(patterns))))
|
|
signalPassFailure();
|
|
finalizeParallelLoopToGPUConversion(getOperation());
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
std::unique_ptr<OperationPass<FuncOp>>
|
|
mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
|
|
return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
|
|
}
|
|
std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() {
|
|
return std::make_unique<ForLoopMapper>();
|
|
}
|
|
|
|
std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
|
|
return std::make_unique<ParallelLoopToGpuPass>();
|
|
}
|