[flang] support fir.alloca operations inside of omp reduction ops (#84952)

Advise to place the alloca at the start of the first block of whichever
region (init or combiner) we are currently inside.

It probably isn't safe to put an alloca inside of a combiner region
because this will be executed multiple times. But that would be a bug to
fix in Lower/OpenMP.cpp, not here.

OpenMP array reductions 1/6
Next PR: https://github.com/llvm/llvm-project/pull/84953
This commit is contained in:
Tom Eccles
2024-03-15 11:46:12 +00:00
committed by GitHub
parent f623adbbbd
commit e12b46fef7
3 changed files with 47 additions and 2 deletions

View File

@@ -208,6 +208,8 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
.getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>()) {
return ompOutlineableIface.getAllocaBlock();
}
if (mlir::isa<mlir::omp::ReductionDeclareOp>(getRegion().getParentOp()))
return &getRegion().front();
if (auto accRecipeIface =
getRegion().getParentOfType<mlir::acc::RecipeInterface>()) {
return accRecipeIface.getAllocaBlock(getRegion());

View File

@@ -410,8 +410,15 @@ protected:
mlir::ConversionPatternRewriter &rewriter) const {
auto thisPt = rewriter.saveInsertionPoint();
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
rewriter.setInsertionPointToStart(insertBlock);
if (mlir::isa<mlir::omp::ReductionDeclareOp>(parentOp)) {
// ReductionDeclareOp has multiple child regions. We want to get the first
// block of whichever of those regions we are currently in
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
rewriter.setInsertionPointToStart(&parentRegion->front());
} else {
mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
rewriter.setInsertionPointToStart(insertBlock);
}
auto size = genI32Constant(loc, rewriter, 1);
unsigned allocaAs = getAllocaAddressSpace(rewriter);
unsigned programAs = getProgramAddressSpace(rewriter);

View File

@@ -0,0 +1,36 @@
// RUN: tco %s | FileCheck %s
// the fir.embox in the init region is turned into an alloca for the box. Test
// that CodeGen.cpp knows where to place an alloca when it is inside of an
// omp.reduction.declare
// regretably this has to be nonsense IR because we need the subsequent patches
// to process anything useful
omp.reduction.declare @test_reduction : !fir.ref<!fir.box<i32>> init {
^bb0(%arg0: !fir.ref<!fir.box<i32>>):
%0 = fir.alloca !fir.box<i32>
%1 = fir.alloca i32
%2 = fir.embox %1 : (!fir.ref<i32>) -> !fir.box<i32>
// use the embox for something so it isn't removed
fir.store %2 to %0 : !fir.ref<!fir.box<i32>>
omp.yield(%0 : !fir.ref<!fir.box<i32>>)
} combiner {
^bb0(%arg0: !fir.ref<!fir.box<i32>>, %arg1: !fir.ref<!fir.box<i32>>):
%0 = fir.undefined !fir.ref<!fir.box<i32>>
omp.yield(%0 : !fir.ref<!fir.box<i32>>)
}
func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
%4 = fir.alloca !fir.box<i32>
omp.parallel byref reduction(@test_reduction %4 -> %arg0 : !fir.ref<!fir.box<i32>>) {
omp.terminator
}
return
}
// basically we are testing that there isn't a crash
// CHECK-LABEL: define void @_QQmain
// CHECK-NEXT: alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8