[flang] support fir.alloca operations inside of omp reduction ops (#84952)

Advise to place the alloca at the start of the first block of whichever region (init or combiner) we are currently inside. It probably isn't safe to put an alloca inside of a combiner region because this will be executed multiple times. But that would be a bug to fix in Lower/OpenMP.cpp, not here. OpenMP array reductions 1/6 Next PR: https://github.com/llvm/llvm-project/pull/84953
2024-03-15 11:46:12 +00:00
parent f623adbbbd
commit e12b46fef7
3 changed files with 47 additions and 2 deletions
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -208,6 +208,8 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
              .getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>()) {
    return ompOutlineableIface.getAllocaBlock();
  }
+  if (mlir::isa<mlir::omp::ReductionDeclareOp>(getRegion().getParentOp()))
+    return &getRegion().front();
  if (auto accRecipeIface =
          getRegion().getParentOfType<mlir::acc::RecipeInterface>()) {
    return accRecipeIface.getAllocaBlock(getRegion());
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -410,8 +410,15 @@ protected:
      mlir::ConversionPatternRewriter &rewriter) const {
    auto thisPt = rewriter.saveInsertionPoint();
    mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
-    mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
-    rewriter.setInsertionPointToStart(insertBlock);
+    if (mlir::isa<mlir::omp::ReductionDeclareOp>(parentOp)) {
+      // ReductionDeclareOp has multiple child regions. We want to get the first
+      // block of whichever of those regions we are currently in
+      mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
+      rewriter.setInsertionPointToStart(&parentRegion->front());
+    } else {
+      mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
+      rewriter.setInsertionPointToStart(insertBlock);
+    }
    auto size = genI32Constant(loc, rewriter, 1);
    unsigned allocaAs = getAllocaAddressSpace(rewriter);
    unsigned programAs = getProgramAddressSpace(rewriter);
--- a/flang/test/Fir/omp-reduction-embox-codegen.fir
+++ b/flang/test/Fir/omp-reduction-embox-codegen.fir
@@ -0,0 +1,36 @@
+// RUN: tco %s | FileCheck %s
+
+// the fir.embox in the init region is turned into an alloca for the box. Test
+// that CodeGen.cpp knows where to place an alloca when it is inside of an
+// omp.reduction.declare
+
+// regretably this has to be nonsense IR because we need the subsequent patches
+// to process anything useful
+
+omp.reduction.declare @test_reduction : !fir.ref<!fir.box<i32>> init {
+^bb0(%arg0: !fir.ref<!fir.box<i32>>):
+  %0 = fir.alloca !fir.box<i32>
+  %1 = fir.alloca i32
+  %2 = fir.embox %1 : (!fir.ref<i32>) -> !fir.box<i32>
+
+  // use the embox for something so it isn't removed
+  fir.store %2 to %0 : !fir.ref<!fir.box<i32>>
+
+  omp.yield(%0 : !fir.ref<!fir.box<i32>>)
+} combiner {
+^bb0(%arg0: !fir.ref<!fir.box<i32>>, %arg1: !fir.ref<!fir.box<i32>>):
+  %0 = fir.undefined !fir.ref<!fir.box<i32>>
+  omp.yield(%0 : !fir.ref<!fir.box<i32>>)
+}
+
+func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+  %4 = fir.alloca !fir.box<i32>
+  omp.parallel byref reduction(@test_reduction %4 -> %arg0 : !fir.ref<!fir.box<i32>>) {
+    omp.terminator
+  }
+  return
+}
+
+// basically we are testing that there isn't a crash
+// CHECK-LABEL: define void @_QQmain
+// CHECK-NEXT:    alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8