[flang][CodeGen] fix bug hoisting allocas using a shared constant arg (#116251)

When hoisting the allocas with a constant integer size, the constant
integer was moved to where the alloca is hoisted to unconditionally.

By CodeGen there have been various iterations of mlir canonicalization
and dead code elimination. This can cause lots of unrelated bits of code
to share the same constant values. If for some reason the alloca
couldn't be hoisted all of the way to the entry block of the function,
moving the constant might result in it no-longer dominating some of the
remaining uses.

In theory, there should be dominance analysis to ensure the location of
the constant does dominate all uses of it. But those constants are
effectively free anyway (they aren't even separate instructions in LLVM
IR), so it is less expensive just to leave the old one where it was and
insert a new one we know for sure is immediately before the alloca.
This commit is contained in:
Tom Eccles
2024-11-15 10:31:20 +00:00
committed by GitHub
parent 7c8e05aa45
commit e9fc2faf0c
2 changed files with 56 additions and 1 deletions

View File

@@ -279,7 +279,14 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
mlir::Block *insertBlock =
getBlockForAllocaInsert(parentOp, parentRegion);
size.getDefiningOp()->moveBefore(&insertBlock->front());
// The old size might have had multiple users, some at a broader scope
// than we can safely outline the alloca to. As it is only an
// llvm.constant operation, it is faster to clone it than to calculate the
// dominance to see if it really should be moved.
mlir::Operation *clonedSize = rewriter.clone(*size.getDefiningOp());
size = clonedSize->getResult(0);
clonedSize->moveBefore(&insertBlock->front());
rewriter.setInsertionPointAfter(size.getDefiningOp());
}

View File

@@ -1065,3 +1065,51 @@ func.func @omp_map_common_block_using_common_block_members() {
}
fir.global common @var_common_(dense<0> : vector<8xi8>) {alignment = 4 : i64} : !fir.array<8xi8>
// -----
func.func @use_string(%arg0 : !fir.ref<!fir.char<1,?>>) {
return
}
func.func @use_index(%arg0 : index) {
return
}
// CHECK-LABEL: llvm.func @alloca_hoisting_openmp() {
// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(6 : index) : i64
// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(42 : i32) : i32
// CHECK: omp.parallel {
// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(6 : index) : i64
// CHECK: %[[VAL_4:.*]] = llvm.alloca %[[VAL_3]] x i8 : (i64) -> !llvm.ptr
// CHECK: omp.wsloop {
// CHECK: omp.loop_nest (%[[VAL_5:.*]]) : i32 = (%[[VAL_1]]) to (%[[VAL_2]]) inclusive step (%[[VAL_1]]) {
// CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: llvm.call @use_string(%[[VAL_4]]) : (!llvm.ptr) -> ()
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
// CHECK: llvm.call @use_index(%[[VAL_0]]) : (i64) -> ()
// CHECK: llvm.return
// CHECK: }
func.func @alloca_hoisting_openmp() {
%c6 = arith.constant 6 : index
%c1_i32 = arith.constant 1 : i32
%c42_i32 = arith.constant 42 : i32
omp.parallel {
omp.wsloop {
omp.loop_nest (%arg0) : i32 = (%c1_i32) to (%c42_i32) inclusive step (%c1_i32) {
%0 = fir.alloca !fir.char<1,?>(%c6 : index)
fir.call @use_string(%0) : (!fir.ref<!fir.char<1,?>>) -> ()
omp.yield
}
}
omp.terminator
}
fir.call @use_index(%c6) : (index) -> ()
return
}