[flang] introduce fir.copy to avoid load store of aggregates (#130289)

Introduce a FIR operation to do memcopy/memmove of compile time constant size types.

This is to avoid requiring derived type copies to done with load/store
which is badly supported in LLVM when the aggregate type is "big" (no
threshold can easily be defined here, better to always avoid them for
fir.type).

This was the root cause of the regressions caused by #114002 which introduced a
load/store of fir.type<> which caused hand/asserts to fire in LLVM on
several benchmarks.

See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type
This commit is contained in:
jeanPerier
2025-03-11 09:31:03 +01:00
committed by GitHub
parent cb7298f66d
commit 1ddf18057a
7 changed files with 187 additions and 5 deletions

View File

@@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred
def fir_BoxAddressOrValueType : Type<IsBoxAddressOrValueTypePred,
"fir.box or fir.class type or reference">;
def RefOfConstantSizeAggregateTypePred
: CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">;
def AnyRefOfConstantSizeAggregateType : TypeConstraint<
RefOfConstantSizeAggregateTypePred,
"a reference type to a constant size fir.array, fir.char, or fir.type">;
//===----------------------------------------------------------------------===//
// Memory SSA operations
//===----------------------------------------------------------------------===//
@@ -342,6 +348,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
}];
}
def fir_CopyOp : fir_Op<"copy", []> {
let summary = "copy constant size memory";
let description = [{
Copy the memory from a source with compile time constant size to
a destination of the same type.
This is meant to be used for aggregate types where load and store
are not appropriate to make a copy because LLVM is not meant to
handle load and store of "big" aggregates.
Its "no_overlap" attribute allows indicating that the source and destination
are known to not overlap at compile time.
```
!t =!fir.type<t{x:!fir.array<1000xi32>}>
fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
```
TODO: add FirAliasTagOpInterface to carry TBAA.
}];
let arguments = (ins Arg<AnyRefOfConstantSizeAggregateType, "", [MemRead]>:$source,
Arg<AnyRefOfConstantSizeAggregateType, "", [MemWrite]>:$destination,
OptionalAttr<UnitAttr>:$no_overlap);
let builders = [OpBuilder<(ins "mlir::Value":$source,
"mlir::Value":$destination,
CArg<"bool", "false">:$no_overlap)>];
let assemblyFormat = [{
$source `to` $destination (`no_overlap` $no_overlap^)?
attr-dict `:` type(operands)
}];
let hasVerifier = 1;
}
def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
let summary = [{
save an array, box, or record function result SSA-value to a memory location

View File

@@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) {
return t && mlir::isa<fir::BoxProcType>(t);
}
inline bool isRefOfConstantSizeAggregateType(mlir::Type t) {
t = fir::dyn_cast_ptrEleTy(t);
return t &&
mlir::isa<fir::CharacterType, fir::RecordType, fir::SequenceType>(t) &&
!hasDynamicSize(t);
}
/// Return a string representation of `ty`.
///
/// fir.array<10x10xf32> -> prefix_10x10xf32

View File

@@ -3545,6 +3545,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
}
};
/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
using FIROpConversion::FIROpConversion;
llvm::LogicalResult
matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
mlir::Location loc = copy.getLoc();
mlir::Value llvmSource = adaptor.getSource();
mlir::Value llvmDestination = adaptor.getDestination();
mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
mlir::Value copySize =
genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
mlir::LLVM::AliasAnalysisOpInterface newOp;
if (copy.getNoOverlap())
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
else
newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
// TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
attachTBAATag(newOp, copyTy, copyTy, nullptr);
rewriter.eraseOp(copy);
return mlir::success();
}
};
namespace {
/// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
@@ -4148,11 +4178,11 @@ void fir::populateFIRToLLVMConversionPatterns(
BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
InsertOnRangeOpConversion, IsPresentOpConversion,
CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,

View File

@@ -3940,6 +3940,26 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
build(builder, result, value, memref, {});
}
//===----------------------------------------------------------------------===//
// CopyOp
//===----------------------------------------------------------------------===//
void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
mlir::Value source, mlir::Value destination,
bool noOverlap) {
mlir::UnitAttr noOverlapAttr =
noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
build(builder, result, source, destination, noOverlapAttr);
}
llvm::LogicalResult fir::CopyOp::verify() {
mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
if (sourceType != destinationType)
return emitOpError("source and destination must have the same value type");
return mlir::success();
}
//===----------------------------------------------------------------------===//
// StringLitOp
//===----------------------------------------------------------------------===//

View File

@@ -0,0 +1,35 @@
// Test fir.copy codegen.
// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s
!t=!fir.type<sometype{i:!fir.array<9xi32>}>
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}
// CHECK-LABEL: llvm.func @test_copy_1(
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
// CHECK: llvm.return
// CHECK: }
func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
return
}
// CHECK-LABEL: llvm.func @test_copy_2(
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
// CHECK: llvm.return
// CHECK: }
}

View File

@@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
%0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
return %0 : i16
}
// CHECK-LABEL: @test_copy(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
// CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
return
}

View File

@@ -1018,3 +1018,40 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
%1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
return
}
// -----
!t=!fir.type<sometype{i:i32}>
!t2=!fir.type<sometype2{j:i32}>
func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
// expected-error@+1{{'fir.copy' op source and destination must have the same value type}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
return
}
// -----
!t=!fir.type<sometype{i:i32}>
func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type<sometype{i:i32}>'}}
fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
return
}
// -----
!t=!fir.array<?xi32>
func.func @bad_copy_3(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<!fir.array<?xi32>>'}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}
// -----
!t=f32
func.func @bad_copy_4(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<f32>'}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}