From 1ddf18057a5aa1ee7010ec262ccfc80c39b99bf6 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 11 Mar 2025 09:31:03 +0100 Subject: [PATCH] [flang] introduce fir.copy to avoid load store of aggregates (#130289) Introduce a FIR operation to do memcopy/memmove of compile time constant size types. This is to avoid requiring derived type copies to done with load/store which is badly supported in LLVM when the aggregate type is "big" (no threshold can easily be defined here, better to always avoid them for fir.type). This was the root cause of the regressions caused by #114002 which introduced a load/store of fir.type<> which caused hand/asserts to fire in LLVM on several benchmarks. See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type --- .../include/flang/Optimizer/Dialect/FIROps.td | 44 +++++++++++++++++++ .../include/flang/Optimizer/Dialect/FIRType.h | 7 +++ flang/lib/Optimizer/CodeGen/CodeGen.cpp | 40 ++++++++++++++--- flang/lib/Optimizer/Dialect/FIROps.cpp | 20 +++++++++ flang/test/Fir/copy-codegen.fir | 35 +++++++++++++++ flang/test/Fir/fir-ops.fir | 9 ++++ flang/test/Fir/invalid.fir | 37 ++++++++++++++++ 7 files changed, 187 insertions(+), 5 deletions(-) create mode 100644 flang/test/Fir/copy-codegen.fir diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index c83c57186b46..8325468c4b21 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred def fir_BoxAddressOrValueType : Type; +def RefOfConstantSizeAggregateTypePred + : CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">; +def AnyRefOfConstantSizeAggregateType : TypeConstraint< + RefOfConstantSizeAggregateTypePred, + "a reference type to a constant size fir.array, fir.char, or fir.type">; + //===----------------------------------------------------------------------===// // Memory SSA operations //===----------------------------------------------------------------------===// @@ -342,6 +348,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> { }]; } +def fir_CopyOp : fir_Op<"copy", []> { + let summary = "copy constant size memory"; + + let description = [{ + Copy the memory from a source with compile time constant size to + a destination of the same type. + + This is meant to be used for aggregate types where load and store + are not appropriate to make a copy because LLVM is not meant to + handle load and store of "big" aggregates. + + Its "no_overlap" attribute allows indicating that the source and destination + are known to not overlap at compile time. + + ``` + !t =!fir.type}> + fir.copy %x to %y : !fir.ref, !fir.ref + ``` + TODO: add FirAliasTagOpInterface to carry TBAA. + }]; + + let arguments = (ins Arg:$source, + Arg:$destination, + OptionalAttr:$no_overlap); + + let builders = [OpBuilder<(ins "mlir::Value":$source, + "mlir::Value":$destination, + CArg<"bool", "false">:$no_overlap)>]; + + let assemblyFormat = [{ + $source `to` $destination (`no_overlap` $no_overlap^)? + attr-dict `:` type(operands) + }]; + + let hasVerifier = 1; +} + + def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> { let summary = [{ save an array, box, or record function result SSA-value to a memory location diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h index 3d30f4e67368..76e0aa352bcd 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRType.h +++ b/flang/include/flang/Optimizer/Dialect/FIRType.h @@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) { return t && mlir::isa(t); } +inline bool isRefOfConstantSizeAggregateType(mlir::Type t) { + t = fir::dyn_cast_ptrEleTy(t); + return t && + mlir::isa(t) && + !hasDynamicSize(t); +} + /// Return a string representation of `ty`. /// /// fir.array<10x10xf32> -> prefix_10x10xf32 diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index b5b2f393f6ca..5548f5f981b1 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3545,6 +3545,36 @@ struct StoreOpConversion : public fir::FIROpConversion { } }; +/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove` +struct CopyOpConversion : public fir::FIROpConversion { + using FIROpConversion::FIROpConversion; + + llvm::LogicalResult + matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Location loc = copy.getLoc(); + mlir::Value llvmSource = adaptor.getSource(); + mlir::Value llvmDestination = adaptor.getDestination(); + mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64); + mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType()); + mlir::Value copySize = + genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy)); + + mlir::LLVM::AliasAnalysisOpInterface newOp; + if (copy.getNoOverlap()) + newOp = rewriter.create( + loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false); + else + newOp = rewriter.create( + loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false); + + // TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp. + attachTBAATag(newOp, copyTy, copyTy, nullptr); + rewriter.eraseOp(copy); + return mlir::success(); + } +}; + namespace { /// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for @@ -4148,11 +4178,11 @@ void fir::populateFIRToLLVMConversionPatterns( BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion, BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion, CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion, - DTEntryOpConversion, DeclareOpConversion, DivcOpConversion, - EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion, - ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion, - FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion, - InsertOnRangeOpConversion, IsPresentOpConversion, + CopyOpConversion, DTEntryOpConversion, DeclareOpConversion, + DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion, + EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion, + FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion, + GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion, NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion, diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 7efb733eb565..203a72af61b9 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3940,6 +3940,26 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, build(builder, result, value, memref, {}); } +//===----------------------------------------------------------------------===// +// CopyOp +//===----------------------------------------------------------------------===// + +void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, + mlir::Value source, mlir::Value destination, + bool noOverlap) { + mlir::UnitAttr noOverlapAttr = + noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{}; + build(builder, result, source, destination, noOverlapAttr); +} + +llvm::LogicalResult fir::CopyOp::verify() { + mlir::Type sourceType = fir::unwrapRefType(getSource().getType()); + mlir::Type destinationType = fir::unwrapRefType(getDestination().getType()); + if (sourceType != destinationType) + return emitOpError("source and destination must have the same value type"); + return mlir::success(); +} + //===----------------------------------------------------------------------===// // StringLitOp //===----------------------------------------------------------------------===// diff --git a/flang/test/Fir/copy-codegen.fir b/flang/test/Fir/copy-codegen.fir new file mode 100644 index 000000000000..eef1885c6a49 --- /dev/null +++ b/flang/test/Fir/copy-codegen.fir @@ -0,0 +1,35 @@ +// Test fir.copy codegen. +// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s + +!t=!fir.type}> + +module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} { + +func.func @test_copy_1(%arg0: !fir.ref, %arg1: !fir.ref) { + fir.copy %arg0 to %arg1 no_overlap : !fir.ref, !fir.ref + return +} +// CHECK-LABEL: llvm.func @test_copy_1( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr, +// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) { +// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)> +// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64 +// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () +// CHECK: llvm.return +// CHECK: } + +func.func @test_copy_2(%arg0: !fir.ref, %arg1: !fir.ref) { + fir.copy %arg0 to %arg1 : !fir.ref, !fir.ref + return +} +// CHECK-LABEL: llvm.func @test_copy_2( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr, +// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) { +// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)> +// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64 +// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () +// CHECK: llvm.return +// CHECK: } +} diff --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir index 1bfcb3a9f3dc..06b0bbbf0bd2 100644 --- a/flang/test/Fir/fir-ops.fir +++ b/flang/test/Fir/fir-ops.fir @@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 { %0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext}) return %0 : i16 } + +// CHECK-LABEL: @test_copy( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr> +func.func @test_copy(%arg0: !fir.ref>, %arg1: !fir.ptr>) { + // CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref>, !fir.ptr> + fir.copy %arg0 to %arg1 no_overlap : !fir.ref>, !fir.ptr> + return +} diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index 7e3f9d649841..feb2cd55b378 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -1018,3 +1018,40 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref>) { %1 = fir.is_assumed_size %arg0 : (!fir.ref>) -> i1 return } + +// ----- + +!t=!fir.type +!t2=!fir.type +func.func @bad_copy_1(%arg0: !fir.ref, %arg1: !fir.ref) { + // expected-error@+1{{'fir.copy' op source and destination must have the same value type}} + fir.copy %arg0 to %arg1 no_overlap : !fir.ref, !fir.ref + return +} + +// ----- + +!t=!fir.type +func.func @bad_copy_2(%arg0: !fir.ref, %arg1: !t) { + // expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type'}} + fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref + return +} + +// ----- + +!t=!fir.array +func.func @bad_copy_3(%arg0: !fir.ref, %arg1: !fir.ref) { + // expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref>'}} + fir.copy %arg0 to %arg1 no_overlap : !fir.ref, !fir.ref + return +} + +// ----- + +!t=f32 +func.func @bad_copy_4(%arg0: !fir.ref, %arg1: !fir.ref) { + // expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref'}} + fir.copy %arg0 to %arg1 no_overlap : !fir.ref, !fir.ref + return +}