[flang] introduce fir.copy to avoid load store of aggregates (#130289)
Introduce a FIR operation to do memcopy/memmove of compile time constant size types. This is to avoid requiring derived type copies to done with load/store which is badly supported in LLVM when the aggregate type is "big" (no threshold can easily be defined here, better to always avoid them for fir.type). This was the root cause of the regressions caused by #114002 which introduced a load/store of fir.type<> which caused hand/asserts to fire in LLVM on several benchmarks. See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type
This commit is contained in:
@@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred
|
||||
def fir_BoxAddressOrValueType : Type<IsBoxAddressOrValueTypePred,
|
||||
"fir.box or fir.class type or reference">;
|
||||
|
||||
def RefOfConstantSizeAggregateTypePred
|
||||
: CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">;
|
||||
def AnyRefOfConstantSizeAggregateType : TypeConstraint<
|
||||
RefOfConstantSizeAggregateTypePred,
|
||||
"a reference type to a constant size fir.array, fir.char, or fir.type">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Memory SSA operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -342,6 +348,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
|
||||
}];
|
||||
}
|
||||
|
||||
def fir_CopyOp : fir_Op<"copy", []> {
|
||||
let summary = "copy constant size memory";
|
||||
|
||||
let description = [{
|
||||
Copy the memory from a source with compile time constant size to
|
||||
a destination of the same type.
|
||||
|
||||
This is meant to be used for aggregate types where load and store
|
||||
are not appropriate to make a copy because LLVM is not meant to
|
||||
handle load and store of "big" aggregates.
|
||||
|
||||
Its "no_overlap" attribute allows indicating that the source and destination
|
||||
are known to not overlap at compile time.
|
||||
|
||||
```
|
||||
!t =!fir.type<t{x:!fir.array<1000xi32>}>
|
||||
fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
|
||||
```
|
||||
TODO: add FirAliasTagOpInterface to carry TBAA.
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<AnyRefOfConstantSizeAggregateType, "", [MemRead]>:$source,
|
||||
Arg<AnyRefOfConstantSizeAggregateType, "", [MemWrite]>:$destination,
|
||||
OptionalAttr<UnitAttr>:$no_overlap);
|
||||
|
||||
let builders = [OpBuilder<(ins "mlir::Value":$source,
|
||||
"mlir::Value":$destination,
|
||||
CArg<"bool", "false">:$no_overlap)>];
|
||||
|
||||
let assemblyFormat = [{
|
||||
$source `to` $destination (`no_overlap` $no_overlap^)?
|
||||
attr-dict `:` type(operands)
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
|
||||
def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
|
||||
let summary = [{
|
||||
save an array, box, or record function result SSA-value to a memory location
|
||||
|
||||
@@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) {
|
||||
return t && mlir::isa<fir::BoxProcType>(t);
|
||||
}
|
||||
|
||||
inline bool isRefOfConstantSizeAggregateType(mlir::Type t) {
|
||||
t = fir::dyn_cast_ptrEleTy(t);
|
||||
return t &&
|
||||
mlir::isa<fir::CharacterType, fir::RecordType, fir::SequenceType>(t) &&
|
||||
!hasDynamicSize(t);
|
||||
}
|
||||
|
||||
/// Return a string representation of `ty`.
|
||||
///
|
||||
/// fir.array<10x10xf32> -> prefix_10x10xf32
|
||||
|
||||
@@ -3545,6 +3545,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
|
||||
}
|
||||
};
|
||||
|
||||
/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
|
||||
struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
|
||||
using FIROpConversion::FIROpConversion;
|
||||
|
||||
llvm::LogicalResult
|
||||
matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
|
||||
mlir::ConversionPatternRewriter &rewriter) const override {
|
||||
mlir::Location loc = copy.getLoc();
|
||||
mlir::Value llvmSource = adaptor.getSource();
|
||||
mlir::Value llvmDestination = adaptor.getDestination();
|
||||
mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
|
||||
mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
|
||||
mlir::Value copySize =
|
||||
genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
|
||||
|
||||
mlir::LLVM::AliasAnalysisOpInterface newOp;
|
||||
if (copy.getNoOverlap())
|
||||
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
|
||||
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
|
||||
else
|
||||
newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
|
||||
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
|
||||
|
||||
// TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
|
||||
attachTBAATag(newOp, copyTy, copyTy, nullptr);
|
||||
rewriter.eraseOp(copy);
|
||||
return mlir::success();
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
/// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
|
||||
@@ -4148,11 +4178,11 @@ void fir::populateFIRToLLVMConversionPatterns(
|
||||
BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
|
||||
BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
|
||||
CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
|
||||
DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
|
||||
EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
|
||||
ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
|
||||
FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
|
||||
InsertOnRangeOpConversion, IsPresentOpConversion,
|
||||
CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
|
||||
DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
|
||||
EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
|
||||
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
|
||||
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
|
||||
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
|
||||
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
|
||||
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
|
||||
|
||||
@@ -3940,6 +3940,26 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
|
||||
build(builder, result, value, memref, {});
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CopyOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
|
||||
mlir::Value source, mlir::Value destination,
|
||||
bool noOverlap) {
|
||||
mlir::UnitAttr noOverlapAttr =
|
||||
noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
|
||||
build(builder, result, source, destination, noOverlapAttr);
|
||||
}
|
||||
|
||||
llvm::LogicalResult fir::CopyOp::verify() {
|
||||
mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
|
||||
mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
|
||||
if (sourceType != destinationType)
|
||||
return emitOpError("source and destination must have the same value type");
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// StringLitOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
35
flang/test/Fir/copy-codegen.fir
Normal file
35
flang/test/Fir/copy-codegen.fir
Normal file
@@ -0,0 +1,35 @@
|
||||
// Test fir.copy codegen.
|
||||
// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s
|
||||
|
||||
!t=!fir.type<sometype{i:!fir.array<9xi32>}>
|
||||
|
||||
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
|
||||
|
||||
func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
|
||||
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
|
||||
return
|
||||
}
|
||||
// CHECK-LABEL: llvm.func @test_copy_1(
|
||||
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
|
||||
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
|
||||
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
|
||||
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
|
||||
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
|
||||
// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
|
||||
// CHECK: llvm.return
|
||||
// CHECK: }
|
||||
|
||||
func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
|
||||
fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
|
||||
return
|
||||
}
|
||||
// CHECK-LABEL: llvm.func @test_copy_2(
|
||||
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
|
||||
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
|
||||
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
|
||||
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
|
||||
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
|
||||
// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
|
||||
// CHECK: llvm.return
|
||||
// CHECK: }
|
||||
}
|
||||
@@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
|
||||
%0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
|
||||
return %0 : i16
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_copy(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
|
||||
func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
|
||||
// CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
|
||||
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1018,3 +1018,40 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
|
||||
%1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
!t=!fir.type<sometype{i:i32}>
|
||||
!t2=!fir.type<sometype2{j:i32}>
|
||||
func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
|
||||
// expected-error@+1{{'fir.copy' op source and destination must have the same value type}}
|
||||
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
!t=!fir.type<sometype{i:i32}>
|
||||
func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
|
||||
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type<sometype{i:i32}>'}}
|
||||
fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
!t=!fir.array<?xi32>
|
||||
func.func @bad_copy_3(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
|
||||
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<!fir.array<?xi32>>'}}
|
||||
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
!t=f32
|
||||
func.func @bad_copy_4(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
|
||||
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<f32>'}}
|
||||
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user