### Description This patch improves the folding efficiency of `vector.insert` and `vector.extract` operations by not returning early after successfully converting dynamic indices to static indices. This PR also renames the test pass `TestConstantFold` to `TestSingleFold` and adds comprehensive documentation explaining the single-pass folding behavior. ### Motivation Since the `OpBuilder::createOrFold` function only calls `fold` **once**, the current `fold` methods of `vector.insert` and `vector.extract` may leave the op in a state that can be folded further. For example, consider the following un-folded IR: ``` %v1 = vector.insert %e1, %v0 [0] : f32 into vector<128xf32> %c0 = arith.constant 0 : index %e2 = vector.extract %v1[%c0] : f32 from vector<128xf32> ``` If we use `createOrFold` to create the `vector.extract` op, then the result will be: ``` %v1 = vector.insert %e1, %v0 [127] : f32 into vector<128xf32> %e2 = vector.extract %v1[0] : f32 from vector<128xf32> ``` But this is not the optimal result. `createOrFold` should have returned `%e1`. The reason is that the execution of fold returns immediately after `extractInsertFoldConstantOp`, causing subsequent folding logics to be skipped. --------- Co-authored-by: Yang Bai <yangb@nvidia.com>
43 lines
1.5 KiB
MLIR
43 lines
1.5 KiB
MLIR
// RUN: mlir-opt %s -split-input-file -test-single-fold -mlir-print-debuginfo | FileCheck %s
|
|
|
|
// CHECK-LABEL: func @fold_and_merge
|
|
func.func @fold_and_merge() -> (i32, i32) {
|
|
// CHECK-NEXT: [[C:%.+]] = arith.constant 6 : i32 loc(#[[UnknownLoc:.*]])
|
|
%0 = arith.constant 1 : i32 loc("fold_and_merge":0:0)
|
|
%1 = arith.constant 5 : i32 loc("fold_and_merge":1:0)
|
|
%2 = arith.addi %0, %1 : i32 loc("fold_and_merge":2:0)
|
|
|
|
%3 = arith.constant 6 : i32 loc("fold_and_merge":3:0)
|
|
|
|
return %2, %3: i32, i32
|
|
}
|
|
// CHECK: #[[UnknownLoc]] = loc(unknown)
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @materialize_different_dialect
|
|
func.func @materialize_different_dialect() -> (f32, f32) {
|
|
// CHECK: arith.constant 1.{{0*}}e+00 : f32 loc(#[[UnknownLoc:.*]])
|
|
%0 = arith.constant -1.0 : f32 loc("materialize_different_dialect":0:0)
|
|
%1 = math.absf %0 : f32 loc("materialize_different_dialect":1:0)
|
|
%2 = arith.constant 1.0 : f32 loc("materialize_different_dialect":2:0)
|
|
|
|
return %1, %2: f32, f32
|
|
}
|
|
// CHECK: #[[UnknownLoc]] = loc(unknown)
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @materialize_in_front
|
|
func.func @materialize_in_front(%arg0: memref<8xi32>) {
|
|
// CHECK-NEXT: arith.constant 6 : i32 loc(#[[UnknownLoc:.*]])
|
|
affine.for %arg1 = 0 to 8 {
|
|
%1 = arith.constant 1 : i32 loc("materialize_in_front":0:0)
|
|
%2 = arith.constant 5 : i32 loc("materialize_in_front":1:0)
|
|
%3 = arith.addi %1, %2 : i32 loc("materialize_in_front":2:0)
|
|
memref.store %3, %arg0[%arg1] : memref<8xi32>
|
|
}
|
|
return
|
|
} loc("materialize_in_front":3:0)
|
|
// CHECK: #[[UnknownLoc]] = loc(unknown)
|