Files
clang-p2996/mlir/test/Transforms/canonicalize.mlir
Matthias Springer e7790fbed3 [mlir] Add test-convergence option to Canonicalizer tests
This new option is set to `false` by default. It should  be set only in Canonicalizer tests to detect faulty canonicalization patterns. I.e., patterns that prevent the canonicalizer from converging. The canonicalizer should always convergence on such small unit tests that we have in `canonicalize.mlir`.

Two faulty canonicalization patterns were detected and fixed with this change.

Differential Revision: https://reviews.llvm.org/D140873
2023-01-04 12:02:21 +01:00

1232 lines
44 KiB
MLIR

// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize{test-convergence}))' -split-input-file | FileCheck %s
// CHECK-LABEL: func @test_subi_zero
func.func @test_subi_zero(%arg0: i32) -> i32 {
// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
// CHECK-NEXT: return %c0
%y = arith.subi %arg0, %arg0 : i32
return %y: i32
}
// CHECK-LABEL: func @test_subi_zero_vector
func.func @test_subi_zero_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
//CHECK-NEXT: %cst = arith.constant dense<0> : vector<4xi32>
%y = arith.subi %arg0, %arg0 : vector<4xi32>
// CHECK-NEXT: return %cst
return %y: vector<4xi32>
}
// CHECK-LABEL: func @test_subi_zero_tensor
func.func @test_subi_zero_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
//CHECK-NEXT: %cst = arith.constant dense<0> : tensor<4x5xi32>
%y = arith.subi %arg0, %arg0 : tensor<4x5xi32>
// CHECK-NEXT: return %cst
return %y: tensor<4x5xi32>
}
// CHECK-LABEL: func @dim
func.func @dim(%arg0: tensor<8x4xf32>) -> index {
// CHECK: %c4 = arith.constant 4 : index
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c1 : tensor<8x4xf32>
// CHECK-NEXT: return %c4
return %0 : index
}
// CHECK-LABEL: func @test_commutative
func.func @test_commutative(%arg0: i32) -> (i32, i32) {
// CHECK: %c42_i32 = arith.constant 42 : i32
%c42_i32 = arith.constant 42 : i32
// CHECK-NEXT: %0 = arith.addi %arg0, %c42_i32 : i32
%y = arith.addi %c42_i32, %arg0 : i32
// This should not be swapped.
// CHECK-NEXT: %1 = arith.subi %c42_i32, %arg0 : i32
%z = arith.subi %c42_i32, %arg0 : i32
// CHECK-NEXT: return %0, %1
return %y, %z: i32, i32
}
// CHECK-LABEL: func @trivial_dce
func.func @trivial_dce(%arg0: tensor<8x4xf32>) {
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c1 : tensor<8x4xf32>
// CHECK-NEXT: return
return
}
// CHECK-LABEL: func @load_dce
func.func @load_dce(%arg0: index) {
%c4 = arith.constant 4 : index
%a = memref.alloc(%c4) : memref<?xf32>
%2 = memref.load %a[%arg0] : memref<?xf32>
memref.dealloc %a: memref<?xf32>
// CHECK-NEXT: return
return
}
// CHECK-LABEL: func @addi_zero
func.func @addi_zero(%arg0: i32) -> i32 {
// CHECK-NEXT: return %arg0
%c0_i32 = arith.constant 0 : i32
%y = arith.addi %c0_i32, %arg0 : i32
return %y: i32
}
// CHECK-LABEL: func @addi_zero_index
func.func @addi_zero_index(%arg0: index) -> index {
// CHECK-NEXT: return %arg0
%c0_index = arith.constant 0 : index
%y = arith.addi %c0_index, %arg0 : index
return %y: index
}
// CHECK-LABEL: func @addi_zero_vector
func.func @addi_zero_vector(%arg0: vector<4 x i32>) -> vector<4 x i32> {
// CHECK-NEXT: return %arg0
%c0_v4i32 = arith.constant dense<0> : vector<4 x i32>
%y = arith.addi %c0_v4i32, %arg0 : vector<4 x i32>
return %y: vector<4 x i32>
}
// CHECK-LABEL: func @addi_zero_tensor
func.func @addi_zero_tensor(%arg0: tensor<4 x 5 x i32>) -> tensor<4 x 5 x i32> {
// CHECK-NEXT: return %arg0
%c0_t45i32 = arith.constant dense<0> : tensor<4 x 5 x i32>
%y = arith.addi %arg0, %c0_t45i32 : tensor<4 x 5 x i32>
return %y: tensor<4 x 5 x i32>
}
// CHECK-LABEL: func @muli_zero
func.func @muli_zero(%arg0: i32) -> i32 {
// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
%c0_i32 = arith.constant 0 : i32
%y = arith.muli %c0_i32, %arg0 : i32
// CHECK-NEXT: return %c0_i32
return %y: i32
}
// CHECK-LABEL: func @muli_zero_index
func.func @muli_zero_index(%arg0: index) -> index {
// CHECK-NEXT: %[[CST:.*]] = arith.constant 0 : index
%c0_index = arith.constant 0 : index
%y = arith.muli %c0_index, %arg0 : index
// CHECK-NEXT: return %[[CST]]
return %y: index
}
// CHECK-LABEL: func @muli_zero_vector
func.func @muli_zero_vector(%arg0: vector<4 x i32>) -> vector<4 x i32> {
// CHECK-NEXT: %cst = arith.constant dense<0> : vector<4xi32>
%cst = arith.constant dense<0> : vector<4 x i32>
%y = arith.muli %cst, %arg0 : vector<4 x i32>
// CHECK-NEXT: return %cst
return %y: vector<4 x i32>
}
// CHECK-LABEL: func @muli_zero_tensor
func.func @muli_zero_tensor(%arg0: tensor<4 x 5 x i32>) -> tensor<4 x 5 x i32> {
// CHECK-NEXT: %cst = arith.constant dense<0> : tensor<4x5xi32>
%cst = arith.constant dense<0> : tensor<4 x 5 x i32>
%y = arith.muli %arg0, %cst : tensor<4 x 5 x i32>
// CHECK-NEXT: return %cst
return %y: tensor<4 x 5 x i32>
}
// CHECK-LABEL: func @muli_one
func.func @muli_one(%arg0: i32) -> i32 {
// CHECK-NEXT: return %arg0
%c0_i32 = arith.constant 1 : i32
%y = arith.muli %c0_i32, %arg0 : i32
return %y: i32
}
// CHECK-LABEL: func @muli_one_index
func.func @muli_one_index(%arg0: index) -> index {
// CHECK-NEXT: return %arg0
%c0_index = arith.constant 1 : index
%y = arith.muli %c0_index, %arg0 : index
return %y: index
}
// CHECK-LABEL: func @muli_one_vector
func.func @muli_one_vector(%arg0: vector<4 x i32>) -> vector<4 x i32> {
// CHECK-NEXT: return %arg0
%c1_v4i32 = arith.constant dense<1> : vector<4 x i32>
%y = arith.muli %c1_v4i32, %arg0 : vector<4 x i32>
return %y: vector<4 x i32>
}
// CHECK-LABEL: func @muli_one_tensor
func.func @muli_one_tensor(%arg0: tensor<4 x 5 x i32>) -> tensor<4 x 5 x i32> {
// CHECK-NEXT: return %arg0
%c1_t45i32 = arith.constant dense<1> : tensor<4 x 5 x i32>
%y = arith.muli %arg0, %c1_t45i32 : tensor<4 x 5 x i32>
return %y: tensor<4 x 5 x i32>
}
//CHECK-LABEL: func @and_self
func.func @and_self(%arg0: i32) -> i32 {
//CHECK-NEXT: return %arg0
%1 = arith.andi %arg0, %arg0 : i32
return %1 : i32
}
//CHECK-LABEL: func @and_self_vector
func.func @and_self_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
//CHECK-NEXT: return %arg0
%1 = arith.andi %arg0, %arg0 : vector<4xi32>
return %1 : vector<4xi32>
}
//CHECK-LABEL: func @and_self_tensor
func.func @and_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
//CHECK-NEXT: return %arg0
%1 = arith.andi %arg0, %arg0 : tensor<4x5xi32>
return %1 : tensor<4x5xi32>
}
//CHECK-LABEL: func @and_zero
func.func @and_zero(%arg0: i32) -> i32 {
// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
%c0_i32 = arith.constant 0 : i32
// CHECK-NEXT: return %c0_i32
%1 = arith.andi %arg0, %c0_i32 : i32
return %1 : i32
}
//CHECK-LABEL: func @and_zero_index
func.func @and_zero_index(%arg0: index) -> index {
// CHECK-NEXT: %[[CST:.*]] = arith.constant 0 : index
%c0_index = arith.constant 0 : index
// CHECK-NEXT: return %[[CST]]
%1 = arith.andi %arg0, %c0_index : index
return %1 : index
}
//CHECK-LABEL: func @and_zero_vector
func.func @and_zero_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
// CHECK-NEXT: %cst = arith.constant dense<0> : vector<4xi32>
%cst = arith.constant dense<0> : vector<4xi32>
// CHECK-NEXT: return %cst
%1 = arith.andi %arg0, %cst : vector<4xi32>
return %1 : vector<4xi32>
}
//CHECK-LABEL: func @and_zero_tensor
func.func @and_zero_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
// CHECK-NEXT: %cst = arith.constant dense<0> : tensor<4x5xi32>
%cst = arith.constant dense<0> : tensor<4x5xi32>
// CHECK-NEXT: return %cst
%1 = arith.andi %arg0, %cst : tensor<4x5xi32>
return %1 : tensor<4x5xi32>
}
//CHECK-LABEL: func @or_self
func.func @or_self(%arg0: i32) -> i32 {
//CHECK-NEXT: return %arg0
%1 = arith.ori %arg0, %arg0 : i32
return %1 : i32
}
//CHECK-LABEL: func @or_self_vector
func.func @or_self_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
//CHECK-NEXT: return %arg0
%1 = arith.ori %arg0, %arg0 : vector<4xi32>
return %1 : vector<4xi32>
}
//CHECK-LABEL: func @or_self_tensor
func.func @or_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
//CHECK-NEXT: return %arg0
%1 = arith.ori %arg0, %arg0 : tensor<4x5xi32>
return %1 : tensor<4x5xi32>
}
//CHECK-LABEL: func @or_zero
func.func @or_zero(%arg0: i32) -> i32 {
%c0_i32 = arith.constant 0 : i32
// CHECK-NEXT: return %arg0
%1 = arith.ori %arg0, %c0_i32 : i32
return %1 : i32
}
//CHECK-LABEL: func @or_zero_index
func.func @or_zero_index(%arg0: index) -> index {
%c0_index = arith.constant 0 : index
// CHECK-NEXT: return %arg0
%1 = arith.ori %arg0, %c0_index : index
return %1 : index
}
//CHECK-LABEL: func @or_zero_vector
func.func @or_zero_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
// CHECK-NEXT: return %arg0
%cst = arith.constant dense<0> : vector<4xi32>
%1 = arith.ori %arg0, %cst : vector<4xi32>
return %1 : vector<4xi32>
}
//CHECK-LABEL: func @or_zero_tensor
func.func @or_zero_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
// CHECK-NEXT: return %arg0
%cst = arith.constant dense<0> : tensor<4x5xi32>
%1 = arith.ori %arg0, %cst : tensor<4x5xi32>
return %1 : tensor<4x5xi32>
}
// CHECK-LABEL: func @or_all_ones
func.func @or_all_ones(%arg0: i1, %arg1: i4) -> (i1, i4) {
// CHECK-DAG: %c-1_i4 = arith.constant -1 : i4
// CHECK-DAG: %true = arith.constant true
%c1_i1 = arith.constant 1 : i1
%c15 = arith.constant 15 : i4
// CHECK-NEXT: return %true
%1 = arith.ori %arg0, %c1_i1 : i1
%2 = arith.ori %arg1, %c15 : i4
return %1, %2 : i1, i4
}
//CHECK-LABEL: func @xor_self
func.func @xor_self(%arg0: i32) -> i32 {
//CHECK-NEXT: %c0_i32 = arith.constant 0
%1 = arith.xori %arg0, %arg0 : i32
//CHECK-NEXT: return %c0_i32
return %1 : i32
}
//CHECK-LABEL: func @xor_self_vector
func.func @xor_self_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
//CHECK-NEXT: %cst = arith.constant dense<0> : vector<4xi32>
%1 = arith.xori %arg0, %arg0 : vector<4xi32>
//CHECK-NEXT: return %cst
return %1 : vector<4xi32>
}
//CHECK-LABEL: func @xor_self_tensor
func.func @xor_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
//CHECK-NEXT: %cst = arith.constant dense<0> : tensor<4x5xi32>
%1 = arith.xori %arg0, %arg0 : tensor<4x5xi32>
//CHECK-NEXT: return %cst
return %1 : tensor<4x5xi32>
}
// CHECK-LABEL: func @memref_cast_folding
func.func @memref_cast_folding(%arg0: memref<4 x f32>, %arg1: f32) -> (f32, f32) {
%0 = memref.cast %arg0 : memref<4xf32> to memref<?xf32>
// CHECK-NEXT: %c0 = arith.constant 0 : index
%c0 = arith.constant 0 : index
%dim = memref.dim %0, %c0 : memref<? x f32>
// CHECK-NEXT: affine.load %arg0[3]
%1 = affine.load %0[%dim - 1] : memref<?xf32>
// CHECK-NEXT: memref.store %arg1, %arg0[%c0] : memref<4xf32>
memref.store %arg1, %0[%c0] : memref<?xf32>
// CHECK-NEXT: %{{.*}} = memref.load %arg0[%c0] : memref<4xf32>
%2 = memref.load %0[%c0] : memref<?xf32>
// CHECK-NEXT: memref.dealloc %arg0 : memref<4xf32>
memref.dealloc %0: memref<?xf32>
// CHECK-NEXT: return %{{.*}}
return %1, %2 : f32, f32
}
// CHECK-LABEL: @fold_memref_cast_in_memref_cast
// CHECK-SAME: (%[[ARG0:.*]]: memref<42x42xf64>)
func.func @fold_memref_cast_in_memref_cast(%0: memref<42x42xf64>) {
// CHECK: %[[folded:.*]] = memref.cast %[[ARG0]] : memref<42x42xf64> to memref<?x?xf64>
%4 = memref.cast %0 : memref<42x42xf64> to memref<?x42xf64>
// CHECK-NOT: memref.cast
%5 = memref.cast %4 : memref<?x42xf64> to memref<?x?xf64>
// CHECK: "test.user"(%[[folded]])
"test.user"(%5) : (memref<?x?xf64>) -> ()
return
}
// CHECK-LABEL: @fold_memref_cast_chain
// CHECK-SAME: (%[[ARG0:.*]]: memref<42x42xf64>)
func.func @fold_memref_cast_chain(%0: memref<42x42xf64>) {
// CHECK-NOT: memref.cast
%4 = memref.cast %0 : memref<42x42xf64> to memref<?x42xf64>
%5 = memref.cast %4 : memref<?x42xf64> to memref<42x42xf64>
// CHECK: "test.user"(%[[ARG0]])
"test.user"(%5) : (memref<42x42xf64>) -> ()
return
}
// CHECK-LABEL: func @dead_alloc_fold
func.func @dead_alloc_fold() {
// CHECK-NEXT: return
%c4 = arith.constant 4 : index
%a = memref.alloc(%c4) : memref<?xf32>
return
}
// CHECK-LABEL: func @dead_dealloc_fold
func.func @dead_dealloc_fold() {
// CHECK-NEXT: return
%a = memref.alloc() : memref<4xf32>
memref.dealloc %a: memref<4xf32>
return
}
// CHECK-LABEL: func @dead_dealloc_fold_multi_use
func.func @dead_dealloc_fold_multi_use(%cond : i1) {
// CHECK-NEXT: return
%a = memref.alloc() : memref<4xf32>
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
memref.dealloc %a: memref<4xf32>
return
^bb2:
memref.dealloc %a: memref<4xf32>
return
}
// CHECK-LABEL: func @write_only_alloc_fold
func.func @write_only_alloc_fold(%v: f32) {
// CHECK-NEXT: return
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%a = memref.alloc(%c4) : memref<?xf32>
memref.store %v, %a[%c0] : memref<?xf32>
memref.dealloc %a: memref<?xf32>
return
}
// CHECK-LABEL: func @write_only_alloca_fold
func.func @write_only_alloca_fold(%v: f32) {
// CHECK-NEXT: return
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%a = memref.alloca(%c4) : memref<?xf32>
memref.store %v, %a[%c0] : memref<?xf32>
return
}
// CHECK-LABEL: func @dead_block_elim
func.func @dead_block_elim() {
// CHECK-NOT: ^bb
func.func @nested() {
return
^bb1:
return
}
return
^bb1:
return
}
// CHECK-LABEL: func @dyn_shape_fold(%arg0: index, %arg1: index)
func.func @dyn_shape_fold(%L : index, %M : index) -> (memref<4 x ? x 8 x ? x ? x f32>, memref<? x ? x i32>, memref<? x ? x f32>, memref<4 x ? x 8 x ? x ? x f32>) {
// CHECK: %c0 = arith.constant 0 : index
%zero = arith.constant 0 : index
// The constants below disappear after they propagate into shapes.
%nine = arith.constant 9 : index
%N = arith.constant 1024 : index
%K = arith.constant 512 : index
// CHECK: memref.alloc(%arg0) : memref<?x1024xf32>
%a = memref.alloc(%L, %N) : memref<? x ? x f32>
// CHECK: memref.alloc(%arg1) : memref<4x1024x8x512x?xf32>
%b = memref.alloc(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
// CHECK: memref.alloc() : memref<512x1024xi32>
%c = memref.alloc(%K, %N) : memref<? x ? x i32>
// CHECK: memref.alloc() : memref<9x9xf32>
%d = memref.alloc(%nine, %nine) : memref<? x ? x f32>
// CHECK: memref.alloca(%arg1) : memref<4x1024x8x512x?xf32>
%e = memref.alloca(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
// CHECK: affine.for
affine.for %i = 0 to %L {
// CHECK-NEXT: affine.for
affine.for %j = 0 to 10 {
// CHECK-NEXT: memref.load %{{.*}}[%arg2, %arg3] : memref<?x1024xf32>
// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%c0, %c0, %arg2, %arg3, %c0] : memref<4x1024x8x512x?xf32>
%v = memref.load %a[%i, %j] : memref<?x?xf32>
memref.store %v, %b[%zero, %zero, %i, %j, %zero] : memref<4x?x8x?x?xf32>
}
}
return %b, %c, %d, %e : memref<4 x ? x 8 x ? x ? x f32>, memref<? x ? x i32>, memref<? x ? x f32>, memref<4 x ? x 8 x ? x ? x f32>
}
// CHECK-LABEL: func @dim_op_fold(
// CHECK-SAME: %[[ARG0:[a-z0-9]*]]: index
// CHECK-SAME: %[[ARG1:[a-z0-9]*]]: index
// CHECK-SAME: %[[ARG2:[a-z0-9]*]]: index
// CHECK-SAME: %[[BUF:[a-z0-9]*]]: memref<?xi8>
func.func @dim_op_fold(%arg0: index, %arg1: index, %arg2: index, %BUF: memref<?xi8>, %M : index, %N : index, %K : index) {
// CHECK-SAME: [[M:arg[0-9]+]]: index
// CHECK-SAME: [[N:arg[0-9]+]]: index
// CHECK-SAME: [[K:arg[0-9]+]]: index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
%1 = memref.alloc(%arg1, %arg2) : memref<?x8x?xf32>
%2 = memref.dim %1, %c2 : memref<?x8x?xf32>
affine.for %arg3 = 0 to %2 {
%3 = memref.alloc(%arg0) : memref<?xi8>
%ub = memref.dim %3, %c0 : memref<?xi8>
affine.for %arg4 = 0 to %ub {
%s = memref.dim %0, %c0 : memref<?x?xf32>
%v = memref.view %3[%c0][%arg4, %s] : memref<?xi8> to memref<?x?xf32>
%sv = memref.subview %0[%c0, %c0][%s,%arg4][%c1,%c1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
%l = memref.dim %v, %c1 : memref<?x?xf32>
%u = memref.dim %sv, %c0 : memref<?x?xf32, strided<[?, ?], offset: ?>>
affine.for %arg5 = %l to %u {
"foo"() : () -> ()
}
%sv2 = memref.subview %0[0, 0][17, %arg4][1, 1] : memref<?x?xf32> to memref<17x?xf32, strided<[?, 1], offset: ?>>
%l2 = memref.dim %v, %c1 : memref<?x?xf32>
%u2 = memref.dim %sv2, %c1 : memref<17x?xf32, strided<[?, 1], offset: ?>>
scf.for %arg5 = %l2 to %u2 step %c1 {
"foo"() : () -> ()
}
}
}
// CHECK: affine.for %[[I:.*]] = 0 to %[[ARG2]] {
// CHECK-NEXT: affine.for %[[J:.*]] = 0 to %[[ARG0]] {
// CHECK-NEXT: affine.for %[[K:.*]] = %[[ARG0]] to %[[ARG0]] {
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: }
// CHECK-NEXT: scf.for %[[KK:.*]] = %[[ARG0]] to %[[J]] step %{{.*}} {
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
%A = memref.view %BUF[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
%B = memref.view %BUF[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
%C = memref.view %BUF[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
%M_ = memref.dim %A, %c0 : memref<?x?xf32>
%K_ = memref.dim %A, %c1 : memref<?x?xf32>
%N_ = memref.dim %C, %c1 : memref<?x?xf32>
scf.for %i = %c0 to %M_ step %c1 {
scf.for %j = %c0 to %N_ step %c1 {
scf.for %k = %c0 to %K_ step %c1 {
}
}
}
// CHECK-NEXT: return
return
}
// CHECK-LABEL: func @merge_constants
func.func @merge_constants() -> (index, index) {
// CHECK-NEXT: %c42 = arith.constant 42 : index
%0 = arith.constant 42 : index
%1 = arith.constant 42 : index
// CHECK-NEXT: return %c42, %c42
return %0, %1: index, index
}
// CHECK-LABEL: func @hoist_constant
func.func @hoist_constant(%arg0: memref<8xi32>) {
// CHECK-NEXT: %c42_i32 = arith.constant 42 : i32
// CHECK-NEXT: affine.for %arg1 = 0 to 8 {
affine.for %arg1 = 0 to 8 {
// CHECK-NEXT: memref.store %c42_i32, %arg0[%arg1]
%c42_i32 = arith.constant 42 : i32
memref.store %c42_i32, %arg0[%arg1] : memref<8xi32>
}
return
}
// CHECK-LABEL: func @const_fold_propagate
func.func @const_fold_propagate() -> memref<?x?xf32> {
%VT_i = arith.constant 512 : index
%VT_i_s = affine.apply affine_map<(d0) -> (d0 floordiv 8)> (%VT_i)
%VT_k_l = affine.apply affine_map<(d0) -> (d0 floordiv 16)> (%VT_i)
// CHECK: = memref.alloc() : memref<64x32xf32>
%Av = memref.alloc(%VT_i_s, %VT_k_l) : memref<?x?xf32>
return %Av : memref<?x?xf32>
}
// CHECK-LABEL: func @indirect_call_folding
func.func @indirect_target() {
return
}
func.func @indirect_call_folding() {
// CHECK-NEXT: call @indirect_target() : () -> ()
// CHECK-NEXT: return
%indirect_fn = constant @indirect_target : () -> ()
call_indirect %indirect_fn() : () -> ()
return
}
//
// IMPORTANT NOTE: the operations in this test are exactly those produced by
// lowering affine.apply affine_map<(i) -> (i mod 42)> to standard operations. Please only
// change these operations together with the affine lowering pass tests.
//
// CHECK-LABEL: @lowered_affine_mod
func.func @lowered_affine_mod() -> (index, index) {
// CHECK-DAG: {{.*}} = arith.constant 1 : index
// CHECK-DAG: {{.*}} = arith.constant 41 : index
%c-43 = arith.constant -43 : index
%c42 = arith.constant 42 : index
%0 = arith.remsi %c-43, %c42 : index
%c0 = arith.constant 0 : index
%1 = arith.cmpi slt, %0, %c0 : index
%2 = arith.addi %0, %c42 : index
%3 = arith.select %1, %2, %0 : index
%c43 = arith.constant 43 : index
%c42_0 = arith.constant 42 : index
%4 = arith.remsi %c43, %c42_0 : index
%c0_1 = arith.constant 0 : index
%5 = arith.cmpi slt, %4, %c0_1 : index
%6 = arith.addi %4, %c42_0 : index
%7 = arith.select %5, %6, %4 : index
return %3, %7 : index, index
}
//
// IMPORTANT NOTE: the operations in this test are exactly those produced by
// lowering affine.apply affine_map<(i) -> (i mod 42)> to standard operations. Please only
// change these operations together with the affine lowering pass tests.
//
// CHECK-LABEL: func @lowered_affine_floordiv
func.func @lowered_affine_floordiv() -> (index, index) {
// CHECK-DAG: %c1 = arith.constant 1 : index
// CHECK-DAG: %c-2 = arith.constant -2 : index
%c-43 = arith.constant -43 : index
%c42 = arith.constant 42 : index
%c0 = arith.constant 0 : index
%c-1 = arith.constant -1 : index
%0 = arith.cmpi slt, %c-43, %c0 : index
%1 = arith.subi %c-1, %c-43 : index
%2 = arith.select %0, %1, %c-43 : index
%3 = arith.divsi %2, %c42 : index
%4 = arith.subi %c-1, %3 : index
%5 = arith.select %0, %4, %3 : index
%c43 = arith.constant 43 : index
%c42_0 = arith.constant 42 : index
%c0_1 = arith.constant 0 : index
%c-1_2 = arith.constant -1 : index
%6 = arith.cmpi slt, %c43, %c0_1 : index
%7 = arith.subi %c-1_2, %c43 : index
%8 = arith.select %6, %7, %c43 : index
%9 = arith.divsi %8, %c42_0 : index
%10 = arith.subi %c-1_2, %9 : index
%11 = arith.select %6, %10, %9 : index
return %5, %11 : index, index
}
//
// IMPORTANT NOTE: the operations in this test are exactly those produced by
// lowering affine.apply affine_map<(i) -> (i mod 42)> to standard operations. Please only
// change these operations together with the affine lowering pass tests.
//
// CHECK-LABEL: func @lowered_affine_ceildiv
func.func @lowered_affine_ceildiv() -> (index, index) {
// CHECK-DAG: %c-1 = arith.constant -1 : index
%c-43 = arith.constant -43 : index
%c42 = arith.constant 42 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = arith.cmpi sle, %c-43, %c0 : index
%1 = arith.subi %c0, %c-43 : index
%2 = arith.subi %c-43, %c1 : index
%3 = arith.select %0, %1, %2 : index
%4 = arith.divsi %3, %c42 : index
%5 = arith.subi %c0, %4 : index
%6 = arith.addi %4, %c1 : index
%7 = arith.select %0, %5, %6 : index
// CHECK-DAG: %c2 = arith.constant 2 : index
%c43 = arith.constant 43 : index
%c42_0 = arith.constant 42 : index
%c0_1 = arith.constant 0 : index
%c1_2 = arith.constant 1 : index
%8 = arith.cmpi sle, %c43, %c0_1 : index
%9 = arith.subi %c0_1, %c43 : index
%10 = arith.subi %c43, %c1_2 : index
%11 = arith.select %8, %9, %10 : index
%12 = arith.divsi %11, %c42_0 : index
%13 = arith.subi %c0_1, %12 : index
%14 = arith.addi %12, %c1_2 : index
%15 = arith.select %8, %13, %14 : index
// CHECK-NEXT: return %c-1, %c2
return %7, %15 : index, index
}
// Checks that NOP casts are removed.
// CHECK-LABEL: cast_values
func.func @cast_values(%arg0: memref<?xi32>) -> memref<2xi32> {
// NOP cast
%1 = memref.cast %arg0 : memref<?xi32> to memref<?xi32>
// CHECK-NEXT: %[[RET:.*]] = memref.cast %arg0 : memref<?xi32> to memref<2xi32>
%3 = memref.cast %1 : memref<?xi32> to memref<2xi32>
// NOP cast
%5 = memref.cast %3 : memref<2xi32> to memref<2xi32>
// CHECK-NEXT: return %[[RET]] : memref<2xi32>
return %5 : memref<2xi32>
}
// -----
// CHECK-LABEL: func @view
func.func @view(%arg0 : index) -> (f32, f32, f32, f32) {
// CHECK: %[[C15:.*]] = arith.constant 15 : index
// CHECK: %[[ALLOC_MEM:.*]] = memref.alloc() : memref<2048xi8>
%0 = memref.alloc() : memref<2048xi8>
%c0 = arith.constant 0 : index
%c7 = arith.constant 7 : index
%c11 = arith.constant 11 : index
%c15 = arith.constant 15 : index
// Test: fold constant sizes.
// CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x11xf32>
%1 = memref.view %0[%c15][%c7, %c11] : memref<2048xi8> to memref<?x?xf32>
%r0 = memref.load %1[%c0, %c0] : memref<?x?xf32>
// Test: fold one constant size.
// CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][%arg0, %arg0] : memref<2048xi8> to memref<?x?x7xf32>
%2 = memref.view %0[%c15][%arg0, %arg0, %c7] : memref<2048xi8> to memref<?x?x?xf32>
%r1 = memref.load %2[%c0, %c0, %c0] : memref<?x?x?xf32>
// Test: preserve an existing static size.
// CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x4xf32>
%3 = memref.view %0[%c15][%c7] : memref<2048xi8> to memref<?x4xf32>
%r2 = memref.load %3[%c0, %c0] : memref<?x4xf32>
// Test: folding static alloc and memref.cast into a view.
// CHECK memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<15x7xf32>
%4 = memref.cast %0 : memref<2048xi8> to memref<?xi8>
%5 = memref.view %4[%c15][%c15, %c7] : memref<?xi8> to memref<?x?xf32>
%r3 = memref.load %5[%c0, %c0] : memref<?x?xf32>
return %r0, %r1, %r2, %r3 : f32, f32, f32, f32
}
// -----
// CHECK-LABEL: func @subview
// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
func.func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
// Folded but reappears after subview folding into dim.
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index
// CHECK-DAG: %[[C11:.*]] = arith.constant 11 : index
%c0 = arith.constant 0 : index
// CHECK-NOT: arith.constant 1 : index
%c1 = arith.constant 1 : index
// CHECK-NOT: arith.constant 2 : index
%c2 = arith.constant 2 : index
// Folded but reappears after subview folding into dim.
%c7 = arith.constant 7 : index
%c11 = arith.constant 11 : index
// CHECK-NOT: arith.constant 15 : index
%c15 = arith.constant 15 : index
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
%0 = memref.alloc() : memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>>
// Test: subview with constant base memref and constant operands is folded.
// Note that the subview uses the base memrefs layout map because it used
// zero offset and unit stride arguments.
// CHECK: memref.subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [1, 1, 1] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>>
// CHECK-SAME: to memref<7x11x2xf32, strided<[64, 4, 1]>>
%1 = memref.subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
: memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
%v0 = memref.load %1[%c0, %c0, %c0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// Test: subview with one dynamic operand can also be folded.
// CHECK: memref.subview %[[ALLOC0]][0, %[[ARG0]], 0] [7, 11, 15] [1, 1, 1] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>>
// CHECK-SAME: to memref<7x11x15xf32, strided<[64, 4, 1], offset: ?>>
%2 = memref.subview %0[%c0, %arg0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
: memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %2[%c0, %c0, %c0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// CHECK: %[[ALLOC1:.*]] = memref.alloc(%[[ARG0]])
%3 = memref.alloc(%arg0) : memref<?x16x4xf32, strided<[64, 4, 1], offset: 0>>
// Test: subview with constant operands but dynamic base memref is folded as long as the strides and offset of the base memref are static.
// CHECK: memref.subview %[[ALLOC1]][0, 0, 0] [7, 11, 15] [1, 1, 1] :
// CHECK-SAME: memref<?x16x4xf32, strided<[64, 4, 1]>>
// CHECK-SAME: to memref<7x11x15xf32, strided<[64, 4, 1]>>
%4 = memref.subview %3[%c0, %c0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
: memref<?x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %4[%c0, %c0, %c0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// Test: subview offset operands are folded correctly w.r.t. base strides.
// CHECK: memref.subview %[[ALLOC0]][1, 2, 7] [7, 11, 2] [1, 1, 1] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>> to
// CHECK-SAME: memref<7x11x2xf32, strided<[64, 4, 1], offset: 79>>
%5 = memref.subview %0[%c1, %c2, %c7] [%c7, %c11, %c2] [%c1, %c1, %c1]
: memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %5[%c0, %c0, %c0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// Test: subview stride operands are folded correctly w.r.t. base strides.
// CHECK: memref.subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [2, 7, 11] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>>
// CHECK-SAME: to memref<7x11x2xf32, strided<[128, 28, 11]>>
%6 = memref.subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c2, %c7, %c11]
: memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %6[%c0, %c0, %c0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// Test: subview shape are folded, but offsets and strides are not even if base memref is static
// CHECK: memref.subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>> to
// CHECK-SAME: memref<7x11x2xf32, strided<[?, ?, ?], offset: ?>>
%10 = memref.subview %0[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %10[%arg1, %arg1, %arg1] :
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// Test: subview strides are folded, but offsets and shape are not even if base memref is static
// CHECK: memref.subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 7, 11] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>> to
// CHECK-SAME: memref<?x?x?xf32, strided<[128, 28, 11], offset: ?>>
%11 = memref.subview %0[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c7, %c11] :
memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %11[%arg0, %arg0, %arg0] :
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// Test: subview offsets are folded, but strides and shape are not even if base memref is static
// CHECK: memref.subview %[[ALLOC0]][1, 2, 7] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] :
// CHECK-SAME: memref<8x16x4xf32, strided<[64, 4, 1]>> to
// CHECK-SAME: memref<?x?x?xf32, strided<[?, ?, ?], offset: 79>>
%13 = memref.subview %0[%c1, %c2, %c7] [%arg1, %arg1, %arg1] [%arg0, %arg0, %arg0] :
memref<8x16x4xf32, strided<[64, 4, 1], offset: 0>> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %13[%arg1, %arg1, %arg1] :
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// CHECK: %[[ALLOC2:.*]] = memref.alloc(%[[ARG0]], %[[ARG0]], %[[ARG1]])
%14 = memref.alloc(%arg0, %arg0, %arg1) : memref<?x?x?xf32>
// Test: subview shape are folded, even if base memref is not static
// CHECK: memref.subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
// CHECK-SAME: memref<?x?x?xf32> to
// CHECK-SAME: memref<7x11x2xf32, strided<[?, ?, ?], offset: ?>>
%15 = memref.subview %14[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
memref<?x?x?xf32> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// TEST: subview strides are folded, in the type only the most minor stride is folded.
// CHECK: memref.subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 2, 2] :
// CHECK-SAME: memref<?x?x?xf32> to
// CHECK-SAME: memref<?x?x?xf32, strided<[?, ?, 2], offset: ?>>
%16 = memref.subview %14[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c2, %c2] :
memref<?x?x?xf32> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// TEST: subview offsets are folded but the type offset remains dynamic, when the base memref is not static
// CHECK: memref.subview %[[ALLOC2]][1, 1, 1] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
// CHECK-SAME: memref<?x?x?xf32> to
// CHECK-SAME: memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
%17 = memref.subview %14[%c1, %c1, %c1] [%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] :
memref<?x?x?xf32> to
memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
memref.store %v0, %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// CHECK: %[[ALLOC3:.*]] = memref.alloc() : memref<12x4xf32>
%18 = memref.alloc() : memref<12x4xf32>
%c4 = arith.constant 4 : index
// TEST: subview strides are maintained when sizes are folded
// CHECK: memref.subview %[[ALLOC3]][%arg1, %arg1] [2, 4] [1, 1] :
// CHECK-SAME: memref<12x4xf32> to
// CHECK-SAME: memref<2x4xf32, strided<[4, 1], offset: ?>
%19 = memref.subview %18[%arg1, %arg1] [%c2, %c4] [1, 1] :
memref<12x4xf32> to
memref<?x?xf32, strided<[4, 1], offset: ?>>
memref.store %v0, %19[%arg1, %arg1] : memref<?x?xf32, strided<[4, 1], offset: ?>>
// TEST: subview strides and sizes are maintained when offsets are folded
// CHECK: memref.subview %[[ALLOC3]][2, 4] [12, 4] [1, 1] :
// CHECK-SAME: memref<12x4xf32> to
// CHECK-SAME: memref<12x4xf32, strided<[4, 1], offset: 12>>
%20 = memref.subview %18[%c2, %c4] [12, 4] [1, 1] :
memref<12x4xf32> to
memref<12x4xf32, strided<[4, 1], offset: ?>>
memref.store %v0, %20[%arg1, %arg1] : memref<12x4xf32, strided<[4, 1], offset: ?>>
// Test: dim on subview is rewritten to size operand.
%7 = memref.dim %4, %c0 : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
%8 = memref.dim %4, %c1 : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
// CHECK: return %[[C7]], %[[C11]]
return %7, %8 : index, index
}
// CHECK-LABEL: func @index_cast
// CHECK-SAME: %[[ARG_0:arg[0-9]+]]: i16
func.func @index_cast(%arg0: i16) -> (i16) {
%11 = arith.index_cast %arg0 : i16 to index
%12 = arith.index_cast %11 : index to i16
// CHECK: return %[[ARG_0]] : i16
return %12 : i16
}
// CHECK-LABEL: func @index_cast_fold
func.func @index_cast_fold() -> (i16, index) {
%c4 = arith.constant 4 : index
%1 = arith.index_cast %c4 : index to i16
%c4_i16 = arith.constant 4 : i16
%2 = arith.index_cast %c4_i16 : i16 to index
// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK-DAG: %[[C4_I16:.*]] = arith.constant 4 : i16
// CHECK: return %[[C4_I16]], %[[C4]] : i16, index
return %1, %2 : i16, index
}
// CHECK-LABEL: func @remove_dead_else
func.func @remove_dead_else(%M : memref<100 x i32>) {
affine.for %i = 0 to 100 {
affine.load %M[%i] : memref<100xi32>
affine.if affine_set<(d0) : (d0 - 2 >= 0)>(%i) {
affine.for %j = 0 to 100 {
%1 = affine.load %M[%j] : memref<100xi32>
"prevent.dce"(%1) : (i32) -> ()
}
} else {
// Nothing
}
affine.load %M[%i] : memref<100xi32>
}
return
}
// CHECK: affine.if
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.load
// CHECK-NEXT: "prevent.dce"
// CHECK-NEXT: }
// CHECK-NEXT: }
// -----
// CHECK-LABEL: func @divi_signed_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @divi_signed_by_one(%arg0: i32) -> (i32) {
%c1 = arith.constant 1 : i32
%res = arith.divsi %arg0, %c1 : i32
// CHECK: return %[[ARG]]
return %res : i32
}
// CHECK-LABEL: func @divi_unsigned_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @divi_unsigned_by_one(%arg0: i32) -> (i32) {
%c1 = arith.constant 1 : i32
%res = arith.divui %arg0, %c1 : i32
// CHECK: return %[[ARG]]
return %res : i32
}
// CHECK-LABEL: func @tensor_divi_signed_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @tensor_divi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
%c1 = arith.constant dense<1> : tensor<4x5xi32>
%res = arith.divsi %arg0, %c1 : tensor<4x5xi32>
// CHECK: return %[[ARG]]
return %res : tensor<4x5xi32>
}
// CHECK-LABEL: func @tensor_divi_unsigned_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @tensor_divi_unsigned_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
%c1 = arith.constant dense<1> : tensor<4x5xi32>
%res = arith.divui %arg0, %c1 : tensor<4x5xi32>
// CHECK: return %[[ARG]]
return %res : tensor<4x5xi32>
}
// -----
// CHECK-LABEL: func @arith.floordivsi_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @arith.floordivsi_by_one(%arg0: i32) -> (i32) {
%c1 = arith.constant 1 : i32
%res = arith.floordivsi %arg0, %c1 : i32
// CHECK: return %[[ARG]]
return %res : i32
}
// CHECK-LABEL: func @tensor_arith.floordivsi_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @tensor_arith.floordivsi_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
%c1 = arith.constant dense<1> : tensor<4x5xi32>
%res = arith.floordivsi %arg0, %c1 : tensor<4x5xi32>
// CHECK: return %[[ARG]]
return %res : tensor<4x5xi32>
}
// -----
// CHECK-LABEL: func @arith.ceildivsi_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @arith.ceildivsi_by_one(%arg0: i32) -> (i32) {
%c1 = arith.constant 1 : i32
%res = arith.ceildivsi %arg0, %c1 : i32
// CHECK: return %[[ARG]]
return %res : i32
}
// CHECK-LABEL: func @tensor_arith.ceildivsi_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @tensor_arith.ceildivsi_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
%c1 = arith.constant dense<1> : tensor<4x5xi32>
%res = arith.ceildivsi %arg0, %c1 : tensor<4x5xi32>
// CHECK: return %[[ARG]]
return %res : tensor<4x5xi32>
}
// -----
// CHECK-LABEL: func @arith.ceildivui_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @arith.ceildivui_by_one(%arg0: i32) -> (i32) {
%c1 = arith.constant 1 : i32
%res = arith.ceildivui %arg0, %c1 : i32
// CHECK: return %[[ARG]]
return %res : i32
}
// CHECK-LABEL: func @tensor_arith.ceildivui_by_one
// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
func.func @tensor_arith.ceildivui_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
%c1 = arith.constant dense<1> : tensor<4x5xi32>
%res = arith.ceildivui %arg0, %c1 : tensor<4x5xi32>
// CHECK: return %[[ARG]]
return %res : tensor<4x5xi32>
}
// -----
// CHECK-LABEL: func @memref_cast_folding_subview
func.func @memref_cast_folding_subview(%arg0: memref<4x5xf32>, %i: index) -> (memref<?x?xf32, strided<[?, ?], offset: ?>>) {
%0 = memref.cast %arg0 : memref<4x5xf32> to memref<?x?xf32>
// CHECK-NEXT: memref.subview %{{.*}}: memref<4x5xf32>
%1 = memref.subview %0[%i, %i][%i, %i][%i, %i]: memref<?x?xf32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
return %1: memref<?x?xf32, strided<[?, ?], offset: ?>>
}
// -----
// CHECK-LABEL: func @memref_cast_folding_subview_static(
func.func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: index)
-> memref<3x4xf32, strided<[?, 1], offset: ?>>
{
%0 = memref.cast %V : memref<16x16xf32> to memref<?x?xf32>
%1 = memref.subview %0[0, 0][3, 4][1, 1] : memref<?x?xf32> to memref<3x4xf32, strided<[?, 1], offset: ?>>
// CHECK: memref.subview{{.*}}: memref<16x16xf32> to memref<3x4xf32, strided<[16, 1]>>
return %1: memref<3x4xf32, strided<[?, 1], offset: ?>>
}
// -----
// CHECK-LABEL: func @slice
// CHECK-SAME: %[[ARG0:[0-9a-z]*]]: index, %[[ARG1:[0-9a-z]*]]: index
func.func @slice(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
-> tensor<?x?x?xf32>
{
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c7 = arith.constant 7 : index
%c11 = arith.constant 11 : index
// CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] :
// CHECK-SAME: tensor<8x16x4xf32> to tensor<7x11x2xf32>
// tensor.cast gets folded away in consumer.
// CHECK-NOT: tensor.cast
%1 = tensor.extract_slice %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
: tensor<8x16x4xf32> to tensor<?x?x?xf32>
// Test: slice with one dynamic operand can also be folded.
// CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] :
// CHECK-SAME: tensor<7x11x2xf32> to tensor<2x?x2xf32>
// CHECK: tensor.cast %{{.*}} : tensor<2x?x2xf32> to tensor<?x?x?xf32>
%2 = tensor.extract_slice %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1]
: tensor<?x?x?xf32> to tensor<?x?x?xf32>
return %2 : tensor<?x?x?xf32>
}
// -----
// CHECK-LABEL: func @fold_trunci
// CHECK-SAME: (%[[ARG0:[0-9a-z]*]]: i1)
func.func @fold_trunci(%arg0: i1) -> i1 attributes {} {
// CHECK-NEXT: return %[[ARG0]] : i1
%0 = arith.extui %arg0 : i1 to i8
%1 = arith.trunci %0 : i8 to i1
return %1 : i1
}
// -----
// CHECK-LABEL: func @fold_trunci_vector
// CHECK-SAME: (%[[ARG0:[0-9a-z]*]]: vector<4xi1>)
func.func @fold_trunci_vector(%arg0: vector<4xi1>) -> vector<4xi1> attributes {} {
// CHECK-NEXT: return %[[ARG0]] : vector<4xi1>
%0 = arith.extui %arg0 : vector<4xi1> to vector<4xi8>
%1 = arith.trunci %0 : vector<4xi8> to vector<4xi1>
return %1 : vector<4xi1>
}
// -----
// TODO Canonicalize this into:
// arith.extui %arg0 : i1 to i2
// CHECK-LABEL: func @do_not_fold_trunci
// CHECK-SAME: (%[[ARG0:[0-9a-z]*]]: i1)
func.func @do_not_fold_trunci(%arg0: i1) -> i2 attributes {} {
// CHECK-NEXT: arith.extui %[[ARG0]] : i1 to i8
// CHECK-NEXT: %[[RES:[0-9a-z]*]] = arith.trunci %{{.*}} : i8 to i2
// CHECK-NEXT: return %[[RES]] : i2
%0 = arith.extui %arg0 : i1 to i8
%1 = arith.trunci %0 : i8 to i2
return %1 : i2
}
// -----
// CHECK-LABEL: func @do_not_fold_trunci_vector
// CHECK-SAME: (%[[ARG0:[0-9a-z]*]]: vector<4xi1>)
func.func @do_not_fold_trunci_vector(%arg0: vector<4xi1>) -> vector<4xi2> attributes {} {
// CHECK-NEXT: arith.extui %[[ARG0]] : vector<4xi1> to vector<4xi8>
// CHECK-NEXT: %[[RES:[0-9a-z]*]] = arith.trunci %{{.*}} : vector<4xi8> to vector<4xi2>
// CHECK-NEXT: return %[[RES]] : vector<4xi2>
%0 = arith.extui %arg0 : vector<4xi1> to vector<4xi8>
%1 = arith.trunci %0 : vector<4xi8> to vector<4xi2>
return %1 : vector<4xi2>
}
// -----
// CHECK-LABEL: func @fold_trunci_sexti
// CHECK-SAME: (%[[ARG0:[0-9a-z]*]]: i1)
func.func @fold_trunci_sexti(%arg0: i1) -> i1 attributes {} {
// CHECK-NEXT: return %[[ARG0]] : i1
%0 = arith.extsi %arg0 : i1 to i8
%1 = arith.trunci %0 : i8 to i1
return %1 : i1
}
// CHECK-LABEL: func @simple_clone_elimination
func.func @simple_clone_elimination() -> memref<5xf32> {
%ret = memref.alloc() : memref<5xf32>
%temp = bufferization.clone %ret : memref<5xf32> to memref<5xf32>
memref.dealloc %temp : memref<5xf32>
return %ret : memref<5xf32>
}
// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
// CHECK-NOT: %{{.*}} = bufferization.clone
// CHECK-NOT: memref.dealloc %{{.*}}
// CHECK: return %[[ret]]
// -----
// CHECK-LABEL: func @clone_loop_alloc
func.func @clone_loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
memref.dealloc %0 : memref<2xf32>
%1 = bufferization.clone %arg3 : memref<2xf32> to memref<2xf32>
%2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) {
%3 = arith.cmpi eq, %arg5, %arg1 : index
memref.dealloc %arg6 : memref<2xf32>
%4 = memref.alloc() : memref<2xf32>
%5 = bufferization.clone %4 : memref<2xf32> to memref<2xf32>
memref.dealloc %4 : memref<2xf32>
%6 = bufferization.clone %5 : memref<2xf32> to memref<2xf32>
memref.dealloc %5 : memref<2xf32>
scf.yield %6 : memref<2xf32>
}
memref.copy %2, %arg4 : memref<2xf32> to memref<2xf32>
memref.dealloc %2 : memref<2xf32>
return
}
// CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for
// CHECK-NEXT: memref.dealloc
// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc
// CHECK-NEXT: scf.yield %[[ALLOC2]]
// CHECK: memref.copy %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// -----
// CHECK-LABEL: func @clone_nested_region
func.func @clone_nested_region(%arg0: index, %arg1: index, %arg2: index) -> memref<?x?xf32> {
%cmp = arith.cmpi eq, %arg0, %arg1 : index
%0 = arith.cmpi eq, %arg0, %arg1 : index
%1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
%3 = scf.if %cmp -> (memref<?x?xf32>) {
%9 = bufferization.clone %1 : memref<?x?xf32> to memref<?x?xf32>
scf.yield %9 : memref<?x?xf32>
} else {
%7 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
%10 = bufferization.clone %7 : memref<?x?xf32> to memref<?x?xf32>
memref.dealloc %7 : memref<?x?xf32>
scf.yield %10 : memref<?x?xf32>
}
%6 = bufferization.clone %3 : memref<?x?xf32> to memref<?x?xf32>
memref.dealloc %3 : memref<?x?xf32>
scf.yield %6 : memref<?x?xf32>
} else {
%3 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
%6 = bufferization.clone %3 : memref<?x?xf32> to memref<?x?xf32>
memref.dealloc %3 : memref<?x?xf32>
scf.yield %6 : memref<?x?xf32>
}
memref.dealloc %1 : memref<?x?xf32>
return %2 : memref<?x?xf32>
}
// CHECK: %[[ALLOC1:.*]] = memref.alloc
// CHECK-NEXT: %[[ALLOC2:.*]] = scf.if
// CHECK-NEXT: %[[ALLOC3_1:.*]] = scf.if
// CHECK-NEXT: %[[ALLOC4_1:.*]] = bufferization.clone %[[ALLOC1]]
// CHECK-NEXT: scf.yield %[[ALLOC4_1]]
// CHECK: %[[ALLOC4_2:.*]] = memref.alloc
// CHECK-NEXT: scf.yield %[[ALLOC4_2]]
// CHECK: scf.yield %[[ALLOC3_1]]
// CHECK: %[[ALLOC3_2:.*]] = memref.alloc
// CHECK-NEXT: scf.yield %[[ALLOC3_2]]
// CHECK: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: return %[[ALLOC2]]