// RUN: mlir-opt %s --optimize-allocation-liveness --split-input-file | FileCheck %s // CHECK-LABEL: func.func private @optimize_alloc_location( // CHECK-SAME: %[[VAL_0:.*]]: memref<45x24x256xf32, 1>, // CHECK-SAME: %[[VAL_1:.*]]: memref<24x256xf32, 1>, // CHECK-SAME: %[[VAL_2:.*]]: memref<256xf32, 1>) { // CHECK: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK: %[[VAL_4:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_5:.*]] = memref.expand_shape %[[VAL_4]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_4]] : memref<45x6144xf32, 1> // CHECK: %[[VAL_6:.*]] = memref.alloc() {alignment = 64 : i64} : memref<24x256xf32, 1> // CHECK: %[[VAL_7:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: memref.store %[[VAL_7]], %[[VAL_6]]{{\[}}%[[VAL_3]], %[[VAL_3]]] : memref<24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_6]] : memref<24x256xf32, 1> // CHECK: return // CHECK: } // This test will optimize the location of the %alloc deallocation func.func private @optimize_alloc_location(%arg0: memref<45x24x256xf32, 1> , %arg1: memref<24x256xf32, 1> , %arg2: memref<256xf32, 1>) -> () { %c1 = arith.constant 1 : index %alloc = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape = memref.expand_shape %alloc [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<24x256xf32, 1> %cf1 = arith.constant 1.0 : f32 memref.store %cf1, %alloc_1[%c1, %c1] : memref<24x256xf32, 1> memref.dealloc %alloc : memref<45x6144xf32, 1> memref.dealloc %alloc_1 : memref<24x256xf32, 1> return } // ----- // CHECK-LABEL: func.func private @test_multiple_deallocation_moves( // CHECK-SAME: %[[VAL_0:.*]]: memref<45x24x256xf32, 1>, // CHECK-SAME: %[[VAL_1:.*]]: memref<24x256xf32, 1>, // CHECK-SAME: %[[VAL_2:.*]]: memref<256xf32, 1>) { // CHECK: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK: %[[VAL_4:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_5:.*]] = memref.expand_shape %[[VAL_4]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_4]] : memref<45x6144xf32, 1> // CHECK: %[[VAL_6:.*]] = memref.alloc() {alignment = 64 : i64} : memref<24x256xf32, 1> // CHECK: %[[VAL_7:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_8:.*]] = memref.expand_shape %[[VAL_7]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_7]] : memref<45x6144xf32, 1> // CHECK: %[[VAL_9:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_10:.*]] = memref.expand_shape %[[VAL_9]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_9]] : memref<45x6144xf32, 1> // CHECK: %[[VAL_11:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_12:.*]] = memref.expand_shape %[[VAL_11]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_11]] : memref<45x6144xf32, 1> // CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: memref.store %[[VAL_13]], %[[VAL_6]]{{\[}}%[[VAL_3]], %[[VAL_3]]] : memref<24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_6]] : memref<24x256xf32, 1> // CHECK: return // CHECK: } // This test creates multiple deallocation rearrangements. func.func private @test_multiple_deallocation_moves(%arg0: memref<45x24x256xf32, 1> , %arg1: memref<24x256xf32, 1> , %arg2: memref<256xf32, 1>) -> () { %c1 = arith.constant 1 : index %alloc = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape = memref.expand_shape %alloc [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<24x256xf32, 1> %alloc_2 = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape2 = memref.expand_shape %alloc_2 [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape3 = memref.expand_shape %alloc_3 [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> %alloc_4 = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape4 = memref.expand_shape %alloc_4 [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> %cf1 = arith.constant 1.0 : f32 memref.store %cf1, %alloc_1[%c1, %c1] : memref<24x256xf32, 1> memref.dealloc %alloc : memref<45x6144xf32, 1> memref.dealloc %alloc_1 : memref<24x256xf32, 1> memref.dealloc %alloc_2 : memref<45x6144xf32, 1> memref.dealloc %alloc_3 : memref<45x6144xf32, 1> memref.dealloc %alloc_4 : memref<45x6144xf32, 1> return } // ----- // CHECK-LABEL: func.func private @test_users_in_different_blocks_linalig_generic( // CHECK-SAME: %[[VAL_0:.*]]: memref<1x20x20xf32, 1>) -> memref<1x32x32xf32, 1> { // CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK: %[[VAL_3:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x32x32xf32, 1> // CHECK: %[[VAL_4:.*]] = memref.subview %[[VAL_3]][0, 0, 0] [1, 20, 20] [1, 1, 1] : memref<1x32x32xf32, 1> to memref<1x20x20xf32, strided<[1024, 32, 1]>, 1> // CHECK: memref.copy %[[VAL_0]], %[[VAL_4]] : memref<1x20x20xf32, 1> to memref<1x20x20xf32, strided<[1024, 32, 1]>, 1> // CHECK: %[[VAL_5:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x1xf32, 1> // CHECK: %[[VAL_6:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x8x32x1x4xf32, 1> // CHECK: linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} outs(%[[VAL_6]] : memref<1x8x32x1x4xf32, 1>) { // CHECK: ^bb0(%[[VAL_7:.*]]: f32): // CHECK: %[[VAL_8:.*]] = linalg.index 0 : index // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_8]], %[[VAL_8]], %[[VAL_8]], %[[VAL_2]]] : memref<1x32x32x1xf32, 1> // CHECK: linalg.yield %[[VAL_9]] : f32 // CHECK: } // CHECK: memref.dealloc %[[VAL_5]] : memref<1x32x32x1xf32, 1> // CHECK: %[[VAL_10:.*]] = memref.collapse_shape %[[VAL_6]] {{\[\[}}0, 1], [2], [3], [4]] : memref<1x8x32x1x4xf32, 1> into memref<8x32x1x4xf32, 1> // CHECK: memref.dealloc %[[VAL_6]] : memref<1x8x32x1x4xf32, 1> // CHECK: return %[[VAL_3]] : memref<1x32x32xf32, 1> // CHECK: } // This test will optimize the location of the %alloc_0 deallocation, since the last user of this allocation is the last linalg.generic operation // it will move the deallocation right after the last linalg.generic operation // %alloc_1 will not be moved becuase of the collapse shape op. func.func private @test_users_in_different_blocks_linalig_generic(%arg0: memref<1x20x20xf32, 1>) -> (memref<1x32x32xf32, 1>) { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x32x32xf32, 1> %subview = memref.subview %alloc[0, 0, 0] [1, 20, 20] [1, 1, 1] : memref<1x32x32xf32, 1> to memref<1x20x20xf32, strided<[1024, 32, 1]>, 1> memref.copy %arg0, %subview : memref<1x20x20xf32, 1> to memref<1x20x20xf32, strided<[1024, 32, 1]>, 1> %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x1xf32, 1> %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x8x32x1x4xf32, 1> linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} outs(%alloc_1 : memref<1x8x32x1x4xf32, 1>) { ^bb0(%out: f32): %0 = linalg.index 0 : index %8 = memref.load %alloc_0[%0, %0, %0, %c0] : memref<1x32x32x1xf32, 1> linalg.yield %8 : f32 } %collapse_shape = memref.collapse_shape %alloc_1 [[0, 1], [2], [3], [4]] : memref<1x8x32x1x4xf32, 1> into memref<8x32x1x4xf32, 1> memref.dealloc %alloc_0 : memref<1x32x32x1xf32, 1> memref.dealloc %alloc_1 : memref<1x8x32x1x4xf32, 1> return %alloc : memref<1x32x32xf32, 1> } // ----- // CHECK-LABEL: func.func private @test_deallocs_in_different_block_forops( // CHECK-SAME: %[[VAL_0:.*]]: memref<45x24x256xf32, 1>, // CHECK-SAME: %[[VAL_1:.*]]: memref<24x256xf32, 1>, // CHECK-SAME: %[[VAL_2:.*]]: memref<256xf32, 1>) { // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: %[[VAL_5:.*]] = arith.constant 8 : index // CHECK: %[[VAL_6:.*]] = arith.constant 45 : index // CHECK: %[[VAL_7:.*]] = arith.constant 24 : index // CHECK: %[[VAL_8:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_9:.*]] = memref.expand_shape %[[VAL_8]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: %[[VAL_10:.*]] = memref.alloc() {alignment = 64 : i64} : memref<24x256xf32, 1> // CHECK: %[[VAL_11:.*]] = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> // CHECK: %[[VAL_12:.*]] = memref.expand_shape %[[VAL_11]] {{\[\[}}0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_11]] : memref<45x6144xf32, 1> // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] { // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_5]] { // CHECK: %[[VAL_15:.*]] = memref.subview %[[VAL_9]]{{\[}}%[[VAL_13]], %[[VAL_14]], 0] [1, 8, 256] [1, 1, 1] : memref<45x24x256xf32, 1> to memref<1x8x256xf32, strided<[6144, 256, 1], offset: ?>, 1> // CHECK: %[[VAL_16:.*]] = memref.subview %[[VAL_10]]{{\[}}%[[VAL_14]], 0] [8, 256] [1, 1] : memref<24x256xf32, 1> to memref<8x256xf32, strided<[256, 1], offset: ?>, 1> // CHECK: } // CHECK: } // CHECK: memref.dealloc %[[VAL_10]] : memref<24x256xf32, 1> // CHECK: memref.dealloc %[[VAL_8]] : memref<45x6144xf32, 1> // CHECK: return // CHECK: } // This test will not move the deallocations %alloc and %alloc1 since they are used in the last scf.for operation // %alloc_2 will move right after its last user the expand_shape operation func.func private @test_deallocs_in_different_block_forops(%arg0: memref<45x24x256xf32, 1>, %arg1: memref<24x256xf32, 1> , %arg2: memref<256xf32, 1> ) -> () { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c8 = arith.constant 8 : index %c45 = arith.constant 45 : index %c24 = arith.constant 24 : index %alloc = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape = memref.expand_shape %alloc [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<24x256xf32, 1> %alloc_2 = memref.alloc() {alignment = 64 : i64} : memref<45x6144xf32, 1> %expand_shape2 = memref.expand_shape %alloc_2 [[0], [1, 2]] output_shape [45, 24, 256] : memref<45x6144xf32, 1> into memref<45x24x256xf32, 1> scf.for %arg3 = %c0 to %c45 step %c1 { scf.for %arg4 = %c0 to %c24 step %c8 { %subview = memref.subview %expand_shape[%arg3, %arg4, 0] [1, 8, 256] [1, 1, 1] : memref<45x24x256xf32, 1> to memref<1x8x256xf32, strided<[6144, 256, 1], offset: ?>, 1> %subview_3 = memref.subview %alloc_1[%arg4, 0] [8, 256] [1, 1] : memref<24x256xf32, 1> to memref<8x256xf32, strided<[256, 1], offset: ?>, 1> } } memref.dealloc %alloc : memref<45x6144xf32, 1> memref.dealloc %alloc_1 : memref<24x256xf32, 1> memref.dealloc %alloc_2 : memref<45x6144xf32, 1> return } // ----- // CHECK-LABEL: func.func private @test_conditional_deallocation() -> memref<32xf32, 1> { // CHECK: %[[VAL_0:.*]] = memref.alloc() {alignment = 64 : i64} : memref<32xf32, 1> // CHECK: %[[VAL_1:.*]] = arith.constant true // CHECK: %[[VAL_2:.*]] = scf.if %[[VAL_1]] -> (memref<32xf32, 1>) { // CHECK: memref.dealloc %[[VAL_0]] : memref<32xf32, 1> // CHECK: %[[VAL_3:.*]] = memref.alloc() {alignment = 64 : i64} : memref<32xf32, 1> // CHECK: scf.yield %[[VAL_3]] : memref<32xf32, 1> // CHECK: } else { // CHECK: scf.yield %[[VAL_0]] : memref<32xf32, 1> // CHECK: } // CHECK: return %[[VAL_4:.*]] : memref<32xf32, 1> // CHECK: } // This test will check for a conditional allocation. we dont want to hoist the deallocation // in the conditional branch func.func private @test_conditional_deallocation() -> memref<32xf32, 1> { %0 = memref.alloc() {alignment = 64 : i64} : memref<32xf32, 1> %true = arith.constant true %3 = scf.if %true -> (memref<32xf32, 1>) { memref.dealloc %0: memref<32xf32, 1> %1 = memref.alloc() {alignment = 64 : i64} : memref<32xf32, 1> scf.yield %1 : memref<32xf32, 1> } else { scf.yield %0 : memref<32xf32, 1> } return %3 : memref<32xf32, 1> }