Fixes a bug in affine fusion pipeline where an incorrect slice is computed. After the slice computation is done, original domain of the the source is compared with the new domain that will result if the fusion succeeds. If the new domain must be a subset of the original domain for the slice to be valid. If the slice computed is incorrect, fusion based on such a slice is avoided. Relevant test cases are added/edited. Fixes https://bugs.llvm.org/show_bug.cgi?id=49203 Differential Revision: https://reviews.llvm.org/D98239
163 lines
7.6 KiB
MLIR
163 lines
7.6 KiB
MLIR
// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-slice-computation -split-input-file -verify-diagnostics | FileCheck %s
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @slice_depth1_loop_nest() {
|
|
func @slice_depth1_loop_nest() {
|
|
%0 = memref.alloc() : memref<100xf32>
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
|
|
affine.store %cst, %0[%i0] : memref<100xf32>
|
|
}
|
|
affine.for %i1 = 0 to 5 {
|
|
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
|
|
%1 = affine.load %0[%i1] : memref<100xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @forward_slice_slice_depth1_loop_nest() {
|
|
func @forward_slice_slice_depth1_loop_nest() {
|
|
%0 = memref.alloc() : memref<100xf32>
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 5 {
|
|
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
|
|
affine.store %cst, %0[%i0] : memref<100xf32>
|
|
}
|
|
affine.for %i1 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
|
|
%1 = affine.load %0[%i1] : memref<100xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// Loop %i0 writes to locations [2, 17] and loop %i0 reads from locations [3, 6]
|
|
// Slice loop bounds should be adjusted such that the load/store are for the
|
|
// same location.
|
|
// CHECK-LABEL: func @slice_depth1_loop_nest_with_offsets() {
|
|
func @slice_depth1_loop_nest_with_offsets() {
|
|
%0 = memref.alloc() : memref<100xf32>
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0) -> (d0 + 3), (d0) -> (d0 + 4)] )}}
|
|
%a0 = affine.apply affine_map<(d0) -> (d0 + 2)>(%i0)
|
|
affine.store %cst, %0[%a0] : memref<100xf32>
|
|
}
|
|
affine.for %i1 = 4 to 8 {
|
|
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0 - 3), (d0) -> (d0 - 2)] )}}
|
|
%a1 = affine.apply affine_map<(d0) -> (d0 - 1)>(%i1)
|
|
%1 = affine.load %0[%a1] : memref<100xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// Slices at loop depth 1 should only slice the loop bounds of the first scf.
|
|
// Slices at loop depth 2 should slice loop bounds of both loops.
|
|
// CHECK-LABEL: func @slice_depth2_loop_nest() {
|
|
func @slice_depth2_loop_nest() {
|
|
%0 = memref.alloc() : memref<100x100xf32>
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
|
// expected-remark@-2 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
|
affine.for %i1 = 0 to 16 {
|
|
affine.store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
|
}
|
|
}
|
|
affine.for %i2 = 0 to 10 {
|
|
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
|
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
|
affine.for %i3 = 0 to 8 {
|
|
%1 = affine.load %0[%i2, %i3] : memref<100x100xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// The load at depth 1 in loop nest %i2 prevents slicing loop nest %i0 at depths
|
|
// greater than 1. However, loop nest %i2 can be sliced into loop nest %i0 at
|
|
// depths 1 and 2 because the dependent store in loop nest %i0 is at depth 2.
|
|
// CHECK-LABEL: func @slice_depth2_loop_nest_two_loads() {
|
|
func @slice_depth2_loop_nest_two_loads() {
|
|
%0 = memref.alloc() : memref<100x100xf32>
|
|
%c0 = constant 0 : index
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
|
// expected-remark@-2 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (0), (d0, d1) -> (16)] )}}
|
|
affine.for %i1 = 0 to 16 {
|
|
affine.store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
|
}
|
|
}
|
|
affine.for %i2 = 0 to 10 {
|
|
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
|
affine.for %i3 = 0 to 8 {
|
|
%1 = affine.load %0[%i2, %i3] : memref<100x100xf32>
|
|
}
|
|
%2 = affine.load %0[%i2, %c0] : memref<100x100xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// The store at depth 1 in loop nest %i0 prevents slicing loop nest %i2 at
|
|
// depths greater than 1 into loop nest %i0. However, loop nest %i0 can be
|
|
// sliced into loop nest %i2 at depths 1 and 2 because the dependent load in
|
|
// loop nest %i2 is at depth 2.
|
|
// CHECK-LABEL: func @slice_depth2_loop_nest_two_stores() {
|
|
func @slice_depth2_loop_nest_two_stores() {
|
|
%0 = memref.alloc() : memref<100x100xf32>
|
|
%c0 = constant 0 : index
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
|
affine.for %i1 = 0 to 16 {
|
|
affine.store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
|
}
|
|
affine.store %cst, %0[%i0, %c0] : memref<100x100xf32>
|
|
}
|
|
affine.for %i2 = 0 to 10 {
|
|
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (16)] )}}
|
|
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (0), (d0, d1) -> (16)] )}}
|
|
affine.for %i3 = 0 to 8 {
|
|
%1 = affine.load %0[%i2, %i3] : memref<100x100xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// Test loop nest which has a smaller outer trip count than its inner scf.
|
|
// CHECK-LABEL: func @slice_loop_nest_with_smaller_outer_trip_count() {
|
|
func @slice_loop_nest_with_smaller_outer_trip_count() {
|
|
%0 = memref.alloc() : memref<100x100xf32>
|
|
%c0 = constant 0 : index
|
|
%cst = constant 7.000000e+00 : f32
|
|
affine.for %i0 = 0 to 16 {
|
|
// expected-remark@-1 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (10)] )}}
|
|
// expected-remark@-2 {{Incorrect slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
|
affine.for %i1 = 0 to 16 {
|
|
affine.store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
|
}
|
|
}
|
|
affine.for %i2 = 0 to 8 {
|
|
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (10)] )}}
|
|
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
|
affine.for %i3 = 0 to 10 {
|
|
%1 = affine.load %0[%i2, %i3] : memref<100x100xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|