Files
clang-p2996/mlir/test/Dialect/Affine/loop-fusion-dependence-check.mlir
Philip Lassen 608a663c8e [MLIR] Clean up pass options for test-loop-fusion and affine-super-vectorizer-test (#87606)
Before the change `test-loop-fusion` and `affine-super-vectorizer-test`
options were in their own category. This was because they used the
standard llvm command line parsing with `llvm::cl::opt`. This PR moves
them over to the mlir `Pass::Option` class.

Before the change
```
$ mlir-opt --help

...

  General options:
    ...

  Compiler passes to run
      Passes:
         ...
      Pass Pipelines:
        ...
  Generic Options:
       ....

  affine-super-vectorizer-test options:

    --backward-slicing                           
     ...
    --vectorize-affine-loop-nest
    
  test-loop-fusion options:

    --test-loop-fusion-dependence-check 
   ...
    --test-loop-fusion-transformation 
```

After the change 

```
$ mlir-opt --help

...

  General options:
    ...

  Compiler passes to run
      Passes:
          ...
          --affine-super-vectorizer-test
             --backward-slicing               
                ...
             --vectorize-affine-loop-nest  
          ...
          --test-loop-fusion options:
               --test-loop-fusion-dependence-check   
                ...
                --test-loop-fusion-transformation 
           ...
      Pass Pipelines:
        ...
  Generic Options:
      ...
```

---------

Signed-off-by: philass <plassen@groq.com>
2024-04-04 12:26:33 +02:00

337 lines
11 KiB
MLIR

// RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion=test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s
// -----
// CHECK-LABEL: func @cannot_fuse_would_create_cycle() {
func.func @cannot_fuse_would_create_cycle() {
%a = memref.alloc() : memref<10xf32>
%b = memref.alloc() : memref<10xf32>
%c = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
// Set up the following dependences:
// 1) loop0 -> loop1 on memref '%a'
// 2) loop0 -> loop2 on memref '%b'
// 3) loop1 -> loop2 on memref '%c'
// Fusing loop nest '%i0' and loop nest '%i2' would create a cycle.
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
%v0 = affine.load %a[%i0] : memref<10xf32>
affine.store %cf7, %b[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
affine.store %cf7, %a[%i1] : memref<10xf32>
%v1 = affine.load %c[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
%v2 = affine.load %b[%i2] : memref<10xf32>
affine.store %cf7, %c[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @can_fuse_rar_dependence() {
func.func @can_fuse_rar_dependence() {
%a = memref.alloc() : memref<10xf32>
%b = memref.alloc() : memref<10xf32>
%c = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
// Set up the following dependences:
// Make dependence from 0 to 1 on '%a' read-after-read.
// 1) loop0 -> loop1 on memref '%a'
// 2) loop0 -> loop2 on memref '%b'
// 3) loop1 -> loop2 on memref '%c'
// Should fuse: no fusion preventing remarks should be emitted for this test.
affine.for %i0 = 0 to 10 {
%v0 = affine.load %a[%i0] : memref<10xf32>
affine.store %cf7, %b[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
%v1 = affine.load %a[%i1] : memref<10xf32>
%v2 = affine.load %c[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
%v3 = affine.load %b[%i2] : memref<10xf32>
affine.store %cf7, %c[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @can_fuse_different_memrefs() {
func.func @can_fuse_different_memrefs() {
%a = memref.alloc() : memref<10xf32>
%b = memref.alloc() : memref<10xf32>
%c = memref.alloc() : memref<10xf32>
%d = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
// Set up the following dependences:
// Make dependence from 0 to 1 on unrelated memref '%d'.
// 1) loop0 -> loop1 on memref '%a'
// 2) loop0 -> loop2 on memref '%b'
// 3) loop1 -> loop2 on memref '%c'
// Should fuse: no fusion preventing remarks should be emitted for this test.
affine.for %i0 = 0 to 10 {
%v0 = affine.load %a[%i0] : memref<10xf32>
affine.store %cf7, %b[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
affine.store %cf7, %d[%i1] : memref<10xf32>
%v1 = affine.load %c[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
%v2 = affine.load %b[%i2] : memref<10xf32>
affine.store %cf7, %c[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_intermediate_store() {
func.func @should_not_fuse_across_intermediate_store() {
%0 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
%v0 = affine.load %0[%i0] : memref<10xf32>
"op0"(%v0) : (f32) -> ()
}
// Should not fuse loop nests '%i0' and '%i1' across top-level store.
affine.store %cf7, %0[%c0] : memref<10xf32>
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
%v1 = affine.load %0[%i1] : memref<10xf32>
"op1"(%v1) : (f32) -> ()
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_intermediate_load() {
func.func @should_not_fuse_across_intermediate_load() {
%0 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
affine.store %cf7, %0[%i0] : memref<10xf32>
}
// Should not fuse loop nests '%i0' and '%i1' across top-level load.
%v0 = affine.load %0[%c0] : memref<10xf32>
"op0"(%v0) : (f32) -> ()
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
affine.store %cf7, %0[%i1] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() {
func.func @should_not_fuse_across_ssa_value_def() {
%0 = memref.alloc() : memref<10xf32>
%1 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
%v0 = affine.load %0[%i0] : memref<10xf32>
affine.store %v0, %1[%i0] : memref<10xf32>
}
// Loop nest '%i0" cannot be fused past load from '%1' due to RAW dependence.
%v1 = affine.load %1[%c0] : memref<10xf32>
"op0"(%v1) : (f32) -> ()
// Loop nest '%i1' cannot be fused past SSA value def '%c2' which it uses.
%c2 = arith.constant 2 : index
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
affine.store %cf7, %0[%c2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_store_before_load() {
func.func @should_not_fuse_store_before_load() {
%0 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
affine.store %cf7, %0[%i0] : memref<10xf32>
%v0 = affine.load %0[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
%v1 = affine.load %0[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
affine.store %cf7, %0[%i2] : memref<10xf32>
%v2 = affine.load %0[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() {
func.func @should_not_fuse_across_load_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
}
%v1 = affine.load %0[%i0, %c0] : memref<10x10xf32>
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() {
func.func @should_not_fuse_across_load_in_loop_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
}
affine.for %i2 = 0 to 10 {
%v1 = affine.load %0[%i0, %i2] : memref<10x10xf32>
}
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() {
func.func @should_not_fuse_across_store_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
}
affine.store %cf7, %0[%i0, %c0] : memref<10x10xf32>
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
%v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() {
func.func @should_not_fuse_across_store_in_loop_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
}
affine.for %i2 = 0 to 10 {
affine.store %cf7, %0[%i0, %i2] : memref<10x10xf32>
}
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
%v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() {
func.func @should_not_fuse_across_ssa_value_def_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%1 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
affine.store %v0, %1[%i0, %i1] : memref<10x10xf32>
}
// RAW dependence from store in loop nest '%i1' to 'load %1' prevents
// fusion loop nest '%i1' into loops after load.
%v1 = affine.load %1[%i0, %c0] : memref<10x10xf32>
"op0"(%v1) : (f32) -> ()
// Loop nest '%i2' cannot be fused past SSA value def '%c2' which it uses.
%c2 = arith.constant 2 : index
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
affine.store %cf7, %0[%i0, %c2] : memref<10x10xf32>
}
}
return
}