Files
clang-p2996/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
Alexey Zhikhartsev 02077da7e7 Add jump-threading optimization for deterministic finite automata
The current JumpThreading pass does not jump thread loops since it can
result in irreducible control flow that harms other optimizations. This
prevents switch statements inside a loop from being optimized to use
unconditional branches.

This code pattern occurs in the core_state_transition function of
Coremark. The state machine can be implemented manually with goto
statements resulting in a large runtime improvement, and this transform
makes the switch implementation match the goto version in performance.

This patch specifically targets switch statements inside a loop that
have the opportunity to be threaded. Once it identifies an opportunity,
it creates new paths that branch directly to the correct code block.
For example, the left CFG could be transformed to the right CFG:

```
          sw.bb                        sw.bb
        /   |   \                    /   |   \
   case1  case2  case3          case1  case2  case3
        \   |   /                /       |       \
        latch.bb             latch.2  latch.3  latch.1
         br sw.bb              /         |         \
                           sw.bb.2     sw.bb.3     sw.bb.1
                            br case2    br case3    br case1
```

Co-author: Justin Kreiner @jkreiner
Co-author: Ehsan Amiri @amehsan

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D99205
2021-07-27 14:34:04 -04:00

181 lines
5.8 KiB
LLVM

; REQUIRES: asserts
; RUN: opt -S -dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
; This test checks that the analysis identifies all threadable paths in a
; simple CFG. A threadable path includes a list of basic blocks, the exit
; state, and the block that determines the next state.
; < path of BBs that form a cycle > [ state, determinator ]
define i32 @test1(i32 %num) {
; CHECK: < for.body for.inc > [ 1, for.inc ]
; CHECK-NEXT: < for.body case1 for.inc > [ 2, for.inc ]
; CHECK-NEXT: < for.body case2 for.inc > [ 1, for.inc ]
; CHECK-NEXT: < for.body case2 si.unfold.false for.inc > [ 2, for.inc ]
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
; This test checks that the analysis finds threadable paths in a more
; complicated CFG. Here the FSM is represented as a nested loop, with
; fallthrough cases.
define i32 @test2(i32 %init) {
; CHECK: < loop.3 case2 > [ 3, loop.3 ]
; CHECK-NEXT: < loop.3 case2 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ]
; CHECK-NEXT: < loop.3 case2 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 4, loop.1.backedge ]
; CHECK-NEXT: < loop.3 case3 loop.2.backedge loop.2 > [ 0, loop.2.backedge ]
; CHECK-NEXT: < loop.3 case3 case4 loop.2.backedge loop.2 > [ 3, loop.2.backedge ]
; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ]
; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 2, loop.1.backedge ]
; CHECK-NEXT: < loop.3 case4 loop.2.backedge loop.2 > [ 3, loop.2.backedge ]
; CHECK-NEXT: < loop.3 case4 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ]
; CHECK-NEXT: < loop.3 case4 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 2, loop.1.backedge ]
entry:
%cmp = icmp eq i32 %init, 0
%sel = select i1 %cmp, i32 0, i32 2
br label %loop.1
loop.1:
%state.1 = phi i32 [ %sel, %entry ], [ %state.1.be2, %loop.1.backedge ]
br label %loop.2
loop.2:
%state.2 = phi i32 [ %state.1, %loop.1 ], [ %state.2.be, %loop.2.backedge ]
br label %loop.3
loop.3:
%state = phi i32 [ %state.2, %loop.2 ], [ 3, %case2 ]
switch i32 %state, label %infloop.i [
i32 2, label %case2
i32 3, label %case3
i32 4, label %case4
i32 0, label %case0
i32 1, label %case1
]
case2:
br i1 %cmp, label %loop.3, label %loop.1.backedge
case3:
br i1 %cmp, label %loop.2.backedge, label %case4
case4:
br i1 %cmp, label %loop.2.backedge, label %loop.1.backedge
loop.1.backedge:
%state.1.be = phi i32 [ 2, %case4 ], [ 4, %case2 ]
%state.1.be2 = select i1 %cmp, i32 1, i32 %state.1.be
br label %loop.1
loop.2.backedge:
%state.2.be = phi i32 [ 3, %case4 ], [ 0, %case3 ]
br label %loop.2
case0:
br label %exit
case1:
br label %exit
infloop.i:
br label %infloop.i
exit:
ret i32 0
}
declare void @baz()
; Verify that having the switch block as a determinator is handled correctly.
define i32 @main() {
; CHECK: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ]
; CHECK-NEXT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ]
bb:
%i = alloca [420 x i8], align 1
%i2 = getelementptr inbounds [420 x i8], [420 x i8]* %i, i64 0, i64 390
br label %bb3
bb3: ; preds = %bb59, %bb
%i4 = phi i8* [ %i2, %bb ], [ %i60, %bb59 ]
%i5 = phi i8 [ 77, %bb ], [ %i64, %bb59 ]
%i6 = phi i32 [ 2, %bb ], [ %i63, %bb59 ]
%i7 = phi i32 [ 26, %bb ], [ %i62, %bb59 ]
%i8 = phi i32 [ 25, %bb ], [ %i61, %bb59 ]
%i9 = icmp sgt i32 %i7, 2
%i10 = select i1 %i9, i32 %i7, i32 2
%i11 = add i32 %i8, 2
%i12 = sub i32 %i11, %i10
%i13 = mul nsw i32 %i12, 3
%i14 = add nsw i32 %i13, %i6
%i15 = sext i32 %i14 to i64
%i16 = getelementptr inbounds i8, i8* %i4, i64 %i15
%i17 = load i8, i8* %i16, align 1
%i18 = icmp sgt i8 %i17, 0
br i1 %i18, label %bb21, label %bb31
bb21: ; preds = %bb3
br i1 true, label %bb59, label %bb43
bb59: ; preds = %bb49, %bb43, %bb31, %bb21
%i60 = phi i8* [ %i44, %bb49 ], [ %i44, %bb43 ], [ %i34, %bb31 ], [ %i4, %bb21 ]
%i61 = phi i32 [ %i45, %bb49 ], [ %i45, %bb43 ], [ %i33, %bb31 ], [ %i8, %bb21 ]
%i62 = phi i32 [ %i47, %bb49 ], [ %i47, %bb43 ], [ %i32, %bb31 ], [ %i7, %bb21 ]
%i63 = phi i32 [ %i48, %bb49 ], [ %i48, %bb43 ], [ 2, %bb31 ], [ %i6, %bb21 ]
%i64 = phi i8 [ %i46, %bb49 ], [ %i46, %bb43 ], [ 77, %bb31 ], [ %i5, %bb21 ]
%i65 = icmp sgt i32 %i62, 0
br i1 %i65, label %bb3, label %bb66
bb31: ; preds = %bb3
%i32 = add nsw i32 %i7, -1
%i33 = add nsw i32 %i8, -1
%i34 = getelementptr inbounds i8, i8* %i4, i64 -15
%i35 = icmp eq i8 %i5, 77
br i1 %i35, label %bb59, label %bb41
bb41: ; preds = %bb31
tail call void @baz()
br label %bb43
bb43: ; preds = %bb41, %bb21
%i44 = phi i8* [ %i34, %bb41 ], [ %i4, %bb21 ]
%i45 = phi i32 [ %i33, %bb41 ], [ %i8, %bb21 ]
%i46 = phi i8 [ 77, %bb41 ], [ %i5, %bb21 ]
%i47 = phi i32 [ %i32, %bb41 ], [ %i7, %bb21 ]
%i48 = phi i32 [ 2, %bb41 ], [ %i6, %bb21 ]
tail call void @baz()
switch i8 %i5, label %bb59 [
i8 68, label %bb49
i8 73, label %bb49
]
bb49: ; preds = %bb43, %bb43
tail call void @baz()
br label %bb59
bb66: ; preds = %bb59
ret i32 0
}