Files
clang-p2996/llvm/test/Transforms/PhaseOrdering/switch-sext.ll
Michael Maitland 6b9952759f [SimplifyCFG] Simplify switch instruction that has duplicate arms (#114262)
I noticed that the two C functions emitted different IR:

```
int switch_duplicate_arms(int switch_val, int v, int w) {
  switch (switch_val) {
  default:
    break;
  case 0:
    w = v;
    break;
  case 1:
    w = v;
    break;
  }
  return w;
}

int if_duplicate_arms(int switch_val, int v, int w) {
  if (switch_val == 0)
    w = v;
  else if (switch_val == 1)
    w = v;
  return v0;
}
```

We generate IR that looks like this:

```
define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  switch i32 %1, label %7 [
    i32 0, label %5
    i32 1, label %6
  ]

5:
  br label %7

6:
  br label %7

7:
  %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
  ret i32 %8
}

define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  %5 = icmp ult i32 %1, 2
  %6 = select i1 %5, i32 %2, i32 %3
  ret i32 %6
}
```

For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5
and %6
branch to the same location and the incoming values for %8 are the same
from
those blocks. We could remove one on the duplicate switch targets and
update
the switch with the single target.

On RISC-V, prior to this patch, we generate the following code:
```
switch_duplicate_arms:
        li      a4, 1
        beq     a1, a4, .LBB0_2
        mv      a0, a3
        bnez    a1, .LBB0_3
.LBB0_2:
        mv      a0, a2
.LBB0_3:
        ret

if_duplicate_arms:
        li      a4, 2
        mv      a0, a2
        bltu    a1, a4, .LBB1_2
        mv      a0, a3
.LBB1_2:
        ret
```

After this patch, the O3 code is optimized to the icmp + select pair,
which
gives us the same code gen as `if_duplicate_arms`, as desired. This
results
is one less branch instruction in the final assembly.

This may help with both code size and further switch simplification. I
found
that this patch causes no significant impact to spec2006/int/ref and
spec2017/intrate/ref.

---------

Co-authored-by: Min Hsu <min@myhsu.dev>
2024-11-15 15:38:34 +01:00

49 lines
1.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes='default<O3>' < %s | FileCheck %s
define i8 @test_switch_with_sext_phi(i8 %code) {
; CHECK-LABEL: define noundef i8 @test_switch_with_sext_phi(
; CHECK-SAME: i8 [[CODE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i8 [[CODE]], label [[SW_EPILOG:%.*]] [
; CHECK-NEXT: i8 108, label [[SW_BB2:%.*]]
; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb2:
; CHECK-NEXT: br label [[SW_EPILOG]]
; CHECK: sw.bb3:
; CHECK-NEXT: br label [[SW_EPILOG]]
; CHECK: sw.epilog:
; CHECK-NEXT: [[PEP_CODE:%.*]] = phi i8 [ 81, [[SW_BB3]] ], [ 113, [[SW_BB2]] ], [ [[CODE]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i8 [[PEP_CODE]]
;
entry:
%conv = sext i8 %code to i32
switch i32 %conv, label %sw.default [
i32 105, label %sw.epilog
i32 73, label %sw.bb1
i32 108, label %sw.bb2
i32 76, label %sw.bb3
i32 63, label %sw.bb4
]
sw.bb1:
br label %sw.epilog
sw.bb2:
br label %sw.epilog
sw.bb3:
br label %sw.epilog
sw.bb4:
br label %sw.epilog
sw.default:
br label %sw.epilog
sw.epilog:
%pep_code = phi i8 [ %code, %sw.default ], [ 63, %sw.bb4 ], [ 81, %sw.bb3 ], [ 113, %sw.bb2 ], [ 73, %sw.bb1 ], [ 105, %entry ]
ret i8 %pep_code
}