I noticed that the two C functions emitted different IR:
```
int switch_duplicate_arms(int switch_val, int v, int w) {
switch (switch_val) {
default:
break;
case 0:
w = v;
break;
case 1:
w = v;
break;
}
return w;
}
int if_duplicate_arms(int switch_val, int v, int w) {
if (switch_val == 0)
w = v;
else if (switch_val == 1)
w = v;
return v0;
}
```
We generate IR that looks like this:
```
define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
switch i32 %1, label %7 [
i32 0, label %5
i32 1, label %6
]
5:
br label %7
6:
br label %7
7:
%8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
ret i32 %8
}
define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
%5 = icmp ult i32 %1, 2
%6 = select i1 %5, i32 %2, i32 %3
ret i32 %6
}
```
For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5
and %6
branch to the same location and the incoming values for %8 are the same
from
those blocks. We could remove one on the duplicate switch targets and
update
the switch with the single target.
On RISC-V, prior to this patch, we generate the following code:
```
switch_duplicate_arms:
li a4, 1
beq a1, a4, .LBB0_2
mv a0, a3
bnez a1, .LBB0_3
.LBB0_2:
mv a0, a2
.LBB0_3:
ret
if_duplicate_arms:
li a4, 2
mv a0, a2
bltu a1, a4, .LBB1_2
mv a0, a3
.LBB1_2:
ret
```
After this patch, the O3 code is optimized to the icmp + select pair,
which
gives us the same code gen as `if_duplicate_arms`, as desired. This
results
is one less branch instruction in the final assembly.
This may help with both code size and further switch simplification. I
found
that this patch causes no significant impact to spec2006/int/ref and
spec2017/intrate/ref.
---------
Co-authored-by: Min Hsu <min@myhsu.dev>
49 lines
1.3 KiB
LLVM
49 lines
1.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S -passes='default<O3>' < %s | FileCheck %s
|
|
|
|
define i8 @test_switch_with_sext_phi(i8 %code) {
|
|
; CHECK-LABEL: define noundef i8 @test_switch_with_sext_phi(
|
|
; CHECK-SAME: i8 [[CODE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: switch i8 [[CODE]], label [[SW_EPILOG:%.*]] [
|
|
; CHECK-NEXT: i8 108, label [[SW_BB2:%.*]]
|
|
; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: sw.bb2:
|
|
; CHECK-NEXT: br label [[SW_EPILOG]]
|
|
; CHECK: sw.bb3:
|
|
; CHECK-NEXT: br label [[SW_EPILOG]]
|
|
; CHECK: sw.epilog:
|
|
; CHECK-NEXT: [[PEP_CODE:%.*]] = phi i8 [ 81, [[SW_BB3]] ], [ 113, [[SW_BB2]] ], [ [[CODE]], [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: ret i8 [[PEP_CODE]]
|
|
;
|
|
entry:
|
|
%conv = sext i8 %code to i32
|
|
switch i32 %conv, label %sw.default [
|
|
i32 105, label %sw.epilog
|
|
i32 73, label %sw.bb1
|
|
i32 108, label %sw.bb2
|
|
i32 76, label %sw.bb3
|
|
i32 63, label %sw.bb4
|
|
]
|
|
|
|
sw.bb1:
|
|
br label %sw.epilog
|
|
|
|
sw.bb2:
|
|
br label %sw.epilog
|
|
|
|
sw.bb3:
|
|
br label %sw.epilog
|
|
|
|
sw.bb4:
|
|
br label %sw.epilog
|
|
|
|
sw.default:
|
|
br label %sw.epilog
|
|
|
|
sw.epilog:
|
|
%pep_code = phi i8 [ %code, %sw.default ], [ 63, %sw.bb4 ], [ 81, %sw.bb3 ], [ 113, %sw.bb2 ], [ 73, %sw.bb1 ], [ 105, %entry ]
|
|
ret i8 %pep_code
|
|
}
|