Files
clang-p2996/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll
Craig Topper 014390d937 [RISCV] Implement cross basic block VXRM write insertion. (#70382)
This adds a new pass to insert VXRM writes for vector instructions. With
the goal of avoiding redundant writes.

The pass does 2 dataflow algorithms. The first is a forward data flow to
calculate where a VXRM value is available. The second is a backwards
dataflow to determine where a VXRM value is anticipated.

Finally, we use the results of these two dataflows to insert VXRM writes
where a value is anticipated, but not available.

The pass does not split critical edges so we aren't always able to
eliminate all redundancy.

The pass will only insert vxrm writes on paths that always require it.
2023-11-02 14:09:27 -07:00

550 lines
20 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i8>,
iXLen, iXLen);
declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i8>,
iXLen, iXLen);
; Test same rounding mode in one block.
define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %b
}
; Test different rounding mode.
define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 2, iXLen %3)
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %b
}
declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
; Test same vxrm with call in between which may invalidate vxrm.
define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; RV32-LABEL: test3:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: mv s0, a0
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; RV32-NEXT: csrwi vxrm, 0
; RV32-NEXT: vaadd.vv v8, v8, v9
; RV32-NEXT: call foo@plt
; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
; RV32-NEXT: csrwi vxrm, 0
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vaadd.vv v8, v8, v9
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: test3:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: mv s0, a0
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: vaadd.vv v8, v8, v9
; RV64-NEXT: call foo@plt
; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vaadd.vv v8, v8, v9
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
%b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %b,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %c
}
; Test same vxrm with asm in between which may invalidate vxrm.
define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
%b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %b,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %c
}
; Test same rounding mode in triangle.
define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: beqz a1, .LBB4_2
; CHECK-NEXT: # %bb.1: # %condblock
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: .LBB4_2: # %mergeblock
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br i1 %cond, label %condblock, label %mergeblock
condblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
ret <vscale x 1 x i8> %c
}
; Test same rounding mode in diamond with no dominating vxrm.
define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: beqz a1, .LBB5_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_2: # %falseblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
ret <vscale x 1 x i8> %c
}
; Test same rounding mode in diamond with same dominating vxrm.
define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB6_2: # %falseblock
; CHECK-NEXT: vasub.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
ret <vscale x 1 x i8> %d
}
; Test same rounding mode in diamond with same vxrm at merge.
define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: beqz a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB7_2: # %falseblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vasub.vv v8, v8, v9
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %c,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %d
}
; Test same rounding mode in diamond with different vxrm at merge.
define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: beqz a1, .LBB8_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: j .LBB8_3
; CHECK-NEXT: .LBB8_2: # %falseblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vasub.vv v8, v8, v9
; CHECK-NEXT: .LBB8_3: # %mergeblock
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %c,
<vscale x 1 x i8> %2,
iXLen 2, iXLen %3)
ret <vscale x 1 x i8> %d
}
; Test loop with no dominating vxrm write.
define void @test10(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a3, .LBB9_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: .LBB9_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a2)
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: bnez a3, .LBB9_2
; CHECK-NEXT: .LBB9_3: # %for.end
; CHECK-NEXT: ret
entry:
%tobool.not9 = icmp eq iXLen %n, 0
br i1 %tobool.not9, label %for.end, label %for.body
for.body:
%n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
%sub = sub iXLen %n.addr.011, %vl
%tobool.not = icmp eq iXLen %sub, 0
br i1 %tobool.not, label %for.end, label %for.body
for.end:
ret void
}
declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen)
declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen)
; Test loop with dominating vxrm write. Make sure there is no write in the loop.
define void @test11(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) {
; CHECK-LABEL: test11:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a2)
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: .LBB10_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: beqz a3, .LBB10_3
; CHECK-NEXT: # %bb.2: # %for.body
; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a2)
; CHECK-NEXT: j .LBB10_1
; CHECK-NEXT: .LBB10_3: # %for.end
; CHECK-NEXT: ret
entry:
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
%load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
%load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
%vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
%suba = sub iXLen %n, %vl
%tobool.not9 = icmp eq iXLen %suba, 0
br i1 %tobool.not9, label %for.end, label %for.body
for.body:
%n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
%vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
%sub = sub iXLen %n.addr.011, %vl2
%tobool.not = icmp eq iXLen %sub, 0
br i1 %tobool.not, label %for.end, label %for.body
for.end:
ret void
}
; The edge from entry to block2 is a critical edge. The vxrm write in block2
; is redundant when coming from block1, but is needed when coming from entry.
; FIXME: We could remove the write from the end of block1 without splitting the
; critical edge.
define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
; CHECK-LABEL: test12:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v9, v8, v9
; CHECK-NEXT: beqz a0, .LBB11_2
; CHECK-NEXT: # %bb.1: # %block1
; CHECK-NEXT: csrwi vxrm, 1
; CHECK-NEXT: vaadd.vv v9, v8, v9
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: .LBB11_2: # %block2
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
br i1 %c1, label %block1, label %block2
block1:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
br label %block2
block2:
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
ret <vscale x 1 x i8> %d
}
; Similar to test12, but introduces a second critical edge from block1 to
; block3. Now the write to vxrm at the end of block1, can't be removed because
; it is needed by block3.
define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
; CHECK-LABEL: test13:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v10, v8, v9
; CHECK-NEXT: beqz a0, .LBB12_2
; CHECK-NEXT: # %bb.1: # %block1
; CHECK-NEXT: csrwi vxrm, 1
; CHECK-NEXT: vaadd.vv v10, v8, v10
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: beqz a1, .LBB12_3
; CHECK-NEXT: .LBB12_2: # %block2
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB12_3: # %block3
; CHECK-NEXT: vaadd.vv v8, v9, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
br i1 %c1, label %block1, label %block2
block1:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
br i1 %c2, label %block2, label %block3
block2:
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
ret <vscale x 1 x i8> %d
block3:
%e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
ret <vscale x 1 x i8> %e
}