Files
clang-p2996/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll
Craig Topper ab393cee9d [RISCV] Take known minimum vlen into account when calculating alignment padding in assignRVVStackObjectOffsets. (#110312)
If we know vlen is a multiple of 16, we don't need any alignment
padding.

I wrote the code so that it would generate the minimum amount of padding
if the stack align was 32 or larger or if RVVBitsPerBlock was smaller
than half the stack alignment.
2024-09-30 11:44:23 -07:00

546 lines
20 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i8>,
iXLen, iXLen);
declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i8>,
iXLen, iXLen);
; Test same rounding mode in one block.
define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %b
}
; Test different rounding mode.
define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 2, iXLen %3)
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %b
}
declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
; Test same vxrm with call in between which may invalidate vxrm.
define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; RV32-LABEL: test3:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: mv s0, a0
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrwi vxrm, 0
; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; RV32-NEXT: vaadd.vv v8, v8, v9
; RV32-NEXT: call foo
; RV32-NEXT: csrwi vxrm, 0
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
; RV32-NEXT: vaadd.vv v8, v8, v9
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: test3:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: mv s0, a0
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; RV64-NEXT: vaadd.vv v8, v8, v9
; RV64-NEXT: call foo
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
; RV64-NEXT: vaadd.vv v8, v8, v9
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
%b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %b,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %c
}
; Test same vxrm with asm in between which may invalidate vxrm.
define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
%b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %b,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %c
}
; Test same rounding mode in triangle.
define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: beqz a1, .LBB4_2
; CHECK-NEXT: # %bb.1: # %condblock
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: .LBB4_2: # %mergeblock
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br i1 %cond, label %condblock, label %mergeblock
condblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
ret <vscale x 1 x i8> %c
}
; Test same rounding mode in diamond with no dominating vxrm.
define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: beqz a1, .LBB5_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_2: # %falseblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
ret <vscale x 1 x i8> %c
}
; Test same rounding mode in diamond with same dominating vxrm.
define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB6_2: # %falseblock
; CHECK-NEXT: vasub.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %a,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
ret <vscale x 1 x i8> %d
}
; Test same rounding mode in diamond with same vxrm at merge.
define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: beqz a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB7_2: # %falseblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vasub.vv v8, v8, v9
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %c,
<vscale x 1 x i8> %2,
iXLen 0, iXLen %3)
ret <vscale x 1 x i8> %d
}
; Test same rounding mode in diamond with different vxrm at merge.
define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: beqz a1, .LBB8_2
; CHECK-NEXT: # %bb.1: # %trueblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: j .LBB8_3
; CHECK-NEXT: .LBB8_2: # %falseblock
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vasub.vv v8, v8, v9
; CHECK-NEXT: .LBB8_3: # %mergeblock
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
entry:
br i1 %cond, label %trueblock, label %falseblock
trueblock:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
falseblock:
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 0, iXLen %3)
br label %mergeblock
mergeblock:
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> undef,
<vscale x 1 x i8> %c,
<vscale x 1 x i8> %2,
iXLen 2, iXLen %3)
ret <vscale x 1 x i8> %d
}
; Test loop with no dominating vxrm write.
define void @test10(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a3, .LBB9_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: .LBB9_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a2)
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: bnez a3, .LBB9_2
; CHECK-NEXT: .LBB9_3: # %for.end
; CHECK-NEXT: ret
entry:
%tobool.not9 = icmp eq iXLen %n, 0
br i1 %tobool.not9, label %for.end, label %for.body
for.body:
%n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
%sub = sub iXLen %n.addr.011, %vl
%tobool.not = icmp eq iXLen %sub, 0
br i1 %tobool.not, label %for.end, label %for.body
for.end:
ret void
}
declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
; Test loop with dominating vxrm write. Make sure there is no write in the loop.
define void @test11(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
; CHECK-LABEL: test11:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a2)
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: .LBB10_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: beqz a3, .LBB10_3
; CHECK-NEXT: # %bb.2: # %for.body
; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a2)
; CHECK-NEXT: j .LBB10_1
; CHECK-NEXT: .LBB10_3: # %for.end
; CHECK-NEXT: ret
entry:
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
%load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
%load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
%vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
%suba = sub iXLen %n, %vl
%tobool.not9 = icmp eq iXLen %suba, 0
br i1 %tobool.not9, label %for.end, label %for.body
for.body:
%n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
%vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
%sub = sub iXLen %n.addr.011, %vl2
%tobool.not = icmp eq iXLen %sub, 0
br i1 %tobool.not, label %for.end, label %for.body
for.end:
ret void
}
; The edge from entry to block2 is a critical edge. The vxrm write in block2
; is redundant when coming from block1, but is needed when coming from entry.
; FIXME: We could remove the write from the end of block1 without splitting the
; critical edge.
define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
; CHECK-LABEL: test12:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v9, v8, v9
; CHECK-NEXT: beqz a0, .LBB11_2
; CHECK-NEXT: # %bb.1: # %block1
; CHECK-NEXT: csrwi vxrm, 1
; CHECK-NEXT: vaadd.vv v9, v8, v9
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: .LBB11_2: # %block2
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
br i1 %c1, label %block1, label %block2
block1:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
br label %block2
block2:
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
ret <vscale x 1 x i8> %d
}
; Similar to test12, but introduces a second critical edge from block1 to
; block3. Now the write to vxrm at the end of block1, can't be removed because
; it is needed by block3.
define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
; CHECK-LABEL: test13:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
; CHECK-NEXT: vaadd.vv v10, v8, v9
; CHECK-NEXT: beqz a0, .LBB12_2
; CHECK-NEXT: # %bb.1: # %block1
; CHECK-NEXT: csrwi vxrm, 1
; CHECK-NEXT: vaadd.vv v10, v8, v10
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: beqz a1, .LBB12_3
; CHECK-NEXT: .LBB12_2: # %block2
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaadd.vv v8, v8, v10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB12_3: # %block3
; CHECK-NEXT: vaadd.vv v8, v9, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
br i1 %c1, label %block1, label %block2
block1:
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
br i1 %c2, label %block2, label %block3
block2:
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
ret <vscale x 1 x i8> %d
block3:
%e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
ret <vscale x 1 x i8> %e
}