If we know vlen is a multiple of 16, we don't need any alignment padding. I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment.
546 lines
20 KiB
LLVM
546 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
|
|
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
|
|
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
|
|
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
|
|
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
|
|
|
|
declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
iXLen, iXLen);
|
|
declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
iXLen, iXLen);
|
|
|
|
; Test same rounding mode in one block.
|
|
define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %b
|
|
}
|
|
|
|
; Test different rounding mode.
|
|
define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 2, iXLen %3)
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %b
|
|
}
|
|
|
|
declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
|
|
|
|
; Test same vxrm with call in between which may invalidate vxrm.
|
|
define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; RV32-LABEL: test3:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: addi sp, sp, -32
|
|
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: csrr a1, vlenb
|
|
; RV32-NEXT: sub sp, sp, a1
|
|
; RV32-NEXT: mv s0, a0
|
|
; RV32-NEXT: addi a1, sp, 16
|
|
; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
|
|
; RV32-NEXT: csrwi vxrm, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; RV32-NEXT: vaadd.vv v8, v8, v9
|
|
; RV32-NEXT: call foo
|
|
; RV32-NEXT: csrwi vxrm, 0
|
|
; RV32-NEXT: addi a0, sp, 16
|
|
; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
|
|
; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
|
|
; RV32-NEXT: vaadd.vv v8, v8, v9
|
|
; RV32-NEXT: csrr a0, vlenb
|
|
; RV32-NEXT: add sp, sp, a0
|
|
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: addi sp, sp, 32
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: test3:
|
|
; RV64: # %bb.0: # %entry
|
|
; RV64-NEXT: addi sp, sp, -32
|
|
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: sub sp, sp, a1
|
|
; RV64-NEXT: mv s0, a0
|
|
; RV64-NEXT: addi a1, sp, 16
|
|
; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
|
|
; RV64-NEXT: csrwi vxrm, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; RV64-NEXT: vaadd.vv v8, v8, v9
|
|
; RV64-NEXT: call foo
|
|
; RV64-NEXT: csrwi vxrm, 0
|
|
; RV64-NEXT: addi a0, sp, 16
|
|
; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
|
|
; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
|
|
; RV64-NEXT: vaadd.vv v8, v8, v9
|
|
; RV64-NEXT: csrr a0, vlenb
|
|
; RV64-NEXT: add sp, sp, a0
|
|
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 32
|
|
; RV64-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
%b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
|
|
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %b,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same vxrm with asm in between which may invalidate vxrm.
|
|
define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; CHECK-LABEL: test4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
%b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
|
|
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %b,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same rounding mode in triangle.
|
|
define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test5:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: beqz a1, .LBB4_2
|
|
; CHECK-NEXT: # %bb.1: # %condblock
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: .LBB4_2: # %mergeblock
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br i1 %cond, label %condblock, label %mergeblock
|
|
|
|
condblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same rounding mode in diamond with no dominating vxrm.
|
|
define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test6:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: beqz a1, .LBB5_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB5_2: # %falseblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same rounding mode in diamond with same dominating vxrm.
|
|
define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test7:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: beqz a1, .LBB6_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB6_2: # %falseblock
|
|
; CHECK-NEXT: vasub.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
|
|
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Test same rounding mode in diamond with same vxrm at merge.
|
|
define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test8:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: beqz a1, .LBB7_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB7_2: # %falseblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vasub.vv v8, v8, v9
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %c,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Test same rounding mode in diamond with different vxrm at merge.
|
|
define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test9:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: beqz a1, .LBB8_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: j .LBB8_3
|
|
; CHECK-NEXT: .LBB8_2: # %falseblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vasub.vv v8, v8, v9
|
|
; CHECK-NEXT: .LBB8_3: # %mergeblock
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %c,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 2, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Test loop with no dominating vxrm write.
|
|
define void @test10(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
|
|
; CHECK-LABEL: test10:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: beqz a3, .LBB9_3
|
|
; CHECK-NEXT: # %bb.1: # %for.body.preheader
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: .LBB9_2: # %for.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a2)
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: sub a3, a3, a4
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: bnez a3, .LBB9_2
|
|
; CHECK-NEXT: .LBB9_3: # %for.end
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%tobool.not9 = icmp eq iXLen %n, 0
|
|
br i1 %tobool.not9, label %for.end, label %for.body
|
|
|
|
for.body:
|
|
%n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
|
|
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
|
|
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
|
|
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
|
|
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
|
|
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
|
|
%sub = sub iXLen %n.addr.011, %vl
|
|
%tobool.not = icmp eq iXLen %sub, 0
|
|
br i1 %tobool.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
|
|
declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
|
|
declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
|
|
|
|
; Test loop with dominating vxrm write. Make sure there is no write in the loop.
|
|
define void @test11(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
|
|
; CHECK-LABEL: test11:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a2)
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: .LBB10_1: # %for.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: sub a3, a3, a4
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: beqz a3, .LBB10_3
|
|
; CHECK-NEXT: # %bb.2: # %for.body
|
|
; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1
|
|
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a2)
|
|
; CHECK-NEXT: j .LBB10_1
|
|
; CHECK-NEXT: .LBB10_3: # %for.end
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
|
|
%load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
|
|
%load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
|
|
%vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
|
|
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
|
|
%suba = sub iXLen %n, %vl
|
|
%tobool.not9 = icmp eq iXLen %suba, 0
|
|
br i1 %tobool.not9, label %for.end, label %for.body
|
|
|
|
for.body:
|
|
%n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
|
|
%vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
|
|
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
|
|
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
|
|
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
|
|
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
|
|
%sub = sub iXLen %n.addr.011, %vl2
|
|
%tobool.not = icmp eq iXLen %sub, 0
|
|
br i1 %tobool.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; The edge from entry to block2 is a critical edge. The vxrm write in block2
|
|
; is redundant when coming from block1, but is needed when coming from entry.
|
|
; FIXME: We could remove the write from the end of block1 without splitting the
|
|
; critical edge.
|
|
define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
|
|
; CHECK-LABEL: test12:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a0, a0, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v9, v8, v9
|
|
; CHECK-NEXT: beqz a0, .LBB11_2
|
|
; CHECK-NEXT: # %bb.1: # %block1
|
|
; CHECK-NEXT: csrwi vxrm, 1
|
|
; CHECK-NEXT: vaadd.vv v9, v8, v9
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: .LBB11_2: # %block2
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
|
|
br i1 %c1, label %block1, label %block2
|
|
|
|
block1:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
|
|
br label %block2
|
|
|
|
block2:
|
|
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Similar to test12, but introduces a second critical edge from block1 to
|
|
; block3. Now the write to vxrm at the end of block1, can't be removed because
|
|
; it is needed by block3.
|
|
define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
|
|
; CHECK-LABEL: test13:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a0, a0, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v10, v8, v9
|
|
; CHECK-NEXT: beqz a0, .LBB12_2
|
|
; CHECK-NEXT: # %bb.1: # %block1
|
|
; CHECK-NEXT: csrwi vxrm, 1
|
|
; CHECK-NEXT: vaadd.vv v10, v8, v10
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: beqz a1, .LBB12_3
|
|
; CHECK-NEXT: .LBB12_2: # %block2
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB12_3: # %block3
|
|
; CHECK-NEXT: vaadd.vv v8, v9, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
|
|
br i1 %c1, label %block1, label %block2
|
|
|
|
block1:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
|
|
br i1 %c2, label %block2, label %block3
|
|
|
|
block2:
|
|
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
|
|
ret <vscale x 1 x i8> %d
|
|
|
|
block3:
|
|
%e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
|
|
ret <vscale x 1 x i8> %e
|
|
}
|