This adds a new pass to insert VXRM writes for vector instructions. With the goal of avoiding redundant writes. The pass does 2 dataflow algorithms. The first is a forward data flow to calculate where a VXRM value is available. The second is a backwards dataflow to determine where a VXRM value is anticipated. Finally, we use the results of these two dataflows to insert VXRM writes where a value is anticipated, but not available. The pass does not split critical edges so we aren't always able to eliminate all redundancy. The pass will only insert vxrm writes on paths that always require it.
550 lines
20 KiB
LLVM
550 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
|
|
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
|
|
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
|
|
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
|
|
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
|
|
|
|
declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
iXLen, iXLen);
|
|
declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
<vscale x 1 x i8>,
|
|
iXLen, iXLen);
|
|
|
|
; Test same rounding mode in one block.
|
|
define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %b
|
|
}
|
|
|
|
; Test different rounding mode.
|
|
define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 2, iXLen %3)
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %b
|
|
}
|
|
|
|
declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
|
|
|
|
; Test same vxrm with call in between which may invalidate vxrm.
|
|
define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; RV32-LABEL: test3:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: addi sp, sp, -32
|
|
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: csrr a1, vlenb
|
|
; RV32-NEXT: slli a1, a1, 1
|
|
; RV32-NEXT: sub sp, sp, a1
|
|
; RV32-NEXT: mv s0, a0
|
|
; RV32-NEXT: addi a1, sp, 16
|
|
; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; RV32-NEXT: csrwi vxrm, 0
|
|
; RV32-NEXT: vaadd.vv v8, v8, v9
|
|
; RV32-NEXT: call foo@plt
|
|
; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
|
|
; RV32-NEXT: csrwi vxrm, 0
|
|
; RV32-NEXT: addi a0, sp, 16
|
|
; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
|
|
; RV32-NEXT: vaadd.vv v8, v8, v9
|
|
; RV32-NEXT: csrr a0, vlenb
|
|
; RV32-NEXT: slli a0, a0, 1
|
|
; RV32-NEXT: add sp, sp, a0
|
|
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: addi sp, sp, 32
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: test3:
|
|
; RV64: # %bb.0: # %entry
|
|
; RV64-NEXT: addi sp, sp, -32
|
|
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: slli a1, a1, 1
|
|
; RV64-NEXT: sub sp, sp, a1
|
|
; RV64-NEXT: mv s0, a0
|
|
; RV64-NEXT: addi a1, sp, 16
|
|
; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; RV64-NEXT: csrwi vxrm, 0
|
|
; RV64-NEXT: vaadd.vv v8, v8, v9
|
|
; RV64-NEXT: call foo@plt
|
|
; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
|
|
; RV64-NEXT: csrwi vxrm, 0
|
|
; RV64-NEXT: addi a0, sp, 16
|
|
; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
|
|
; RV64-NEXT: vaadd.vv v8, v8, v9
|
|
; RV64-NEXT: csrr a0, vlenb
|
|
; RV64-NEXT: slli a0, a0, 1
|
|
; RV64-NEXT: add sp, sp, a0
|
|
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 32
|
|
; RV64-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
%b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
|
|
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %b,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same vxrm with asm in between which may invalidate vxrm.
|
|
define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
|
|
; CHECK-LABEL: test4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
%b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
|
|
%c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %b,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same rounding mode in triangle.
|
|
define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test5:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: beqz a1, .LBB4_2
|
|
; CHECK-NEXT: # %bb.1: # %condblock
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: .LBB4_2: # %mergeblock
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br i1 %cond, label %condblock, label %mergeblock
|
|
|
|
condblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same rounding mode in diamond with no dominating vxrm.
|
|
define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test6:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: beqz a1, .LBB5_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB5_2: # %falseblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
|
|
|
|
ret <vscale x 1 x i8> %c
|
|
}
|
|
|
|
; Test same rounding mode in diamond with same dominating vxrm.
|
|
define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test7:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: beqz a1, .LBB6_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB6_2: # %falseblock
|
|
; CHECK-NEXT: vasub.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %a,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
|
|
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Test same rounding mode in diamond with same vxrm at merge.
|
|
define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test8:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: beqz a1, .LBB7_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB7_2: # %falseblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vasub.vv v8, v8, v9
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %c,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 0, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Test same rounding mode in diamond with different vxrm at merge.
|
|
define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
|
|
; CHECK-LABEL: test9:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: beqz a1, .LBB8_2
|
|
; CHECK-NEXT: # %bb.1: # %trueblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: j .LBB8_3
|
|
; CHECK-NEXT: .LBB8_2: # %falseblock
|
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vasub.vv v8, v8, v9
|
|
; CHECK-NEXT: .LBB8_3: # %mergeblock
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %cond, label %trueblock, label %falseblock
|
|
|
|
trueblock:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
falseblock:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %0,
|
|
<vscale x 1 x i8> %1,
|
|
iXLen 0, iXLen %3)
|
|
br label %mergeblock
|
|
|
|
mergeblock:
|
|
%c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
|
|
<vscale x 1 x i8> undef,
|
|
<vscale x 1 x i8> %c,
|
|
<vscale x 1 x i8> %2,
|
|
iXLen 2, iXLen %3)
|
|
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Test loop with no dominating vxrm write.
|
|
define void @test10(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) {
|
|
; CHECK-LABEL: test10:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: beqz a3, .LBB9_3
|
|
; CHECK-NEXT: # %bb.1: # %for.body.preheader
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: .LBB9_2: # %for.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a2)
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: sub a3, a3, a4
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: bnez a3, .LBB9_2
|
|
; CHECK-NEXT: .LBB9_3: # %for.end
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%tobool.not9 = icmp eq iXLen %n, 0
|
|
br i1 %tobool.not9, label %for.end, label %for.body
|
|
|
|
for.body:
|
|
%n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
|
|
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
|
|
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
|
|
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
|
|
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
|
|
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
|
|
%sub = sub iXLen %n.addr.011, %vl
|
|
%tobool.not = icmp eq iXLen %sub, 0
|
|
br i1 %tobool.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
|
|
declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen)
|
|
declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen)
|
|
|
|
; Test loop with dominating vxrm write. Make sure there is no write in the loop.
|
|
define void @test11(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) {
|
|
; CHECK-LABEL: test11:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a2)
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: .LBB10_1: # %for.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: sub a3, a3, a4
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: beqz a3, .LBB10_3
|
|
; CHECK-NEXT: # %bb.2: # %for.body
|
|
; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1
|
|
; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a2)
|
|
; CHECK-NEXT: j .LBB10_1
|
|
; CHECK-NEXT: .LBB10_3: # %for.end
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
|
|
%load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
|
|
%load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
|
|
%vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
|
|
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
|
|
%suba = sub iXLen %n, %vl
|
|
%tobool.not9 = icmp eq iXLen %suba, 0
|
|
br i1 %tobool.not9, label %for.end, label %for.body
|
|
|
|
for.body:
|
|
%n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
|
|
%vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
|
|
%load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
|
|
%load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
|
|
%vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
|
|
tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
|
|
%sub = sub iXLen %n.addr.011, %vl2
|
|
%tobool.not = icmp eq iXLen %sub, 0
|
|
br i1 %tobool.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; The edge from entry to block2 is a critical edge. The vxrm write in block2
|
|
; is redundant when coming from block1, but is needed when coming from entry.
|
|
; FIXME: We could remove the write from the end of block1 without splitting the
|
|
; critical edge.
|
|
define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
|
|
; CHECK-LABEL: test12:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a0, a0, 1
|
|
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v9, v8, v9
|
|
; CHECK-NEXT: beqz a0, .LBB11_2
|
|
; CHECK-NEXT: # %bb.1: # %block1
|
|
; CHECK-NEXT: csrwi vxrm, 1
|
|
; CHECK-NEXT: vaadd.vv v9, v8, v9
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: .LBB11_2: # %block2
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
|
|
br i1 %c1, label %block1, label %block2
|
|
|
|
block1:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
|
|
br label %block2
|
|
|
|
block2:
|
|
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
|
|
ret <vscale x 1 x i8> %d
|
|
}
|
|
|
|
; Similar to test12, but introduces a second critical edge from block1 to
|
|
; block3. Now the write to vxrm at the end of block1, can't be removed because
|
|
; it is needed by block3.
|
|
define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
|
|
; CHECK-LABEL: test13:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andi a0, a0, 1
|
|
; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
|
|
; CHECK-NEXT: csrwi vxrm, 0
|
|
; CHECK-NEXT: vaadd.vv v10, v8, v9
|
|
; CHECK-NEXT: beqz a0, .LBB12_2
|
|
; CHECK-NEXT: # %bb.1: # %block1
|
|
; CHECK-NEXT: csrwi vxrm, 1
|
|
; CHECK-NEXT: vaadd.vv v10, v8, v10
|
|
; CHECK-NEXT: andi a1, a1, 1
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: beqz a1, .LBB12_3
|
|
; CHECK-NEXT: .LBB12_2: # %block2
|
|
; CHECK-NEXT: csrwi vxrm, 2
|
|
; CHECK-NEXT: vaadd.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB12_3: # %block3
|
|
; CHECK-NEXT: vaadd.vv v8, v9, v10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
|
|
br i1 %c1, label %block1, label %block2
|
|
|
|
block1:
|
|
%b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
|
|
br i1 %c2, label %block2, label %block3
|
|
|
|
block2:
|
|
%c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
|
|
%d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
|
|
ret <vscale x 1 x i8> %d
|
|
|
|
block3:
|
|
%e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
|
|
ret <vscale x 1 x i8> %e
|
|
}
|