Previously, RISCVInsertReadWriteCSR inserted an FRM swap for any value other than 7 and restored the original value right after the vector instruction. This is inefficient if multiple vector instructions use the same rounding mode if the next vector instruction uses a different explicit rounding mode. This patch implements a local optimization to solve the above problem. We assume the starting rounding mode of the basic block is "dynamic." When iterating through a basic block and encountering an instruction whose rounding mode is not the same as the current rounding mode, we change the current rounding mode and save the current rounding mode if needed. And we may need to restore FRM when encountering function call, inline asm and some uses of FRM. The advanced version of this is to perform cross basic block analysis for the starting rounding mode of each basic block.
642 lines
21 KiB
LLVM
642 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d < %s | FileCheck %s
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d \
|
|
; RUN: -riscv-disable-frm-insert-opt < %s | FileCheck %s --check-prefix=UNOPT
|
|
|
|
declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float>,
|
|
<vscale x 1 x float>,
|
|
<vscale x 1 x float>,
|
|
i64, i64)
|
|
|
|
; Test only save/restore frm once.
|
|
define <vscale x 1 x float> @test(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v8
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: test:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v8
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %a,
|
|
<vscale x 1 x float> %a,
|
|
i64 0, i64 %2)
|
|
ret <vscale x 1 x float> %b
|
|
}
|
|
|
|
; Test only restore frm once.
|
|
define <vscale x 1 x float> @test2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: fsrmi 1
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v8
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: test2:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: fsrmi a0, 1
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v8
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %a,
|
|
<vscale x 1 x float> %a,
|
|
i64 1, i64 %2)
|
|
ret <vscale x 1 x float> %b
|
|
}
|
|
|
|
declare void @foo()
|
|
define <vscale x 1 x float> @just_call(<vscale x 1 x float> %0) nounwind {
|
|
; CHECK-LABEL: just_call:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -48
|
|
; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: sub sp, sp, a0
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
|
; CHECK-NEXT: call foo
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 48
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: just_call:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: addi sp, sp, -48
|
|
; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; UNOPT-NEXT: csrr a0, vlenb
|
|
; UNOPT-NEXT: slli a0, a0, 1
|
|
; UNOPT-NEXT: sub sp, sp, a0
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
|
; UNOPT-NEXT: call foo
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; UNOPT-NEXT: csrr a0, vlenb
|
|
; UNOPT-NEXT: slli a0, a0, 1
|
|
; UNOPT-NEXT: add sp, sp, a0
|
|
; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; UNOPT-NEXT: addi sp, sp, 48
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
call void @foo()
|
|
ret <vscale x 1 x float> %0
|
|
}
|
|
|
|
define <vscale x 1 x float> @before_call1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: before_call1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -48
|
|
; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: csrr a1, vlenb
|
|
; CHECK-NEXT: slli a1, a1, 1
|
|
; CHECK-NEXT: sub sp, sp, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: addi a1, sp, 32
|
|
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: call foo
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 48
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: before_call1:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: addi sp, sp, -48
|
|
; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; UNOPT-NEXT: csrr a1, vlenb
|
|
; UNOPT-NEXT: slli a1, a1, 1
|
|
; UNOPT-NEXT: sub sp, sp, a1
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: addi a1, sp, 32
|
|
; UNOPT-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: call foo
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; UNOPT-NEXT: csrr a0, vlenb
|
|
; UNOPT-NEXT: slli a0, a0, 1
|
|
; UNOPT-NEXT: add sp, sp, a0
|
|
; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; UNOPT-NEXT: addi sp, sp, 48
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
call void @foo()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @before_call2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: before_call2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -48
|
|
; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: csrr a1, vlenb
|
|
; CHECK-NEXT: slli a1, a1, 1
|
|
; CHECK-NEXT: sub sp, sp, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
|
; CHECK-NEXT: call foo
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 48
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: before_call2:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: addi sp, sp, -48
|
|
; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; UNOPT-NEXT: csrr a1, vlenb
|
|
; UNOPT-NEXT: slli a1, a1, 1
|
|
; UNOPT-NEXT: sub sp, sp, a1
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
|
; UNOPT-NEXT: call foo
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; UNOPT-NEXT: csrr a0, vlenb
|
|
; UNOPT-NEXT: slli a0, a0, 1
|
|
; UNOPT-NEXT: add sp, sp, a0
|
|
; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; UNOPT-NEXT: addi sp, sp, 48
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 7, i64 %2)
|
|
call void @foo()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @after_call1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_call1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -48
|
|
; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: csrr a1, vlenb
|
|
; CHECK-NEXT: slli a1, a1, 1
|
|
; CHECK-NEXT: sub sp, sp, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: addi a1, sp, 32
|
|
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: call foo
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 48
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_call1:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: addi sp, sp, -48
|
|
; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; UNOPT-NEXT: csrr a1, vlenb
|
|
; UNOPT-NEXT: slli a1, a1, 1
|
|
; UNOPT-NEXT: sub sp, sp, a1
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: addi a1, sp, 32
|
|
; UNOPT-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: call foo
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; UNOPT-NEXT: csrr a0, vlenb
|
|
; UNOPT-NEXT: slli a0, a0, 1
|
|
; UNOPT-NEXT: add sp, sp, a0
|
|
; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; UNOPT-NEXT: addi sp, sp, 48
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
call void @foo()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @after_call2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_call2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -48
|
|
; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: csrr a1, vlenb
|
|
; CHECK-NEXT: slli a1, a1, 1
|
|
; CHECK-NEXT: sub sp, sp, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
|
; CHECK-NEXT: call foo
|
|
; CHECK-NEXT: addi a0, sp, 32
|
|
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 48
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_call2:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: addi sp, sp, -48
|
|
; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; UNOPT-NEXT: csrr a1, vlenb
|
|
; UNOPT-NEXT: slli a1, a1, 1
|
|
; UNOPT-NEXT: sub sp, sp, a1
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
|
; UNOPT-NEXT: call foo
|
|
; UNOPT-NEXT: addi a0, sp, 32
|
|
; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
|
; UNOPT-NEXT: csrr a0, vlenb
|
|
; UNOPT-NEXT: slli a0, a0, 1
|
|
; UNOPT-NEXT: add sp, sp, a0
|
|
; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; UNOPT-NEXT: addi sp, sp, 48
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 7, i64 %2)
|
|
call void @foo()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @just_asm(<vscale x 1 x float> %0) nounwind {
|
|
; CHECK-LABEL: just_asm:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: just_asm:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: #APP
|
|
; UNOPT-NEXT: #NO_APP
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
call void asm sideeffect "", ""()
|
|
ret <vscale x 1 x float> %0
|
|
}
|
|
|
|
define <vscale x 1 x float> @before_asm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: before_asm1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: before_asm1:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: #APP
|
|
; UNOPT-NEXT: #NO_APP
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
call void asm sideeffect "", ""()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @before_asm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: before_asm2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: before_asm2:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: #APP
|
|
; UNOPT-NEXT: #NO_APP
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 7, i64 %2)
|
|
call void asm sideeffect "", ""()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @after_asm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_asm1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_asm1:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: #APP
|
|
; UNOPT-NEXT: #NO_APP
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
call void asm sideeffect "", ""()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
define <vscale x 1 x float> @after_asm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_asm2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_asm2:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: #APP
|
|
; UNOPT-NEXT: #NO_APP
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 7, i64 %2)
|
|
call void asm sideeffect "", ""()
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
; Test restoring frm before reading frm and doing nothing with following
|
|
; dynamic rounding mode operations.
|
|
; TODO: The frrm could be elided.
|
|
declare i32 @llvm.get.rounding()
|
|
define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2, ptr %p) nounwind {
|
|
; CHECK-LABEL: test5:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 0
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: frrm a0
|
|
; CHECK-NEXT: slli a0, a0, 2
|
|
; CHECK-NEXT: lui a2, 66
|
|
; CHECK-NEXT: addiw a2, a2, 769
|
|
; CHECK-NEXT: srl a0, a2, a0
|
|
; CHECK-NEXT: andi a0, a0, 7
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v8
|
|
; CHECK-NEXT: sw a0, 0(a1)
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: test5:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 0
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: frrm a0
|
|
; UNOPT-NEXT: slli a0, a0, 2
|
|
; UNOPT-NEXT: lui a2, 66
|
|
; UNOPT-NEXT: addiw a2, a2, 769
|
|
; UNOPT-NEXT: srl a0, a2, a0
|
|
; UNOPT-NEXT: andi a0, a0, 7
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v8
|
|
; UNOPT-NEXT: sw a0, 0(a1)
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 0, i64 %2)
|
|
%rm = call i32 @llvm.get.rounding()
|
|
store i32 %rm, ptr %p, align 4
|
|
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %a,
|
|
<vscale x 1 x float> %a,
|
|
i64 7, i64 %2)
|
|
ret <vscale x 1 x float> %b
|
|
}
|
|
|
|
; Test not set FRM for vfadd with DYN after WriteFRMImm.
|
|
declare void @llvm.set.rounding(i32)
|
|
define <vscale x 1 x float> @after_fsrm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_fsrm1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: fsrmi 4
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_fsrm1:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: fsrmi 4
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
call void @llvm.set.rounding(i32 4)
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 7, i64 %2)
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
; Test not set FRM for vfadd with a known rm after WriteFRMImm with same rm.
|
|
define <vscale x 1 x float> @after_fsrm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_fsrm2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: fsrmi 4
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_fsrm2:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: fsrmi 4
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 4
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
call void @llvm.set.rounding(i32 4)
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 4, i64 %2)
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
; Test not set FRM for vfadd with a known rm after WriteFRMImm with same rm.
|
|
define <vscale x 1 x float> @after_fsrm3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_fsrm3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: fsrmi 4
|
|
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; CHECK-NEXT: fsrmi a0, 5
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_fsrm3:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: fsrmi 4
|
|
; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: fsrmi a0, 5
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
call void @llvm.set.rounding(i32 4)
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 5, i64 %2)
|
|
ret <vscale x 1 x float> %a
|
|
}
|
|
|
|
; Test not set FRM for the vfadd after WriteFRM.
|
|
define <vscale x 1 x float> @after_fsrm4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %rm, i64 %2) nounwind {
|
|
; CHECK-LABEL: after_fsrm4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: slli a0, a0, 32
|
|
; CHECK-NEXT: srli a0, a0, 30
|
|
; CHECK-NEXT: lui a2, 66
|
|
; CHECK-NEXT: addiw a2, a2, 769
|
|
; CHECK-NEXT: srl a0, a2, a0
|
|
; CHECK-NEXT: andi a0, a0, 7
|
|
; CHECK-NEXT: fsrm a0
|
|
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; UNOPT-LABEL: after_fsrm4:
|
|
; UNOPT: # %bb.0: # %entry
|
|
; UNOPT-NEXT: slli a0, a0, 32
|
|
; UNOPT-NEXT: srli a0, a0, 30
|
|
; UNOPT-NEXT: lui a2, 66
|
|
; UNOPT-NEXT: addiw a2, a2, 769
|
|
; UNOPT-NEXT: srl a0, a2, a0
|
|
; UNOPT-NEXT: andi a0, a0, 7
|
|
; UNOPT-NEXT: fsrm a0
|
|
; UNOPT-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
|
; UNOPT-NEXT: vfadd.vv v8, v8, v9
|
|
; UNOPT-NEXT: ret
|
|
entry:
|
|
call void @llvm.set.rounding(i32 %rm)
|
|
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
|
|
<vscale x 1 x float> undef,
|
|
<vscale x 1 x float> %0,
|
|
<vscale x 1 x float> %1,
|
|
i64 7, i64 %2)
|
|
ret <vscale x 1 x float> %a
|
|
}
|