Files
clang-p2996/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
Wang Pengcheng 2023a230d1 [RISCV] Move V0 to the end of register allocation order (#82967)
According to

https://riscv-optimization-guide-riseproject-c94355ae3e6872252baa952524.gitlab.io/riscv-optimization-guide.html:

> The v0 register defined by the RISC-V vector extension is special in
> that it can be used both as a general purpose vector register and also
> as a mask register. As a preference, use registers other than v0 for
> non-mask values. Otherwise data will have to be moved out of v0 when a
> mask is required in an operation. v0 may be used when all other
> registers are in use, and using v0 would avoid spilling register state
> to memory.

And using V0 register may stall masking pipeline and stop chaining
for some microarchitectures.

So we should try to not use V0 and register groups contained it as
much as possible. We achieve this via moving V0 to the end of RA
order.
2024-03-01 12:17:56 +08:00

521 lines
21 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
; ZVFH-LABEL: vfmax_nxv1f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv1f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
declare <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; ZVFH-LABEL: vfmax_nxv2f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv2f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 2 x half> %v
}
declare <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
; ZVFH-LABEL: vfmax_nxv4f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv.v.v v0, v10
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv4f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 4 x half> %v
}
declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; ZVFH-LABEL: vfmax_nxv8f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v12, v10, v10
; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v12
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v14
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv8f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %v
}
declare <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
; ZVFH-LABEL: vfmax_nxv16f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v16, v12, v12
; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0
; ZVFH-NEXT: vmv1r.v v0, v16
; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v20
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv16f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: sub sp, sp, a0
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24
; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16
; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv1r.v v0, v7
; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
ret <vscale x 16 x half> %v
}
declare <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>)
define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
; ZVFH-LABEL: vfmax_nxv32f16_vv:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v7, v16, v16
; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
; ZVFH-NEXT: vmv1r.v v0, v7
; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v24
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv32f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: sub sp, sp, a0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv8r.v v0, v8
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
; ZVFHMIN-NEXT: vmfeq.vv v3, v24, v24
; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v3
; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
; ZVFHMIN-NEXT: vfmax.vv v24, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
; ZVFHMIN-NEXT: vmfeq.vv v7, v8, v8
; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv1r.v v0, v7
; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v16, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
ret <vscale x 32 x half> %v
}
declare <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>)
define <vscale x 1 x float> @vfmax_nxv1f32_vv(<vscale x 1 x float> %a, <vscale x 1 x float> %b) {
; CHECK-LABEL: vfmax_nxv1f32_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x float> %b)
ret <vscale x 1 x float> %v
}
declare <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
define <vscale x 2 x float> @vfmax_nxv2f32_vv(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: vfmax_nxv2f32_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
ret <vscale x 2 x float> %v
}
declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
define <vscale x 4 x float> @vfmax_nxv4f32_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: vfmax_nxv4f32_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v14
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 4 x float> %v
}
declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
define <vscale x 8 x float> @vfmax_nxv8f32_vv(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
; CHECK-LABEL: vfmax_nxv8f32_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v16, v12, v12
; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v20
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
ret <vscale x 8 x float> %v
}
declare <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>)
define <vscale x 16 x float> @vfmax_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
; CHECK-LABEL: vfmax_nxv16f32_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v7, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
ret <vscale x 16 x float> %v
}
declare <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>)
define <vscale x 1 x double> @vfmax_nxv1f64_vv(<vscale x 1 x double> %a, <vscale x 1 x double> %b) {
; CHECK-LABEL: vfmax_nxv1f64_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b)
ret <vscale x 1 x double> %v
}
declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
define <vscale x 2 x double> @vfmax_nxv2f64_vv(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: vfmax_nxv2f64_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v14
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %v
}
declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
define <vscale x 4 x double> @vfmax_nxv4f64_vv(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
; CHECK-LABEL: vfmax_nxv4f64_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v16, v12, v12
; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v20
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
ret <vscale x 4 x double> %v
}
declare <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>)
define <vscale x 8 x double> @vfmax_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
; CHECK-LABEL: vfmax_nxv8f64_vv:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v7, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
ret <vscale x 8 x double> %v
}
define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
; ZVFH-LABEL: vfmax_nxv1f16_vv_nnan:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFH-NEXT: vfmax.vv v8, v8, v9
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnan:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%v = call nnan <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
; ZVFH-LABEL: vfmax_nxv1f16_vv_nnana:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmv1r.v v10, v9
; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
; ZVFH-NEXT: vfmax.vv v8, v10, v9
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnana:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%c = fadd nnan <vscale x 1 x half> %a, %a
%v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
; ZVFH-LABEL: vfmax_nxv1f16_vv_nnanb:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmv1r.v v10, v8
; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnanb:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%c = fadd nnan <vscale x 1 x half> %b, %b
%v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
ret <vscale x 1 x half> %v
}