Files
clang-p2996/llvm/test/CodeGen/AMDGPU/fminimum3.ll
Brox Chen 6dbc01e801 [AMDGPU][True16][CodeGen] update GFX11Plus codegen test with true16 flag (#135078)
This is a NFC patch.

This patch run a bulk update on CodeGen tests that are impacted by the
true16 features. This patch applies:
1. duplicate GFX11plus runlines and apply them with
"+mattr=+real-true16" and "+mattr=-real-true16"
2. update the test with the update script

For some GISEL runlines, the current CodeGen do not fully support the
true16 version. Still update the runlines, but comment out the failing
one, and added a "FIXME-TRUE16" comment to that test for easier
tracking. These test will be fixed in the following patches.

This is in a transition state that we support both
"+real-true16/-real-true16" in our code base. We plan to move to
"+real-true16" as default, and finally remove "-real-true16" mode and
test lines.
2025-04-23 13:06:52 -04:00

4071 lines
164 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v2, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v2, v0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %c, float %max0)
ret float %max1
}
define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inreg %c) {
; GFX12-LABEL: s_fminimum3_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_minimum3_f32 v0, s0, s1, v0
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-NEXT: s_wait_alu 0xf1ff
; GFX12-NEXT: ; return to shader part epilog
;
; GFX942-LABEL: s_fminimum3_f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
; GFX942-NEXT: v_min_f32_e32 v1, s0, v0
; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_min_f32_e32 v1, s2, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_readfirstlane_b32 s0, v0
; GFX942-NEXT: ; return to shader part epilog
;
; GFX950-LABEL: s_fminimum3_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: v_mov_b32_e32 v0, s1
; GFX950-NEXT: v_mov_b32_e32 v1, s2
; GFX950-NEXT: v_minimum3_f32 v0, s0, v0, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_readfirstlane_b32 s0, v0
; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
%cast = bitcast float %max1 to i32
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
ret i32 %readfirstlane
}
define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fabs0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fabs0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, |v0|, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fabs0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, |v0|, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%max0 = call float @llvm.minimum.f32(float %a.fabs, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fabs1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fabs1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, v0, |v1|
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fabs1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, |v1|, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
%max0 = call float @llvm.minimum.f32(float %a, float %b.fabs)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fabs2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fabs2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v1, v0, |v2|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fabs2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, |v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fabs)
ret float %max1
}
define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, |v0|, |v1|
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v1, v0, |v2|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.minimum.f32(float %a.fabs, float %b.fabs)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fabs)
ret float %max1
}
define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, -v0, -v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v1, v0, -v2
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%c.fneg = fneg float %c
%max0 = call float @llvm.minimum.f32(float %a.fneg, float %b.fneg)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fneg)
ret float %max1
}
define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fneg_fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fneg_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, -|v0|, -|v1|
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v1, v0, -|v2|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
%a.fneg.fabs = fneg float %a.fabs
%b.fneg.fabs = fneg float %b.fabs
%c.fneg.fabs = fneg float %c.fabs
%max0 = call float @llvm.minimum.f32(float %a.fneg.fabs, float %b.fneg.fabs)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fneg.fabs)
ret float %max1
}
define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fneg0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fneg0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, -v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fneg0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, -v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%max0 = call float @llvm.minimum.f32(float %a.fneg, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fneg1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fneg1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v3, v0, -v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fneg1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, -v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
%max0 = call float @llvm.minimum.f32(float %a, float %b.fneg)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_fneg2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_fneg2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v1, v0, -v2
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_fneg2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, -v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fneg)
ret float %max1
}
define float @v_fminimum3_f32_const0(float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_const0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_const0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v2, 0x41000000, v0
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v2, v0, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_const0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 8.0, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32__const2(float %a, float %b) {
; GFX12-LABEL: v_fminimum3_f32__const2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32__const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v2, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v1, 0x41000000, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32__const2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float 8.0)
ret float %max1
}
define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32_inlineimm0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_inlineimm0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v2, v0, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_inlineimm0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 4.0, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
}
define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
; GFX12-LABEL: v_fminimum3_f32__inlineimm:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32__inlineimm:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v2, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v1, 4.0, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32__inlineimm:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float 4.0)
ret float %max1
}
define float @v_fminimum3_f32_const1_const2(float %a) {
; GFX12-LABEL: v_fminimum3_f32_const1_const2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s0, 0x41000000
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f32_const1_const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v1, 0x41000000, v0
; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_min_f32_e32 v1, 0x41800000, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f32_const1_const2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
; GFX950-NEXT: v_mov_b32_e32 v1, 0x41800000
; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float 8.0)
%max1 = call float @llvm.minimum.f32(float %max0, float 16.0)
ret float %max1
}
define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; GFX12-LABEL: v_fminimum3_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v4, v0, v2
; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v6, v1, v3
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX942-NEXT: v_min_f32_e32 v3, v0, v2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v2, v4, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v2, v5, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v4, v0, v2
; GFX950-NEXT: v_minimum3_f32 v1, v5, v1, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0)
ret <2 x float> %max1
}
define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; GFX12-LABEL: v_fminimum3_v2f32_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v2, v4
; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v6, v1, v3
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX942-NEXT: v_min_f32_e32 v3, v0, v2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v2, v0, v4
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v2, v1, v5
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v4
; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
ret <2 x float> %max1
}
define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; GFX12-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v4|
; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v6, |v1|, |v3|
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
; GFX942-NEXT: v_min_f32_e64 v3, |v0|, |v2|
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v2, v0, |v4|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_min_f32_e64 v2, v1, |v5|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v4|
; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
%c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c)
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a.fabs, <2 x float> %b.fabs)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c.fabs)
ret <2 x float> %max1
}
define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; GFX12-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v2, -v4
; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v6, -v1, -v3
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
; GFX942-NEXT: v_min_f32_e64 v3, -v0, -v2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v2, v0, -v4
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_min_f32_e64 v2, v1, -v5
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v2, -v4
; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
%c.fneg = fneg <2 x float> %c
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a.fneg, <2 x float> %b.fneg)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c.fneg)
ret <2 x float> %max1
}
define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c) {
; GFX12-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, 2.0, v2
; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v4, 2.0, v1
; GFX942-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX942-NEXT: v_min_f32_e32 v4, 2.0, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX942-NEXT: v_min_f32_e32 v4, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: v_min_f32_e32 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, v2
; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
ret <2 x float> %max1
}
define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b) {
; GFX12-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v2, 4.0
; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v4, v1, v3
; GFX942-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX942-NEXT: v_min_f32_e32 v3, v0, v2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, 4.0
; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>)
ret <2 x float> %max1
}
define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; GFX12-LABEL: v_fminimum3_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v6, v0, v3
; GFX12-NEXT: v_minimum3_f32 v1, v7, v1, v4
; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v9, v2, v5
; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX942-NEXT: v_min_f32_e32 v5, v1, v4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX942-NEXT: v_min_f32_e32 v4, v0, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v6, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v7, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v8, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v6, v0, v3
; GFX950-NEXT: v_minimum3_f32 v1, v7, v1, v4
; GFX950-NEXT: v_minimum3_f32 v2, v8, v2, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0)
ret <3 x float> %max1
}
define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; GFX12-LABEL: v_fminimum3_v3f32_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v3, v6
; GFX12-NEXT: v_minimum3_f32 v1, v1, v4, v7
; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v9, v2, v5
; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX942-NEXT: v_min_f32_e32 v5, v1, v4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX942-NEXT: v_min_f32_e32 v4, v0, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v0, v6
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v1, v7
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v2, v8
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v6
; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v7
; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v8
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
ret <3 x float> %max1
}
define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; GFX12-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v6|
; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v7|
; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v9, |v2|, |v5|
; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
; GFX942-NEXT: v_min_f32_e64 v5, |v1|, |v4|
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
; GFX942-NEXT: v_min_f32_e64 v4, |v0|, |v3|
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX942-NEXT: v_min_f32_e64 v3, v0, |v6|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v3, v1, |v7|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v3, v2, |v8|
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v6|
; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v7|
; GFX950-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
%c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c)
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a.fabs, <3 x float> %b.fabs)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c.fabs)
ret <3 x float> %max1
}
define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; GFX12-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v3, -v6
; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v4, -v7
; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e64 v9, -v2, -v5
; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
; GFX942-NEXT: v_min_f32_e64 v5, -v1, -v4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
; GFX942-NEXT: v_min_f32_e64 v4, -v0, -v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX942-NEXT: v_min_f32_e64 v3, v0, -v6
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v3, v1, -v7
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX942-NEXT: v_min_f32_e64 v3, v2, -v8
; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v3, -v6
; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v4, -v7
; GFX950-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
%c.fneg = fneg <3 x float> %c
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a.fneg, <3 x float> %b.fneg)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c.fneg)
ret <3 x float> %max1
}
define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c) {
; GFX12-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, 2.0, v3
; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v4
; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v6, 2.0, v2
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX942-NEXT: v_min_f32_e32 v6, 2.0, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX942-NEXT: v_min_f32_e32 v6, 2.0, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_min_f32_e32 v6, v0, v3
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX942-NEXT: v_min_f32_e32 v3, v1, v4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, v2, v5
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, v3
; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, v4
; GFX950-NEXT: v_minimum3_f32 v2, v2, 2.0, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
ret <3 x float> %max1
}
define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b) {
; GFX12-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum3_f32 v0, v0, v3, 4.0
; GFX12-NEXT: v_minimum3_f32 v1, v1, v4, 4.0
; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v6, v2, v5
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX942-NEXT: v_min_f32_e32 v5, v1, v4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX942-NEXT: v_min_f32_e32 v4, v0, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v1
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, 4.0
; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, 4.0
; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>)
ret <3 x float> %max1
}
define half @v_fminimum3_f16(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16_commute(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_commute:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v2.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_commute:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v2, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v2, v0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v2, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v2, v0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %c, half %max0)
ret half %max1
}
define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg %c) {
; GFX12-TRUE16-LABEL: s_fminimum3_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, s0, s1, v0.l
; GFX12-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-TRUE16-NEXT: s_wait_alu 0xf1ff
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-FAKE16-LABEL: s_fminimum3_f16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, s2
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, s0, s1, v0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-FAKE16-NEXT: s_wait_alu 0xf1ff
; GFX12-FAKE16-NEXT: ; return to shader part epilog
;
; GFX942-LABEL: s_fminimum3_f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
; GFX942-NEXT: v_min_f16_e32 v1, s0, v0
; GFX942-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_min_f16_e32 v1, s2, v0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_readfirstlane_b32 s0, v0
; GFX942-NEXT: ; return to shader part epilog
;
; GFX950-LABEL: s_fminimum3_f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: v_mov_b32_e32 v0, s1
; GFX950-NEXT: v_mov_b32_e32 v1, s2
; GFX950-NEXT: v_pk_minimum3_f16 v0, s0, v0, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_readfirstlane_b32 s0, v0
; GFX950-NEXT: ; return to shader part epilog
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
%cast = bitcast half %max1 to i16
%zext = zext i16 %cast to i32
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %zext)
ret i32 %readfirstlane
}
define half @v_fminimum3_f16_fabs0(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fabs0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, |v0.l|, v1.l, v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fabs0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fabs0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, |v0|, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fabs0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
%max0 = call half @llvm.minimum.f16(half %a.fabs, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16_fabs1(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fabs1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, |v1.l|, v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fabs1:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, |v1|, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fabs1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, v0, |v1|
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fabs1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call half @llvm.fabs.f16(half %b)
%max0 = call half @llvm.minimum.f16(half %a, half %b.fabs)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16_fabs2(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fabs2:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, |v2.l|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fabs2:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, |v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fabs2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e64 v1, v0, |v2|
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fabs2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call half @llvm.fabs.f16(half %c)
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c.fabs)
ret half %max1
}
define half @v_fminimum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fabs_all:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, |v0.l|, |v1.l|, |v2.l|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fabs_all:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, |v1|, |v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, |v0|, |v1|
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e64 v1, v0, |v2|
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
%b.fabs = call half @llvm.fabs.f16(half %b)
%c.fabs = call half @llvm.fabs.f16(half %c)
%max0 = call half @llvm.minimum.f16(half %a.fabs, half %b.fabs)
%max1 = call half @llvm.minimum.f16(half %max0, half %c.fabs)
ret half %max1
}
define half @v_fminimum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fneg_all:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, -v0.l, -v1.l, -v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fneg_all:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -v0, -v1, -v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, -v0, -v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e64 v1, v0, -v2
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX950-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX950-NEXT: v_xor_b32_e32 v2, 0x8000, v2
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
%b.fneg = fneg half %b
%c.fneg = fneg half %c
%max0 = call half @llvm.minimum.f16(half %a.fneg, half %b.fneg)
%max1 = call half @llvm.minimum.f16(half %max0, half %c.fneg)
ret half %max1
}
define half @v_fminimum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fneg_fabs_all:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, -|v0.l|, -|v1.l|, -|v2.l|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fneg_fabs_all:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -|v0|, -|v1|, -|v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fneg_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, -|v0|, -|v1|
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e64 v1, v0, -|v2|
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, -|v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fneg_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX950-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX950-NEXT: v_or_b32_e32 v2, 0x8000, v2
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
%b.fabs = call half @llvm.fabs.f16(half %b)
%c.fabs = call half @llvm.fabs.f16(half %c)
%a.fneg.fabs = fneg half %a.fabs
%b.fneg.fabs = fneg half %b.fabs
%c.fneg.fabs = fneg half %c.fabs
%max0 = call half @llvm.minimum.f16(half %a.fneg.fabs, half %b.fneg.fabs)
%max1 = call half @llvm.minimum.f16(half %max0, half %c.fneg.fabs)
ret half %max1
}
define half @v_fminimum3_f16_fneg0(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fneg0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, -v0.l, v1.l, v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fneg0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -v0, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fneg0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, -v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fneg0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
%max0 = call half @llvm.minimum.f16(half %a.fneg, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16_fneg1(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fneg1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, -v1.l, v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fneg1:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, -v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fneg1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e64 v3, v0, -v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, -v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fneg1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg half %b
%max0 = call half @llvm.minimum.f16(half %a, half %b.fneg)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16_fneg2(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_fneg2:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, -v2.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_fneg2:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, -v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_fneg2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e64 v1, v0, -v2
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_fneg2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v2, 0x8000, v2
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg half %c
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c.fneg)
ret half %max1
}
define half @v_fminimum3_f16_const0(half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_const0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 0x4800, v1.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_const0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, 0x4800, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_const0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v2, 0x4800, v0
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f16_e32 v2, v0, v1
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_const0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_movk_i32 s0, 0x4800
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half 8.0, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16__const2(half %a, half %b) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16__const2:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 0x4800
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16__const2:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 0x4800
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16__const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v2, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f16_e32 v1, 0x4800, v0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16__const2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_movk_i32 s0, 0x4800
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half 8.0)
ret half %max1
}
define half @v_fminimum3_f16_inlineimm0(half %b, half %c) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_inlineimm0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 4.0, v1.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_inlineimm0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, 4.0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_inlineimm0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v2, 4.0, v0
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f16_e32 v2, v0, v1
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_inlineimm0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, 4.0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half 4.0, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
ret half %max1
}
define half @v_fminimum3_f16__inlineimm(half %a, half %b) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16__inlineimm:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 4.0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16__inlineimm:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 4.0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16__inlineimm:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v2, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_min_f16_e32 v1, 4.0, v0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16__inlineimm:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half 4.0)
ret half %max1
}
define half @v_fminimum3_f16_const1_const2(half %a) {
; GFX12-TRUE16-LABEL: v_fminimum3_f16_const1_const2:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x4c00
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 0x4800, v0.h
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_fminimum3_f16_const1_const2:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: s_movk_i32 s0, 0x4800
; GFX12-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, s0, 0x4c00
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_f16_const1_const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v1, 0x4800, v0
; GFX942-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_min_f16_e32 v1, 0x4c00, v0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_f16_const1_const2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_movk_i32 s0, 0x4800
; GFX950-NEXT: v_mov_b32_e32 v1, 0x4c00
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half 8.0)
%max1 = call half @llvm.minimum.f16(half %max0, half 16.0)
ret half %max1
}
define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; GFX12-LABEL: v_fminimum3_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v0, v2, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX942-NEXT: v_pk_min_f16 v1, v2, v1
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v2, v5
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v2, v0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %max0)
ret <2 x half> %max1
}
define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; GFX12-LABEL: v_fminimum3_v2f16_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f16_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
ret <2 x half> %max1
}
define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; GFX12-LABEL: v_fminimum3_v2f16__fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX12-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
; GFX942-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1
; GFX942-NEXT: v_pk_min_f16 v3, v3, v4
; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
; GFX942-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc
; GFX942-NEXT: v_perm_b32 v1, v4, v0, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, v5
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f16__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
%b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
%c.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %c)
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a.fabs, <2 x half> %b.fabs)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c.fabs)
ret <2 x half> %max1
}
define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; GFX12-LABEL: v_fminimum3_v2f16__fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f16__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x half> %a
%b.fneg = fneg <2 x half> %b
%c.fneg = fneg <2 x half> %c
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a.fneg, <2 x half> %b.fneg)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c.fneg)
ret <2 x half> %max1
}
define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX12-LABEL: v_fminimum3_v2f16__inlineimm1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v3, v0, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v1
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f16__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
ret <2 x half> %max1
}
define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX12-LABEL: v_fminimum3_v2f16__inlineimm2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v2f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v2, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX942-NEXT: v_perm_b32 v1, v0, v4, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v2, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v2f16__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, 4.0 op_sel_hi:[1,1,0]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
ret <2 x half> %max1
}
define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
; GFX12-LABEL: v_fminimum3_v3f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v4, v0
; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v6, v0, v2
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX942-NEXT: v_pk_min_f16 v1, v5, v1
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX942-NEXT: v_pk_min_f16 v2, v4, v2
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v4, v0, v2
; GFX950-NEXT: v_pk_minimum3_f16 v1, v5, v1, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %c, <3 x half> %max0)
ret <3 x half> %max1
}
define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
; GFX12-LABEL: v_fminimum3_v3f16_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v6, v0, v2
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, v5
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f16_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v4
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
ret <3 x half> %max1
}
define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
; GFX12-LABEL: v_fminimum3_v3f16__fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX12-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX12-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX12-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
; GFX12-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v1
; GFX942-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v3
; GFX942-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0
; GFX942-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2
; GFX942-NEXT: v_pk_min_f16 v7, v7, v9
; GFX942-NEXT: v_mov_b32_e32 v12, 0x7e00
; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v6, v6, v8
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
; GFX942-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
; GFX942-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
; GFX942-NEXT: v_perm_b32 v2, v8, v0, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v11
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX942-NEXT: v_perm_b32 v6, v9, v1, s0
; GFX942-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX942-NEXT: v_pk_min_f16 v6, v6, v10
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f16__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX950-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX950-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
; GFX950-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v4
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
%b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
%c.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %c)
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a.fabs, <3 x half> %b.fabs)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c.fabs)
ret <3 x half> %max1
}
define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
; GFX12-LABEL: v_fminimum3_v3f16__fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f16__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v4 neg_lo:[1,1,1] neg_hi:[1,1,1]
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v5 neg_lo:[1,1,1] neg_hi:[1,1,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x half> %a
%b.fneg = fneg <3 x half> %b
%c.fneg = fneg <3 x half> %c
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a.fneg, <3 x half> %b.fneg)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c.fneg)
ret <3 x half> %max1
}
define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX12-LABEL: v_fminimum3_v3f16__inlineimm1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 2.0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00
; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v7, v1, 2.0
; GFX942-NEXT: s_mov_b32 s1, 0x5040100
; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
; GFX942-NEXT: s_movk_i32 s0, 0x7e00
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX942-NEXT: v_perm_b32 v4, v5, v0, s1
; GFX942-NEXT: v_pk_min_f16 v4, v4, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc
; GFX942-NEXT: v_pack_b32_f16 v7, v1, s0
; GFX942-NEXT: v_pk_min_f16 v7, v7, v3
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f16__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, 2.0, v2 op_sel_hi:[1,0,1]
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, 2.0, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
ret <3 x half> %max1
}
define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX12-LABEL: v_fminimum3_v3f16__inlineimm2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v3f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v4, v0, v2
; GFX942-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX942-NEXT: v_perm_b32 v1, v1, v4, s0
; GFX942-NEXT: v_pk_min_f16 v1, v1, 4.0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
; GFX942-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v3f16__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, 4.0 op_sel_hi:[1,1,0]
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
ret <3 x half> %max1
}
define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; GFX12-LABEL: v_fminimum3_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v4, v0
; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v4f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v6, v0, v2
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX942-NEXT: v_pk_min_f16 v2, v5, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX942-NEXT: v_pk_min_f16 v2, v4, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v4f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v4, v0, v2
; GFX950-NEXT: v_pk_minimum3_f16 v1, v5, v1, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %c, <4 x half> %max0)
ret <4 x half> %max1
}
define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; GFX12-LABEL: v_fminimum3_v4f16_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v4f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v6, v0, v2
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v5
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v4f16_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v4
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
ret <4 x half> %max1
}
define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; GFX12-LABEL: v_fminimum3_v4f16__fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX12-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX12-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX12-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
; GFX12-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v4f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v0
; GFX942-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v2
; GFX942-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1
; GFX942-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3
; GFX942-NEXT: v_pk_min_f16 v7, v7, v9
; GFX942-NEXT: v_mov_b32_e32 v12, 0x7e00
; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v6, v6, v8
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
; GFX942-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
; GFX942-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
; GFX942-NEXT: v_perm_b32 v2, v8, v1, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v11
; GFX942-NEXT: v_perm_b32 v6, v9, v0, s0
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v6, v6, v10
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
; GFX942-NEXT: v_perm_b32 v1, v3, v1, s0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
; GFX942-NEXT: v_perm_b32 v0, v7, v0, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v4f16__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX950-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX950-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
; GFX950-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v4
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
%b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
%c.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %c)
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a.fabs, <4 x half> %b.fabs)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c.fabs)
ret <4 x half> %max1
}
define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; GFX12-LABEL: v_fminimum3_v4f16__fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v4f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v4f16__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v4 neg_lo:[1,1,1] neg_hi:[1,1,1]
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v5 neg_lo:[1,1,1] neg_hi:[1,1,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <4 x half> %a
%b.fneg = fneg <4 x half> %b
%c.fneg = fneg <4 x half> %c
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a.fneg, <4 x half> %b.fneg)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c.fneg)
ret <4 x half> %max1
}
define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX12-LABEL: v_fminimum3_v4f16__inlineimm1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 2.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v4f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00
; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v7
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX942-NEXT: v_perm_b32 v4, v8, v1, s0
; GFX942-NEXT: v_pk_min_f16 v4, v4, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: v_perm_b32 v8, v5, v0, s0
; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX942-NEXT: v_pk_min_f16 v8, v8, v2
; GFX942-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v8
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: v_perm_b32 v1, v7, v1, s0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v4f16__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, 2.0, v2 op_sel_hi:[1,0,1]
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, 2.0, v3 op_sel_hi:[1,0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
ret <4 x half> %max1
}
define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX12-LABEL: v_fminimum3_v4f16__inlineimm2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_fminimum3_v4f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v4, v0, v2
; GFX942-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: v_pk_min_f16 v2, v1, v3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v1, v4, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX942-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX942-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX942-NEXT: v_perm_b32 v0, v0, v4, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_fminimum3_v4f16__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, 4.0 op_sel_hi:[1,1,0]
; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, 4.0 op_sel_hi:[1,1,0]
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
ret <4 x half> %max1
}
define double @v_fminimum3_f64(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64_commute(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_commute:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[4:5], v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[4:5], v[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %c, double %max0)
ret double %max1
}
define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, double inreg %c) {
; GFX12-LABEL: s_fminimum3_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], s[4:5]
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_readfirstlane_b32 s1, v1
; GFX12-NEXT: s_wait_alu 0xf1ff
; GFX12-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fminimum3_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX9-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc
; GFX9-NEXT: v_readfirstlane_b32 s1, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
%cast = bitcast double %max1 to <2 x i32>
%elt0 = extractelement <2 x i32> %cast, i32 0
%elt1 = extractelement <2 x i32> %cast, i32 1
%readlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt0)
%readlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt1)
%insert.0 = insertelement <2 x i32> poison, i32 %readlane0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %readlane1, i32 1
ret <2 x i32> %insert.1
}
define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fabs0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], |v[0:1]|, v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fabs0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call double @llvm.fabs.f64(double %a)
%max0 = call double @llvm.minimum.f64(double %a.fabs, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fabs1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[2:3]|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fabs1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call double @llvm.fabs.f64(double %b)
%max0 = call double @llvm.minimum.f64(double %a, double %b.fabs)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fabs2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fabs2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call double @llvm.fabs.f64(double %c)
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c.fabs)
ret double %max1
}
define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], |v[0:1]|, |v[2:3]|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call double @llvm.fabs.f64(double %a)
%b.fabs = call double @llvm.fabs.f64(double %b)
%c.fabs = call double @llvm.fabs.f64(double %c)
%max0 = call double @llvm.minimum.f64(double %a.fabs, double %b.fabs)
%max1 = call double @llvm.minimum.f64(double %max0, double %c.fabs)
ret double %max1
}
define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fneg_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], -v[0:1], -v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg double %a
%b.fneg = fneg double %b
%c.fneg = fneg double %c
%max0 = call double @llvm.minimum.f64(double %a.fneg, double %b.fneg)
%max1 = call double @llvm.minimum.f64(double %max0, double %c.fneg)
ret double %max1
}
define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fneg_fabs_all:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], -|v[0:1]|, -|v[2:3]|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -|v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fneg_fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -|v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call double @llvm.fabs.f64(double %a)
%b.fabs = call double @llvm.fabs.f64(double %b)
%c.fabs = call double @llvm.fabs.f64(double %c)
%a.fneg.fabs = fneg double %a.fabs
%b.fneg.fabs = fneg double %b.fabs
%c.fneg.fabs = fneg double %c.fabs
%max0 = call double @llvm.minimum.f64(double %a.fneg.fabs, double %b.fneg.fabs)
%max1 = call double @llvm.minimum.f64(double %max0, double %c.fneg.fabs)
ret double %max1
}
define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fneg0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], -v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fneg0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg double %a
%max0 = call double @llvm.minimum.f64(double %a.fneg, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fneg1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fneg1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg double %b
%max0 = call double @llvm.minimum.f64(double %a, double %b.fneg)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_fneg2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_fneg2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg double %c
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c.fneg)
ret double %max1
}
define double @v_fminimum3_f64_const0(double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_const0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double 8.0, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64__const2(double %a, double %b) {
; GFX12-LABEL: v_fminimum3_f64__const2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64__const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double 8.0)
ret double %max1
}
define double @v_fminimum3_f64_inlineimm0(double %b, double %c) {
; GFX12-LABEL: v_fminimum3_f64_inlineimm0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_inlineimm0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], 4.0
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double 4.0, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
ret double %max1
}
define double @v_fminimum3_f64__inlineimm(double %a, double %b) {
; GFX12-LABEL: v_fminimum3_f64__inlineimm:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64__inlineimm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], 4.0
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double 4.0)
ret double %max1
}
define double @v_fminimum3_f64_const1_const2(double %a) {
; GFX12-LABEL: v_fminimum3_f64_const1_const2:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40300000, v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fminimum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: s_mov_b32 s1, 0x40300000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double 8.0)
%max1 = call double @llvm.minimum.f64(double %max0, double 16.0)
ret double %max1
}
define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) {
; GFX12-LABEL: v_no_fminimum3_f32__multi_use:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f32 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_no_fminimum3_f32__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f32_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f32_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_no_fminimum3_f32__multi_use:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
; GFX950-NEXT: v_minimum3_f32 v1, v0, v2, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
%insert.0 = insertelement <2 x float> poison, float %max0, i32 0
%insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1
ret <2 x float> %insert.1
}
define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) {
; GFX12-LABEL: s_no_fminimum3_f32__multi_use:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f32 s0, s0, s1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: s_minimum_f32 s1, s0, s2
; GFX12-NEXT: ; return to shader part epilog
;
; GFX942-LABEL: s_no_fminimum3_f32__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
; GFX942-NEXT: v_min_f32_e32 v1, s0, v0
; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_min_f32_e32 v1, s2, v0
; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
; GFX942-NEXT: v_readfirstlane_b32 s0, v0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_readfirstlane_b32 s1, v1
; GFX942-NEXT: ; return to shader part epilog
;
; GFX950-LABEL: s_no_fminimum3_f32__multi_use:
; GFX950: ; %bb.0:
; GFX950-NEXT: v_mov_b32_e32 v0, s0
; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1
; GFX950-NEXT: v_minimum3_f32 v1, v0, s2, s2
; GFX950-NEXT: v_readfirstlane_b32 s0, v0
; GFX950-NEXT: v_readfirstlane_b32 s1, v1
; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
%cast0 = bitcast float %max0 to i32
%cast1 = bitcast float %max1 to i32
%readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0)
%readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1)
%insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1
ret <2 x i32> %insert.1
}
define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) {
; GFX12-TRUE16-LABEL: v_no_fminimum3_f16__multi_use:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_minimum_f16 v0.h, v0.l, v2.l
; GFX12-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_no_fminimum3_f16__multi_use:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_minimum_f16 v1, v0, v2
; GFX12-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_no_fminimum3_f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_min_f16_e32 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX942-NEXT: v_min_f16_e32 v1, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX942-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_no_fminimum3_f16__multi_use:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_pk_minimum3_f16 v1, v0, v2, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
%insert.0 = insertelement <2 x half> poison, half %max0, i32 0
%insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1
ret <2 x half> %insert.1
}
define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) {
; GFX12-LABEL: s_no_fminimum3_f16__multi_use:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f16 s0, s0, s1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT: s_minimum_f16 s1, s0, s2
; GFX12-NEXT: s_and_b32 s0, 0xffff, s0
; GFX12-NEXT: s_and_b32 s1, 0xffff, s1
; GFX12-NEXT: ; return to shader part epilog
;
; GFX942-LABEL: s_no_fminimum3_f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
; GFX942-NEXT: v_min_f16_e32 v1, s0, v0
; GFX942-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX942-NEXT: v_min_f16_e32 v1, s2, v0
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
; GFX942-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX942-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX942-NEXT: v_readfirstlane_b32 s0, v0
; GFX942-NEXT: v_readfirstlane_b32 s1, v1
; GFX942-NEXT: ; return to shader part epilog
;
; GFX950-LABEL: s_no_fminimum3_f16__multi_use:
; GFX950: ; %bb.0:
; GFX950-NEXT: v_mov_b32_e32 v0, s0
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, s1, s1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_pk_minimum3_f16 v1, v0, s2, s2
; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX950-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX950-NEXT: v_readfirstlane_b32 s0, v0
; GFX950-NEXT: v_readfirstlane_b32 s1, v1
; GFX950-NEXT: ; return to shader part epilog
%max0 = call half @llvm.minimum.f16(half %a, half %b)
%max1 = call half @llvm.minimum.f16(half %max0, half %c)
%cast0 = bitcast half %max0 to i16
%cast1 = bitcast half %max1 to i16
%ext0 = zext i16 %cast0 to i32
%ext1 = zext i16 %cast1 to i32
%readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0)
%readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1)
%insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1
ret <2 x i32> %insert.1
}
define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; GFX12-LABEL: v_no_fminimum3_v2f16__multi_use:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_minimum_f16 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_no_fminimum3_v2f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_pk_min_f16 v3, v0, v1
; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX942-NEXT: s_mov_b32 s0, 0x5040100
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
; GFX942-NEXT: v_perm_b32 v0, v1, v5, s0
; GFX942-NEXT: v_pk_min_f16 v3, v0, v2
; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
; GFX942-NEXT: v_perm_b32 v1, v1, v5, s0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_no_fminimum3_v2f16__multi_use:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_pk_minimum3_f16 v1, v0, v2, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.f16(<2 x half> %max0, <2 x half> %c)
%concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x half> %concat
}
define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double %c) {
; GFX12-LABEL: v_no_fminimum3_f64__multi_use:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_no_fminimum3_f64__multi_use:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
%insert.0 = insertelement <2 x double> poison, double %max0, i32 0
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}