We had custom lowering for the wider vectors of f16, but missing test coverage for them. Also add more vector tests for bf16, and split the bf16 cases into separate files so we can add globalisel run lines.
5205 lines
211 KiB
LLVM
5205 lines
211 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
|
|
|
|
define half @v_minimumnum_f16(half %x, half %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call half @llvm.minimumnum.f16(half %x, half %y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_nnan(half %x, half %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_nnan:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan half @llvm.minimumnum.f16(half %x, half %y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_1.0(half %x) {
|
|
; GFX7-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f16_e32 v0, 1.0, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, 1.0, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, 1.0, v0.l
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, 1.0, v0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, 1.0, v0.l
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_1.0:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, 1.0, v0
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call half @llvm.minimumnum.f16(half %x, half 1.0)
|
|
ret half %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_nnan(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64(double %x, double %y) {
|
|
; GFX7-LABEL: v_minimumnum_f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_nnan(double %x, double %y) {
|
|
; GFX7-LABEL: v_minimumnum_f64_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f64_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_1.0(float %x) {
|
|
; GFX7-LABEL: v_minimumnum_f32_1.0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_1.0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_1.0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, 1.0, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_1.0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_1.0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 1.0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_1.0:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, 1.0, v0
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.minimumnum.f32(float %x, float 1.0)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_rhs_not_snan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_rhs_not_snan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_rhs_not_snan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_rhs_not_snan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_rhs_not_snan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_rhs_not_snan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%canon.y = call float @llvm.canonicalize.f32(float %y)
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %canon.y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_lhs_not_snan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_lhs_not_snan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_lhs_not_snan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_lhs_not_snan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_lhs_not_snan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_lhs_not_snan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%canon.x = call float @llvm.canonicalize.f32(float %x)
|
|
%result = call float @llvm.minimumnum.f32(float %canon.x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_both_operands_not_snan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_both_operands_not_snan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_both_operands_not_snan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_both_operands_not_snan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_both_operands_not_snan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_both_operands_not_snan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%canon.x = call float @llvm.canonicalize.f32(float %x)
|
|
%canon.y = call float @llvm.canonicalize.f32(float %y)
|
|
%result = call float @llvm.minimumnum.f32(float %canon.x, float %canon.y)
|
|
ret float %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_1.0(double %x) {
|
|
; GFX7-LABEL: v_minimumnum_f64_1.0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_1.0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f64_1.0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_1.0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_1.0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_1.0:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], 1.0, v[0:1]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call double @llvm.minimumnum.f64(double %x, double 1.0)
|
|
ret double %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_v_s(half %x, half inreg %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
|
|
; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
|
|
; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
|
|
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_v_s:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call half @llvm.minimumnum.f16(half %x, half %y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e64 v0, s17, s17
|
|
; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f16_e64 v0, s17, s17
|
|
; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
|
|
; GFX900-NEXT: v_min_f16_e32 v0, v1, v0
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f16_e64 v0, s1, s1
|
|
; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
|
|
; GFX950-NEXT: v_min_f16_e32 v0, v1, v0
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e64 v0, s17, s17
|
|
; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v1, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, s1, s1
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.h, v0.l
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, s1, s1
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v1, v0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, s1, s1
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_s_s:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, s1, s1
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v1, v0
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call half @llvm.minimumnum.f16(half %x, half %y)
|
|
ret half %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
|
|
; GFX900-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
|
|
; GFX950-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_s_v:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
|
|
; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
|
|
; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
|
|
; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_v_s:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
|
|
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17
|
|
; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17
|
|
; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f32_e64 v0, s17, s17
|
|
; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
|
|
; GFX900-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f32_e64 v0, s1, s1
|
|
; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
|
|
; GFX950-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e64 v0, s17, s17
|
|
; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e64 v0, s1, s1
|
|
; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v1, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_s_s:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
|
|
; GFX7-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
|
|
; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_s_v:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
|
|
; GFX7-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_v_s:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) {
|
|
; GFX7-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
|
|
; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
|
|
; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_s_s:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
|
|
; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_fabs_rhs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_fabs_rhs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_fabs_rhs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_fabs_rhs:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1|
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_fabs_rhs:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
|
|
; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_fabs_rhs:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
|
|
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.y = call float @llvm.fabs.f32(float %y)
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %fabs.y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
|
|
; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
|
|
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.y = call float @llvm.fabs.f32(float %y)
|
|
%fneg.fabs.y = fneg float %fabs.y
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %fneg.fabs.y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_fabs(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_fabs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
|
|
; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_fabs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
|
|
; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_fabs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
|
|
; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0|
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_fabs:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1|
|
|
; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0|
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_fabs:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
|
|
; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0|
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_fabs:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
|
|
; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%fabs.y = call float @llvm.fabs.f32(float %y)
|
|
%result = call float @llvm.minimumnum.f32(float %fabs.x, float %fabs.y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_fneg(float %x, float %y) {
|
|
; GFX7-LABEL: v_minimumnum_f32_fneg:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_fneg:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_fneg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1
|
|
; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_fneg:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1
|
|
; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_fneg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1
|
|
; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_fneg:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1
|
|
; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%fneg.x = fneg float %x
|
|
%fneg.y = fneg float %y
|
|
%result = call float @llvm.minimumnum.f32(float %fneg.x, float %fneg.y)
|
|
ret float %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_fabs_rhs:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.y = call half @llvm.fabs.f16(half %y)
|
|
%result = call half @llvm.minimumnum.f16(half %x, half %fabs.y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
|
|
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
|
|
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.y = call half @llvm.fabs.f16(half %y)
|
|
%fneg.fabs.y = fneg half %fabs.y
|
|
%result = call half @llvm.minimumnum.f16(half %x, half %fneg.fabs.y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_fabs(half %x, half %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
|
|
; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0|
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0|
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0|
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1|
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, |v0|, |v0|
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_fabs:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.x = call half @llvm.fabs.f16(half %x)
|
|
%fabs.y = call half @llvm.fabs.f16(half %y)
|
|
%result = call half @llvm.minimumnum.f16(half %fabs.x, half %fabs.y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_fneg(half %x, half %y) {
|
|
; GFX7-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1
|
|
; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1
|
|
; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1
|
|
; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_fneg:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -v1, -v1
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, -v0, -v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%fneg.x = fneg half %x
|
|
%fneg.y = fneg half %y
|
|
%result = call half @llvm.minimumnum.f16(half %fneg.x, half %fneg.y)
|
|
ret half %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_fneg(double %x, double %y) {
|
|
; GFX7-LABEL: v_minimumnum_f64_fneg:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_fneg:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f64_fneg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
|
|
; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_fneg:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_fneg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_fneg:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
|
|
; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%fneg.x = fneg double %x
|
|
%fneg.y = fneg double %y
|
|
%result = call double @llvm.minimumnum.f64(double %fneg.x, double %fneg.y)
|
|
ret double %result
|
|
}
|
|
|
|
define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v2f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v2f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v2f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y)
|
|
ret <2 x half> %result
|
|
}
|
|
|
|
define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v2f16_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f16_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v2f16_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f16_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f16_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f16_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y)
|
|
ret <2 x half> %result
|
|
}
|
|
|
|
define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v3f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v3f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_min_f16 v1, v1, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v3f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v1, v1, v2
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y)
|
|
ret <3 x half> %result
|
|
}
|
|
|
|
define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v3f16_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f16_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v3f16_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f16_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f16_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f16_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y)
|
|
ret <3 x half> %result
|
|
}
|
|
|
|
define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v4f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v4f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_min_f16 v1, v1, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v4f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v1, v1, v2
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y)
|
|
ret <4 x half> %result
|
|
}
|
|
|
|
define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v4f16_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f16_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v4f16_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f16_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f16_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f16_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y)
|
|
ret <4 x half> %result
|
|
}
|
|
|
|
define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v6f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v6
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v7
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v8
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v9
|
|
; GFX7-NEXT: v_min_f32_e32 v4, v4, v10
|
|
; GFX7-NEXT: v_min_f32_e32 v5, v5, v11
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v6f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
|
|
; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
|
|
; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v2, v2, v5
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v4
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v3
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
|
|
; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v6f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v3
|
|
; GFX900-NEXT: v_pk_max_f16 v3, v4, v4
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX900-NEXT: v_pk_max_f16 v3, v5, v5
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX900-NEXT: v_pk_min_f16 v2, v2, v3
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v6f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v3
|
|
; GFX950-NEXT: v_pk_max_f16 v3, v4, v4
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX950-NEXT: v_pk_max_f16 v3, v5, v5
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v2, v2, v3
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v6f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v3
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v4
|
|
; GFX10-NEXT: v_pk_min_f16 v2, v2, v5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v6f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v3
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v4
|
|
; GFX11-NEXT: v_pk_min_f16 v2, v2, v5
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v6f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
|
|
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v3
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v4
|
|
; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v5
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <6 x half> @llvm.minimumnum.v6f16(<6 x half> %x, <6 x half> %y)
|
|
ret <6 x half> %result
|
|
}
|
|
|
|
define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v8f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v8
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v9
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v10
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v11
|
|
; GFX7-NEXT: v_min_f32_e32 v4, v4, v12
|
|
; GFX7-NEXT: v_min_f32_e32 v5, v5, v13
|
|
; GFX7-NEXT: v_min_f32_e32 v6, v6, v14
|
|
; GFX7-NEXT: v_min_f32_e32 v7, v7, v15
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v8f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
|
|
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
|
; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
|
|
; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
|
|
; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v3, v3, v7
|
|
; GFX8-NEXT: v_min_f16_e32 v2, v2, v6
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v5
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v4
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v11
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v10
|
|
; GFX8-NEXT: v_or_b32_e32 v2, v2, v9
|
|
; GFX8-NEXT: v_or_b32_e32 v3, v3, v8
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v8f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v4
|
|
; GFX900-NEXT: v_pk_max_f16 v4, v5, v5
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_min_f16 v1, v1, v4
|
|
; GFX900-NEXT: v_pk_max_f16 v4, v6, v6
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX900-NEXT: v_pk_min_f16 v2, v2, v4
|
|
; GFX900-NEXT: v_pk_max_f16 v4, v7, v7
|
|
; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX900-NEXT: v_pk_min_f16 v3, v3, v4
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v8f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v4
|
|
; GFX950-NEXT: v_pk_max_f16 v4, v5, v5
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX950-NEXT: v_pk_min_f16 v1, v1, v4
|
|
; GFX950-NEXT: v_pk_max_f16 v4, v6, v6
|
|
; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX950-NEXT: v_pk_min_f16 v2, v2, v4
|
|
; GFX950-NEXT: v_pk_max_f16 v4, v7, v7
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v3, v3, v4
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v8f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v4
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v5
|
|
; GFX10-NEXT: v_pk_min_f16 v2, v2, v6
|
|
; GFX10-NEXT: v_pk_min_f16 v3, v3, v7
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v8f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v4
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v5
|
|
; GFX11-NEXT: v_pk_min_f16 v2, v2, v6
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
|
|
; GFX11-NEXT: v_pk_min_f16 v3, v3, v7
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v8f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
|
|
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
|
|
; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
|
|
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v4
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v5
|
|
; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v6
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v7
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %x, <8 x half> %y)
|
|
ret <8 x half> %result
|
|
}
|
|
|
|
define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v16f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v17
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v18
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v21
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v19
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v22
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v23
|
|
; GFX7-NEXT: v_min_f32_e32 v4, v4, v17
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
|
|
; GFX7-NEXT: v_min_f32_e32 v5, v5, v18
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v16
|
|
; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
|
|
; GFX7-NEXT: v_min_f32_e32 v6, v6, v19
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v24
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v25
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v26
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
|
|
; GFX7-NEXT: v_min_f32_e32 v7, v7, v20
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v27
|
|
; GFX7-NEXT: v_min_f32_e32 v8, v8, v17
|
|
; GFX7-NEXT: v_min_f32_e32 v9, v9, v18
|
|
; GFX7-NEXT: v_min_f32_e32 v10, v10, v19
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v28
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v29
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v30
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
|
|
; GFX7-NEXT: v_min_f32_e32 v11, v11, v20
|
|
; GFX7-NEXT: v_min_f32_e32 v12, v12, v17
|
|
; GFX7-NEXT: v_min_f32_e32 v13, v13, v18
|
|
; GFX7-NEXT: v_min_f32_e32 v14, v14, v19
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
|
|
; GFX7-NEXT: v_min_f32_e32 v15, v15, v16
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v16f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v15, v15, v15
|
|
; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
|
|
; GFX8-NEXT: v_max_f16_e32 v14, v14, v14
|
|
; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
|
|
; GFX8-NEXT: v_max_f16_e32 v13, v13, v13
|
|
; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
|
|
; GFX8-NEXT: v_max_f16_e32 v12, v12, v12
|
|
; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
|
|
; GFX8-NEXT: v_max_f16_e32 v11, v11, v11
|
|
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
|
; GFX8-NEXT: v_max_f16_e32 v10, v10, v10
|
|
; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
|
|
; GFX8-NEXT: v_max_f16_e32 v9, v9, v9
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v8, v8, v8
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v7, v7, v15
|
|
; GFX8-NEXT: v_min_f16_e32 v6, v6, v14
|
|
; GFX8-NEXT: v_min_f16_e32 v5, v5, v13
|
|
; GFX8-NEXT: v_min_f16_e32 v4, v4, v12
|
|
; GFX8-NEXT: v_min_f16_e32 v3, v3, v11
|
|
; GFX8-NEXT: v_min_f16_e32 v2, v2, v10
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v9
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v8
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v23
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v22
|
|
; GFX8-NEXT: v_or_b32_e32 v2, v2, v21
|
|
; GFX8-NEXT: v_or_b32_e32 v3, v3, v20
|
|
; GFX8-NEXT: v_or_b32_e32 v4, v4, v19
|
|
; GFX8-NEXT: v_or_b32_e32 v5, v5, v18
|
|
; GFX8-NEXT: v_or_b32_e32 v6, v6, v17
|
|
; GFX8-NEXT: v_or_b32_e32 v7, v7, v16
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v16f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v9, v9
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_min_f16 v1, v1, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v10, v10
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX900-NEXT: v_pk_min_f16 v2, v2, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v11, v11
|
|
; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX900-NEXT: v_pk_min_f16 v3, v3, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v12, v12
|
|
; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX900-NEXT: v_pk_min_f16 v4, v4, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v13, v13
|
|
; GFX900-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX900-NEXT: v_pk_min_f16 v5, v5, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v14, v14
|
|
; GFX900-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX900-NEXT: v_pk_min_f16 v6, v6, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v15, v15
|
|
; GFX900-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX900-NEXT: v_pk_min_f16 v7, v7, v8
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v16f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v9, v9
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX950-NEXT: v_pk_min_f16 v1, v1, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v10, v10
|
|
; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX950-NEXT: v_pk_min_f16 v2, v2, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v11, v11
|
|
; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX950-NEXT: v_pk_min_f16 v3, v3, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v12, v12
|
|
; GFX950-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX950-NEXT: v_pk_min_f16 v4, v4, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v13, v13
|
|
; GFX950-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX950-NEXT: v_pk_min_f16 v5, v5, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v14, v14
|
|
; GFX950-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX950-NEXT: v_pk_min_f16 v6, v6, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v15, v15
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v7, v7, v8
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v16f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_max_f16 v9, v9, v9
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_max_f16 v10, v10, v10
|
|
; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v8
|
|
; GFX10-NEXT: v_pk_max_f16 v8, v11, v11
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v9
|
|
; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX10-NEXT: v_pk_min_f16 v2, v2, v10
|
|
; GFX10-NEXT: v_pk_max_f16 v9, v12, v12
|
|
; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX10-NEXT: v_pk_max_f16 v10, v13, v13
|
|
; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX10-NEXT: v_pk_max_f16 v11, v14, v14
|
|
; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX10-NEXT: v_pk_max_f16 v12, v15, v15
|
|
; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX10-NEXT: v_pk_min_f16 v3, v3, v8
|
|
; GFX10-NEXT: v_pk_min_f16 v4, v4, v9
|
|
; GFX10-NEXT: v_pk_min_f16 v5, v5, v10
|
|
; GFX10-NEXT: v_pk_min_f16 v6, v6, v11
|
|
; GFX10-NEXT: v_pk_min_f16 v7, v7, v12
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v16f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: v_pk_max_f16 v9, v9, v9
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: v_pk_max_f16 v10, v10, v10
|
|
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v8
|
|
; GFX11-NEXT: v_pk_max_f16 v8, v11, v11
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v9
|
|
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX11-NEXT: v_pk_min_f16 v2, v2, v10
|
|
; GFX11-NEXT: v_pk_max_f16 v9, v12, v12
|
|
; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX11-NEXT: v_pk_max_f16 v10, v13, v13
|
|
; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX11-NEXT: v_pk_max_f16 v11, v14, v14
|
|
; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX11-NEXT: v_pk_max_f16 v12, v15, v15
|
|
; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX11-NEXT: v_pk_min_f16 v3, v3, v8
|
|
; GFX11-NEXT: v_pk_min_f16 v4, v4, v9
|
|
; GFX11-NEXT: v_pk_min_f16 v5, v5, v10
|
|
; GFX11-NEXT: v_pk_min_f16 v6, v6, v11
|
|
; GFX11-NEXT: v_pk_min_f16 v7, v7, v12
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v16f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10
|
|
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v8
|
|
; GFX12-NEXT: v_pk_max_num_f16 v8, v11, v11
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v9
|
|
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
|
|
; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v10
|
|
; GFX12-NEXT: v_pk_max_num_f16 v9, v12, v12
|
|
; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
|
|
; GFX12-NEXT: v_pk_max_num_f16 v10, v13, v13
|
|
; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
|
|
; GFX12-NEXT: v_pk_max_num_f16 v11, v14, v14
|
|
; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
|
|
; GFX12-NEXT: v_pk_max_num_f16 v12, v15, v15
|
|
; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
|
|
; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v8
|
|
; GFX12-NEXT: v_pk_min_num_f16 v4, v4, v9
|
|
; GFX12-NEXT: v_pk_min_num_f16 v5, v5, v10
|
|
; GFX12-NEXT: v_pk_min_num_f16 v6, v6, v11
|
|
; GFX12-NEXT: v_pk_min_num_f16 v7, v7, v12
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y)
|
|
ret <16 x half> %result
|
|
}
|
|
|
|
define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v32f16:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v18
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v19
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v20
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v21, v21
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v22, v22
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v23, v23
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v21, v21
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v22, v22
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v23, v23
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v24, v24
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v25, v25
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v26, v26
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v27, v27
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v24, v24
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v25, v25
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v26, v26
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v27, v27
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v28, v28
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v29, v29
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v30, v30
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v28, v28
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v29, v29
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v30, v30
|
|
; GFX7-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
|
|
; GFX7-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v32, v32
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v32, v32
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v4, v4, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v5, v5, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v6, v6, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v7, v7, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v8, v8, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v9, v9, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v10, v10, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v11, v11, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v12, v12, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v13, v13, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v14, v14, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v15, v15, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v16, v16, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v17, v17, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v18, v18, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v19, v19, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v20, v20, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v21, v21, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v22, v22, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v23, v23, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v24, v24, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v25, v25, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v26, v26, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v27, v27, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v28, v28, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v29, v29, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v30, v30, v31
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
|
|
; GFX7-NEXT: v_min_f32_e32 v31, v31, v32
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v32f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v17, v17, v17
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v17
|
|
; GFX8-NEXT: v_min_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v40
|
|
; GFX8-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX8-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v15, v15, v15
|
|
; GFX8-NEXT: v_min_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_max_f16_e32 v30, v30, v30
|
|
; GFX8-NEXT: v_max_f16_e32 v14, v14, v14
|
|
; GFX8-NEXT: v_max_f16_e32 v29, v29, v29
|
|
; GFX8-NEXT: v_max_f16_e32 v13, v13, v13
|
|
; GFX8-NEXT: v_max_f16_e32 v28, v28, v28
|
|
; GFX8-NEXT: v_max_f16_e32 v12, v12, v12
|
|
; GFX8-NEXT: v_max_f16_e32 v27, v27, v27
|
|
; GFX8-NEXT: v_max_f16_e32 v11, v11, v11
|
|
; GFX8-NEXT: v_max_f16_e32 v26, v26, v26
|
|
; GFX8-NEXT: v_max_f16_e32 v10, v10, v10
|
|
; GFX8-NEXT: v_max_f16_e32 v25, v25, v25
|
|
; GFX8-NEXT: v_max_f16_e32 v9, v9, v9
|
|
; GFX8-NEXT: v_max_f16_e32 v24, v24, v24
|
|
; GFX8-NEXT: v_max_f16_e32 v8, v8, v8
|
|
; GFX8-NEXT: v_max_f16_e32 v23, v23, v23
|
|
; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
|
|
; GFX8-NEXT: v_max_f16_e32 v22, v22, v22
|
|
; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
|
|
; GFX8-NEXT: v_max_f16_e32 v21, v21, v21
|
|
; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
|
|
; GFX8-NEXT: v_max_f16_e32 v20, v20, v20
|
|
; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
|
|
; GFX8-NEXT: v_max_f16_e32 v19, v19, v19
|
|
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
|
; GFX8-NEXT: v_max_f16_e32 v18, v18, v18
|
|
; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
|
|
; GFX8-NEXT: v_max_f16_e32 v16, v16, v16
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v14, v14, v30
|
|
; GFX8-NEXT: v_min_f16_e32 v13, v13, v29
|
|
; GFX8-NEXT: v_min_f16_e32 v12, v12, v28
|
|
; GFX8-NEXT: v_min_f16_e32 v11, v11, v27
|
|
; GFX8-NEXT: v_min_f16_e32 v10, v10, v26
|
|
; GFX8-NEXT: v_min_f16_e32 v9, v9, v25
|
|
; GFX8-NEXT: v_min_f16_e32 v8, v8, v24
|
|
; GFX8-NEXT: v_min_f16_e32 v7, v7, v23
|
|
; GFX8-NEXT: v_min_f16_e32 v6, v6, v22
|
|
; GFX8-NEXT: v_min_f16_e32 v5, v5, v21
|
|
; GFX8-NEXT: v_min_f16_e32 v4, v4, v20
|
|
; GFX8-NEXT: v_min_f16_e32 v3, v3, v19
|
|
; GFX8-NEXT: v_min_f16_e32 v2, v2, v18
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v16
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v33
|
|
; GFX8-NEXT: v_or_b32_e32 v2, v2, v55
|
|
; GFX8-NEXT: v_or_b32_e32 v3, v3, v54
|
|
; GFX8-NEXT: v_or_b32_e32 v4, v4, v53
|
|
; GFX8-NEXT: v_or_b32_e32 v5, v5, v52
|
|
; GFX8-NEXT: v_or_b32_e32 v6, v6, v51
|
|
; GFX8-NEXT: v_or_b32_e32 v7, v7, v50
|
|
; GFX8-NEXT: v_or_b32_e32 v8, v8, v49
|
|
; GFX8-NEXT: v_or_b32_e32 v9, v9, v48
|
|
; GFX8-NEXT: v_or_b32_e32 v10, v10, v39
|
|
; GFX8-NEXT: v_or_b32_e32 v11, v11, v38
|
|
; GFX8-NEXT: v_or_b32_e32 v12, v12, v36
|
|
; GFX8-NEXT: v_or_b32_e32 v13, v13, v34
|
|
; GFX8-NEXT: v_or_b32_e32 v14, v14, v32
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v31, v31, v31
|
|
; GFX8-NEXT: v_min_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v15, v15, v31
|
|
; GFX8-NEXT: v_or_b32_e32 v15, v15, v35
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v32f16:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v16, v16, v16
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v16
|
|
; GFX900-NEXT: v_pk_max_f16 v16, v17, v17
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_min_f16 v1, v1, v16
|
|
; GFX900-NEXT: v_pk_max_f16 v16, v18, v18
|
|
; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX900-NEXT: v_pk_min_f16 v2, v2, v16
|
|
; GFX900-NEXT: v_pk_max_f16 v16, v19, v19
|
|
; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX900-NEXT: v_pk_min_f16 v3, v3, v16
|
|
; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
|
|
; GFX900-NEXT: v_pk_max_f16 v17, v20, v20
|
|
; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX900-NEXT: v_pk_max_f16 v18, v21, v21
|
|
; GFX900-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX900-NEXT: v_pk_max_f16 v19, v22, v22
|
|
; GFX900-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX900-NEXT: v_pk_max_f16 v20, v23, v23
|
|
; GFX900-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX900-NEXT: v_pk_max_f16 v21, v24, v24
|
|
; GFX900-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX900-NEXT: v_pk_max_f16 v22, v25, v25
|
|
; GFX900-NEXT: v_pk_max_f16 v9, v9, v9
|
|
; GFX900-NEXT: v_pk_max_f16 v23, v26, v26
|
|
; GFX900-NEXT: v_pk_max_f16 v10, v10, v10
|
|
; GFX900-NEXT: v_pk_max_f16 v24, v27, v27
|
|
; GFX900-NEXT: v_pk_max_f16 v11, v11, v11
|
|
; GFX900-NEXT: v_pk_max_f16 v25, v28, v28
|
|
; GFX900-NEXT: v_pk_max_f16 v12, v12, v12
|
|
; GFX900-NEXT: v_pk_max_f16 v26, v29, v29
|
|
; GFX900-NEXT: v_pk_max_f16 v13, v13, v13
|
|
; GFX900-NEXT: v_pk_max_f16 v27, v30, v30
|
|
; GFX900-NEXT: v_pk_max_f16 v14, v14, v14
|
|
; GFX900-NEXT: v_pk_max_f16 v15, v15, v15
|
|
; GFX900-NEXT: v_pk_min_f16 v4, v4, v17
|
|
; GFX900-NEXT: v_pk_min_f16 v5, v5, v18
|
|
; GFX900-NEXT: v_pk_min_f16 v6, v6, v19
|
|
; GFX900-NEXT: v_pk_min_f16 v7, v7, v20
|
|
; GFX900-NEXT: v_pk_min_f16 v8, v8, v21
|
|
; GFX900-NEXT: v_pk_min_f16 v9, v9, v22
|
|
; GFX900-NEXT: v_pk_min_f16 v10, v10, v23
|
|
; GFX900-NEXT: v_pk_min_f16 v11, v11, v24
|
|
; GFX900-NEXT: v_pk_min_f16 v12, v12, v25
|
|
; GFX900-NEXT: v_pk_min_f16 v13, v13, v26
|
|
; GFX900-NEXT: v_pk_min_f16 v14, v14, v27
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v16, v16, v16
|
|
; GFX900-NEXT: v_pk_min_f16 v15, v15, v16
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v32f16:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: scratch_load_dword v31, off, s32
|
|
; GFX950-NEXT: v_pk_max_f16 v16, v16, v16
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: v_pk_max_f16 v17, v17, v17
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_max_f16 v18, v18, v18
|
|
; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX950-NEXT: v_pk_max_f16 v19, v19, v19
|
|
; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX950-NEXT: v_pk_max_f16 v20, v20, v20
|
|
; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX950-NEXT: v_pk_max_f16 v21, v21, v21
|
|
; GFX950-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX950-NEXT: v_pk_max_f16 v22, v22, v22
|
|
; GFX950-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX950-NEXT: v_pk_max_f16 v23, v23, v23
|
|
; GFX950-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX950-NEXT: v_pk_max_f16 v24, v24, v24
|
|
; GFX950-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX950-NEXT: v_pk_max_f16 v25, v25, v25
|
|
; GFX950-NEXT: v_pk_max_f16 v9, v9, v9
|
|
; GFX950-NEXT: v_pk_max_f16 v26, v26, v26
|
|
; GFX950-NEXT: v_pk_max_f16 v10, v10, v10
|
|
; GFX950-NEXT: v_pk_max_f16 v27, v27, v27
|
|
; GFX950-NEXT: v_pk_max_f16 v11, v11, v11
|
|
; GFX950-NEXT: v_pk_max_f16 v28, v28, v28
|
|
; GFX950-NEXT: v_pk_max_f16 v12, v12, v12
|
|
; GFX950-NEXT: v_pk_max_f16 v29, v29, v29
|
|
; GFX950-NEXT: v_pk_max_f16 v13, v13, v13
|
|
; GFX950-NEXT: v_pk_max_f16 v30, v30, v30
|
|
; GFX950-NEXT: v_pk_max_f16 v14, v14, v14
|
|
; GFX950-NEXT: v_pk_max_f16 v15, v15, v15
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v16
|
|
; GFX950-NEXT: v_pk_min_f16 v1, v1, v17
|
|
; GFX950-NEXT: v_pk_min_f16 v2, v2, v18
|
|
; GFX950-NEXT: v_pk_min_f16 v3, v3, v19
|
|
; GFX950-NEXT: v_pk_min_f16 v4, v4, v20
|
|
; GFX950-NEXT: v_pk_min_f16 v5, v5, v21
|
|
; GFX950-NEXT: v_pk_min_f16 v6, v6, v22
|
|
; GFX950-NEXT: v_pk_min_f16 v7, v7, v23
|
|
; GFX950-NEXT: v_pk_min_f16 v8, v8, v24
|
|
; GFX950-NEXT: v_pk_min_f16 v9, v9, v25
|
|
; GFX950-NEXT: v_pk_min_f16 v10, v10, v26
|
|
; GFX950-NEXT: v_pk_min_f16 v11, v11, v27
|
|
; GFX950-NEXT: v_pk_min_f16 v12, v12, v28
|
|
; GFX950-NEXT: v_pk_min_f16 v13, v13, v29
|
|
; GFX950-NEXT: v_pk_min_f16 v14, v14, v30
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v16, v31, v31
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v15, v15, v16
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v32f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX10-NEXT: v_pk_max_f16 v16, v16, v16
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_max_f16 v17, v17, v17
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_max_f16 v18, v18, v18
|
|
; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX10-NEXT: v_pk_max_f16 v19, v19, v19
|
|
; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX10-NEXT: v_pk_max_f16 v20, v20, v20
|
|
; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX10-NEXT: v_pk_max_f16 v21, v21, v21
|
|
; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX10-NEXT: v_pk_max_f16 v22, v22, v22
|
|
; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX10-NEXT: v_pk_max_f16 v23, v23, v23
|
|
; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX10-NEXT: v_pk_max_f16 v24, v24, v24
|
|
; GFX10-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX10-NEXT: v_pk_max_f16 v25, v25, v25
|
|
; GFX10-NEXT: v_pk_max_f16 v9, v9, v9
|
|
; GFX10-NEXT: v_pk_max_f16 v26, v26, v26
|
|
; GFX10-NEXT: v_pk_max_f16 v10, v10, v10
|
|
; GFX10-NEXT: v_pk_max_f16 v27, v27, v27
|
|
; GFX10-NEXT: v_pk_max_f16 v11, v11, v11
|
|
; GFX10-NEXT: v_pk_max_f16 v28, v28, v28
|
|
; GFX10-NEXT: v_pk_max_f16 v12, v12, v12
|
|
; GFX10-NEXT: v_pk_max_f16 v29, v29, v29
|
|
; GFX10-NEXT: v_pk_max_f16 v13, v13, v13
|
|
; GFX10-NEXT: v_pk_max_f16 v30, v30, v30
|
|
; GFX10-NEXT: v_pk_max_f16 v14, v14, v14
|
|
; GFX10-NEXT: v_pk_max_f16 v15, v15, v15
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v16
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v17
|
|
; GFX10-NEXT: v_pk_min_f16 v2, v2, v18
|
|
; GFX10-NEXT: v_pk_min_f16 v3, v3, v19
|
|
; GFX10-NEXT: v_pk_min_f16 v4, v4, v20
|
|
; GFX10-NEXT: v_pk_min_f16 v5, v5, v21
|
|
; GFX10-NEXT: v_pk_min_f16 v6, v6, v22
|
|
; GFX10-NEXT: v_pk_min_f16 v7, v7, v23
|
|
; GFX10-NEXT: v_pk_min_f16 v8, v8, v24
|
|
; GFX10-NEXT: v_pk_min_f16 v9, v9, v25
|
|
; GFX10-NEXT: v_pk_min_f16 v10, v10, v26
|
|
; GFX10-NEXT: v_pk_min_f16 v11, v11, v27
|
|
; GFX10-NEXT: v_pk_min_f16 v12, v12, v28
|
|
; GFX10-NEXT: v_pk_min_f16 v13, v13, v29
|
|
; GFX10-NEXT: v_pk_min_f16 v14, v14, v30
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v16, v31, v31
|
|
; GFX10-NEXT: v_pk_min_f16 v15, v15, v16
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v32f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: v_pk_max_f16 v16, v16, v16
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: v_pk_max_f16 v17, v17, v17
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: v_pk_max_f16 v18, v18, v18
|
|
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
|
|
; GFX11-NEXT: v_pk_max_f16 v19, v19, v19
|
|
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
|
|
; GFX11-NEXT: v_pk_max_f16 v20, v20, v20
|
|
; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
|
|
; GFX11-NEXT: v_pk_max_f16 v21, v21, v21
|
|
; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
|
|
; GFX11-NEXT: v_pk_max_f16 v22, v22, v22
|
|
; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
|
|
; GFX11-NEXT: v_pk_max_f16 v23, v23, v23
|
|
; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
|
|
; GFX11-NEXT: v_pk_max_f16 v24, v24, v24
|
|
; GFX11-NEXT: v_pk_max_f16 v8, v8, v8
|
|
; GFX11-NEXT: v_pk_max_f16 v25, v25, v25
|
|
; GFX11-NEXT: v_pk_max_f16 v9, v9, v9
|
|
; GFX11-NEXT: v_pk_max_f16 v26, v26, v26
|
|
; GFX11-NEXT: v_pk_max_f16 v10, v10, v10
|
|
; GFX11-NEXT: v_pk_max_f16 v27, v27, v27
|
|
; GFX11-NEXT: v_pk_max_f16 v11, v11, v11
|
|
; GFX11-NEXT: v_pk_max_f16 v28, v28, v28
|
|
; GFX11-NEXT: v_pk_max_f16 v12, v12, v12
|
|
; GFX11-NEXT: v_pk_max_f16 v29, v29, v29
|
|
; GFX11-NEXT: v_pk_max_f16 v13, v13, v13
|
|
; GFX11-NEXT: v_pk_max_f16 v30, v30, v30
|
|
; GFX11-NEXT: v_pk_max_f16 v14, v14, v14
|
|
; GFX11-NEXT: v_pk_max_f16 v15, v15, v15
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v16
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v17
|
|
; GFX11-NEXT: v_pk_min_f16 v2, v2, v18
|
|
; GFX11-NEXT: v_pk_min_f16 v3, v3, v19
|
|
; GFX11-NEXT: v_pk_min_f16 v4, v4, v20
|
|
; GFX11-NEXT: v_pk_min_f16 v5, v5, v21
|
|
; GFX11-NEXT: v_pk_min_f16 v6, v6, v22
|
|
; GFX11-NEXT: v_pk_min_f16 v7, v7, v23
|
|
; GFX11-NEXT: v_pk_min_f16 v8, v8, v24
|
|
; GFX11-NEXT: v_pk_min_f16 v9, v9, v25
|
|
; GFX11-NEXT: v_pk_min_f16 v10, v10, v26
|
|
; GFX11-NEXT: v_pk_min_f16 v11, v11, v27
|
|
; GFX11-NEXT: v_pk_min_f16 v12, v12, v28
|
|
; GFX11-NEXT: v_pk_min_f16 v13, v13, v29
|
|
; GFX11-NEXT: v_pk_min_f16 v14, v14, v30
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v16, v31, v31
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_pk_min_f16 v15, v15, v16
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v32f16:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX12-NEXT: v_pk_max_num_f16 v16, v16, v16
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v17, v17, v17
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: v_pk_max_num_f16 v18, v18, v18
|
|
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
|
|
; GFX12-NEXT: v_pk_max_num_f16 v19, v19, v19
|
|
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
|
|
; GFX12-NEXT: v_pk_max_num_f16 v20, v20, v20
|
|
; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
|
|
; GFX12-NEXT: v_pk_max_num_f16 v21, v21, v21
|
|
; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
|
|
; GFX12-NEXT: v_pk_max_num_f16 v22, v22, v22
|
|
; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
|
|
; GFX12-NEXT: v_pk_max_num_f16 v23, v23, v23
|
|
; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
|
|
; GFX12-NEXT: v_pk_max_num_f16 v24, v24, v24
|
|
; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8
|
|
; GFX12-NEXT: v_pk_max_num_f16 v25, v25, v25
|
|
; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9
|
|
; GFX12-NEXT: v_pk_max_num_f16 v26, v26, v26
|
|
; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10
|
|
; GFX12-NEXT: v_pk_max_num_f16 v27, v27, v27
|
|
; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v11
|
|
; GFX12-NEXT: v_pk_max_num_f16 v28, v28, v28
|
|
; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v12
|
|
; GFX12-NEXT: v_pk_max_num_f16 v29, v29, v29
|
|
; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v13
|
|
; GFX12-NEXT: v_pk_max_num_f16 v30, v30, v30
|
|
; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v14
|
|
; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v15
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v16
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v17
|
|
; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v18
|
|
; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v19
|
|
; GFX12-NEXT: v_pk_min_num_f16 v4, v4, v20
|
|
; GFX12-NEXT: v_pk_min_num_f16 v5, v5, v21
|
|
; GFX12-NEXT: v_pk_min_num_f16 v6, v6, v22
|
|
; GFX12-NEXT: v_pk_min_num_f16 v7, v7, v23
|
|
; GFX12-NEXT: v_pk_min_num_f16 v8, v8, v24
|
|
; GFX12-NEXT: v_pk_min_num_f16 v9, v9, v25
|
|
; GFX12-NEXT: v_pk_min_num_f16 v10, v10, v26
|
|
; GFX12-NEXT: v_pk_min_num_f16 v11, v11, v27
|
|
; GFX12-NEXT: v_pk_min_num_f16 v12, v12, v28
|
|
; GFX12-NEXT: v_pk_min_num_f16 v13, v13, v29
|
|
; GFX12-NEXT: v_pk_min_num_f16 v14, v14, v30
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v16, v31, v31
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v15, v15, v16
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <32 x half> @llvm.minimumnum.v32f16(<32 x half> %x, <32 x half> %y)
|
|
ret <32 x half> %result
|
|
}
|
|
|
|
define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v2f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v2
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v1, v1, v2
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v2f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX9-NEXT: v_max_f32_e32 v2, v3, v3
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_min_f32_e32 v1, v1, v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
|
|
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
|
|
; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v2f32_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f32_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v2f32_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f32_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f32_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f32_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v3f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5
|
|
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v3
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5
|
|
; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; GFX8-NEXT: v_min_f32_e32 v2, v2, v3
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v3f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX9-NEXT: v_max_f32_e32 v3, v4, v4
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX9-NEXT: v_max_f32_e32 v3, v5, v5
|
|
; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
|
|
; GFX9-NEXT: v_min_f32_e32 v2, v2, v3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_max_f32_e32 v4, v4, v4
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f32_e32 v5, v5, v5
|
|
; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0
|
|
; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1
|
|
; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
|
|
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
|
|
; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
|
|
; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y)
|
|
ret <3 x float> %result
|
|
}
|
|
|
|
define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v3f32_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f32_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v3f32_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f32_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f32_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
|
|
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f32_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
|
|
; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y)
|
|
ret <3 x float> %result
|
|
}
|
|
|
|
define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v4f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6
|
|
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7
|
|
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v4
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6
|
|
; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; GFX8-NEXT: v_min_f32_e32 v2, v2, v4
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7
|
|
; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; GFX8-NEXT: v_min_f32_e32 v3, v3, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v4f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v4, v4, v4
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX9-NEXT: v_max_f32_e32 v4, v5, v5
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX9-NEXT: v_max_f32_e32 v4, v6, v6
|
|
; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
|
|
; GFX9-NEXT: v_min_f32_e32 v2, v2, v4
|
|
; GFX9-NEXT: v_max_f32_e32 v4, v7, v7
|
|
; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
|
|
; GFX9-NEXT: v_min_f32_e32 v3, v3, v4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v4, v4, v4
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_max_f32_e32 v5, v5, v5
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f32_e32 v6, v6, v6
|
|
; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
|
|
; GFX10-NEXT: v_max_f32_e32 v7, v7, v7
|
|
; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
|
|
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
|
|
; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
|
|
; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
|
|
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
|
|
; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
|
|
; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
|
|
; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
|
|
; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y)
|
|
ret <4 x float> %result
|
|
}
|
|
|
|
define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v4f32_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f32_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX8-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX8-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v4f32_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f32_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f32_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
|
|
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f32_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
|
|
; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y)
|
|
ret <4 x float> %result
|
|
}
|
|
|
|
define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v2f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v2f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v2f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
|
|
ret <2 x double> %result
|
|
}
|
|
|
|
define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v2f64_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f64_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v2f64_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f64_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f64_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f64_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
|
|
ret <2 x double> %result
|
|
}
|
|
|
|
define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v3f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v3f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v3f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y)
|
|
ret <3 x double> %result
|
|
}
|
|
|
|
define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v3f64_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f64_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v3f64_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f64_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f64_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f64_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y)
|
|
ret <3 x double> %result
|
|
}
|
|
|
|
define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v4f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v4f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
|
|
; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v4f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9]
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
|
|
; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
|
|
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y)
|
|
ret <4 x double> %result
|
|
}
|
|
|
|
define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
|
|
; GFX7-LABEL: v_minimumnum_v4f64_nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f64_nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v4f64_nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f64_nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f64_nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f64_nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13]
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y)
|
|
ret <4 x double> %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
|
|
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
|
|
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
|
|
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_no_ieee:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
|
|
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
|
|
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call half @llvm.minimumnum.f16(half %x, half %y)
|
|
ret half %result
|
|
}
|
|
|
|
define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX12-TRUE16: ; %bb.0:
|
|
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-FAKE16-LABEL: v_minimumnum_f16_nan_no_ieee:
|
|
; GFX12-FAKE16: ; %bb.0:
|
|
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
|
|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan half @llvm.minimumnum.f16(half %x, half %y)
|
|
ret half %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_f32_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
|
|
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_f32_nnan_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f32_nnan_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f32_nnan_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f32_nnan_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f32_nnan_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f32_nnan_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan float @llvm.minimumnum.f32(float %x, float %y)
|
|
ret float %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_f64_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f64_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
|
|
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_f64_nnan_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_f64_nnan_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_f64_nnan_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_f64_nnan_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_f64_nnan_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_f64_nnan_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan double @llvm.minimumnum.f64(double %x, double %y)
|
|
ret double %result
|
|
}
|
|
|
|
define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
|
|
; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
|
|
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f16_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
|
|
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y)
|
|
ret <2 x half> %result
|
|
}
|
|
|
|
define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y)
|
|
ret <2 x half> %result
|
|
}
|
|
|
|
define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y)
|
|
ret <3 x half> %result
|
|
}
|
|
|
|
define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) #0 {
|
|
; GFX7-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
|
|
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
|
|
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
|
|
; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
|
|
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
|
|
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
|
|
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call nnan <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y)
|
|
ret <4 x half> %result
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-ieee"="false" }
|