These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities. Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them. This contains the documentation changes for both this change and #76954 as well.
2636 lines
116 KiB
LLVM
2636 lines
116 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
|
|
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.hi = lshr i32 %src0, 16
|
|
%src1.hi = lshr i32 %src1, 16
|
|
%src2.hi = lshr i32 %src2, 16
|
|
%src0.i16 = trunc i32 %src0.hi to i16
|
|
%src1.i16 = trunc i32 %src1.hi to i16
|
|
%src2.i16 = trunc i32 %src2.hi to i16
|
|
%src0.fp16 = bitcast i16 %src0.i16 to half
|
|
%src1.fp16 = bitcast i16 %src1.i16 to half
|
|
%src2.fp16 = bitcast i16 %src2.i16 to half
|
|
%src0.ext = fpext half %src0.fp16 to float
|
|
%src1.ext = fpext half %src1.fp16 to float
|
|
%src2.ext = fpext half %src2.fp16 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.hi = extractelement <2 x half> %src0, i32 1
|
|
%src1.hi = extractelement <2 x half> %src1, i32 1
|
|
%src2.hi = extractelement <2 x half> %src2, i32 1
|
|
%src0.ext = fpext half %src0.hi to float
|
|
%src1.ext = fpext half %src1.hi to float
|
|
%src2.ext = fpext half %src2.hi to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_v2f32:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX1100-NEXT: v_mov_b32_e32 v0, v3
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5
|
|
; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_v2f32:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5
|
|
; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_v2f32:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6
|
|
; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5
|
|
; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3
|
|
; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_v2f32:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_v2f32:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
|
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
|
%src2.ext = fpext <2 x half> %src2 to <2 x float>
|
|
%result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX1100-NEXT: v_mov_b32_e32 v0, v3
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: v_mov_b32_e32 v0, v3
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: v_mov_b32_e32 v0, v3
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2
|
|
; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1
|
|
; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v4, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_mad_f32 v0, v3, v0, v2
|
|
; VI-NEXT: v_mac_f32_e32 v2, v4, v1
|
|
; VI-NEXT: v_mov_b32_e32 v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v5
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v1
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v4
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v5
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v4, v2, v1
|
|
; SDAG-CI-NEXT: v_mac_f32_e32 v1, v5, v3
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5
|
|
; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
|
%src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
|
|
%src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
|
|
%src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
|
|
%src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
|
|
%src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
|
|
%result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%src0.ext.neg = fneg float %src0.ext
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
|
|
%src0.ext.neg.abs = fneg float %src0.ext.abs
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, -v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.neg = fneg float %src2
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2|
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, |v2|
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.abs = call float @llvm.fabs.f32(float %src2)
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2|
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.abs = call float @llvm.fabs.f32(float %src2)
|
|
%src2.neg.abs = fneg float %src2.abs
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
|
|
ret float %result
|
|
}
|
|
|
|
; TODO: Fold inline immediates. Need to be careful because it is an
|
|
; f16 inline immediate that may be converted to f32, not an actual f32
|
|
; inline immediate.
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
|
|
ret float %result
|
|
}
|
|
|
|
; Attempt to break inline immediate folding. If the operand is
|
|
; interpreted as f32, the inline immediate is really the f16 inline
|
|
; imm value converted to f32.
|
|
; fpext f16 1/2pi = 0x3e230000
|
|
; f32 1/2pi = 0x3e22f983
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2 = fpext half 0xH3118 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
|
|
ret float %result
|
|
}
|
|
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2 = fpext half 0xH003F to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
|
|
ret float %result
|
|
}
|
|
|
|
define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0
|
|
; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
|
|
; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 1.0
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 1.0
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 1.0
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0
|
|
; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0
|
|
; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
|
|
; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
|
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
|
%result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
|
|
; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
|
|
; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
|
|
; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
|
|
; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
|
|
; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
|
|
; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0x3e230000
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0x3e230000
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0x3e230000
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
|
|
; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
|
|
; GISEL-VI-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v1, v3, v4
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
|
|
; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
|
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
|
%src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
|
|
%result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
|
|
; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2
|
|
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; SDAG-GFX900: ; %bb.0:
|
|
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
|
|
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; SDAG-GFX906: ; %bb.0:
|
|
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
|
|
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
|
|
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
|
|
; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
|
|
; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
|
|
; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0.15915494
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; GISEL-GFX900: ; %bb.0:
|
|
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0.15915494
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; GISEL-GFX906: ; %bb.0:
|
|
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0.15915494
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
|
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
|
|
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
|
|
; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
|
|
; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
|
|
; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
|
|
; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
|
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
|
%src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
|
|
%result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
|
|
ret <2 x float> %result
|
|
}
|
|
|
|
define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.hi = extractelement <2 x half> %src0, i32 1
|
|
%src1.hi = extractelement <2 x half> %src1, i32 1
|
|
%src2.hi = extractelement <2 x half> %src2, i32 1
|
|
%src0.ext = fpext half %src0.hi to float
|
|
%src1.ext = fpext half %src1.hi to float
|
|
%src2.ext = fpext half %src2.hi to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
%max = call float @llvm.maxnum.f32(float %result, float 0.0)
|
|
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
|
|
ret float %clamp
|
|
}
|
|
|
|
define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: no_mix_simple:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: no_mix_simple:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: no_mix_simple:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: no_mix_simple:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: no_mix_simple:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; CI-LABEL: no_mix_simple:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
%result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
|
|
ret float %result
|
|
}
|
|
|
|
define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: no_mix_simple_fabs:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: no_mix_simple_fabs:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: no_mix_simple_fabs:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: no_mix_simple_fabs:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: no_mix_simple_fabs:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; CI-LABEL: no_mix_simple_fabs:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.fabs = call float @llvm.fabs.f32(float %src0)
|
|
%result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
|
|
ret float %result
|
|
}
|
|
|
|
; FIXME(DAG): Should abe able to select in this case.
|
|
; All sources are converted from f16, so it doesn't matter
|
|
; v_mad_mix_f32 flushes.
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; VI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; VI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; VI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX906-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; VI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; VI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%mul = fmul float %src0.ext, %src1.ext
|
|
%result = fadd float %mul, %src2.ext
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX900-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; VI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%mul = fmul float %src0.ext, %src1.ext
|
|
%result = fadd float %mul, %src2
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%mul = fmul contract float %src0.ext, %src1.ext
|
|
%result = fadd contract float %mul, %src2.ext
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%mul = fmul contract float %src0.ext, %src1.ext
|
|
%result = fadd contract float %mul, %src2
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
|
%src0 = extractelement <2 x half> %src0.arg.bc, i32 0
|
|
%src0.neg = fneg half %src0
|
|
%src0.ext = fpext half %src0.neg to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
; %src0.ext.neg = fneg float %src0.ext
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
; Make sure we don't fold pre-cvt fneg if we already have a fabs
|
|
|
|
define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: s_mov_b32 s4, 0x8000
|
|
; GFX900-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: s_mov_b32 s4, 0x8000
|
|
; GFX906-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
|
%src0 = extractelement <2 x half> %src0.arg.bc, i32 1
|
|
%src0.neg = fneg half %src0
|
|
%src0.ext = fpext half %src0.neg to float
|
|
%src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; GFX9GEN: ; %bb.0:
|
|
; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0|
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
|
%src0 = extractelement <2 x half> %src0.arg.bc, i32 1
|
|
%src0.abs = call half @llvm.fabs.f16(half %src0)
|
|
%src0.ext = fpext half %src0.abs to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
|
%fneg = fneg <2 x half> %src0.arg.bc
|
|
%src0 = extractelement <2 x half> %fneg, i32 1
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
|
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
|
|
%src0 = extractelement <2 x half> %fabs, i32 1
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
|
; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; GFX1100: ; %bb.0:
|
|
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; SDAG-GFX9GEN: ; %bb.0:
|
|
; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; SDAG-VI: ; %bb.0:
|
|
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
|
|
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; SDAG-CI: ; %bb.0:
|
|
; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
|
|
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
|
|
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; GISEL-GFX9GEN: ; %bb.0:
|
|
; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; GISEL-VI: ; %bb.0:
|
|
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
|
; GISEL-CI: ; %bb.0:
|
|
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
|
|
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
|
|
; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
|
|
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
|
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
|
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
|
|
%fneg.fabs = fneg <2 x half> %fabs
|
|
%src0 = extractelement <2 x half> %fneg.fabs, i32 1
|
|
%src0.ext = fpext half %src0 to float
|
|
%src1.ext = fpext half %src1 to float
|
|
%src2.ext = fpext half %src2 to float
|
|
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
|
|
ret float %result
|
|
}
|
|
|
|
declare half @llvm.fabs.f16(half) #2
|
|
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
|
|
declare float @llvm.fabs.f32(float) #2
|
|
declare float @llvm.minnum.f32(float, float) #2
|
|
declare float @llvm.maxnum.f32(float, float) #2
|
|
declare float @llvm.fmuladd.f32(float, float, float) #2
|
|
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
|
|
|
|
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
|
|
attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
|
|
attributes #2 = { nounwind readnone speculatable }
|