SIInsertWaitcnts inserts waitcnt instructions to resolve data dependencies. The GFX10+ vscnt (VMEM store count) counter is never used in this way. It is only used to resolve memory dependencies, and that is handled by SIMemoryLegalizer. Hence there is no need to conservatively wait for vscnt to be 0 on function entry and before returns. Differential Revision: https://reviews.llvm.org/D153537
2348 lines
104 KiB
LLVM
2348 lines
104 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
|
|
; Test with buggy fract, shouldn't match
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,GFX6-IR %s
|
|
|
|
; Working fract, but no f16
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=kaveri -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,GFX7-IR %s
|
|
|
|
; Working fract and f16 support
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,IR-LEGALF16 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,IR-LEGALF16 %s
|
|
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
|
|
|
|
; Test patterns to match v_fract_* instructions.
|
|
|
|
; Expansion as it appears in the library with the extra output for
|
|
; floor. We can fold in the nan check into the instruction, but the
|
|
; inf check must remain.
|
|
define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define float @safe_math_fract_f32
|
|
; GFX6-IR-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
|
|
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
|
|
; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
|
|
; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret float [[COND6]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @safe_math_fract_f32
|
|
; IR-FRACT-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
|
|
; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
|
|
; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
|
|
; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-FRACT-NEXT: ret float [[COND6]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
|
|
; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: v_fract_f32_e32 v4, v0
|
|
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
|
|
; GFX8-NEXT: v_fract_f32_e32 v4, v0
|
|
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
|
|
; GFX8-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX8-NEXT: global_store_dword v[1:2], v3, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v3, v0
|
|
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
|
|
; GFX11-NEXT: v_floor_f32_e32 v4, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
|
|
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0.000000e+00
|
|
%cond = select i1 %uno, float %x, float %min
|
|
%fabs = tail call float @llvm.fabs.f32(float %x)
|
|
%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
|
|
%cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
|
|
store float %floor, ptr addrspace(1) %ip, align 4
|
|
ret float %cond6
|
|
}
|
|
|
|
define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check
|
|
; GFX6-IR-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
|
|
; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret float [[COND]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @safe_math_fract_f32_noinf_check
|
|
; IR-FRACT-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-FRACT-NEXT: ret float [[COND]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_f32_noinf_check:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_f32_noinf_check:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_f32_noinf_check:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: global_store_dword v[1:2], v3, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_f32_noinf_check:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0.000000e+00
|
|
%cond = select i1 %uno, float %x, float %min
|
|
store float %floor, ptr addrspace(1) %ip, align 4
|
|
ret float %cond
|
|
}
|
|
|
|
; Cannot match fract without a nan check or no-nans.
|
|
define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; IR-LABEL: define float @no_nan_check_math_fract_f32
|
|
; IR-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
|
|
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
|
|
; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[MIN]]
|
|
; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-NEXT: ret float [[COND6]]
|
|
;
|
|
; GFX6-LABEL: no_nan_check_math_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
|
|
; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: no_nan_check_math_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
|
|
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: no_nan_check_math_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
|
|
; GFX8-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
|
|
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX8-NEXT: global_store_dword v[1:2], v3, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: no_nan_check_math_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
|
|
; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
|
|
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%fabs = tail call float @llvm.fabs.f32(float %x)
|
|
%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
|
|
%cond6 = select i1 %cmpinf, float 0.000000e+00, float %min
|
|
store float %floor, ptr addrspace(1) %ip, align 4
|
|
ret float %cond6
|
|
}
|
|
|
|
define float @basic_fract_f32_nonans(float nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define float @basic_fract_f32_nonans
|
|
; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @basic_fract_f32_nonans
|
|
; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_f32_nonans:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_f32_nonans:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_f32_nonans:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_f32_nonans:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
define float @basic_fract_f32_flags_minnum(float %x) {
|
|
; IR-LABEL: define float @basic_fract_f32_flags_minnum
|
|
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_f32_flags_minnum:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_f32_flags_minnum:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_f32_flags_minnum:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_f32_flags_minnum:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
define float @basic_fract_f32_flags_fsub(float nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define float @basic_fract_f32_flags_fsub
|
|
; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub nsz float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @basic_fract_f32_flags_fsub
|
|
; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_f32_flags_fsub:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_f32_flags_fsub:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_f32_flags_fsub:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_f32_flags_fsub:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub nsz float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define <2 x float> @basic_fract_v2f32_nonans
|
|
; GFX6-IR-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
|
|
; GFX6-IR-NEXT: ret <2 x float> [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define <2 x float> @basic_fract_v2f32_nonans
|
|
; IR-FRACT-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IR-FRACT-NEXT: [[TMP2:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP0]])
|
|
; IR-FRACT-NEXT: [[TMP3:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP1]])
|
|
; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
|
|
; IR-FRACT-NEXT: ret <2 x float> [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_v2f32_nonans:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v2, v0
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v1
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v1, v3
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_v2f32_nonans:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_fract_f32_e32 v1, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_v2f32_nonans:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: v_fract_f32_e32 v1, v1
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_v2f32_nonans:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: v_fract_f32_e32 v1, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
|
|
%sub = fsub <2 x float> %x, %floor
|
|
%min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
|
|
ret <2 x float> %min
|
|
}
|
|
|
|
define float @basic_fract_f32_multi_use_fsub_nonans(float nofpclass(nan) %x, ptr addrspace(1) %ptr) {
|
|
; GFX6-IR-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans
|
|
; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]], ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4
|
|
; GFX6-IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans
|
|
; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]], ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-FRACT-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4
|
|
; IR-FRACT-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_f32_multi_use_fsub_nonans:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_sub_f32_e32 v3, v0, v3
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3
|
|
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_f32_multi_use_fsub_nonans:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_sub_f32_e32 v3, v0, v3
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_f32_multi_use_fsub_nonans:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v3, v0, v3
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: global_store_dword v[1:2], v3, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v3, v0, v3
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
store float %sub, ptr addrspace(1) %ptr
|
|
ret float %min
|
|
}
|
|
|
|
define float @nnan_minnum_fract_f32(float %x) {
|
|
; GFX6-IR-LABEL: define float @nnan_minnum_fract_f32
|
|
; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nnan float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @nnan_minnum_fract_f32
|
|
; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: nnan_minnum_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: nnan_minnum_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: nnan_minnum_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: nnan_minnum_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call nnan float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
; TODO: Could match if we checked isKnownNeverNaN on the minnum src
|
|
; instead of the pattern input source.
|
|
define float @nnan_fsub_fract_f32(float %x) {
|
|
; IR-LABEL: define float @nnan_fsub_fract_f32
|
|
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub nnan float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: nnan_fsub_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: nnan_fsub_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: nnan_fsub_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: nnan_fsub_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub nnan float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
define float @nnan_floor_fract_f32(float %x) {
|
|
; IR-LABEL: define float @nnan_floor_fract_f32
|
|
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call nnan float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: nnan_floor_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: nnan_floor_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: nnan_floor_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: nnan_floor_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call nnan float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
define float @nnan_src_fract_f32(float nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define float @nnan_src_fract_f32
|
|
; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @nnan_src_fract_f32
|
|
; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: nnan_src_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: nnan_src_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: nnan_src_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: nnan_src_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
; Negative test
|
|
define float @not_fract_f32_wrong_const(float nofpclass(nan) %x) {
|
|
; IR-LABEL: define float @not_fract_f32_wrong_const
|
|
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFC0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: not_fract_f32_wrong_const:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: not_fract_f32_wrong_const:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: not_fract_f32_wrong_const:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: not_fract_f32_wrong_const:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFC0000000)
|
|
ret float %min
|
|
}
|
|
|
|
; Negative test
|
|
define float @not_fract_f32_swapped_fsub(float nofpclass(nan) %x) {
|
|
; IR-LABEL: define float @not_fract_f32_swapped_fsub
|
|
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[FLOOR]], [[X]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: not_fract_f32_swapped_fsub:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v1, v0
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: not_fract_f32_swapped_fsub:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v1, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: not_fract_f32_swapped_fsub:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v1, v0
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: not_fract_f32_swapped_fsub:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v1, v0
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %floor, %x
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
; Negative test
|
|
define float @not_fract_f32_not_floor(float nofpclass(nan) %x) {
|
|
; IR-LABEL: define float @not_fract_f32_not_floor
|
|
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.trunc.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: not_fract_f32_not_floor:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_trunc_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: not_fract_f32_not_floor:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_trunc_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: not_fract_f32_not_floor:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_trunc_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: not_fract_f32_not_floor:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_trunc_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.trunc.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
; Negative test
|
|
define float @not_fract_f32_different_floor(float %x, float %y) {
|
|
; IR-LABEL: define float @not_fract_f32_different_floor
|
|
; IR-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[Y]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: not_fract_f32_different_floor:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v1
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: not_fract_f32_different_floor:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: not_fract_f32_different_floor:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v1
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: not_fract_f32_different_floor:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %y)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
; Negative test
|
|
define float @not_fract_f32_maxnum(float nofpclass(nan) %x) {
|
|
; IR-LABEL: define float @not_fract_f32_maxnum
|
|
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.maxnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: ret float [[MIN]]
|
|
;
|
|
; GFX6-LABEL: not_fract_f32_maxnum:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: not_fract_f32_maxnum:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: not_fract_f32_maxnum:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX8-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: not_fract_f32_maxnum:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.maxnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
ret float %min
|
|
}
|
|
|
|
define float @fcmp_uno_check_is_nan_f32(float %x) {
|
|
; IR-LABEL: define float @fcmp_uno_check_is_nan_f32
|
|
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0x7FF8000000000000
|
|
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
|
|
; IR-NEXT: ret float [[COND]]
|
|
;
|
|
; GCN-LABEL: fcmp_uno_check_is_nan_f32:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0x7FF8000000000000
|
|
%cond = select i1 %uno, float %x, float %min
|
|
ret float %cond
|
|
}
|
|
|
|
; No inf check
|
|
define float @select_nan_fract_f32(float %x) {
|
|
; GFX6-IR-LABEL: define float @select_nan_fract_f32
|
|
; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
|
|
; GFX6-IR-NEXT: ret float [[COND]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @select_nan_fract_f32
|
|
; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[COND]]
|
|
;
|
|
; GFX6-LABEL: select_nan_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: select_nan_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: select_nan_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: select_nan_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0.000000e+00
|
|
%cond = select i1 %uno, float %x, float %min
|
|
ret float %cond
|
|
}
|
|
|
|
define float @commuted_select_nan_fract_f32(float %x) {
|
|
; GFX6-IR-LABEL: define float @commuted_select_nan_fract_f32
|
|
; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
|
|
; GFX6-IR-NEXT: ret float [[COND]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @commuted_select_nan_fract_f32
|
|
; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[COND]]
|
|
;
|
|
; GFX6-LABEL: commuted_select_nan_fract_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: commuted_select_nan_fract_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: commuted_select_nan_fract_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: commuted_select_nan_fract_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp ord float %x, 0.000000e+00
|
|
%cond = select i1 %uno, float %min, float %x
|
|
ret float %cond
|
|
}
|
|
|
|
define float @wrong_commuted_nan_select_f32(float %x) {
|
|
; IR-LABEL: define float @wrong_commuted_nan_select_f32
|
|
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-NEXT: entry:
|
|
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
|
|
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
|
|
; IR-NEXT: ret float [[COND]]
|
|
;
|
|
; GFX6-LABEL: wrong_commuted_nan_select_f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: wrong_commuted_nan_select_f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: wrong_commuted_nan_select_f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX8-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: wrong_commuted_nan_select_f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0.000000e+00
|
|
%cond = select i1 %uno, float %min, float %x
|
|
ret float %cond
|
|
}
|
|
|
|
define half @basic_fract_f16_nonan(half nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define half @basic_fract_f16_nonan
|
|
; GFX6-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
|
|
; GFX6-IR-NEXT: ret half [[MIN]]
|
|
;
|
|
; GFX7-IR-LABEL: define half @basic_fract_f16_nonan
|
|
; GFX7-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX7-IR-NEXT: entry:
|
|
; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
|
|
; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
|
|
; GFX7-IR-NEXT: ret half [[MIN]]
|
|
;
|
|
; IR-LEGALF16-LABEL: define half @basic_fract_f16_nonan
|
|
; IR-LEGALF16-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-LEGALF16-NEXT: entry:
|
|
; IR-LEGALF16-NEXT: [[MIN:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[X]])
|
|
; IR-LEGALF16-NEXT: ret half [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_f16_nonan:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_f16_nonan:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_f16_nonan:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f16_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_f16_nonan:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f16_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call half @llvm.floor.f16(half %x)
|
|
%sub = fsub half %x, %floor
|
|
%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
|
|
ret half %min
|
|
}
|
|
|
|
define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan
|
|
; GFX6-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
|
|
; GFX6-IR-NEXT: ret <2 x half> [[MIN]]
|
|
;
|
|
; GFX7-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan
|
|
; GFX7-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX7-IR-NEXT: entry:
|
|
; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
|
|
; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
|
|
; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
|
|
; GFX7-IR-NEXT: ret <2 x half> [[MIN]]
|
|
;
|
|
; IR-LEGALF16-LABEL: define <2 x half> @basic_fract_v2f16_nonan
|
|
; IR-LEGALF16-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-LEGALF16-NEXT: entry:
|
|
; IR-LEGALF16-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0
|
|
; IR-LEGALF16-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1
|
|
; IR-LEGALF16-NEXT: [[TMP2:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP0]])
|
|
; IR-LEGALF16-NEXT: [[TMP3:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP1]])
|
|
; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0
|
|
; IR-LEGALF16-NEXT: [[MIN:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1
|
|
; IR-LEGALF16-NEXT: ret <2 x half> [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_v2f16_nonan:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX6-NEXT: v_floor_f32_e32 v2, v0
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v1
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v1, v3
|
|
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
|
|
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fe000, v1
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_v2f16_nonan:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-NEXT: v_floor_f32_e32 v2, v0
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v1
|
|
; GFX7-NEXT: v_sub_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
|
|
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fe000, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_v2f16_nonan:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f16_e32 v1, v0
|
|
; GFX8-NEXT: v_fract_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
; GFX8-NEXT: v_pack_b32_f16 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_v2f16_nonan:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX11-NEXT: v_fract_f16_e32 v0, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_fract_f16_e32 v1, v1
|
|
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
|
|
%sub = fsub <2 x half> %x, %floor
|
|
%min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)
|
|
ret <2 x half> %min
|
|
}
|
|
|
|
define double @basic_fract_f64_nanans(double nofpclass(nan) %x) {
|
|
; GFX6-IR-LABEL: define double @basic_fract_f64_nanans
|
|
; GFX6-IR-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
|
|
; GFX6-IR-NEXT: ret double [[MIN]]
|
|
;
|
|
; IR-FRACT-LABEL: define double @basic_fract_f64_nanans
|
|
; IR-FRACT-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan double @llvm.amdgcn.fract.f64(double [[X]])
|
|
; IR-FRACT-NEXT: ret double [[MIN]]
|
|
;
|
|
; GFX6-LABEL: basic_fract_f64_nanans:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
|
|
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
|
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
|
|
; GFX6-NEXT: s_mov_b32 s4, -1
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
|
; GFX6-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3]
|
|
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
|
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
|
|
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: basic_fract_f64_nanans:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: basic_fract_f64_nanans:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: basic_fract_f64_nanans:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call double @llvm.floor.f64(double %x)
|
|
%sub = fsub double %x, %floor
|
|
%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
|
|
ret double %min
|
|
}
|
|
|
|
define half @safe_math_fract_f16_noinf_check(half %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define half @safe_math_fract_f16_noinf_check
|
|
; GFX6-IR-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
|
|
; GFX6-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret half [[COND]]
|
|
;
|
|
; GFX7-IR-LABEL: define half @safe_math_fract_f16_noinf_check
|
|
; GFX7-IR-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX7-IR-NEXT: entry:
|
|
; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
|
|
; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
|
|
; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
|
|
; GFX7-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
|
|
; GFX7-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX7-IR-NEXT: ret half [[COND]]
|
|
;
|
|
; IR-LEGALF16-LABEL: define half @safe_math_fract_f16_noinf_check
|
|
; IR-LEGALF16-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-LEGALF16-NEXT: entry:
|
|
; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; IR-LEGALF16-NEXT: [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]])
|
|
; IR-LEGALF16-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-LEGALF16-NEXT: ret half [[COND]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_f16_noinf_check:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
; GFX6-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_f16_noinf_check:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4
|
|
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_f16_noinf_check:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f16_e32 v3, v0
|
|
; GFX8-NEXT: v_fract_f16_e32 v0, v0
|
|
; GFX8-NEXT: global_store_short v[1:2], v3, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_f16_noinf_check:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f16_e32 v3, v0
|
|
; GFX11-NEXT: v_fract_f16_e32 v0, v0
|
|
; GFX11-NEXT: global_store_b16 v[1:2], v3, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call half @llvm.floor.f16(half %x)
|
|
%sub = fsub half %x, %floor
|
|
%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
|
|
%uno = fcmp uno half %x, 0.000000e+00
|
|
%cond = select i1 %uno, half %x, half %min
|
|
store half %floor, ptr addrspace(1) %ip, align 4
|
|
ret half %cond
|
|
}
|
|
|
|
define double @safe_math_fract_f64_noinf_check(double %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define double @safe_math_fract_f64_noinf_check
|
|
; GFX6-IR-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]
|
|
; GFX6-IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret double [[COND]]
|
|
;
|
|
; IR-FRACT-LABEL: define double @safe_math_fract_f64_noinf_check
|
|
; IR-FRACT-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]])
|
|
; IR-FRACT-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-FRACT-NEXT: ret double [[COND]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_f64_noinf_check:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
|
|
; GFX6-NEXT: v_mov_b32_e32 v6, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff
|
|
; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
|
|
; GFX6-NEXT: s_mov_b32 s8, -1
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
|
|
; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], -v[4:5]
|
|
; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff
|
|
; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
|
|
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
|
|
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9]
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
|
|
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_f64_noinf_check:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
|
|
; GFX7-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_f64_noinf_check:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
|
|
; GFX8-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
|
|
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_f64_noinf_check:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
|
|
; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call double @llvm.floor.f64(double %x)
|
|
%sub = fsub double %x, %floor
|
|
%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
|
|
%uno = fcmp uno double %x, 0.000000e+00
|
|
%cond = select i1 %uno, double %x, double %min
|
|
store double %floor, ptr addrspace(1) %ip, align 4
|
|
ret double %cond
|
|
}
|
|
|
|
define float @select_nan_fract_f32_flags_select(float %x) {
|
|
; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_select
|
|
; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select nsz i1 [[UNO]], float [[X]], float [[MIN]]
|
|
; GFX6-IR-NEXT: ret float [[COND]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_select
|
|
; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call nsz float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[COND]]
|
|
;
|
|
; GFX6-LABEL: select_nan_fract_f32_flags_select:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: select_nan_fract_f32_flags_select:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: select_nan_fract_f32_flags_select:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: select_nan_fract_f32_flags_select:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0.000000e+00
|
|
%cond = select nsz i1 %uno, float %x, float %min
|
|
ret float %cond
|
|
}
|
|
|
|
define float @select_nan_fract_f32_flags_minnum(float %x) {
|
|
; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_minnum
|
|
; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
|
|
; GFX6-IR-NEXT: ret float [[COND]]
|
|
;
|
|
; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_minnum
|
|
; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
|
|
; IR-FRACT-NEXT: ret float [[COND]]
|
|
;
|
|
; GFX6-LABEL: select_nan_fract_f32_flags_minnum:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v1, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: select_nan_fract_f32_flags_minnum:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: select_nan_fract_f32_flags_minnum:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: select_nan_fract_f32_flags_minnum:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v0, v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call float @llvm.floor.f32(float %x)
|
|
%sub = fsub float %x, %floor
|
|
%min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
|
|
%uno = fcmp uno float %x, 0.000000e+00
|
|
%cond = select i1 %uno, float %x, float %min
|
|
ret float %cond
|
|
}
|
|
|
|
define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define <2 x float> @safe_math_fract_v2f32
|
|
; GFX6-IR-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]]
|
|
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
|
|
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
|
|
; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]
|
|
; GFX6-IR-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret <2 x float> [[COND6]]
|
|
;
|
|
; IR-FRACT-LABEL: define <2 x float> @safe_math_fract_v2f32
|
|
; IR-FRACT-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
|
|
; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IR-FRACT-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP0]])
|
|
; IR-FRACT-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP1]])
|
|
; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
|
|
; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
|
|
; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
|
|
; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]
|
|
; IR-FRACT-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-FRACT-NEXT: ret <2 x float> [[COND6]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_v2f32:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_floor_f32_e32 v5, v1
|
|
; GFX6-NEXT: v_floor_f32_e32 v4, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v6, v1, v5
|
|
; GFX6-NEXT: v_sub_f32_e32 v7, v0, v4
|
|
; GFX6-NEXT: v_min_f32_e32 v6, 0x3f7fffff, v6
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
|
; GFX6-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: s_movk_i32 s10, 0x204
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
|
|
; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v0, s10
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[8:9]
|
|
; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v1, s10
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9]
|
|
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_v2f32:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX7-NEXT: v_fract_f32_e32 v6, v0
|
|
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: v_floor_f32_e32 v4, v0
|
|
; GFX7-NEXT: v_fract_f32_e32 v7, v1
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
|
|
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s8
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_floor_f32_e32 v5, v1
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
|
|
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_v2f32:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
|
|
; GFX8-NEXT: v_fract_f32_e32 v6, v0
|
|
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
|
|
; GFX8-NEXT: v_floor_f32_e32 v4, v0
|
|
; GFX8-NEXT: v_fract_f32_e32 v7, v1
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
|
|
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s4
|
|
; GFX8-NEXT: v_floor_f32_e32 v5, v1
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
|
|
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_v2f32:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f32_e32 v6, v0
|
|
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
|
|
; GFX11-NEXT: v_fract_f32_e32 v7, v1
|
|
; GFX11-NEXT: v_floor_f32_e32 v4, v0
|
|
; GFX11-NEXT: v_floor_f32_e32 v5, v1
|
|
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo
|
|
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1|
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
|
|
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
|
|
%sub = fsub <2 x float> %x, %floor
|
|
%min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
|
|
%uno = fcmp uno <2 x float> %x, zeroinitializer
|
|
%cond = select <2 x i1> %uno, <2 x float> %x, <2 x float> %min
|
|
%fabs = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
|
|
%cmpinf = fcmp oeq <2 x float> %fabs, <float 0x7FF0000000000000, float 0x7FF0000000000000>
|
|
%cond6 = select <2 x i1> %cmpinf, <2 x float> zeroinitializer, <2 x float> %cond
|
|
store <2 x float> %floor, ptr addrspace(1) %ip, align 4
|
|
ret <2 x float> %cond6
|
|
}
|
|
|
|
define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define double @safe_math_fract_f64
|
|
; GFX6-IR-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]
|
|
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
|
|
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000
|
|
; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]
|
|
; GFX6-IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret double [[COND6]]
|
|
;
|
|
; IR-FRACT-LABEL: define double @safe_math_fract_f64
|
|
; IR-FRACT-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]])
|
|
; IR-FRACT-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
|
|
; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000
|
|
; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]
|
|
; IR-FRACT-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-FRACT-NEXT: ret double [[COND6]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_f64:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
|
|
; GFX6-NEXT: v_mov_b32_e32 v6, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff
|
|
; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
|
|
; GFX6-NEXT: s_mov_b32 s8, -1
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
|
|
; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], -v[4:5]
|
|
; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff
|
|
; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9]
|
|
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
|
|
; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000
|
|
; GFX6-NEXT: s_mov_b32 s8, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
|
|
; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
|
|
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_f64:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
|
|
; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
|
|
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
|
|
; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_f64:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 s4, 0
|
|
; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000
|
|
; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
|
|
; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
|
|
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
|
|
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_f64:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: s_mov_b32 s1, 0x7ff00000
|
|
; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
|
|
; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, |v[0:1]|, s[0:1]
|
|
; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
|
|
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call double @llvm.floor.f64(double %x)
|
|
%sub = fsub double %x, %floor
|
|
%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
|
|
%uno = fcmp uno double %x, 0.000000e+00
|
|
%cond = select i1 %uno, double %x, double %min
|
|
%fabs = tail call double @llvm.fabs.f64(double %x)
|
|
%cmpinf = fcmp oeq double %fabs, 0x7FF0000000000000
|
|
%cond6 = select i1 %cmpinf, double 0.000000e+00, double %cond
|
|
store double %floor, ptr addrspace(1) %ip, align 4
|
|
ret double %cond6
|
|
}
|
|
|
|
define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define half @safe_math_fract_f16
|
|
; GFX6-IR-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
|
|
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
|
|
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
|
|
; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
|
|
; GFX6-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret half [[COND6]]
|
|
;
|
|
; GFX7-IR-LABEL: define half @safe_math_fract_f16
|
|
; GFX7-IR-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX7-IR-NEXT: entry:
|
|
; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
|
|
; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
|
|
; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
|
|
; GFX7-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
|
|
; GFX7-IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
|
|
; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
|
|
; GFX7-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
|
|
; GFX7-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX7-IR-NEXT: ret half [[COND6]]
|
|
;
|
|
; IR-LEGALF16-LABEL: define half @safe_math_fract_f16
|
|
; IR-LEGALF16-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-LEGALF16-NEXT: entry:
|
|
; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
|
|
; IR-LEGALF16-NEXT: [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]])
|
|
; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
|
|
; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
|
|
; IR-LEGALF16-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
|
|
; IR-LEGALF16-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-LEGALF16-NEXT: ret half [[COND6]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_f16:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
|
|
; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX6-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_f16:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_floor_f32_e32 v3, v0
|
|
; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4
|
|
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
|
|
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_f16:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_movk_i32 s4, 0x7c00
|
|
; GFX8-NEXT: v_fract_f16_e32 v4, v0
|
|
; GFX8-NEXT: v_cmp_neq_f16_e64 vcc, |v0|, s4
|
|
; GFX8-NEXT: v_floor_f16_e32 v3, v0
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
|
; GFX8-NEXT: global_store_short v[1:2], v3, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_f16:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f16_e32 v3, v0
|
|
; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
|
|
; GFX11-NEXT: v_floor_f16_e32 v4, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
|
|
; GFX11-NEXT: global_store_b16 v[1:2], v4, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call half @llvm.floor.f16(half %x)
|
|
%sub = fsub half %x, %floor
|
|
%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
|
|
%uno = fcmp uno half %x, 0.000000e+00
|
|
%cond = select i1 %uno, half %x, half %min
|
|
%fabs = tail call half @llvm.fabs.f16(half %x)
|
|
%cmpinf = fcmp oeq half %fabs, 0xH7C00
|
|
%cond6 = select i1 %cmpinf, half 0.000000e+00, half %cond
|
|
store half %floor, ptr addrspace(1) %ip, align 4
|
|
ret half %cond6
|
|
}
|
|
|
|
define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define <2 x half> @safe_math_fract_v2f16
|
|
; GFX6-IR-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]
|
|
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
|
|
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
|
|
; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
|
|
; GFX6-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret <2 x half> [[COND6]]
|
|
;
|
|
; GFX7-IR-LABEL: define <2 x half> @safe_math_fract_v2f16
|
|
; GFX7-IR-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX7-IR-NEXT: entry:
|
|
; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
|
|
; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
|
|
; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
|
|
; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer
|
|
; GFX7-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]
|
|
; GFX7-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
|
|
; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
|
|
; GFX7-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
|
|
; GFX7-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX7-IR-NEXT: ret <2 x half> [[COND6]]
|
|
;
|
|
; IR-LEGALF16-LABEL: define <2 x half> @safe_math_fract_v2f16
|
|
; IR-LEGALF16-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-LEGALF16-NEXT: entry:
|
|
; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
|
|
; IR-LEGALF16-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0
|
|
; IR-LEGALF16-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1
|
|
; IR-LEGALF16-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP0]])
|
|
; IR-LEGALF16-NEXT: [[TMP3:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP1]])
|
|
; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0
|
|
; IR-LEGALF16-NEXT: [[COND:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1
|
|
; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
|
|
; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
|
|
; IR-LEGALF16-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
|
|
; IR-LEGALF16-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-LEGALF16-NEXT: ret <2 x half> [[COND6]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_v2f16:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0
|
|
; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |v0|
|
|
; GFX6-NEXT: v_cvt_f32_f16_e64 v1, |v1|
|
|
; GFX6-NEXT: v_floor_f32_e32 v6, v4
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v6
|
|
; GFX6-NEXT: v_floor_f32_e32 v8, v5
|
|
; GFX6-NEXT: v_sub_f32_e32 v6, v4, v6
|
|
; GFX6-NEXT: v_cvt_f16_f32_e32 v9, v8
|
|
; GFX6-NEXT: v_sub_f32_e32 v8, v5, v8
|
|
; GFX6-NEXT: v_min_f32_e32 v6, 0x3f7fe000, v6
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
|
|
; GFX6-NEXT: v_min_f32_e32 v8, 0x3f7fe000, v8
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
|
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
|
|
; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0
|
|
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
|
|
; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_or_b32_e32 v7, v9, v7
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
|
|
; GFX6-NEXT: buffer_store_dword v7, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_v2f16:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0
|
|
; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
|
|
; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
|
|
; GFX7-NEXT: v_floor_f32_e32 v6, v4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v6
|
|
; GFX7-NEXT: v_floor_f32_e32 v8, v5
|
|
; GFX7-NEXT: v_sub_f32_e32 v6, v4, v6
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v8
|
|
; GFX7-NEXT: v_sub_f32_e32 v8, v5, v8
|
|
; GFX7-NEXT: v_min_f32_e32 v6, 0x3f7fe000, v6
|
|
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
|
|
; GFX7-NEXT: v_min_f32_e32 v8, 0x3f7fe000, v8
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
|
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
|
|
; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0
|
|
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
|
|
; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_or_b32_e32 v7, v9, v7
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
|
|
; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_v2f16:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
|
; GFX8-NEXT: s_movk_i32 s6, 0x204
|
|
; GFX8-NEXT: v_floor_f16_e32 v4, v3
|
|
; GFX8-NEXT: v_floor_f16_e32 v5, v0
|
|
; GFX8-NEXT: v_fract_f16_e32 v6, v3
|
|
; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6
|
|
; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4
|
|
; GFX8-NEXT: v_fract_f16_e32 v5, v0
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5]
|
|
; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5]
|
|
; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3
|
|
; GFX8-NEXT: global_store_dword v[1:2], v4, off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_v2f16:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
|
; GFX11-NEXT: v_fract_f16_e32 v6, v0
|
|
; GFX11-NEXT: v_floor_f16_e32 v5, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_fract_f16_e32 v4, v3
|
|
; GFX11-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204
|
|
; GFX11-NEXT: v_floor_f16_e32 v7, v3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v4, 0, s0
|
|
; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_pack_b32_f16 v4, v5, v7
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
|
|
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
|
|
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
|
|
%sub = fsub <2 x half> %x, %floor
|
|
%min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)
|
|
%uno = fcmp uno <2 x half> %x, zeroinitializer
|
|
%cond = select <2 x i1> %uno, <2 x half> %x, <2 x half> %min
|
|
%fabs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
|
|
%cmpinf = fcmp oeq <2 x half> %fabs, <half 0xH7C00, half 0xH7C00>
|
|
%cond6 = select <2 x i1> %cmpinf, <2 x half> zeroinitializer, <2 x half> %cond
|
|
store <2 x half> %floor, ptr addrspace(1) %ip, align 4
|
|
ret <2 x half> %cond6
|
|
}
|
|
|
|
define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %ip) {
|
|
; GFX6-IR-LABEL: define <2 x double> @safe_math_fract_v2f64
|
|
; GFX6-IR-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; GFX6-IR-NEXT: entry:
|
|
; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])
|
|
; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]]
|
|
; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)
|
|
; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer
|
|
; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]]
|
|
; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
|
|
; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000>
|
|
; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]
|
|
; GFX6-IR-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; GFX6-IR-NEXT: ret <2 x double> [[COND6]]
|
|
;
|
|
; IR-FRACT-LABEL: define <2 x double> @safe_math_fract_v2f64
|
|
; IR-FRACT-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[IP:%.*]]) #[[ATTR0]] {
|
|
; IR-FRACT-NEXT: entry:
|
|
; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])
|
|
; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[X]], i64 0
|
|
; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X]], i64 1
|
|
; IR-FRACT-NEXT: [[TMP2:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP0]])
|
|
; IR-FRACT-NEXT: [[TMP3:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP1]])
|
|
; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0
|
|
; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP3]], i64 1
|
|
; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
|
|
; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000>
|
|
; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]
|
|
; IR-FRACT-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
|
|
; IR-FRACT-NEXT: ret <2 x double> [[COND6]]
|
|
;
|
|
; GFX6-LABEL: safe_math_fract_v2f64:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_fract_f64_e32 v[6:7], v[2:3]
|
|
; GFX6-NEXT: v_mov_b32_e32 v10, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v11, 0x3fefffff
|
|
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], v[10:11]
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 3
|
|
; GFX6-NEXT: v_fract_f64_e32 v[12:13], v[0:1]
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
|
|
; GFX6-NEXT: v_add_f64 v[8:9], v[2:3], -v[6:7]
|
|
; GFX6-NEXT: v_min_f64 v[6:7], v[12:13], v[10:11]
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
|
|
; GFX6-NEXT: v_add_f64 v[10:11], v[2:3], -v[8:9]
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
|
|
; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[6:7]
|
|
; GFX6-NEXT: s_mov_b32 s8, -1
|
|
; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff
|
|
; GFX6-NEXT: v_add_f64 v[12:13], v[0:1], -v[6:7]
|
|
; GFX6-NEXT: v_min_f64 v[10:11], v[10:11], s[8:9]
|
|
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
|
|
; GFX6-NEXT: v_min_f64 v[12:13], v[12:13], s[8:9]
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
|
|
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
|
|
; GFX6-NEXT: s_movk_i32 s10, 0x204
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s10
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[8:9]
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[8:9]
|
|
; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[2:3], s10
|
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, s[8:9]
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[8:9]
|
|
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: safe_math_fract_v2f64:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_movk_i32 s4, 0x204
|
|
; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
|
|
; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s4
|
|
; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
|
|
; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s4
|
|
; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
|
|
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[10:11]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[10:11]
|
|
; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: safe_math_fract_v2f64:
|
|
; GFX8: ; %bb.0: ; %entry
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_movk_i32 s6, 0x204
|
|
; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
|
|
; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6
|
|
; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
|
|
; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6
|
|
; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
|
|
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[6:7]
|
|
; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: safe_math_fract_v2f64:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
|
|
; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
|
|
; GFX11-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
|
|
; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204
|
|
; GFX11-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
|
|
; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v10, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v11, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1
|
|
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
|
|
%sub = fsub <2 x double> %x, %floor
|
|
%min = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %sub, <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)
|
|
%uno = fcmp uno <2 x double> %x, zeroinitializer
|
|
%cond = select <2 x i1> %uno, <2 x double> %x, <2 x double> %min
|
|
%fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
|
|
%cmpinf = fcmp oeq <2 x double> %fabs, <double 0x7FF0000000000000, double 0x7FF0000000000000>
|
|
%cond6 = select <2 x i1> %cmpinf, <2 x double> zeroinitializer, <2 x double> %cond
|
|
store <2 x double> %floor, ptr addrspace(1) %ip, align 4
|
|
ret <2 x double> %cond6
|
|
}
|
|
|
|
declare half @llvm.floor.f16(half) #0
|
|
declare float @llvm.floor.f32(float) #0
|
|
declare double @llvm.floor.f64(double) #0
|
|
declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
|
|
declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
|
|
declare <2 x half> @llvm.floor.v2f16(<2 x half>) #0
|
|
declare float @llvm.trunc.f32(float) #0
|
|
declare float @llvm.minnum.f32(float, float) #0
|
|
declare half @llvm.minnum.f16(half, half) #0
|
|
declare double @llvm.minnum.f64(double, double) #0
|
|
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
|
|
declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0
|
|
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
|
|
declare float @llvm.maxnum.f32(float, float) #0
|
|
declare float @llvm.fabs.f32(float) #0
|
|
declare double @llvm.fabs.f64(double) #0
|
|
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
|
|
declare half @llvm.fabs.f16(half) #0
|
|
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
|
|
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
|
|
|
|
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|