Files
clang-p2996/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
Kevin P. Neal d15c454bed [FPEnv][AMDGPU] Correct strictfp tests.
Correct AMDGPU strictfp tests to follow the rules documented in the
LangRef:
https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics

These tests needed the strictfp attribute added to function calls and
some declarations.

Some of the tests now pass with D146845, others get farther along and
fail with D146845. The tests revealed that further work is required
in mostly AMDGPU atomics to get the tests passing.

Since I was here anyway I removed the strictfp attribute from some
constrained intrinsic declarations. They have this attribute by default.

Test changes verified with D146845.
2024-02-05 09:29:31 -05:00

698 lines
26 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
declare i32 @llvm.get.fpmode.i32()
define i32 @func_fpmode_i32() {
; GFX678-LABEL: func_fpmode_i32:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
ret i32 %fpmode
}
define i32 @strictfp_func_fpmode_i32() strictfp {
; GFX678-LABEL: strictfp_func_fpmode_i32:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: strictfp_func_fpmode_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: strictfp_func_fpmode_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: strictfp_func_fpmode_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32() strictfp
ret i32 %fpmode
}
define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) {
; GFX6-LABEL: kernel_fpmode_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: kernel_fpmode_i32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: kernel_fpmode_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19)
; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: kernel_fpmode_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: kernel_fpmode_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: kernel_fpmode_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fpmode = call i32 @llvm.get.fpmode.i32()
store i32 %fpmode, ptr addrspace(1) %ptr
ret void
}
; TODO: We should be able to reduce the demanded bits and ask for less
; from s_getreg_b32
define i32 @func_fpmode_i32_denormonly() {
; GFX678-LABEL: func_fpmode_i32_denormonly:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0xf0
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormonly:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0xf0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormonly:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xf0
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormonly:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0xf0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 240
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_roundonly() {
; GFX678-LABEL: func_fpmode_i32_roundonly:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 15
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_roundonly:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 15
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_roundonly:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 15
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_roundonly:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%round.only = and i32 %fpmode, 15
ret i32 %round.only
}
define i32 @func_fpmode_i32_round_denorm_only() {
; GFX678-LABEL: func_fpmode_i32_round_denorm_only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0xff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_round_denorm_only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0xff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_round_denorm_only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_round_denorm_only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%round.denorm.only = and i32 %fpmode, 255
ret i32 %round.denorm.only
}
define i32 @func_fpmode_i32_round_denorm_dx10_ieee() {
; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 1023
ret i32 %core.mode
}
define i32 @func_fpmode_i32_excp_en() {
; GFX678-LABEL: func_fpmode_i32_excp_en:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_excp_en:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_excp_en:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_excp_en:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 520192
ret i32 %core.mode
}
; Mask for all bits used on gfx6+
define i32 @func_fpmode_i32_environment_gfx6() {
; GFX678-LABEL: func_fpmode_i32_environment_gfx6:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_environment_gfx6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_environment_gfx6:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_environment_gfx6:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 521215
ret i32 %core.mode
}
; Mask for all bits used on gfx9+
define i32 @func_fpmode_i32_environment_gfx9() {
; GFX678-LABEL: func_fpmode_i32_environment_gfx9:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_environment_gfx9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_environment_gfx9:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_environment_gfx9:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 8909823
ret i32 %core.mode
}
define i32 @func_fpmode_i32_denormf32only() {
; GFX678-LABEL: func_fpmode_i32_denormf32only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 48
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf32only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 48
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf32only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 48
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf32only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 48
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 48
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_denormf32only_0() {
; GFX678-LABEL: func_fpmode_i32_denormf32only_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 32
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf32only_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 32
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf32only_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 32
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf32only_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 32
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 32
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_denormf32only_1() {
; GFX678-LABEL: func_fpmode_i32_denormf32only_1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 64
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf32only_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 64
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf32only_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 64
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf32only_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 64
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 64
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_denormf64f16only() {
; GFX678-LABEL: func_fpmode_i32_denormf64f16only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0xc0
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf64f16only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0xc0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf64f16only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xc0
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf64f16only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0xc0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 192
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_dx10_clamp_only() {
; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x100
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x100
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x100
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x100
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%dx10.only = and i32 %fpmode, 256
ret i32 %dx10.only
}
define i32 @func_fpmode_i32_ieee_only() {
; GFX678-LABEL: func_fpmode_i32_ieee_only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x200
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_ieee_only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x200
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_ieee_only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x200
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_ieee_only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x200
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%ieee.only = and i32 %fpmode, 512
ret i32 %ieee.only
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
; GFX1011: {{.*}}