1791 lines
65 KiB
LLVM
1791 lines
65 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
|
|
|
|
declare void @llvm.set.rounding(i32)
|
|
declare i32 @llvm.get.rounding()
|
|
|
|
define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
|
|
; GFX678-LABEL: s_set_rounding:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX678-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX9-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX10-NEXT: s_min_u32 s36, s4, s34
|
|
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_add_i32 s0, s4, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s4, s0
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
|
|
; GFX6-LABEL: s_set_rounding_kernel:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_load_dword s2, s[4:5], 0x9
|
|
; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX6-NEXT: ;;#ASMSTART
|
|
; GFX6-NEXT: ;;#ASMEND
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_add_i32 s3, s2, -4
|
|
; GFX6-NEXT: s_min_u32 s2, s2, s3
|
|
; GFX6-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_kernel:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9
|
|
; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_add_i32 s3, s2, -4
|
|
; GFX7-NEXT: s_min_u32 s2, s2, s3
|
|
; GFX7-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_kernel:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24
|
|
; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: s_add_i32 s3, s2, -4
|
|
; GFX8-NEXT: s_min_u32 s2, s2, s3
|
|
; GFX8-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX8-NEXT: s_endpgm
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_kernel:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x24
|
|
; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX9-NEXT: ;;#ASMSTART
|
|
; GFX9-NEXT: ;;#ASMEND
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_add_i32 s3, s2, -4
|
|
; GFX9-NEXT: s_min_u32 s2, s2, s3
|
|
; GFX9-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX9-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_kernel:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24
|
|
; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_add_i32 s3, s2, -4
|
|
; GFX10-NEXT: s_min_u32 s2, s2, s3
|
|
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_kernel:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_add_i32 s3, s2, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s2, s3
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_endpgm
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
call void asm sideeffect "",""()
|
|
ret void
|
|
}
|
|
|
|
define void @v_set_rounding(i32 %rounding) {
|
|
; GFX6-LABEL: v_set_rounding:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0
|
|
; GFX6-NEXT: v_min_u32_e32 v0, v0, v1
|
|
; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
|
|
; GFX6-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: v_set_rounding:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0
|
|
; GFX7-NEXT: v_min_u32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
|
|
; GFX7-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_set_rounding:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0
|
|
; GFX8-NEXT: v_min_u32_e32 v0, v0, v1
|
|
; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
|
|
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_set_rounding:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v1, -4, v0
|
|
; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
|
|
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_set_rounding:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0
|
|
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX10-NEXT: v_min_u32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
|
|
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_set_rounding:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define void @set_rounding_get_rounding() {
|
|
; GFX678-LABEL: set_rounding_get_rounding:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX678-NEXT: s_lshl_b32 s6, s4, 2
|
|
; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71
|
|
; GFX678-NEXT: s_mov_b32 s5, 0xc96f385
|
|
; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
|
|
; GFX678-NEXT: s_and_b32 s4, s4, 15
|
|
; GFX678-NEXT: s_add_i32 s5, s4, 4
|
|
; GFX678-NEXT: s_cmp_lt_u32 s4, 4
|
|
; GFX678-NEXT: s_cselect_b32 s4, s4, s5
|
|
; GFX678-NEXT: s_add_i32 s5, s4, -4
|
|
; GFX678-NEXT: s_min_u32 s4, s4, s5
|
|
; GFX678-NEXT: s_lshl_b32 s6, s4, 2
|
|
; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: set_rounding_get_rounding:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX9-NEXT: s_lshl_b32 s6, s4, 2
|
|
; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71
|
|
; GFX9-NEXT: s_mov_b32 s5, 0xc96f385
|
|
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
|
|
; GFX9-NEXT: s_and_b32 s4, s4, 15
|
|
; GFX9-NEXT: s_add_i32 s5, s4, 4
|
|
; GFX9-NEXT: s_cmp_lt_u32 s4, 4
|
|
; GFX9-NEXT: s_cselect_b32 s4, s4, s5
|
|
; GFX9-NEXT: s_add_i32 s5, s4, -4
|
|
; GFX9-NEXT: s_min_u32 s4, s4, s5
|
|
; GFX9-NEXT: s_lshl_b32 s6, s4, 2
|
|
; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: set_rounding_get_rounding:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71
|
|
; GFX10-NEXT: s_mov_b32 s5, 0xc96f385
|
|
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
|
|
; GFX10-NEXT: s_and_b32 s4, s4, 15
|
|
; GFX10-NEXT: s_add_i32 s5, s4, 4
|
|
; GFX10-NEXT: s_cmp_lt_u32 s4, 4
|
|
; GFX10-NEXT: s_cselect_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_add_i32 s5, s4, -4
|
|
; GFX10-NEXT: s_min_u32 s6, s4, s5
|
|
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: set_rounding_get_rounding:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX11-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
|
|
; GFX11-NEXT: s_add_i32 s1, s0, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s0, s1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%rounding = call i32 @llvm.get.rounding()
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define void @s_set_rounding_0() {
|
|
; GFX678-LABEL: s_set_rounding_0:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_0:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xf
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 0)
|
|
ret void
|
|
}
|
|
|
|
define void @s_set_rounding_1() {
|
|
; GFX678-LABEL: s_set_rounding_1:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_1:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x0
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 1)
|
|
ret void
|
|
}
|
|
|
|
define void @s_set_rounding_2() {
|
|
; GFX678-LABEL: s_set_rounding_2:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_2:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x5
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 2)
|
|
ret void
|
|
}
|
|
|
|
define void @s_set_rounding_3() {
|
|
; GFX678-LABEL: s_set_rounding_3:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_3:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xa
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 3)
|
|
ret void
|
|
}
|
|
|
|
; Unsupported mode.
|
|
define void @s_set_rounding_4() {
|
|
; GFX678-LABEL: s_set_rounding_4:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_4:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_4:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xf
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 4)
|
|
ret void
|
|
}
|
|
|
|
; undefined
|
|
define void @s_set_rounding_5() {
|
|
; GFX678-LABEL: s_set_rounding_5:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_5:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x0
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 5)
|
|
ret void
|
|
}
|
|
|
|
; undefined
|
|
define void @s_set_rounding_6() {
|
|
; GFX678-LABEL: s_set_rounding_6:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_6:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_6:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x5
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 6)
|
|
ret void
|
|
}
|
|
|
|
; "Dynamic"
|
|
define void @s_set_rounding_7() {
|
|
; GFX678-LABEL: s_set_rounding_7:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_7:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_7:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xa
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 7)
|
|
ret void
|
|
}
|
|
|
|
; Invalid
|
|
define void @s_set_rounding_neg1() {
|
|
; GFX678-LABEL: s_set_rounding_neg1:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_neg1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_neg1:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xb
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 -1)
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------
|
|
; Test extended values
|
|
; --------------------------------------------------------------------
|
|
|
|
; NearestTiesToEvenF32_TowardPositiveF64 = 8
|
|
define void @s_set_rounding_8() {
|
|
; GFX678-LABEL: s_set_rounding_8:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_8:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x4
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 8)
|
|
ret void
|
|
}
|
|
|
|
; NearestTiesToEvenF32_TowardNegativeF64 = 9
|
|
define void @s_set_rounding_9() {
|
|
; GFX678-LABEL: s_set_rounding_9:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_9:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_9:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x8
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 9)
|
|
ret void
|
|
}
|
|
|
|
; NearestTiesToEvenF32_TowardZeroF64 = 10
|
|
define void @s_set_rounding_10() {
|
|
; GFX678-LABEL: s_set_rounding_10:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_10:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_10:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xc
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 10)
|
|
ret void
|
|
}
|
|
|
|
; TowardPositiveF32_NearestTiesToEvenF64 = 11
|
|
define void @s_set_rounding_11() {
|
|
; GFX678-LABEL: s_set_rounding_11:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_11:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_11:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x1
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 11)
|
|
ret void
|
|
}
|
|
|
|
; TowardPositiveF32_TowardNegativeF64 = 12
|
|
define void @s_set_rounding_12() {
|
|
; GFX678-LABEL: s_set_rounding_12:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_12:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_12:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x9
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 12)
|
|
ret void
|
|
}
|
|
|
|
; TowardPositiveF32_TowardZeroF64 = 13
|
|
define void @s_set_rounding_13() {
|
|
; GFX678-LABEL: s_set_rounding_13:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_13:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_13:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xd
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 13)
|
|
ret void
|
|
}
|
|
|
|
; TowardNegativeF32_NearestTiesToEvenF64 = 14
|
|
define void @s_set_rounding_14() {
|
|
; GFX678-LABEL: s_set_rounding_14:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_14:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_14:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x2
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 14)
|
|
ret void
|
|
}
|
|
|
|
; TowardNegativeF32_TowardPositiveF64 = 15
|
|
define void @s_set_rounding_15() {
|
|
; GFX678-LABEL: s_set_rounding_15:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_15:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_15:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x6
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 15)
|
|
ret void
|
|
}
|
|
|
|
|
|
; TowardNegativeF32_TowardZeroF64 = 16
|
|
define void @s_set_rounding_16() {
|
|
; GFX678-LABEL: s_set_rounding_16:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_16:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xe
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 16)
|
|
ret void
|
|
}
|
|
|
|
; TowardZeroF32_NearestTiesToEvenF64 = 17
|
|
define void @s_set_rounding_17() {
|
|
; GFX678-LABEL: s_set_rounding_17:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_17:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_17:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x3
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 17)
|
|
ret void
|
|
}
|
|
|
|
; TowardZeroF32_TowardPositiveF64 = 18
|
|
define void @s_set_rounding_18() {
|
|
; GFX678-LABEL: s_set_rounding_18:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_18:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_18:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0x7
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 18)
|
|
ret void
|
|
}
|
|
|
|
; TowardZeroF32_TowardNegativeF64 = 19,
|
|
define void @s_set_rounding_19() {
|
|
; GFX678-LABEL: s_set_rounding_19:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_19:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_19:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xb
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 19)
|
|
ret void
|
|
}
|
|
|
|
; Invalid, out of bounds
|
|
define void @s_set_rounding_20() {
|
|
; GFX678-LABEL: s_set_rounding_20:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_20:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_20:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xb
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 20)
|
|
ret void
|
|
}
|
|
|
|
define void @s_set_rounding_0xffff() {
|
|
; GFX678-LABEL: s_set_rounding_0xffff:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_0xffff:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX1011-LABEL: s_set_rounding_0xffff:
|
|
; GFX1011: ; %bb.0:
|
|
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX1011-NEXT: s_round_mode 0xb
|
|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.set.rounding(i32 65535)
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------
|
|
; Test optimization knowing the value can only be in the standard
|
|
; range
|
|
; --------------------------------------------------------------------
|
|
|
|
define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) {
|
|
; GFX6-LABEL: s_set_rounding_i2_zeroext:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_lshl_b32 s34, s4, 2
|
|
; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_i2_zeroext:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_lshl_b32 s34, s4, 2
|
|
; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_i2_zeroext:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
|
|
; GFX8-NEXT: s_lshl_b32 s34, s34, 2
|
|
; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_i2_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
|
|
; GFX9-NEXT: s_lshl_b32 s34, s34, 2
|
|
; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_i2_zeroext:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
|
|
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
|
|
; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_i2_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
|
|
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
|
|
; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%zext.rounding = zext i2 %rounding to i32
|
|
call void @llvm.set.rounding(i32 %zext.rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
|
|
; GFX6-LABEL: s_set_rounding_i2_signext:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX6-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_i2_signext:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX7-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_i2_signext:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_sext_i32_i16 s34, s4
|
|
; GFX8-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX8-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_i2_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_sext_i32_i16 s34, s4
|
|
; GFX9-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX9-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_i2_signext:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_sext_i32_i16 s34, s4
|
|
; GFX10-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX10-NEXT: s_min_u32 s36, s34, s35
|
|
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_i2_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_sext_i32_i16 s0, s4
|
|
; GFX11-NEXT: s_add_i32 s1, s0, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s0, s1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%sext.rounding = sext i2 %rounding to i32
|
|
call void @llvm.set.rounding(i32 %sext.rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
|
|
; GFX6-LABEL: s_set_rounding_i3_signext:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX6-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_i3_signext:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX7-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_i3_signext:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_sext_i32_i16 s34, s4
|
|
; GFX8-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX8-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_i3_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_sext_i32_i16 s34, s4
|
|
; GFX9-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX9-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_i3_signext:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_sext_i32_i16 s34, s4
|
|
; GFX10-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX10-NEXT: s_min_u32 s36, s34, s35
|
|
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_i3_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_sext_i32_i16 s0, s4
|
|
; GFX11-NEXT: s_add_i32 s1, s0, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s0, s1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%sext.rounding = sext i3 %rounding to i32
|
|
call void @llvm.set.rounding(i32 %sext.rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
|
|
; GFX6-LABEL: s_set_rounding_i3_zeroext:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX6-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_i3_zeroext:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_add_i32 s34, s4, -4
|
|
; GFX7-NEXT: s_min_u32 s34, s4, s34
|
|
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_i3_zeroext:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
|
|
; GFX8-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX8-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_i3_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
|
|
; GFX9-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX9-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_i3_zeroext:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
|
|
; GFX10-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX10-NEXT: s_min_u32 s36, s34, s35
|
|
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_i3_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
|
|
; GFX11-NEXT: s_add_i32 s1, s0, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s0, s1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%sext.rounding = zext i3 %rounding to i32
|
|
call void @llvm.set.rounding(i32 %sext.rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) {
|
|
; GFX6-LABEL: s_set_rounding_select_0_1:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_cmp_lg_u32 s4, 0
|
|
; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
|
|
; GFX6-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_select_0_1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_cmp_lg_u32 s4, 0
|
|
; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
|
|
; GFX7-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_select_0_1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_cmp_lg_u32 s4, 0
|
|
; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX8-NEXT: s_mov_b32 s34, 0xa50f
|
|
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
|
|
; GFX8-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_0_1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_lg_u32 s4, 0
|
|
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX9-NEXT: s_mov_b32 s34, 0xa50f
|
|
; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
|
|
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_0_1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_lg_u32 s4, 0
|
|
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
|
|
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_0_1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_lg_u32 s4, 0
|
|
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 0, i32 1
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) {
|
|
; GFX678-LABEL: s_set_rounding_select_1_3:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_1_3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_1_3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_1_3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 1, i32 3
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define void @v_set_rounding_select_1_3(i32 %cond) {
|
|
; GFX678-LABEL: v_set_rounding_select_1_3:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50
|
|
; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
|
|
; GFX678-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_set_rounding_select_1_3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
|
|
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_set_rounding_select_1_3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
|
|
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_set_rounding_select_1_3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 1, i32 3
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
|
|
; GFX6-LABEL: s_set_rounding_select_2_0:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
|
|
; GFX6-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: s_set_rounding_select_2_0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
|
|
; GFX7-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_set_rounding_select_2_0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX8-NEXT: s_mov_b32 s34, 0xa50f
|
|
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
|
|
; GFX8-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_2_0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX9-NEXT: s_mov_b32 s34, 0xa50f
|
|
; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
|
|
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_2_0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
|
|
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_2_0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 2, i32 0
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
|
|
; GFX678-LABEL: s_set_rounding_select_2_1:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX678-NEXT: s_movk_i32 s34, 0xa5
|
|
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_2_1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_movk_i32 s34, 0xa5
|
|
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_2_1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_movk_i32 s34, 0xa5
|
|
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_2_1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_movk_i32 s0, 0xa5
|
|
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 2, i32 1
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
|
|
; GFX678-LABEL: s_set_rounding_select_1_2:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX678-NEXT: s_movk_i32 s34, 0xa50
|
|
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_1_2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_movk_i32 s34, 0xa50
|
|
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_1_2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_movk_i32 s34, 0xa50
|
|
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_1_2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_movk_i32 s0, 0xa50
|
|
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 1, i32 2
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) {
|
|
; GFX678-LABEL: s_set_rounding_select_3_0:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_3_0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_3_0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_3_0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 3, i32 0
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
|
|
; GFX678-LABEL: s_set_rounding_select_4_0:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX678-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX678-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX678-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX678-NEXT: s_lshl_b32 s34, s34, 2
|
|
; GFX678-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX678-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_4_0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
|
|
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX9-NEXT: s_lshl_b32 s34, s34, 2
|
|
; GFX9-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX9-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_4_0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
|
|
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
|
|
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
|
|
; GFX10-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX10-NEXT: s_min_u32 s36, s34, s35
|
|
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_4_0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
|
|
; GFX11-NEXT: s_add_i32 s1, s0, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s0, s1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 4, i32 0
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
|
|
; GFX678-LABEL: s_set_rounding_select_3_5:
|
|
; GFX678: ; %bb.0:
|
|
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX678-NEXT: s_cselect_b32 s34, 3, 5
|
|
; GFX678-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX678-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX678-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_set_rounding_select_3_5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX9-NEXT: s_cselect_b32 s34, 3, 5
|
|
; GFX9-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX9-NEXT: s_min_u32 s34, s34, s35
|
|
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
|
|
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_set_rounding_select_3_5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX10-NEXT: s_cselect_b32 s34, 3, 5
|
|
; GFX10-NEXT: s_add_i32 s35, s34, -4
|
|
; GFX10-NEXT: s_min_u32 s36, s34, s35
|
|
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
|
|
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
|
|
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
|
|
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
|
|
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_set_rounding_select_3_5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
|
|
; GFX11-NEXT: s_cselect_b32 s0, 3, 5
|
|
; GFX11-NEXT: s_add_i32 s1, s0, -4
|
|
; GFX11-NEXT: s_min_u32 s2, s0, s1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %cond, 0
|
|
%rounding = select i1 %cmp, i32 3, i32 5
|
|
call void @llvm.set.rounding(i32 %rounding)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
|
|
; GFX6-LABEL: get_rounding_after_set_rounding_1:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX6-NEXT: s_nop 0
|
|
; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX6-NEXT: s_lshl_b32 s2, s0, 2
|
|
; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX6-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX6-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX6-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX6-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX6-NEXT: s_cselect_b32 s4, s0, s1
|
|
; GFX6-NEXT: s_mov_b32 s0, 0
|
|
; GFX6-NEXT: s_mov_b32 s2, -1
|
|
; GFX6-NEXT: s_mov_b32 s1, s0
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: get_rounding_after_set_rounding_1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: s_nop 0
|
|
; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX7-NEXT: s_lshl_b32 s2, s0, 2
|
|
; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX7-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX7-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX7-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX7-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX7-NEXT: s_cselect_b32 s4, s0, s1
|
|
; GFX7-NEXT: s_mov_b32 s0, 0
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s1, s0
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX8-LABEL: get_rounding_after_set_rounding_1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
|
; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX8-NEXT: s_lshl_b32 s2, s0, 2
|
|
; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX8-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX8-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX8-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX8-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX8-NEXT: s_cselect_b32 s0, s0, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX8-NEXT: flat_store_dword v[0:1], v2
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_endpgm
|
|
;
|
|
; GFX9-LABEL: get_rounding_after_set_rounding_1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
|
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX9-NEXT: s_lshl_b32 s2, s0, 2
|
|
; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX9-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX9-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX9-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX9-NEXT: s_cselect_b32 s0, s0, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: get_rounding_after_set_rounding_1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_round_mode 0x0
|
|
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
|
; GFX10-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX10-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX10-NEXT: s_cselect_b32 s0, s0, s1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX10-NEXT: global_store_dword v[0:1], v2, off
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: get_rounding_after_set_rounding_1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_round_mode 0x0
|
|
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
|
|
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
|
|
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
|
|
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
|
; GFX11-NEXT: s_and_b32 s0, s0, 15
|
|
; GFX11-NEXT: s_add_i32 s1, s0, 4
|
|
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
|
|
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_endpgm
|
|
tail call void @llvm.set.rounding(i32 1)
|
|
%set.mode = tail call i32 @llvm.get.rounding()
|
|
store volatile i32 %set.mode, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GCN: {{.*}}
|