AMDGPU: Add mode register use to s_getreg_b32
This should fix reading the wrong mode after setting the mode. Ideally we would have separate pseudos for the case that we know does not read mode.
This commit is contained in:
committed by
Matt Arsenault
parent
9eb91f45fb
commit
f548c4d83c
@@ -1110,14 +1110,15 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
|
||||
|
||||
// This is hasSideEffects to allow its use in readcyclecounter selection.
|
||||
// FIXME: Need to truncate immediate to 16-bits.
|
||||
// FIXME: Missing mode register use. Should have separate pseudos for
|
||||
// known may read MODE and only read MODE.
|
||||
// FIXME: Should have separate pseudos for known may read MODE and
|
||||
// only read MODE.
|
||||
def S_GETREG_B32 : SOPK_Pseudo <
|
||||
"s_getreg_b32",
|
||||
(outs SReg_32:$sdst), (ins hwreg:$simm16),
|
||||
"$sdst, $simm16",
|
||||
[(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
|
||||
let hasSideEffects = 1;
|
||||
let Uses = [MODE];
|
||||
}
|
||||
|
||||
let Defs = [MODE], Uses = [MODE] in {
|
||||
|
||||
@@ -2417,12 +2417,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -2455,12 +2455,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -2727,12 +2727,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -2765,12 +2765,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -3294,12 +3294,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -3334,12 +3334,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
|
||||
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
|
||||
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
|
||||
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
|
||||
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
|
||||
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -3868,12 +3868,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -3906,12 +3906,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -4434,12 +4434,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -4474,12 +4474,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
|
||||
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
|
||||
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
|
||||
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
|
||||
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
|
||||
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -5010,12 +5010,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -5048,12 +5048,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -5569,12 +5569,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -5607,12 +5607,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -6113,12 +6113,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -6153,12 +6153,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
|
||||
; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
|
||||
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
|
||||
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -6619,12 +6619,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -6659,12 +6659,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
|
||||
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
|
||||
; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
|
||||
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
|
||||
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -7168,12 +7168,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -7206,12 +7206,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -7721,12 +7721,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
@@ -7759,12 +7759,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
|
||||
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
|
||||
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
|
||||
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
|
||||
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
|
||||
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
|
||||
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
|
||||
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
|
||||
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
|
||||
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
|
||||
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
|
||||
|
||||
@@ -1661,5 +1661,132 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
|
||||
; GFX6-LABEL: get_rounding_after_set_rounding_1:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_nop 0
|
||||
; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s0, 2
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71
|
||||
; GFX6-NEXT: s_mov_b32 s1, 0xc96f385
|
||||
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
||||
; GFX6-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX6-NEXT: s_add_i32 s1, s0, 4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, 4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s0, s1
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s2, -1
|
||||
; GFX6-NEXT: s_mov_b32 s1, s0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: get_rounding_after_set_rounding_1:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_nop 0
|
||||
; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
||||
; GFX7-NEXT: s_lshl_b32 s2, s0, 2
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71
|
||||
; GFX7-NEXT: s_mov_b32 s1, 0xc96f385
|
||||
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
||||
; GFX7-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX7-NEXT: s_add_i32 s1, s0, 4
|
||||
; GFX7-NEXT: s_cmp_lt_u32 s0, 4
|
||||
; GFX7-NEXT: s_cselect_b32 s4, s0, s1
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s2, -1
|
||||
; GFX7-NEXT: s_mov_b32 s1, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX8-LABEL: get_rounding_after_set_rounding_1:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
||||
; GFX8-NEXT: s_lshl_b32 s2, s0, 2
|
||||
; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71
|
||||
; GFX8-NEXT: s_mov_b32 s1, 0xc96f385
|
||||
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
||||
; GFX8-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX8-NEXT: s_add_i32 s1, s0, 4
|
||||
; GFX8-NEXT: s_cmp_lt_u32 s0, 4
|
||||
; GFX8-NEXT: s_cselect_b32 s0, s0, s1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX8-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: get_rounding_after_set_rounding_1:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s0, 2
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71
|
||||
; GFX9-NEXT: s_mov_b32 s1, 0xc96f385
|
||||
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_add_i32 s1, s0, 4
|
||||
; GFX9-NEXT: s_cmp_lt_u32 s0, 4
|
||||
; GFX9-NEXT: s_cselect_b32 s0, s0, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: get_rounding_after_set_rounding_1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_round_mode 0x0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: s_lshl_b32 s2, s0, 2
|
||||
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
|
||||
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
|
||||
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
||||
; GFX10-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX10-NEXT: s_add_i32 s1, s0, 4
|
||||
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
|
||||
; GFX10-NEXT: s_cselect_b32 s0, s0, s1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX10-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: get_rounding_after_set_rounding_1:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_round_mode 0x0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
|
||||
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
|
||||
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
|
||||
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
|
||||
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
|
||||
; GFX11-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX11-NEXT: s_add_i32 s1, s0, 4
|
||||
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
|
||||
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-NEXT: s_nop 0
|
||||
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX11-NEXT: s_endpgm
|
||||
tail call void @llvm.set.rounding(i32 1)
|
||||
%set.mode = tail call i32 @llvm.get.rounding()
|
||||
store volatile i32 %set.mode, ptr addrspace(1) null
|
||||
ret void
|
||||
}
|
||||
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GCN: {{.*}}
|
||||
|
||||
Reference in New Issue
Block a user