[AMDGPU][SIInsertWaitCnts] skip meta instructions early (#145720)
When iterating over a block, meta instructions have no effect on wait counts, but their presence drops the reference to earlier waitcnt instructions before they are processed. This results in spurious wait counts, which do not affect correctness, but are also not required in the resulting program. Skipping meta instructions as soon as they are seen cleans this up.
This commit is contained in:
committed by
GitHub
parent
13e6ea7b4c
commit
a34a024812
@@ -1786,8 +1786,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
|
||||
bool FlushVmCnt) {
|
||||
setForceEmitWaitcnt();
|
||||
|
||||
if (MI.isMetaInstruction())
|
||||
return false;
|
||||
assert(!MI.isMetaInstruction());
|
||||
|
||||
AMDGPU::Waitcnt Wait;
|
||||
|
||||
@@ -2474,6 +2473,10 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
|
||||
E = Block.instr_end();
|
||||
Iter != E;) {
|
||||
MachineInstr &Inst = *Iter;
|
||||
if (Inst.isMetaInstruction()) {
|
||||
++Iter;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Track pre-existing waitcnts that were added in earlier iterations or by
|
||||
// the memory legalizer.
|
||||
|
||||
@@ -2250,7 +2250,6 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s32, s34
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s34, s12
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s33, s11
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow:
|
||||
@@ -2317,7 +2316,6 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s32, s34
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s34, s12
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s33, s11
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow:
|
||||
|
||||
@@ -731,7 +731,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; kill: killed $vgpr0 killed $vgpr1
|
||||
; GFX9-NEXT: .LBB3_3: ; %exit
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,0]
|
||||
; GFX9-NEXT: s_movk_i32 s4, 0x8000
|
||||
; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
|
||||
@@ -973,7 +972,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; kill: killed $vgpr0 killed $vgpr1
|
||||
; GFX9-NEXT: .LBB4_3: ; %exit
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v7 op_sel_hi:[0,1]
|
||||
; GFX9-NEXT: s_movk_i32 s4, 0x8000
|
||||
; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
|
||||
@@ -1217,7 +1215,6 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(
|
||||
; GFX9-NEXT: .LBB5_3: ; %exit
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_cmp_ge_f16_e32 vcc, 0.5, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, 0x3800
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
|
||||
@@ -1595,7 +1592,6 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr
|
||||
; GFX9-NEXT: s_movk_i32 s34, 0x3800
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_cmp_gt_u16_e32 vcc, s35, v7
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
|
||||
; GFX9-NEXT: v_cmp_gt_u16_sdwa vcc, v7, s34 src0_sel:WORD_1 src1_sel:DWORD
|
||||
@@ -1933,7 +1929,6 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3800
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3900
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0x3d00
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_cmp_ge_f16_e32 vcc, 0.5, v7
|
||||
; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v2, vcc
|
||||
; GFX9-NEXT: v_cmp_nle_f16_sdwa vcc, v7, v0 src0_sel:WORD_1 src1_sel:DWORD
|
||||
|
||||
@@ -127,7 +127,6 @@ define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
|
||||
; GCN-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s8, s10
|
||||
; GCN-NEXT: s_mov_b32 s9, s10
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc
|
||||
@@ -138,7 +137,6 @@ define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: .LBB1_4: ; %exit
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000
|
||||
; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[4:5]
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc
|
||||
@@ -197,7 +195,6 @@ define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
|
||||
; GCN-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s8, s10
|
||||
; GCN-NEXT: s_mov_b32 s9, s10
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc
|
||||
@@ -208,7 +205,6 @@ define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: .LBB2_4: ; %exit
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000
|
||||
; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[4:5]
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc
|
||||
@@ -305,7 +301,6 @@ define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
|
||||
; GCN-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
|
||||
; GCN-NEXT: v_cmp_gt_i64_e64 s[8:9], 0, v[12:13]
|
||||
; GCN-NEXT: v_cmp_gt_i64_e64 s[10:11], 0, v[14:15]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_gt_i64_e64 s[12:13], 0, v[16:17]
|
||||
; GCN-NEXT: v_cmp_gt_i64_e64 s[14:15], 0, v[18:19]
|
||||
; GCN-NEXT: v_cmp_gt_i64_e64 s[16:17], 0, v[4:5]
|
||||
@@ -376,7 +371,6 @@ define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
|
||||
; GCN-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s8, s10
|
||||
; GCN-NEXT: s_mov_b32 s9, s10
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc
|
||||
@@ -387,7 +381,6 @@ define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: .LBB4_4: ; %exit
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0xbff00000
|
||||
; GCN-NEXT: v_cmp_lt_f64_e32 vcc, -1.0, v[4:5]
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v1, v0, -2.0, vcc
|
||||
@@ -446,7 +439,6 @@ define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
|
||||
; GCN-NEXT: s_mov_b32 s11, 0xf000
|
||||
; GCN-NEXT: s_mov_b32 s8, s10
|
||||
; GCN-NEXT: s_mov_b32 s9, s10
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc
|
||||
@@ -457,7 +449,6 @@ define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: .LBB5_4: ; %exit
|
||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0xbff00000
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[4:5]
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, -2.0, v0, vcc
|
||||
@@ -554,7 +545,6 @@ define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e64 s[6:7], -1.0, v[10:11]
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e64 s[8:9], -1.0, v[12:13]
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e64 s[10:11], -1.0, v[14:15]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e64 s[12:13], -1.0, v[16:17]
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e64 s[14:15], -1.0, v[18:19]
|
||||
; GCN-NEXT: v_cmp_nlt_f64_e64 s[16:17], -1.0, v[4:5]
|
||||
|
||||
@@ -6171,13 +6171,12 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
|
||||
; NOOPT-NEXT: v_mov_b32_e32 v11, v14
|
||||
; NOOPT-NEXT: v_mov_b32_e32 v12, v13
|
||||
; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 offset:32
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr1
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr1
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr1
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr1
|
||||
; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
|
||||
; NOOPT-NEXT: s_waitcnt expcnt(0)
|
||||
; NOOPT-NEXT: v_mov_b32_e32 v9, v4
|
||||
; NOOPT-NEXT: v_mov_b32_e32 v10, v3
|
||||
; NOOPT-NEXT: v_mov_b32_e32 v11, v2
|
||||
@@ -7290,7 +7289,6 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
|
||||
; NOOPT-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr0
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ;;#ASMSTART
|
||||
; NOOPT-NEXT: ; reg use v[0:3]
|
||||
; NOOPT-NEXT: ;;#ASMEND
|
||||
@@ -7313,7 +7311,6 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
|
||||
; NOOPT-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr0
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ;;#ASMSTART
|
||||
; NOOPT-NEXT: ; reg use v[0:3]
|
||||
; NOOPT-NEXT: ;;#ASMEND
|
||||
@@ -7534,7 +7531,6 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; NOOPT-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ;;#ASMSTART
|
||||
; NOOPT-NEXT: ; reg use v[0:3]
|
||||
; NOOPT-NEXT: ;;#ASMEND
|
||||
@@ -7558,7 +7554,6 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; NOOPT-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; NOOPT-NEXT: s_waitcnt vmcnt(0)
|
||||
; NOOPT-NEXT: ;;#ASMSTART
|
||||
; NOOPT-NEXT: ; reg use v[0:3]
|
||||
; NOOPT-NEXT: ;;#ASMEND
|
||||
|
||||
@@ -7367,7 +7367,6 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -7918,7 +7917,6 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -8215,7 +8213,6 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -8505,7 +8502,6 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -8777,7 +8773,6 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9052,7 +9047,6 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9349,7 +9343,6 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9646,7 +9639,6 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9943,7 +9935,6 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -10240,7 +10231,6 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -10533,7 +10523,6 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -10830,7 +10819,6 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -11127,7 +11115,6 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -18653,7 +18640,6 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -18928,7 +18914,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -19225,7 +19210,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -19515,7 +19499,6 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -19787,7 +19770,6 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20062,7 +20044,6 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20359,7 +20340,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20656,7 +20636,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20953,7 +20932,6 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -21250,7 +21228,6 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -21543,7 +21520,6 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -21840,7 +21816,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -22137,7 +22112,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
|
||||
@@ -6365,7 +6365,6 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -6642,7 +6641,6 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -6945,7 +6943,6 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -7241,7 +7238,6 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -7515,7 +7511,6 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -7792,7 +7787,6 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -8095,7 +8089,6 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -8398,7 +8391,6 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -8701,7 +8693,6 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9004,7 +8995,6 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9303,7 +9293,6 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9606,7 +9595,6 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -9909,7 +9897,6 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -17555,7 +17542,6 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -18112,7 +18098,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -18415,7 +18400,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -18711,7 +18695,6 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -18985,7 +18968,6 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -19262,7 +19244,6 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -19565,7 +19546,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -19868,7 +19848,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20171,7 +20150,6 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20474,7 +20452,6 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -20773,7 +20750,6 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -21076,7 +21052,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
@@ -21379,7 +21354,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
|
||||
@@ -31,7 +31,6 @@ define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
|
||||
; GCN-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_barrier
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ; wave barrier
|
||||
; GCN-NEXT: s_load_dword s3, s[0:1], 0x4
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
||||
@@ -773,7 +773,6 @@ define amdgpu_kernel void @mix_elt_types_op_sel(ptr addrspace(1) %out, ptr addrs
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; kill: killed $vgpr0_vgpr1
|
||||
; GCN-NEXT: v_pk_add_f16 v0, v0, 2.0 op_sel_hi:[1,0]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GCN-NEXT: v_pk_fma_f16 v0, v1, v2, v0 op_sel:[0,0,1] op_sel_hi:[1,1,0]
|
||||
; GCN-NEXT: global_store_dword v3, v0, s[2:3]
|
||||
|
||||
20
llvm/test/CodeGen/AMDGPU/waitcnt-trailing.mir
Normal file
20
llvm/test/CodeGen/AMDGPU/waitcnt-trailing.mir
Normal file
@@ -0,0 +1,20 @@
|
||||
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s
|
||||
|
||||
# Check that a trivial soft waitcnt at the end of a block is deleted even if it
|
||||
# is followed by a meta instruction.
|
||||
|
||||
# CHECK-LABEL: name: waitcnt-no-redundant
|
||||
# CHECK: S_WAITCNT 0
|
||||
# CHECK: S_MOV_B32
|
||||
# CHECK-NOT: S_WAITCNT
|
||||
|
||||
---
|
||||
name: waitcnt-no-redundant
|
||||
body: |
|
||||
bb.1:
|
||||
S_WAITCNT_soft 53119
|
||||
$sgpr2 = S_MOV_B32 42
|
||||
S_WAITCNT_soft 53119
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
|
||||
...
|
||||
Reference in New Issue
Block a user