Files
clang-p2996/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
Stephen Thomas 2e3fa4ba9e [AMDGPU] Insert before and after instructions that always use GDS (#131338)
It is an architectural requirement that there must be no outstanding GDS
instructions when an "always GDS" instruction is issued, and also that
an always GDS instruction must be allowed to complete.

Insert waits on DScnt/LGKMcnt prior to (if necessary) and subsequent to
(unconditionally) any always GDS instruction, and an additional S_NOP if
the subsequent wait was followed by S_ENDPGM.

Always GDS instructions are GWS instructions, DS_ORDERED_COUNT,
DS_ADD_GS_REG_RTN, and DS_SUB_GS_REG_RTN (the latter two as considered
always GDS as of this patch).
2025-03-21 09:33:04 +00:00

50 lines
2.5 KiB
LLVM

; RUN: llc -global-isel=0 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; FUNC-LABEL: {{^}}ds_ordered_swap:
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_cs float @ds_ordered_swap(ptr addrspace(2) inreg %gds, i32 %value) {
%val = call i32@llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
; FUNC-LABEL: {{^}}ds_ordered_swap_conditional:
; GCN: v_cmp_ne_u32_e32 vcc, 0, v[[VALUE:[0-9]+]]
; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
; GCN: s_cbranch_execz [[BB:.LBB._.]]
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[VALUE]] offset:4868 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: [[BB]]:
; // Wait for expcnt(0) before modifying EXEC
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
define amdgpu_cs float @ds_ordered_swap_conditional(ptr addrspace(2) inreg %gds, i32 %value) {
entry:
%c = icmp ne i32 %value, 0
br i1 %c, label %if-true, label %endif
if-true:
%val = call i32@llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
br label %endif
endif:
%v = phi i32 [ %val, %if-true ], [ poison, %entry ]
%r = bitcast i32 %v to float
ret float %r
}
declare i32 @llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)