andorbitset.ll is interesting since it directly depends on the difference between poison and undef. Not sure it's useful to keep the version using poison, I assume none of this code makes it to codegen. si-spill-cf.ll was also a nasty case, which I doubt has been reproducing its original issue for a very long time. I had to reclaim an older version, replace some of the poison uses, and run simplify-cfg. There's a very slight change in the final CFG with this, but final the output is approximately the same as it used to be.
101 lines
5.2 KiB
YAML
101 lines
5.2 KiB
YAML
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -run-pass=none %s -o - | FileCheck --check-prefix=GCN %s
|
|
|
|
--- |
|
|
; ModuleID = '<stdin>'
|
|
source_filename = "<stdin>"
|
|
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
|
target triple = "amdgcn-amd-amdhsa"
|
|
|
|
define void @syncscopes(i32 %agent, ptr addrspace(4) %agent_out, i32 %workgroup, ptr addrspace(4) %workgroup_out, i32 %wavefront, ptr addrspace(4) %wavefront_out) #0 {
|
|
entry:
|
|
store atomic i32 %agent, ptr addrspace(4) %agent_out syncscope("agent") seq_cst, align 4, !nontemporal !0
|
|
store atomic i32 %workgroup, ptr addrspace(4) %workgroup_out syncscope("workgroup") seq_cst, align 4, !nontemporal !0
|
|
store atomic i32 %wavefront, ptr addrspace(4) %wavefront_out syncscope("wavefront") seq_cst, align 4, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: convergent nounwind
|
|
declare { i1, i64 } @llvm.amdgcn.if(i1) #1
|
|
|
|
; Function Attrs: convergent nounwind
|
|
declare { i1, i64 } @llvm.amdgcn.else(i64) #1
|
|
|
|
; Function Attrs: convergent nounwind readnone
|
|
declare i64 @llvm.amdgcn.break(i64) #2
|
|
|
|
; Function Attrs: convergent nounwind readnone
|
|
declare i64 @llvm.amdgcn.if.break(i1, i64) #2
|
|
|
|
; Function Attrs: convergent nounwind readnone
|
|
declare i64 @llvm.amdgcn.else.break(i64, i64) #2
|
|
|
|
; Function Attrs: convergent nounwind
|
|
declare i1 @llvm.amdgcn.loop(i64) #1
|
|
|
|
; Function Attrs: convergent nounwind
|
|
declare void @llvm.amdgcn.end.cf(i64) #1
|
|
|
|
attributes #0 = { "target-cpu"="gfx803" }
|
|
attributes #1 = { convergent nounwind }
|
|
attributes #2 = { convergent nounwind readnone }
|
|
|
|
!0 = !{i32 1}
|
|
|
|
# GCN-LABEL: name: syncscopes
|
|
# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst (s32) into %ir.agent_out, addrspace 4)
|
|
# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst (s32) into %ir.workgroup_out, addrspace 4)
|
|
# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst (s32) into %ir.wavefront_out, addrspace 4)
|
|
...
|
|
---
|
|
name: syncscopes
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '$sgpr4_sgpr5' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $sgpr4_sgpr5
|
|
|
|
S_WAITCNT 0
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
|
$sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
|
|
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
|
$sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
|
|
$sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
|
|
S_WAITCNT 127
|
|
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
|
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
|
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
|
|
FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst (s32) into %ir.agent_out)
|
|
S_WAITCNT 112
|
|
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
|
|
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $sgpr2_sgpr3, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
|
|
FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst (s32) into %ir.workgroup_out)
|
|
S_WAITCNT 112
|
|
$vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
|
|
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec
|
|
FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst (s32) into %ir.wavefront_out)
|
|
S_ENDPGM 0
|
|
|
|
...
|