Files
clang-p2996/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
Konstantin Zhuravlyov bb80d3e1d3 Enhance synchscope representation
OpenCL 2.0 introduces the notion of memory scopes in atomic operations to
  global and local memory. These scopes restrict how synchronization is
  achieved, which can result in improved performance.

  This change extends existing notion of synchronization scopes in LLVM to
  support arbitrary scopes expressed as target-specific strings, in addition to
  the already defined scopes (single thread, system).

  The LLVM IR and MIR syntax for expressing synchronization scopes has changed
  to use *syncscope("<scope>")*, where <scope> can be "singlethread" (this
  replaces *singlethread* keyword), or a target-specific name. As before, if
  the scope is not specified, it defaults to CrossThread/System scope.

  Implementation details:
    - Mapping from synchronization scope name/string to synchronization scope id
      is stored in LLVM context;
    - CrossThread/System and SingleThread scopes are pre-defined to efficiently
      check for known scopes without comparing strings;
    - Synchronization scope names are stored in SYNC_SCOPE_NAMES_BLOCK in
      the bitcode.

Differential Revision: https://reviews.llvm.org/D21723

llvm-svn: 307722
2017-07-11 22:23:00 +00:00

99 lines
5.1 KiB
YAML

# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -run-pass=none %s -o - | FileCheck --check-prefix=GCN %s
--- |
; ModuleID = '<stdin>'
source_filename = "<stdin>"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn-amd-amdhsa"
define void @syncscopes(i32 %agent, i32 addrspace(4)* %agent_out, i32 %workgroup, i32 addrspace(4)* %workgroup_out, i32 %wavefront, i32 addrspace(4)* %wavefront_out) #0 {
entry:
store atomic i32 %agent, i32 addrspace(4)* %agent_out syncscope("agent") seq_cst, align 4
store atomic i32 %workgroup, i32 addrspace(4)* %workgroup_out syncscope("workgroup") seq_cst, align 4
store atomic i32 %wavefront, i32 addrspace(4)* %wavefront_out syncscope("wavefront") seq_cst, align 4
ret void
}
; Function Attrs: convergent nounwind
declare { i1, i64 } @llvm.amdgcn.if(i1) #1
; Function Attrs: convergent nounwind
declare { i1, i64 } @llvm.amdgcn.else(i64) #1
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.break(i64) #2
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.if.break(i1, i64) #2
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.else.break(i64, i64) #2
; Function Attrs: convergent nounwind
declare i1 @llvm.amdgcn.loop(i64) #1
; Function Attrs: convergent nounwind
declare void @llvm.amdgcn.end.cf(i64) #1
attributes #0 = { "target-cpu"="gfx803" }
attributes #1 = { convergent nounwind }
attributes #2 = { convergent nounwind readnone }
# GCN-LABEL: name: syncscopes
# GCN: FLAT_STORE_DWORD killed %vgpr0_vgpr1, killed %vgpr2, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)
# GCN: FLAT_STORE_DWORD killed %vgpr0_vgpr1, killed %vgpr2, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
# GCN: FLAT_STORE_DWORD killed %vgpr0_vgpr1, killed %vgpr2, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
...
---
name: syncscopes
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '%sgpr4_sgpr5' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0.entry:
liveins: %sgpr4_sgpr5
S_WAITCNT 0
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr6 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
%sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM %sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr7 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
%sgpr8 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
S_WAITCNT 127
%vgpr0 = V_MOV_B32_e32 %sgpr0, implicit %exec, implicit-def %vgpr0_vgpr1, implicit %sgpr0_sgpr1
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%vgpr1 = V_MOV_B32_e32 killed %sgpr1, implicit %exec, implicit killed %sgpr0_sgpr1, implicit %sgpr0_sgpr1, implicit %exec
%vgpr2 = V_MOV_B32_e32 killed %sgpr6, implicit %exec, implicit %exec
FLAT_STORE_DWORD killed %vgpr0_vgpr1, killed %vgpr2, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)
S_WAITCNT 112
%vgpr0 = V_MOV_B32_e32 %sgpr2, implicit %exec, implicit-def %vgpr0_vgpr1, implicit %sgpr2_sgpr3
%vgpr1 = V_MOV_B32_e32 killed %sgpr3, implicit %exec, implicit killed %sgpr2_sgpr3, implicit %sgpr2_sgpr3, implicit %exec
%vgpr2 = V_MOV_B32_e32 killed %sgpr7, implicit %exec, implicit %exec
FLAT_STORE_DWORD killed %vgpr0_vgpr1, killed %vgpr2, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
S_WAITCNT 112
%vgpr0 = V_MOV_B32_e32 %sgpr4, implicit %exec, implicit-def %vgpr0_vgpr1, implicit %sgpr4_sgpr5
%vgpr1 = V_MOV_B32_e32 killed %sgpr5, implicit %exec, implicit killed %sgpr4_sgpr5, implicit %sgpr4_sgpr5, implicit %exec
%vgpr2 = V_MOV_B32_e32 killed %sgpr8, implicit %exec, implicit %exec
FLAT_STORE_DWORD killed %vgpr0_vgpr1, killed %vgpr2, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
S_ENDPGM
...