OpenCL 2.0 introduces the notion of memory scopes in atomic operations to
global and local memory. These scopes restrict how synchronization is
achieved, which can result in improved performance.
This change extends existing notion of synchronization scopes in LLVM to
support arbitrary scopes expressed as target-specific strings, in addition to
the already defined scopes (single thread, system).
The LLVM IR and MIR syntax for expressing synchronization scopes has changed
to use *syncscope("<scope>")*, where <scope> can be "singlethread" (this
replaces *singlethread* keyword), or a target-specific name. As before, if
the scope is not specified, it defaults to CrossThread/System scope.
Implementation details:
- Mapping from synchronization scope name/string to synchronization scope id
is stored in LLVM context;
- CrossThread/System and SingleThread scopes are pre-defined to efficiently
check for known scopes without comparing strings;
- Synchronization scope names are stored in SYNC_SCOPE_NAMES_BLOCK in
the bitcode.
Differential Revision: https://reviews.llvm.org/D21723
llvm-svn: 307722
20 lines
1.2 KiB
LLVM
20 lines
1.2 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s
|
|
|
|
; GCN-LABEL: name: syncscopes
|
|
; GCN: FLAT_STORE_DWORD killed %vgpr1_vgpr2, killed %vgpr0, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)
|
|
; GCN: FLAT_STORE_DWORD killed %vgpr4_vgpr5, killed %vgpr3, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
|
|
; GCN: FLAT_STORE_DWORD killed %vgpr7_vgpr8, killed %vgpr6, 0, -1, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
|
|
define void @syncscopes(
|
|
i32 %agent,
|
|
i32 addrspace(4)* %agent_out,
|
|
i32 %workgroup,
|
|
i32 addrspace(4)* %workgroup_out,
|
|
i32 %wavefront,
|
|
i32 addrspace(4)* %wavefront_out) {
|
|
entry:
|
|
store atomic i32 %agent, i32 addrspace(4)* %agent_out syncscope("agent") seq_cst, align 4
|
|
store atomic i32 %workgroup, i32 addrspace(4)* %workgroup_out syncscope("workgroup") seq_cst, align 4
|
|
store atomic i32 %wavefront, i32 addrspace(4)* %wavefront_out syncscope("wavefront") seq_cst, align 4
|
|
ret void
|
|
}
|