Add support for !"cluster_dim_{x,y,z}" metadata to allow specifying
cluster dimensions on a kernel function in llvm.
If any of these metadata entries are present, the `.explicitcluster` PTX
directive is used and the specified dimensions are lowered with the
`.reqnctapercluster` directive. For more details see:
[PTX ISA: 11.7. Cluster Dimension Directives]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cluster-dimension-directives)
30 lines
978 B
LLVM
30 lines
978 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck -check-prefixes=CHECK80 %s
|
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s
|
|
; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
|
|
|
|
define void @kernel_func_clusterxyz() {
|
|
; CHECK80-LABEL: kernel_func_clusterxyz(
|
|
; CHECK80: {
|
|
; CHECK80-EMPTY:
|
|
; CHECK80-EMPTY:
|
|
; CHECK80-NEXT: // %bb.0:
|
|
; CHECK80-NEXT: ret;
|
|
;
|
|
; CHECK90-LABEL: kernel_func_clusterxyz(
|
|
; CHECK90: .explicitcluster
|
|
; CHECK90-NEXT: .reqnctapercluster 3, 5, 7
|
|
; CHECK90-NEXT: {
|
|
; CHECK90-EMPTY:
|
|
; CHECK90-EMPTY:
|
|
; CHECK90-NEXT: // %bb.0:
|
|
; CHECK90-NEXT: ret;
|
|
ret void
|
|
}
|
|
|
|
|
|
!nvvm.annotations = !{!1, !2}
|
|
|
|
!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1}
|
|
!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7}
|