Files
clang-p2996/llvm/test/CodeGen/NVPTX/cluster-dim.ll
Alex MacLean 9bc26e9e8e [NVPTX] Support !"cluster_dim_{x,y,z}" metadata (#109548)
Add support for !"cluster_dim_{x,y,z}" metadata to allow specifying
cluster dimensions on a kernel function in llvm.

If any of these metadata entries are present, the `.explicitcluster` PTX
directive is used and the specified dimensions are lowered with the
`.reqnctapercluster` directive. For more details see:
[PTX ISA: 11.7. Cluster Dimension Directives]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cluster-dimension-directives)
2024-09-25 16:49:02 -07:00

30 lines
978 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck -check-prefixes=CHECK80 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s
; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
define void @kernel_func_clusterxyz() {
; CHECK80-LABEL: kernel_func_clusterxyz(
; CHECK80: {
; CHECK80-EMPTY:
; CHECK80-EMPTY:
; CHECK80-NEXT: // %bb.0:
; CHECK80-NEXT: ret;
;
; CHECK90-LABEL: kernel_func_clusterxyz(
; CHECK90: .explicitcluster
; CHECK90-NEXT: .reqnctapercluster 3, 5, 7
; CHECK90-NEXT: {
; CHECK90-EMPTY:
; CHECK90-EMPTY:
; CHECK90-NEXT: // %bb.0:
; CHECK90-NEXT: ret;
ret void
}
!nvvm.annotations = !{!1, !2}
!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1}
!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7}