Files
clang-p2996/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
Lewis Crawford 6d058317e6 Enable .ptr .global .align attributes for kernel attributes for CUDA (#114874)
Emit .ptr, .address-space, and .align attributes for kernel
args in CUDA (previously handled only for OpenCL).

This allows for more vectorization opportunities if the PTX consumer
is able to know about the pointer alignments.

If no alignment is explicitly specified, .align 1 will be emitted
to match the LLVM IR semantics in this case.

PTX ISA doc -
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#kernel-parameter-attribute-ptr

This is a rework of the original patch proposed in #79646

---------

Co-authored-by: Vandana <vandanak@nvidia.com>
2024-11-15 12:40:53 +00:00

43 lines
1.8 KiB
LLVM

; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
%struct.Large = type { [16 x double] }
; CHECK-LABEL: .entry func_align(
; CHECK: .param .u64 .ptr .align 1 func_align_param_0
; CHECK: .param .u64 .ptr .align 2 func_align_param_1
; CHECK: .param .u64 .ptr .global .align 4 func_align_param_2
; CHECK: .param .u64 .ptr .shared .align 8 func_align_param_3
; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4
; CHECK: .param .u64 .ptr .local .align 32 func_align_param_5
define void @func_align(ptr nocapture readonly align 1 %input,
ptr nocapture align 2 %out,
ptr addrspace(1) align 4 %global,
ptr addrspace(3) align 8 %shared,
ptr addrspace(4) align 16 %const,
ptr addrspace(5) align 32 %local) {
entry:
ret void
}
; CHECK-LABEL: .entry func_noalign(
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_0
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_1
; CHECK: .param .u64 .ptr .global .align 1 func_noalign_param_2
; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3
; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4
; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5
define void @func_noalign(ptr nocapture readonly %input,
ptr nocapture %out,
ptr addrspace(1) %global,
ptr addrspace(3) %shared,
ptr addrspace(4) %const,
ptr addrspace(5) %local) {
entry:
ret void
}
!nvvm.annotations = !{!0, !1}
!0 = !{ptr @func_align, !"kernel", i32 1}
!1 = !{ptr @func_noalign, !"kernel", i32 1}