Files
clang-p2996/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
Changpeng Fang 7f9868f9b7 AMDGPU: Align the implicit kernel argument segment to 8 bytes for v5
Summary:
  In emitting metadata for implicit kernel arguments, we need to be in sync with the actual loads
to align the implicit kernel argument segment to 8 byte boundary. In this work, we simply force
this alignment through the first implicit argument.
In addition, we don't emit metadata for any implicit kernel argument if none of them is actually used.

Reviewers: arsenm, b-sumner

Differential Revision: https://reviews.llvm.org/D123346
2022-04-11 16:12:39 -07:00

59 lines
2.3 KiB
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK %s
; CHECK-LABEL: test_unaligned_to_eight:
; CHECK: .amdhsa_kernarg_size 264
define amdgpu_kernel void @test_unaligned_to_eight(i32 %four) {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; CHECK-LABEL: test_aligned_to_eight:
; CHECK: .amdhsa_kernarg_size 264
define amdgpu_kernel void @test_aligned_to_eight(i64 %eight) {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; CHECK-LABEL: amdhsa.kernels:
; CHECK: - .args:
; CHECK-NEXT: - .name: four
; CHECK-NEXT: .offset: 0
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: by_value
; CHECK-NEXT: - .offset: 8
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: hidden_block_count_x
; CHECK-NEXT: - .offset: 12
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: hidden_block_count_y
; CHECK-NEXT: - .offset: 16
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: hidden_block_count_z
; CHECK: .kernarg_segment_align: 8
; CHECK-NEXT: .kernarg_segment_size: 264
; CHECK-LABEL: .name: test_unaligned_to_eight
; CHECK: - .args:
; CHECK-NEXT: - .name: eight
; CHECK-NEXT: .offset: 0
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: by_value
; CHECK-NEXT: - .offset: 8
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: hidden_block_count_x
; CHECK-NEXT: - .offset: 12
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: hidden_block_count_y
; CHECK-NEXT: - .offset: 16
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: hidden_block_count_z
; CHECK: .kernarg_segment_align: 8
; CHECK-NEXT: .kernarg_segment_size: 264
; CHECK-LABEL: .name: test_aligned_to_eight
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()