Files
clang-p2996/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
Changpeng Fang 0f20a35b9e AMDGPU: Set up User SGPRs for queue_ptr only when necessary
Summary:
  In general, we need queue_ptr for aperture bases and trap handling,
and user SGPRs have to be set up to hold queue_ptr. In current implementation,
user SGPRs are set up unnecessarily for some cases. If the target has aperture
registers, queue_ptr is not needed to reference aperture bases. For trap
handling, if target suppots getDoorbellID, queue_ptr is also not necessary.
Futher, code object version 5 introduces new kernel ABI which passes queue_ptr
as an implicit kernel argument, so user SGPRs are no longer necessary for
queue_ptr. Based on the trap handling document:
https://llvm.org/docs/AMDGPUUsage.html#amdgpu-trap-handler-for-amdhsa-os-v4-onwards-table,
llvm.debugtrap does not need queue_ptr, we remove queue_ptr suport for llvm.debugtrap
in the backend.

Reviewers: sameerds, arsenm

Fixes: SWDEV-307189

Differential Revision: https://reviews.llvm.org/D119762
2022-03-09 10:14:05 -08:00

30 lines
1.2 KiB
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=DOORBELL %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=DOORBELL %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s
declare void @llvm.trap() #0
; HSA: .amdhsa_kernel trap
; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
; HSA-NEXT: .amdhsa_kernarg_size 8
; HSA-NEXT: .amdhsa_user_sgpr_count 8
; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .end_amdhsa_kernel
; DOORBELL: .amdhsa_kernel trap
; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0
; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0
; DOORBELL-NEXT: .amdhsa_kernarg_size 8
; DOORBELL-NEXT: .amdhsa_user_sgpr_count 6
; DOORBELL-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; DOORBELL: .end_amdhsa_kernel
define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
store volatile i32 1, i32 addrspace(1)* %arg0
call void @llvm.trap()
unreachable
store volatile i32 2, i32 addrspace(1)* %arg0
ret void
}