Summary: In general, we need queue_ptr for aperture bases and trap handling, and user SGPRs have to be set up to hold queue_ptr. In current implementation, user SGPRs are set up unnecessarily for some cases. If the target has aperture registers, queue_ptr is not needed to reference aperture bases. For trap handling, if target suppots getDoorbellID, queue_ptr is also not necessary. Futher, code object version 5 introduces new kernel ABI which passes queue_ptr as an implicit kernel argument, so user SGPRs are no longer necessary for queue_ptr. Based on the trap handling document: https://llvm.org/docs/AMDGPUUsage.html#amdgpu-trap-handler-for-amdhsa-os-v4-onwards-table, llvm.debugtrap does not need queue_ptr, we remove queue_ptr suport for llvm.debugtrap in the backend. Reviewers: sameerds, arsenm Fixes: SWDEV-307189 Differential Revision: https://reviews.llvm.org/D119762
30 lines
1.2 KiB
LLVM
30 lines
1.2 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=DOORBELL %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=DOORBELL %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s
|
|
|
|
declare void @llvm.trap() #0
|
|
|
|
; HSA: .amdhsa_kernel trap
|
|
; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
|
|
; HSA-NEXT: .amdhsa_kernarg_size 8
|
|
; HSA-NEXT: .amdhsa_user_sgpr_count 8
|
|
; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; HSA: .end_amdhsa_kernel
|
|
|
|
; DOORBELL: .amdhsa_kernel trap
|
|
; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0
|
|
; DOORBELL-NEXT: .amdhsa_kernarg_size 8
|
|
; DOORBELL-NEXT: .amdhsa_user_sgpr_count 6
|
|
; DOORBELL-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; DOORBELL: .end_amdhsa_kernel
|
|
|
|
define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
|
|
store volatile i32 1, i32 addrspace(1)* %arg0
|
|
call void @llvm.trap()
|
|
unreachable
|
|
store volatile i32 2, i32 addrspace(1)* %arg0
|
|
ret void
|
|
}
|