Only select to a VGPR if it's trivally used in VGPR only contexts. This fixes mishandling frame indexes used in SGPR only contexts, like inline assembly constraints. This is suboptimal in the common case where the frame index is transitively used by only VALU ops. We make up for this by later folding the copy to VALU plus scalar op in SIFoldOperands.
74 lines
3.8 KiB
LLVM
74 lines
3.8 KiB
LLVM
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s
|
|
; RUN: llc -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
|
|
; RUN: llc -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
|
|
; RUN: llc -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
|
|
; RUN: llc -mcpu=gfx1100 -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global,-architected-flat-scratch,-user-sgpr-init16-bug < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
|
|
|
|
; FIXME: align on alloca seems to be ignored for private_segment_alignment
|
|
|
|
; ALL-LABEL: {{^}}large_alloca_compute_shader:
|
|
|
|
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
|
|
; GCN-DAG: ; fixup A - offset: 4, value: SCRATCH_RSRC_DWORD0
|
|
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
|
|
; GCN-DAG: ; fixup A - offset: 4, value: SCRATCH_RSRC_DWORD1
|
|
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
|
|
; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe8f000
|
|
; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe80000
|
|
; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000
|
|
|
|
; GCNHSA: buffer_store_{{dword|b32}} {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
|
|
; GCNHSA: buffer_load_{{dword|b32}} {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
|
|
|
|
; GCNHSA: .amdhsa_kernel large_alloca_compute_shader
|
|
; GCNHSA: .amdhsa_group_segment_fixed_size 0
|
|
; GCNHSA: .amdhsa_private_segment_fixed_size 32772
|
|
; GCNHSA: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; GCNHSA: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; GCNHSA: .amdhsa_user_sgpr_queue_ptr 1
|
|
; GCNHSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; GCNHSA: .amdhsa_user_sgpr_dispatch_id 1
|
|
; GCNHSA: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; GCNHSA: .amdhsa_user_sgpr_private_segment_size 0
|
|
; GCNHSA: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; GCNHSA: .amdhsa_system_sgpr_workgroup_info 0
|
|
; GCNHSA: .amdhsa_system_vgpr_workitem_id 2
|
|
; GCNHSA: .amdhsa_next_free_vgpr {{2|3}}
|
|
; GCNHSA: .amdhsa_next_free_sgpr 18
|
|
; GCNHSA: .amdhsa_float_round_mode_32 0
|
|
; GCNHSA: .amdhsa_float_round_mode_16_64 0
|
|
; GCNHSA: .amdhsa_float_denorm_mode_32 3
|
|
; GCNHSA: .amdhsa_float_denorm_mode_16_64 3
|
|
; GCNHSA: .amdhsa_dx10_clamp 1
|
|
; GCNHSA: .amdhsa_ieee_mode 1
|
|
; GCNHSA: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; GCNHSA: .amdhsa_exception_fp_denorm_src 0
|
|
; GCNHSA: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; GCNHSA: .amdhsa_exception_fp_ieee_overflow 0
|
|
; GCNHSA: .amdhsa_exception_fp_ieee_underflow 0
|
|
; GCNHSA: .amdhsa_exception_fp_ieee_inexact 0
|
|
; GCNHSA: .amdhsa_exception_int_div_zero 0
|
|
; GCNHSA: .end_amdhsa_kernel
|
|
|
|
; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
|
|
; ALL: ; ScratchSize: 32772
|
|
define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
|
|
%large = alloca [8192 x i32], align 4, addrspace(5)
|
|
%gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191
|
|
store volatile i32 %x, ptr addrspace(5) %gep
|
|
%gep1 = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 %y
|
|
%val = load volatile i32, ptr addrspace(5) %gep1
|
|
store volatile i32 %val, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
|