Files
clang-p2996/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
Christudasan Devadasan b78b36e1a2 [AMDGPU] Implement whole wave register spill
To reduce the register pressure during allocation,
when the allocator spills a virtual register that
corresponds to a whole wave mode operation, the
spill loads and restores should be activated for
all lanes by temporarily flipping all bits in exec
register to one just before the spills. It is not
implemented in the compiler as of today and this
patch enables the necessary support.

This is a pre-patch before the SGPR spill to virtual
VGPR lanes that would eventually causes the whole
wave register spills during allocation.

Reviewed By: arsenm, cdevadas

Differential Revision: https://reviews.llvm.org/D143759
2023-07-07 22:51:45 +05:30

70 lines
2.2 KiB
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-s-branch-bits=4 -stop-after=branch-relaxation -verify-machineinstrs %s -o - | FileCheck %s
; Test that long branch reserved register is serialized through
; MIR.
; CHECK-LABEL: {{^}}name: uniform_long_forward_branch
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 12
; CHECK-NEXT: maxKernArgAlign: 8
; CHECK-NEXT: ldsSize: 0
; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: true
; CHECK-NEXT: noSignedZerosFPMath: false
; CHECK-NEXT: memoryBound: false
; CHECK-NEXT: waveLimiter: false
; CHECK-NEXT: hasSpilledSGPRs: false
; CHECK-NEXT: hasSpilledVGPRs: false
; CHECK-NEXT: scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
; CHECK-NEXT: frameOffsetReg: '$fp_reg'
; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
; CHECK-NEXT: bytesInStackArgArea: 0
; CHECK-NEXT: returnsVoid: true
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
; CHECK-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
; CHECK-NEXT: workGroupIDX: { reg: '$sgpr6' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
; CHECK-NEXT: psInputAddr: 0
; CHECK-NEXT: psInputEnable: 0
; CHECK-NEXT: mode:
; CHECK-NEXT: ieee: true
; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: fp32-input-denormals: true
; CHECK-NEXT: fp32-output-denormals: true
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
; CHECK-NEXT: body:
define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) {
bb0:
%tmp = icmp ne i32 %arg1, 0
br i1 %tmp, label %bb2, label %bb3
bb2:
store volatile i32 17, ptr addrspace(1) undef
br label %bb4
bb3:
; 32 byte asm
call void asm sideeffect
"v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64", ""() #0
br label %bb4
bb4:
store volatile i32 63, ptr addrspace(1) %arg
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }