Files
clang-p2996/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
Christudasan Devadasan f9cd789658 [AMDGPU] Add pseudo instructions for SGPR spill to VGPR (#69923)
For a future patch, is it important to keep the lowered SGPR
spills to be recognized as spill instructions during regalloc.
Directly lowering them into V_WRITELANE/V_READLANE won't allow
us to attach the SPILL flag to their instructions.

This patch introduces the pseudo instructions with the SGPRSpill
flag set in their Desc. They will get lowered to equivalent
instructions later during post RA pseudo expansion.
2023-10-27 17:24:10 +05:30

59 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
; Callee must preserve the VGPR modified by writelane even if it is marked Caller-saved.
declare i32 @llvm.amdgcn.writelane(i32, i32, i32)
define void @sgpr_spill_writelane() {
; GCN-LABEL: sgpr_spill_writelane:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v0, s35, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s35, v0, 0
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "", "~{s35}"()
ret void
}
define void @device_writelane_intrinsic(ptr addrspace(1) %out, i32 %src) {
; GCN-LABEL: device_writelane_intrinsic:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v3, 15
; GCN-NEXT: v_readfirstlane_b32 s4, v2
; GCN-NEXT: v_writelane_b32 v3, s4, 23
; GCN-NEXT: global_store_dword v[0:1], v3, off
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src, i32 23, i32 15)
store i32 %writelane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @kernel_writelane_intrinsic(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
; GCN-LABEL: kernel_writelane_intrinsic:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; GCN-NEXT: v_mov_b32_e32 v1, 45
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b32 m0, s3
; GCN-NEXT: v_writelane_b32 v1, s2, m0
; GCN-NEXT: global_store_dword v0, v1, s[0:1]
; GCN-NEXT: s_endpgm
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 45)
store i32 %writelane, ptr addrspace(1) %out, align 4
ret void
}