Files
clang-p2996/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
Christudasan Devadasan 229e118559 [AMDGPU] Codegen support for constrained multi-dword sloads (#96163)
For targets that support xnack replay feature (gfx8+), the
multi-dword scalar loads shouldn't clobber any register that
holds the src address. The constrained version of the scalar
loads have the early clobber flag attached to the dst operand
to restrict RA from re-allocating any of the src regs for its
dst operand.
2024-07-23 13:59:15 +05:30

26 lines
1.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after twoaddressinstruction < %s | FileCheck %s
; Check that %16 gets constrained to register class sgpr_96_with_sub0_sub1.
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) {
; CHECK-LABEL: name: s_load_constant_v3i32_align4
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed $sgpr1
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:sreg_64 = COPY killed [[COPY]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:sreg_64 = COPY killed [[COPY1]]
; CHECK-NEXT: early-clobber %11:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY2]], 0, 0 :: (invariant load (<2 x s32>) from %ir.ptr, align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed [[COPY2]], 8, 0 :: (invariant load (s32) from %ir.ptr + 8, addrspace 4)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %11.sub0
; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed %11.sub1
; CHECK-NEXT: $sgpr1 = COPY killed [[COPY4]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
; CHECK-NEXT: $sgpr2 = COPY killed [[COPY5]]
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2
%load = load <3 x i32>, ptr addrspace(4) %ptr, align 4
ret <3 x i32> %load
}