[AMDGPU] S_SET_GPR_IDX_ON can be passed an immediate index (#125086)

Oversight found by ISel fuzz effort. Assuming the argument is a
register, in some cases it can be an immediate. Tablegen's type for the
instruction is SSrc_b32, i.e. register or immediate fine. Added the
repro from the bug reporter as a test case - prior to this patch llvm
will assert in getReg.

Fixes SWDEV-508589
This commit is contained in:
Jon Chesterfield
2025-01-30 16:40:12 +00:00
committed by GitHub
parent 2428b6ec40
commit c39fba209c
2 changed files with 40 additions and 2 deletions

View File

@@ -2366,11 +2366,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
assert(ST.useVGPRIndexMode());
Register VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();
Register Idx = MI.getOperand(3).getReg();
MachineOperand Idx = MI.getOperand(3);
Register SubReg = MI.getOperand(4).getImm();
MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON))
.addReg(Idx)
.add(Idx)
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
SetOn->getOperand(3).setIsUndef();

View File

@@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s
define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: copy_to_reg_frameindex:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cmp_lt_u32 0, 16
; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_set_gpr_idx_off
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %done
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dword v1, v0, s[0:1]
; CHECK-NEXT: s_endpgm
entry:
%B = srem i32 %c, -1
%alloca = alloca [16 x i32], align 4, addrspace(5)
br label %loop
loop:
%inc = phi i32 [ 0, %entry ], [ %inc.i, %loop ]
%ptr = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %inc
store i32 %inc, ptr addrspace(5) %ptr, align 4
%inc.i = add i32 %inc, %B
%cnd = icmp uge i32 %inc.i, 16
br i1 %cnd, label %done, label %loop
done:
%tmp1 = load i32, ptr addrspace(5) %alloca, align 4
store i32 %tmp1, ptr addrspace(1) %out, align 4
ret void
}