[AMDGPU] Fix to prevent sinking of PERMLANE_SWAP instruction (#144423)
Permlane_swap instruction depends on exec mask, added isConvergent flag to prevent sinking of instruction. Fixes: SWDEV-537232
This commit is contained in:
committed by
GitHub
parent
e5559ca45f
commit
1b83f10072
@@ -774,7 +774,8 @@ defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>;
|
||||
|
||||
let Constraints = "$vdst = $vdst_in, $src0_out = $src0",
|
||||
DisableEncoding="$vdst_in,$src0_out",
|
||||
SchedRW = [Write32Bit, Write32Bit] in {
|
||||
SchedRW = [Write32Bit, Write32Bit],
|
||||
isConvergent = 1 in {
|
||||
let SubtargetPredicate = HasPermlane16Swap in {
|
||||
defm V_PERMLANE16_SWAP_B32 : VOP1Inst<"v_permlane16_swap_b32", VOP_PERMLANE_SWAP>;
|
||||
}
|
||||
@@ -1549,8 +1550,11 @@ defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
|
||||
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
|
||||
|
||||
defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>;
|
||||
|
||||
let isConvergent = 1 in {
|
||||
defm V_PERMLANE16_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x059>;
|
||||
defm V_PERMLANE32_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x05a>;
|
||||
}
|
||||
|
||||
class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat <
|
||||
(vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)),
|
||||
|
||||
@@ -15,6 +15,7 @@ class LetDummies {
|
||||
bit isConvertibleToThreeAddress;
|
||||
bit isMoveImm;
|
||||
bit isReMaterializable;
|
||||
bit isConvergent;
|
||||
bit isAsCheapAsAMove;
|
||||
bit FPDPRounding;
|
||||
Predicate SubtargetPredicate;
|
||||
|
||||
@@ -733,3 +733,70 @@ body: |
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
name: test_no_sink_permlane_swap
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
body: |
|
||||
; GFX9-LABEL: name: test_no_sink_permlane_swap
|
||||
; GFX9: bb.0:
|
||||
; GFX9-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; GFX9-NEXT: liveins: $vgpr0
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]]
|
||||
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX9-NEXT: [[V_PERMLANE32_SWAP_B32_e64_:%[0-9]+]]:vgpr_32, [[V_PERMLANE32_SWAP_B32_e64_1:%[0-9]+]]:vgpr_32 = V_PERMLANE32_SWAP_B32_e64 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], 0, 0, implicit $exec
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX9-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY2]], [[S_MOV_B32_]], implicit $exec
|
||||
; GFX9-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_LT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; GFX9-NEXT: S_BRANCH %bb.1
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: bb.1:
|
||||
; GFX9-NEXT: successors: %bb.2(0x80000000)
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_PERMLANE32_SWAP_B32_e64_]], [[V_PERMLANE32_SWAP_B32_e64_1]], implicit $exec
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: bb.2:
|
||||
; GFX9-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.0, [[V_MAX_I32_e64_]], %bb.1
|
||||
; GFX9-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: bb.3:
|
||||
; GFX9-NEXT: S_ENDPGM 0, implicit [[PHI]]
|
||||
bb.0:
|
||||
successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
liveins: $vgpr0
|
||||
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%2:sreg_64 = S_MOV_B64 0
|
||||
%3:vreg_64 = COPY %2
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD killed %3, 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
%5:vgpr_32, %6:vgpr_32 = V_PERMLANE32_SWAP_B32_e64 %4, %4, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = COPY $vgpr0
|
||||
%8:sreg_32 = S_MOV_B32 1
|
||||
%9:sreg_64 = V_CMP_LT_I32_e64 %7, %8, implicit $exec
|
||||
%10:sreg_64 = COPY %9
|
||||
%11:sreg_64 = SI_IF %10, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2(0x80000000)
|
||||
|
||||
%12:vgpr_32 = V_MAX_I32_e64 %5, %6, implicit $exec
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3(0x80000000)
|
||||
|
||||
%13:vgpr_32 = PHI %1, %bb.0, %12, %bb.1
|
||||
SI_END_CF %11, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
|
||||
bb.3:
|
||||
S_ENDPGM 0, implicit %13
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user