Files
clang-p2996/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
Baptiste Saleil 54c0f520c7 [VirtRegRewriter] Insert missing killed flags when tracking subregister liveness
VirtRegRewriter may sometimes fail to correctly apply the kill flag where necessary,
which causes unecessary code gen on PowerPC. This patch fixes the way masks for
defined lanes are computed and the way mask for used lanes is computed.

Contact albion.fung@ibm.com instead of author for problems related to this commit.

Differential Revision: https://reviews.llvm.org/D92405
2021-03-03 12:02:04 -05:00

170 lines
14 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-form-memory-clauses,greedy,virtregrewriter -o - %s | FileCheck %s
# FIXME: -start-before=si-form-memory-clauses doesn't work correctly with -stop-after
# This used to produce a bundle that was impossible for the register
# allocator to handle.
---
name: unallocatable_clause_bundle
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
mode:
ieee: false
occupancy: 20
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; CHECK-LABEL: name: unallocatable_clause_bundle
; CHECK: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: renamable $sgpr4 = COPY $sgpr0
; CHECK: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5)
; CHECK: renamable $sgpr5 = S_MOV_B32 0
; CHECK: renamable $sgpr76 = COPY renamable $sgpr5
; CHECK: renamable $sgpr77 = COPY renamable $sgpr5
; CHECK: renamable $sgpr78 = COPY renamable $sgpr5
; CHECK: renamable $sgpr0 = S_MOV_B32 1056964608
; CHECK: renamable $sgpr79 = COPY renamable $sgpr5
; CHECK: renamable $sgpr1 = COPY renamable $sgpr0
; CHECK: renamable $sgpr8 = COPY renamable $sgpr5
; CHECK: renamable $sgpr9 = COPY renamable $sgpr5
; CHECK: renamable $sgpr10 = COPY renamable $sgpr5
; CHECK: renamable $sgpr11 = COPY renamable $sgpr5
; CHECK: renamable $sgpr12 = COPY renamable $sgpr5
; CHECK: renamable $sgpr13 = COPY renamable $sgpr5
; CHECK: renamable $sgpr14 = COPY renamable $sgpr5
; CHECK: renamable $sgpr15 = COPY renamable $sgpr5
; CHECK: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1
; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: renamable $sgpr0 = S_MOV_B32 1200
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK: renamable $sgpr0 = S_MOV_B32 1264
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK: renamable $sgpr0 = S_MOV_B32 1328
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr0 = S_MOV_B32 1392
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 32, addrspace 6)
; CHECK: renamable $sgpr2 = S_MOV_B32 1456
; CHECK: renamable $sgpr3 = COPY renamable $sgpr5
; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr4 = S_MOV_B32 1520
; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.0, align 4, addrspace 5)
; CHECK: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK: KILL killed renamable $sgpr92_sgpr93_sgpr94_sgpr95
; CHECK: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
; CHECK: KILL killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; CHECK: KILL killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43
; CHECK: KILL killed renamable $vgpr5_vgpr6
; CHECK: KILL killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: KILL killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59
; CHECK: KILL killed renamable $sgpr96_sgpr97_sgpr98_sgpr99
; CHECK: KILL killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
; CHECK: KILL killed renamable $sgpr8_sgpr9_sgpr10_sgpr11
; CHECK: KILL killed renamable $sgpr88_sgpr89_sgpr90_sgpr91
; CHECK: KILL killed renamable $vgpr0
; CHECK: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 killed $vgpr7, killed $vgpr8, implicit $mode, implicit $exec
; CHECK: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr9, 0, killed $vgpr10, 0, 0, implicit $mode, implicit $exec
; CHECK: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 -1083321614, killed $vgpr12, implicit $mode, implicit $exec
; CHECK: renamable $vgpr3 = nofpexcept V_ADD_F32_e32 -1090988802, killed $vgpr13, implicit $mode, implicit $exec
; CHECK: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr11, 0, killed $vgpr14, 0, 0, implicit $mode, implicit $exec
; CHECK: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr3, 0, 0, implicit $mode, implicit $exec
; CHECK: renamable $sgpr0 = nofpexcept V_CMP_GT_F32_e64 0, 1028443341, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 0, 0, 1065353216, killed $sgpr0, implicit $exec
; CHECK: EXP_DONE 12, killed renamable $vgpr0, killed renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec
; CHECK: S_ENDPGM 0
%0:vgpr_32 = COPY $vgpr0
undef %1.sub0:sgpr_64 = COPY $sgpr0
%2:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1.sub1:sgpr_64 = S_MOV_B32 0
undef %3.sub0:sgpr_128 = COPY %1.sub1
%3.sub1:sgpr_128 = COPY %1.sub1
%3.sub2:sgpr_128 = COPY %1.sub1
undef %4.sub0:sgpr_64 = S_MOV_B32 1056964608
%3.sub3:sgpr_128 = COPY %1.sub1
%4.sub1:sgpr_64 = COPY %4.sub0
undef %5.sub0:sgpr_256 = COPY %1.sub1
%5.sub1:sgpr_256 = COPY %1.sub1
%5.sub2:sgpr_256 = COPY %1.sub1
%5.sub3:sgpr_256 = COPY %1.sub1
%5.sub4:sgpr_256 = COPY %1.sub1
%5.sub5:sgpr_256 = COPY %1.sub1
%5.sub6:sgpr_256 = COPY %1.sub1
%5.sub7:sgpr_256 = COPY %1.sub1
%6:vreg_64 = COPY %4
%7:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6)
%8:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0, 0 :: (load 16, addrspace 6)
undef %9.sub0:sreg_64_xexec = S_MOV_B32 1200
%9.sub1:sreg_64_xexec = COPY %1.sub1
%10:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6)
%11:sgpr_128 = S_LOAD_DWORDX4_IMM %9, 0, 0, 0 :: (load 16, addrspace 6)
undef %12.sub0:sreg_64_xexec = S_MOV_B32 1264
%12.sub1:sreg_64_xexec = COPY %1.sub1
%13:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6)
%14:sgpr_128 = S_LOAD_DWORDX4_IMM %12, 0, 0, 0 :: (load 16, addrspace 6)
undef %15.sub0:sreg_64_xexec = S_MOV_B32 1328
%15.sub1:sreg_64_xexec = COPY %1.sub1
%16:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6)
%17:sgpr_128 = S_LOAD_DWORDX4_IMM %15, 0, 0, 0 :: (load 16, addrspace 6)
%18:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6)
undef %19.sub0:sreg_64_xexec = S_MOV_B32 1392
%19.sub1:sreg_64_xexec = COPY %1.sub1
%20:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 0, 0, 0 :: (load 32, addrspace 6)
undef %21.sub0:sreg_64_xexec = S_MOV_B32 1456
%21.sub1:sreg_64_xexec = COPY %1.sub1
%22:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6)
%1.sub0:sgpr_64 = S_MOV_B32 1520
%23:sgpr_128 = S_LOAD_DWORDX4_IMM %21, 0, 0, 0 :: (load 16, addrspace 6)
%24:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0, 0 :: (load 16, addrspace 6)
%25:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0, 0 :: (load 16, addrspace 6)
%26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
%34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
%35:vgpr_32 = nofpexcept V_MAX_F32_e32 %26, %27, implicit $mode, implicit $exec
%36:vgpr_32 = V_MAX3_F32_e64 0, %35, 0, %28, 0, %29, 0, 0, implicit $mode, implicit $exec
%37:vgpr_32 = nofpexcept V_ADD_F32_e32 -1083321614, %31, implicit $mode, implicit $exec
%38:vgpr_32 = nofpexcept V_ADD_F32_e32 -1090988802, %32, implicit $mode, implicit $exec
%39:vgpr_32 = V_MAX3_F32_e64 0, %36, 0, %30, 0, %33, 0, 0, implicit $mode, implicit $exec
%40:vgpr_32 = V_MAX3_F32_e64 0, %39, 0, %37, 0, %38, 0, 0, implicit $mode, implicit $exec
%41:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, 1028443341, 0, %40, 0, implicit $mode, implicit $exec
%42:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1065353216, %41, implicit $exec
EXP_DONE 12, %42, %34.sub1, undef %43:vgpr_32, undef %44:vgpr_32, -1, 0, 15, implicit $exec
S_ENDPGM 0
...