Files
clang-p2996/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
Matt Arsenault 7252787dd9 RegAllocGreedy: Fix detection of lanes read by a bundle
SplitKit creates questionably formed bundles of copies
when it needs to copy a subset of live lanes and can't do
it with a single subregister index. These are merely marked
as part of a bundle, and don't start with a BUNDLE instruction.
Queries for the slot index would give the first copy in the
bundle, and we need to inspect the operands of all the other
bundled copies.

Also fix and simplify detection of read lane subsets. This causes
some RISCV test regressions, but these look like accidentally beneficial
splits. I don't see a subrange based reason to perform these splits.

Avoids some really ugly regressions in a future patch.

https://reviews.llvm.org/D146859
2023-10-01 11:37:48 +03:00

367 lines
23 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=RA %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefix=VR %s
---
name: splitkit_copy_bundle
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; RA-LABEL: name: splitkit_copy_bundle
; RA: bb.0:
; RA-NEXT: successors: %bb.1(0x80000000)
; RA-NEXT: {{ $}}
; RA-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; RA-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; RA-NEXT: undef %2.sub1:sgpr_1024 = S_MOV_B32 -1
; RA-NEXT: %2.sub0:sgpr_1024 = S_MOV_B32 -1
; RA-NEXT: undef %3.sub0:sgpr_1024 = S_MOV_B32 0
; RA-NEXT: {{ $}}
; RA-NEXT: bb.1:
; RA-NEXT: successors: %bb.2(0x80000000)
; RA-NEXT: {{ $}}
; RA-NEXT: %2.sub2:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub3:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub4:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub5:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub6:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub7:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub8:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub9:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub10:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub11:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub12:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub13:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub14:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub15:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub16:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub17:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub18:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub19:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub20:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub21:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub22:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub23:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub24:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub25:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub26:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub27:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %2.sub28:sgpr_1024 = COPY %2.sub0
; RA-NEXT: %2.sub29:sgpr_1024 = COPY %2.sub1
; RA-NEXT: %3.sub1:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub2:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub3:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub4:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub5:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub6:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub7:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub8:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub9:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub10:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub11:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub12:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub13:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub14:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub15:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub16:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub17:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub18:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub19:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub20:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub21:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub22:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub23:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub24:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub25:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub26:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub27:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub28:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub29:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub30:sgpr_1024 = COPY %3.sub0
; RA-NEXT: %3.sub31:sgpr_1024 = COPY %3.sub0
; RA-NEXT: {{ $}}
; RA-NEXT: bb.2:
; RA-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; RA-NEXT: {{ $}}
; RA-NEXT: S_NOP 0, csr_amdgpu, implicit [[DEF]], implicit [[DEF1]]
; RA-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
; RA-NEXT: S_BRANCH %bb.2
;
; VR-LABEL: name: splitkit_copy_bundle
; VR: bb.0:
; VR-NEXT: successors: %bb.1(0x80000000)
; VR-NEXT: {{ $}}
; VR-NEXT: renamable $sgpr37 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr36 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr68 = S_MOV_B32 0
; VR-NEXT: renamable $sgpr30_sgpr31 = IMPLICIT_DEF
; VR-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
; VR-NEXT: {{ $}}
; VR-NEXT: bb.1:
; VR-NEXT: successors: %bb.2(0x80000000)
; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003
; VR-NEXT: {{ $}}
; VR-NEXT: renamable $sgpr38 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr39 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr40 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr41 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr42 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr43 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr44 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr45 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr46 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr47 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr48 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr49 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr50 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr51 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr52 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr53 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr54 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr55 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr56 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr57 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr58 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr59 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr60 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr61 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr62 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr63 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr64 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr65 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr69 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr70 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr71 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr72 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr73 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr74 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr75 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr76 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr77 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr78 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr79 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr80 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr81 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr82 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr83 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr84 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr85 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr86 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr87 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr88 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr89 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr90 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr91 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr92 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr93 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr94 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr95 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr96 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr97 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr98 = COPY renamable $sgpr68
; VR-NEXT: renamable $sgpr99 = COPY renamable $sgpr68
; VR-NEXT: {{ $}}
; VR-NEXT: bb.2:
; VR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003
; VR-NEXT: {{ $}}
; VR-NEXT: S_NOP 0, csr_amdgpu, implicit renamable $sgpr30_sgpr31, implicit renamable $sgpr34_sgpr35
; VR-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
; VR-NEXT: S_BRANCH %bb.2
bb.0:
%0:sreg_64 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
undef %2.sub1:sgpr_1024 = S_MOV_B32 -1
%2.sub0:sgpr_1024 = S_MOV_B32 -1
undef %3.sub0:sgpr_1024 = S_MOV_B32 0
bb.1:
%2.sub2:sgpr_1024 = COPY %2.sub0
%2.sub3:sgpr_1024 = COPY %2.sub1
%2.sub4:sgpr_1024 = COPY %2.sub0
%2.sub5:sgpr_1024 = COPY %2.sub1
%2.sub6:sgpr_1024 = COPY %2.sub0
%2.sub7:sgpr_1024 = COPY %2.sub1
%2.sub8:sgpr_1024 = COPY %2.sub0
%2.sub9:sgpr_1024 = COPY %2.sub1
%2.sub10:sgpr_1024 = COPY %2.sub0
%2.sub11:sgpr_1024 = COPY %2.sub1
%2.sub12:sgpr_1024 = COPY %2.sub0
%2.sub13:sgpr_1024 = COPY %2.sub1
%2.sub14:sgpr_1024 = COPY %2.sub0
%2.sub15:sgpr_1024 = COPY %2.sub1
%2.sub16:sgpr_1024 = COPY %2.sub0
%2.sub17:sgpr_1024 = COPY %2.sub1
%2.sub18:sgpr_1024 = COPY %2.sub0
%2.sub19:sgpr_1024 = COPY %2.sub1
%2.sub20:sgpr_1024 = COPY %2.sub0
%2.sub21:sgpr_1024 = COPY %2.sub1
%2.sub22:sgpr_1024 = COPY %2.sub0
%2.sub23:sgpr_1024 = COPY %2.sub1
%2.sub24:sgpr_1024 = COPY %2.sub0
%2.sub25:sgpr_1024 = COPY %2.sub1
%2.sub26:sgpr_1024 = COPY %2.sub0
%2.sub27:sgpr_1024 = COPY %2.sub1
%2.sub28:sgpr_1024 = COPY %2.sub0
%2.sub29:sgpr_1024 = COPY %2.sub1
%3.sub1:sgpr_1024 = COPY %3.sub0
%3.sub2:sgpr_1024 = COPY %3.sub0
%3.sub3:sgpr_1024 = COPY %3.sub0
%3.sub4:sgpr_1024 = COPY %3.sub0
%3.sub5:sgpr_1024 = COPY %3.sub0
%3.sub6:sgpr_1024 = COPY %3.sub0
%3.sub7:sgpr_1024 = COPY %3.sub0
%3.sub8:sgpr_1024 = COPY %3.sub0
%3.sub9:sgpr_1024 = COPY %3.sub0
%3.sub10:sgpr_1024 = COPY %3.sub0
%3.sub11:sgpr_1024 = COPY %3.sub0
%3.sub12:sgpr_1024 = COPY %3.sub0
%3.sub13:sgpr_1024 = COPY %3.sub0
%3.sub14:sgpr_1024 = COPY %3.sub0
%3.sub15:sgpr_1024 = COPY %3.sub0
%3.sub16:sgpr_1024 = COPY %3.sub0
%3.sub17:sgpr_1024 = COPY %3.sub0
%3.sub18:sgpr_1024 = COPY %3.sub0
%3.sub19:sgpr_1024 = COPY %3.sub0
%3.sub20:sgpr_1024 = COPY %3.sub0
%3.sub21:sgpr_1024 = COPY %3.sub0
%3.sub22:sgpr_1024 = COPY %3.sub0
%3.sub23:sgpr_1024 = COPY %3.sub0
%3.sub24:sgpr_1024 = COPY %3.sub0
%3.sub25:sgpr_1024 = COPY %3.sub0
%3.sub26:sgpr_1024 = COPY %3.sub0
%3.sub27:sgpr_1024 = COPY %3.sub0
%3.sub28:sgpr_1024 = COPY %3.sub0
%3.sub29:sgpr_1024 = COPY %3.sub0
%3.sub30:sgpr_1024 = COPY %3.sub0
%3.sub31:sgpr_1024 = COPY %3.sub0
bb.2:
S_NOP 0, implicit %0, implicit %1, csr_amdgpu
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
S_BRANCH %bb.2
...
---
name: splitkit_copy_unbundle_reorder
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
; RA-LABEL: name: splitkit_copy_unbundle_reorder
; RA: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; RA-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; RA-NEXT: [[DEF2:%[0-9]+]]:sgpr_512 = IMPLICIT_DEF
; RA-NEXT: [[DEF2]].sub4:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub5:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub10:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub11:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub7:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub8:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub13:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: [[DEF2]].sub14:sgpr_512 = S_MOV_B32 -1
; RA-NEXT: undef %16.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 {
; RA-NEXT: internal %16.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11
; RA-NEXT: internal %16.sub7:sgpr_512 = COPY [[DEF2]].sub7
; RA-NEXT: internal %16.sub8:sgpr_512 = COPY [[DEF2]].sub8
; RA-NEXT: internal %16.sub13:sgpr_512 = COPY [[DEF2]].sub13
; RA-NEXT: internal %16.sub14:sgpr_512 = COPY [[DEF2]].sub14
; RA-NEXT: }
; RA-NEXT: undef %18.sub4_sub5:sgpr_512 = COPY %16.sub4_sub5 {
; RA-NEXT: internal %18.sub10_sub11:sgpr_512 = COPY %16.sub10_sub11
; RA-NEXT: internal %18.sub7:sgpr_512 = COPY %16.sub7
; RA-NEXT: internal %18.sub8:sgpr_512 = COPY %16.sub8
; RA-NEXT: internal %18.sub13:sgpr_512 = COPY %16.sub13
; RA-NEXT: internal %18.sub14:sgpr_512 = COPY %16.sub14
; RA-NEXT: }
; RA-NEXT: SI_SPILL_S512_SAVE %18, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
; RA-NEXT: undef %17.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 {
; RA-NEXT: internal %17.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11
; RA-NEXT: internal %17.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7
; RA-NEXT: internal %17.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8
; RA-NEXT: internal %17.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13
; RA-NEXT: internal %17.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14
; RA-NEXT: }
; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = COPY %17.sub4_sub5 {
; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = COPY %17.sub10_sub11
; RA-NEXT: internal %14.sub7:sgpr_512 = COPY %17.sub7
; RA-NEXT: internal %14.sub8:sgpr_512 = COPY %17.sub8
; RA-NEXT: internal %14.sub13:sgpr_512 = COPY %17.sub13
; RA-NEXT: internal %14.sub14:sgpr_512 = COPY %17.sub14
; RA-NEXT: }
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]]
;
; VR-LABEL: name: splitkit_copy_unbundle_reorder
; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
; VR-NEXT: renamable $sgpr16 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr17 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr22 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr23 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr19 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr20 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr25 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr26 = S_MOV_B32 -1
; VR-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; VR-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
; VR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
; VR-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17
; VR-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr19
; VR-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23
; VR-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr20
; VR-NEXT: renamable $sgpr21 = COPY killed renamable $sgpr25
; VR-NEXT: renamable $sgpr22 = COPY killed renamable $sgpr26
; VR-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF
; VR-NEXT: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr10_sgpr11 = IMPLICIT_DEF
; VR-NEXT: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0 :: (dereferenceable invariant load (s32))
; VR-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr10_sgpr11, implicit killed renamable $sgpr8, implicit killed renamable $sgpr9, implicit killed renamable $sgpr12, implicit killed renamable $sgpr13, implicit killed renamable $sgpr14, implicit killed renamable $sgpr15, implicit killed renamable $sgpr16, implicit killed renamable $sgpr17
%0:sgpr_128 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:sgpr_512 = IMPLICIT_DEF
%2.sub4:sgpr_512 = S_MOV_B32 -1
%2.sub5:sgpr_512 = S_MOV_B32 -1
%2.sub10:sgpr_512 = S_MOV_B32 -1
%2.sub11:sgpr_512 = S_MOV_B32 -1
%2.sub7:sgpr_512 = S_MOV_B32 -1
%2.sub8:sgpr_512 = S_MOV_B32 -1
%2.sub13:sgpr_512 = S_MOV_B32 -1
%2.sub14:sgpr_512 = S_MOV_B32 -1
; Clobber registers
S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
%5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0 :: (dereferenceable invariant load (s32))
%12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0 :: (dereferenceable invariant load (s32))
S_NOP 0, implicit %0, implicit %1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12
...