Files
clang-p2996/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
Matt Arsenault 7dcb9c0f09 InlineSpiller: Consider copy bundles when looking for snippet copies
This was looking for full copies produced by SplitKit, but SplitKit
introduces copy bundles if not all lanes are live. The scan for uses
needs to look at bundles, not individual instructions.

This is a prerequisite to avoiding some redundant spills due to
subregisters which will help avoid an allocation failure in a future
patch.
2023-06-20 12:26:27 -04:00

94 lines
5.4 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-regalloc -stress-regalloc=3 -start-before=greedy,1 -stop-before=virtregrewriter,1 -o - %s | FileCheck %s
---
name: split_instruction_subranges
alignment: 1
tracksRegLiveness: true
frameInfo:
maxAlignment: 1
hasCalls: true
machineFunctionInfo:
maxKernArgAlign: 1
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
privateSegmentWaveByteOffset: { reg: '$sgpr17' }
occupancy: 8
body: |
bb.0:
; CHECK-LABEL: name: split_instruction_subranges
; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; CHECK-NEXT: S_NOP 0, implicit %9.sub1
; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub0
; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
; CHECK-NEXT: S_NOP 0, implicit %7.sub1
; CHECK-NEXT: S_ENDPGM 0
%1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
%2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
%3:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %6:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
S_NOP 0, implicit %1.sub1
S_NOP 0, implicit %2.sub0
S_NOP 0, implicit %3.sub1
S_ENDPGM 0
...
---
name: split_instruction_subranges_use_is_subreg_def
alignment: 1
tracksRegLiveness: true
frameInfo:
maxAlignment: 1
hasCalls: true
machineFunctionInfo:
maxKernArgAlign: 1
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
privateSegmentWaveByteOffset: { reg: '$sgpr17' }
occupancy: 8
body: |
bb.0:
; CHECK-LABEL: name: split_instruction_subranges_use_is_subreg_def
; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: undef %13.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0
; CHECK-NEXT: S_NOP 0, implicit-def %13.sub1
; CHECK-NEXT: undef %15.sub0:vreg_64 = COPY %13.sub0
; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1
; CHECK-NEXT: S_NOP 0, implicit-def %7.sub0
; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY %7.sub1
; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; CHECK-NEXT: undef %14.sub0:vreg_64 = COPY %15.sub0
; CHECK-NEXT: S_NOP 0, implicit %14.sub0
; CHECK-NEXT: undef %8.sub1:vreg_64 = COPY %9.sub1
; CHECK-NEXT: S_NOP 0, implicit %8.sub1
; CHECK-NEXT: S_ENDPGM 0
%1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
%2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
%3:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %6:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
S_NOP 0, implicit-def %1.sub0
S_NOP 0, implicit-def %2.sub1
S_NOP 0, implicit-def %3.sub0
S_NOP 0, implicit %1.sub1
S_NOP 0, implicit %2.sub0
S_NOP 0, implicit %3.sub1
S_ENDPGM 0
...