Summary: I encountered some problems with SIFixWWMLiveness when WWM is in a loop: 1. It sometimes gave invalid MIR where there is some control flow path to the new implicit use of a register on EXIT_WWM that does not pass through any def. 2. There were lots of false positives of registers that needed to have an implicit use added to EXIT_WWM. 3. Adding an implicit use to EXIT_WWM (and adding an implicit def just before the WWM code, which I tried in order to fix (1)) caused lots of the values to be spilled and reloaded unnecessarily. This commit is a rework of SIFixWWMLiveness, with the following changes: 1. Instead of considering any register with a def that can reach the WWM code and a def that can be reached from the WWM code, it now considers three specific cases that need to be handled. 2. A register that needs liveness over WWM to be synthesized now has it done by adding itself as an implicit use to defs other than the dominant one. Also added the following fixmes: FIXME: We should detect whether a register in one of the above categories is already live at the WWM code before deciding to add the implicit uses to synthesize its liveness. FIXME: I believe this whole scheme may be flawed due to the possibility of the register allocator doing live interval splitting. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D46756 Change-Id: Ie7fba0ede0378849181df3f1a9a7a39ed1a94a94 llvm-svn: 338783
186 lines
7.2 KiB
YAML
186 lines
7.2 KiB
YAML
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fix-wwm-liveness -o - %s | FileCheck %s
|
|
|
|
# Test a then phi value.
|
|
#CHECK: test_wwm_liveness_then_phi
|
|
#CHECK: %21:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit %21
|
|
|
|
---
|
|
name: test_wwm_liveness_then_phi
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64, preferred-register: '' }
|
|
- { id: 1, class: sgpr_32, preferred-register: '' }
|
|
- { id: 2, class: sgpr_32, preferred-register: '' }
|
|
- { id: 3, class: vgpr_32, preferred-register: '' }
|
|
- { id: 4, class: vgpr_32, preferred-register: '' }
|
|
- { id: 5, class: vgpr_32, preferred-register: '' }
|
|
- { id: 6, class: vgpr_32, preferred-register: '' }
|
|
- { id: 7, class: vgpr_32, preferred-register: '' }
|
|
- { id: 8, class: sreg_64, preferred-register: '$vcc' }
|
|
- { id: 9, class: sreg_64, preferred-register: '' }
|
|
- { id: 10, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 11, class: sreg_64, preferred-register: '' }
|
|
- { id: 12, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 13, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 14, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 15, class: sreg_128, preferred-register: '' }
|
|
- { id: 16, class: vgpr_32, preferred-register: '' }
|
|
- { id: 17, class: vgpr_32, preferred-register: '' }
|
|
- { id: 18, class: vgpr_32, preferred-register: '' }
|
|
- { id: 19, class: sreg_64, preferred-register: '' }
|
|
- { id: 20, class: sreg_64, preferred-register: '' }
|
|
- { id: 21, class: vgpr_32, preferred-register: '' }
|
|
- { id: 22, class: sreg_64, preferred-register: '' }
|
|
- { id: 23, class: sreg_64, preferred-register: '' }
|
|
liveins:
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
|
|
%21 = V_MOV_B32_e32 0, implicit $exec
|
|
%5 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
|
|
%6 = V_MBCNT_HI_U32_B32_e32 -1, killed %5, implicit $exec
|
|
%8 = V_CMP_GT_U32_e64 32, killed %6, implicit $exec
|
|
%22 = COPY $exec, implicit-def $exec
|
|
%23 = S_AND_B64 %22, %8, implicit-def dead $scc
|
|
%0 = S_XOR_B64 %23, %22, implicit-def dead $scc
|
|
$exec = S_MOV_B64_term killed %23
|
|
SI_MASK_BRANCH %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.2(0x80000000)
|
|
|
|
%13 = S_MOV_B32 61440
|
|
%14 = S_MOV_B32 -1
|
|
%15 = REG_SEQUENCE undef %12, 1, undef %10, 2, killed %14, 3, killed %13, 4
|
|
%19 = COPY $exec
|
|
$exec = S_MOV_B64 -1
|
|
%16 = BUFFER_LOAD_DWORD_OFFSET %15, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4)
|
|
%17 = V_ADD_F32_e32 1065353216, killed %16, implicit $exec
|
|
$exec = EXIT_WWM killed %19
|
|
%21 = V_MOV_B32_e32 1, implicit $exec
|
|
early-clobber %18 = WWM killed %17, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %18, killed %15, 0, 0, 0, 0, 0, implicit $exec :: (store 4)
|
|
|
|
bb.2:
|
|
$exec = S_OR_B64 $exec, killed %0, implicit-def $scc
|
|
$vgpr0 = COPY killed %21
|
|
SI_RETURN_TO_EPILOG killed $vgpr0
|
|
|
|
...
|
|
|
|
# Test a loop with a loop exit value and a loop phi.
|
|
#CHECK: test_wwm_liveness_loop
|
|
#CHECK: %4:vgpr_32 = IMPLICIT_DEF
|
|
#CHECK: bb.1:
|
|
#CHECK: %4:vgpr_32 = FLAT_LOAD_DWORD{{.*}}, implicit %4
|
|
#CHECK: %27:vgpr_32 = COPY killed %21, implicit %27
|
|
|
|
---
|
|
name: test_wwm_liveness_loop
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
failedISel: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: vgpr_32, preferred-register: '' }
|
|
- { id: 1, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 2, class: sreg_64, preferred-register: '' }
|
|
- { id: 3, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 4, class: vgpr_32, preferred-register: '' }
|
|
- { id: 5, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 6, class: sreg_64, preferred-register: '' }
|
|
- { id: 7, class: sreg_64, preferred-register: '' }
|
|
- { id: 8, class: sreg_64, preferred-register: '' }
|
|
- { id: 9, class: vreg_64, preferred-register: '' }
|
|
- { id: 10, class: vgpr_32, preferred-register: '' }
|
|
- { id: 11, class: vgpr_32, preferred-register: '' }
|
|
- { id: 12, class: vgpr_32, preferred-register: '' }
|
|
- { id: 13, class: sreg_64, preferred-register: '' }
|
|
- { id: 14, class: vreg_64, preferred-register: '' }
|
|
- { id: 15, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 16, class: vgpr_32, preferred-register: '' }
|
|
- { id: 17, class: sreg_64, preferred-register: '$vcc' }
|
|
- { id: 18, class: vgpr_32, preferred-register: '' }
|
|
- { id: 19, class: vgpr_32, preferred-register: '' }
|
|
- { id: 20, class: vgpr_32, preferred-register: '' }
|
|
- { id: 21, class: vgpr_32, preferred-register: '' }
|
|
- { id: 22, class: vgpr_32, preferred-register: '' }
|
|
- { id: 23, class: sreg_64, preferred-register: '' }
|
|
- { id: 24, class: sreg_64, preferred-register: '' }
|
|
- { id: 25, class: sreg_64, preferred-register: '' }
|
|
- { id: 26, class: sreg_64, preferred-register: '' }
|
|
- { id: 27, class: vgpr_32, preferred-register: '' }
|
|
liveins:
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
stackProtector: ''
|
|
maxCallFrameSize: 4294967295
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
localFrameSize: 0
|
|
savePoint: ''
|
|
restorePoint: ''
|
|
fixedStack:
|
|
stack:
|
|
constants:
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1(0x80000000)
|
|
|
|
%25:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
%0:vgpr_32 = FLAT_LOAD_DWORD undef %9:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
|
|
$exec = EXIT_WWM killed %25
|
|
%12:vgpr_32 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
|
|
%7:sreg_64 = S_MOV_B64 0
|
|
%26:sreg_64 = COPY killed %7
|
|
%27:vgpr_32 = COPY killed %12
|
|
|
|
bb.1:
|
|
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
|
|
|
%24:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
%20:vgpr_32 = COPY killed %27
|
|
%2:sreg_64 = COPY killed %26
|
|
%4:vgpr_32 = FLAT_LOAD_DWORD undef %14:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
|
|
$exec = EXIT_WWM killed %24
|
|
%22:vgpr_32 = V_ADD_I32_e32 -1, killed %20, implicit-def dead $vcc, implicit $exec
|
|
%17:sreg_64 = V_CMP_EQ_U32_e64 0, %22, implicit $exec
|
|
%6:sreg_64 = S_OR_B64 killed %17, killed %2, implicit-def $scc
|
|
%21:vgpr_32 = COPY killed %22
|
|
%26:sreg_64 = COPY %6
|
|
%27:vgpr_32 = COPY killed %21
|
|
$exec = S_ANDN2_B64_term $exec, %6
|
|
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
$exec = S_OR_B64 $exec, killed %6, implicit-def $scc
|
|
%23:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
%18:vgpr_32 = V_ADD_F32_e32 killed %0, killed %4, implicit $exec
|
|
$exec = EXIT_WWM killed %23
|
|
early-clobber %19:vgpr_32 = COPY killed %18, implicit $exec
|
|
$vgpr0 = COPY killed %19
|
|
SI_RETURN_TO_EPILOG killed $vgpr0
|
|
|
|
...
|
|
|