SCC was not correctly preserved when entering WWM. Current lit test was unable to detect this as entry block is handled differently. Additionally fix an issue where SCC was unnecessarily preserved when exiting from WWM to Exact mode. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D95500
236 lines
8.7 KiB
YAML
236 lines
8.7 KiB
YAML
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
|
|
|
|
---
|
|
# Check for awareness that s_or_saveexec_b64 clobbers SCC
|
|
#
|
|
#CHECK: ENTER_WWM
|
|
#CHECK: S_CMP_LT_I32
|
|
#CHECK: S_CSELECT_B32
|
|
name: test_wwm_scc
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sgpr_32, preferred-register: '' }
|
|
- { id: 1, class: sgpr_32, preferred-register: '' }
|
|
- { id: 2, class: sgpr_32, preferred-register: '' }
|
|
- { id: 3, class: vgpr_32, preferred-register: '' }
|
|
- { id: 4, class: vgpr_32, preferred-register: '' }
|
|
- { id: 5, class: sgpr_32, preferred-register: '' }
|
|
- { id: 6, class: vgpr_32, preferred-register: '' }
|
|
- { id: 7, class: vgpr_32, preferred-register: '' }
|
|
- { id: 8, class: sreg_32_xm0, preferred-register: '' }
|
|
- { id: 9, class: sreg_32, preferred-register: '' }
|
|
- { id: 10, class: sreg_32, preferred-register: '' }
|
|
- { id: 11, class: vgpr_32, preferred-register: '' }
|
|
- { id: 12, class: vgpr_32, preferred-register: '' }
|
|
liveins:
|
|
- { reg: '$sgpr0', virtual-reg: '%0' }
|
|
- { reg: '$sgpr1', virtual-reg: '%1' }
|
|
- { reg: '$sgpr2', virtual-reg: '%2' }
|
|
- { reg: '$vgpr0', virtual-reg: '%3' }
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
|
|
|
|
%3 = COPY $vgpr0
|
|
%2 = COPY $sgpr2
|
|
%1 = COPY $sgpr1
|
|
%0 = COPY $sgpr0
|
|
S_CMP_LT_I32 0, %0, implicit-def $scc
|
|
%12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec
|
|
%5 = S_CSELECT_B32 %2, %1, implicit $scc
|
|
%11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec
|
|
$vgpr0 = WWM %11, implicit $exec
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
# Second test for awareness that s_or_saveexec_b64 clobbers SCC
|
|
# Because entry block is treated differently.
|
|
#
|
|
#CHECK: %bb.1
|
|
#CHECK: S_CMP_LT_I32
|
|
#CHECK: COPY $scc
|
|
#CHECK: ENTER_WWM
|
|
#CHECK: $scc = COPY
|
|
#CHECK: S_CSELECT_B32
|
|
name: test_wwm_scc2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
|
|
|
|
%3:vgpr_32 = COPY $vgpr0
|
|
%2:sgpr_32 = COPY $sgpr2
|
|
%1:sgpr_32 = COPY $sgpr1
|
|
%0:sgpr_32 = COPY $sgpr0
|
|
%13:sgpr_128 = IMPLICIT_DEF
|
|
|
|
bb.1:
|
|
S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
|
|
%10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
|
|
%5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
|
|
%11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
|
|
$vgpr0 = WWM %11:vgpr_32, implicit $exec
|
|
$vgpr1 = COPY %10:vgpr_32
|
|
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
|
|
|
|
...
|
|
|
|
---
|
|
# V_SET_INACTIVE, when its second operand is undef, is replaced by a
|
|
# COPY by si-wqm. Ensure the instruction is removed.
|
|
#CHECK-NOT: V_SET_INACTIVE
|
|
name: no_cfg
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
failedISel: false
|
|
tracksRegLiveness: true
|
|
hasWinCFI: false
|
|
registers:
|
|
- { id: 0, class: sgpr_32, preferred-register: '' }
|
|
- { id: 1, class: sgpr_32, preferred-register: '' }
|
|
- { id: 2, class: sgpr_32, preferred-register: '' }
|
|
- { id: 3, class: sgpr_32, preferred-register: '' }
|
|
- { id: 4, class: sgpr_32, preferred-register: '' }
|
|
- { id: 5, class: sgpr_128, preferred-register: '' }
|
|
- { id: 6, class: sgpr_128, preferred-register: '' }
|
|
- { id: 7, class: sreg_32, preferred-register: '' }
|
|
- { id: 8, class: vreg_64, preferred-register: '' }
|
|
- { id: 9, class: sreg_32, preferred-register: '' }
|
|
- { id: 10, class: vgpr_32, preferred-register: '' }
|
|
- { id: 11, class: vgpr_32, preferred-register: '' }
|
|
- { id: 12, class: sreg_32, preferred-register: '' }
|
|
- { id: 13, class: vgpr_32, preferred-register: '' }
|
|
- { id: 14, class: vgpr_32, preferred-register: '' }
|
|
- { id: 15, class: vgpr_32, preferred-register: '' }
|
|
- { id: 16, class: vgpr_32, preferred-register: '' }
|
|
liveins:
|
|
- { reg: '$sgpr0', virtual-reg: '%0' }
|
|
- { reg: '$sgpr1', virtual-reg: '%1' }
|
|
- { reg: '$sgpr2', virtual-reg: '%2' }
|
|
- { reg: '$sgpr3', virtual-reg: '%3' }
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
|
|
%3:sgpr_32 = COPY $sgpr3
|
|
%2:sgpr_32 = COPY $sgpr2
|
|
%1:sgpr_32 = COPY $sgpr1
|
|
%0:sgpr_32 = COPY $sgpr0
|
|
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
|
%5:sgpr_128 = COPY %6
|
|
%7:sreg_32 = S_MOV_B32 0
|
|
%8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%16:vgpr_32 = COPY %8.sub1
|
|
%11:vgpr_32 = COPY %16
|
|
%10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
|
|
%14:vgpr_32 = COPY %7
|
|
%13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
|
|
early-clobber %15:vgpr_32 = WWM killed %13, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# Ensure that wwm is not put around an EXEC copy
|
|
#CHECK-LABEL: name: copy_exec
|
|
#CHECK: %7:sreg_64 = COPY $exec
|
|
#CHECK-NEXT: %14:sreg_64 = ENTER_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
#CHECK-NEXT: $exec = EXIT_WWM %14
|
|
#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
|
|
name: copy_exec
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
|
|
%3:sgpr_32 = COPY $sgpr3
|
|
%2:sgpr_32 = COPY $sgpr2
|
|
%1:sgpr_32 = COPY $sgpr1
|
|
%0:sgpr_32 = COPY $sgpr0
|
|
%4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
|
%5:sreg_32 = S_MOV_B32 0
|
|
%6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
|
|
%8:sreg_64 = COPY $exec
|
|
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
|
|
%11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
|
|
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
|
|
early-clobber %13:sreg_32 = WWM %9:vgpr_32, implicit $exec
|
|
|
|
%14:vgpr_32 = COPY %13
|
|
BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# Check exit of WQM is still inserted correctly when SCC is live until block end.
|
|
# Critially this tests that compilation does not fail.
|
|
#CHECK-LABEL: name: scc_always_live
|
|
#CHECK: %8:vreg_128 = IMAGE_SAMPLE_V4_V2 %7
|
|
#CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc
|
|
#CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64
|
|
#CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32
|
|
#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc
|
|
#CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc
|
|
#CHECK-NEXT: $scc = COPY %14
|
|
#CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64
|
|
#CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2
|
|
#CHECK-NEXT: S_CBRANCH_SCC0 %bb.2
|
|
name: scc_always_live
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2
|
|
|
|
$m0 = COPY $sgpr1
|
|
%0:vgpr_32 = COPY $vgpr1
|
|
%1:vgpr_32 = COPY $vgpr2
|
|
%8:sgpr_32 = COPY $sgpr2
|
|
%100:sgpr_256 = IMPLICIT_DEF
|
|
%101:sgpr_128 = IMPLICIT_DEF
|
|
|
|
%2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
|
|
%3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
|
|
|
|
undef %7.sub0:vreg_64 = COPY %2:vgpr_32
|
|
%7.sub1:vreg_64 = COPY %3:vgpr_32
|
|
|
|
%4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
|
S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc
|
|
|
|
undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
|
|
%5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec
|
|
%6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
|
|
|
|
%9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
|
|
|
S_CBRANCH_SCC0 %bb.2, implicit $scc
|
|
|
|
bb.1:
|
|
%10:sreg_32 = S_MOV_B32 0
|
|
BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
bb.2:
|
|
$vgpr0 = COPY %4.sub0:vreg_128
|
|
$vgpr1 = COPY %4.sub1:vreg_128
|
|
$vgpr2 = COPY %9.sub0:vreg_128
|
|
$vgpr3 = COPY %9.sub1:vreg_128
|
|
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
|
...
|