This reverts commit7792b4ae79. The problem was a conflict withe55d6f5ea2"[AMDGPU] Simplify and improve codegen for llvm.amdgcn.set.inactive (https://github.com/llvm/llvm-project/pull/107889)" which changed the syntax of V_SET_INACTIVE (and thus made my MIR test crash). ...if only we had a merge queue.
136 lines
6.4 KiB
YAML
136 lines
6.4 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
|
|
|
|
---
|
|
# Test that we don't do silly things when there is no whole wave mode in the
|
|
# shader (aka bb.1).
|
|
#
|
|
name: test_no_wwm
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test_no_wwm
|
|
; CHECK: bb.0:
|
|
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
|
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
|
|
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; CHECK-NEXT: S_BRANCH %bb.1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1:
|
|
; CHECK-NEXT: successors: %bb.2(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 5, [[COPY2]], 0, implicit $exec
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY3]], implicit-def $scc
|
|
; CHECK-NEXT: $vgpr8 = COPY [[COPY2]]
|
|
; CHECK-NEXT: $sgpr0 = COPY [[COPY]]
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
|
; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY4]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
|
|
%9:sreg_32 = COPY $sgpr0
|
|
undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
|
|
%1.sub1:ccr_sgpr_64 = COPY $sgpr2
|
|
%37:vgpr_32 = COPY $vgpr8
|
|
%14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
|
|
%16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
|
|
%38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
|
|
$exec_lo = S_MOV_B32_term %38:sreg_32
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
%37:vgpr_32 = V_ADD_U32_e64 5, %37:vgpr_32, 0, implicit $exec
|
|
|
|
bb.2:
|
|
$exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
|
|
$vgpr8 = COPY %37:vgpr_32
|
|
$sgpr0 = COPY %9:sreg_32
|
|
%2:sreg_32 = COPY $sgpr0
|
|
SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %2:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
...
|
|
|
|
---
|
|
# Test that we handle WWM in the shader correctly.
|
|
#
|
|
name: test_wwm_bb1
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test_wwm_bb1
|
|
; CHECK: bb.0:
|
|
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
|
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
|
|
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; CHECK-NEXT: S_BRANCH %bb.1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1:
|
|
; CHECK-NEXT: successors: %bb.2(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 24, [[COPY3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32_xm0_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY3]], 0, 71, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 42, [[V_SET_INACTIVE_B32_]], 0, implicit $exec
|
|
; CHECK-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
|
|
; CHECK-NEXT: early-clobber [[COPY2]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc
|
|
; CHECK-NEXT: $vgpr8 = COPY [[COPY2]]
|
|
; CHECK-NEXT: $vgpr9 = COPY [[COPY3]]
|
|
; CHECK-NEXT: $sgpr0 = COPY [[COPY]]
|
|
; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
|
|
%9:sreg_32 = COPY $sgpr0
|
|
undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
|
|
%1.sub1:ccr_sgpr_64 = COPY $sgpr2
|
|
%40:vgpr_32 = COPY $vgpr9
|
|
%36:vgpr_32 = COPY $vgpr8
|
|
%14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
|
|
%16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
|
|
%38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
|
|
$exec_lo = S_MOV_B32_term %38:sreg_32
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
%36:vgpr_32 = V_ADD_U32_e64 24, %36:vgpr_32, 0, implicit $exec
|
|
%20:sreg_32_xm0_xexec = IMPLICIT_DEF
|
|
%19:vgpr_32 = V_SET_INACTIVE_B32 0, %36:vgpr_32, 0, 71, undef %20, implicit $exec, implicit-def $scc
|
|
%18:vgpr_32 = V_ADD_U32_e64 42, %19:vgpr_32, 0, implicit $exec
|
|
%40:vgpr_32 = STRICT_WWM %18:vgpr_32, implicit $exec
|
|
|
|
bb.2:
|
|
$exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
|
|
$vgpr8 = COPY %40:vgpr_32
|
|
$vgpr9 = COPY %36:vgpr_32
|
|
$sgpr0 = COPY %9:sreg_32
|
|
SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %9:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
|
|
...
|