Files
clang-p2996/llvm/test/CodeGen/AMDGPU/licm-valu.mir
Carl Ritson a1fb307b4b [AMDGPU] Allow hoisting of some VALU compare instructions
Conversatively allow hoisting/sinking of VALU comparisons.
If the result of a comparison is masked with exec, narrowing the
set of active lanes, then it is safe to hoist it as the masking
instruction will never by hoisted.

Heuristically this is also true for sinking, as we do not expect
the result of a sunk comparison that is masked with exec to be
used outside of the loop.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D118975
2022-02-08 11:27:23 +09:00

134 lines
3.8 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
---
name: hoist_move
tracksRegLiveness: true
body: |
; GCN-LABEL: name: hoist_move
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: S_ENDPGM 0
bb.0:
S_BRANCH %bb.1
bb.1:
%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
$exec = S_OR_B64 $exec, 1, implicit-def $scc
S_CBRANCH_EXECNZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...
---
name: no_hoist_cmp
tracksRegLiveness: true
body: |
; GCN-LABEL: name: no_hoist_cmp
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
; GCN-NEXT: $exec = S_OR_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: S_ENDPGM 0
bb.0:
S_BRANCH %bb.1
bb.1:
%0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
$exec = S_OR_B64 $exec, %0:sreg_64, implicit-def $scc
S_CBRANCH_EXECNZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...
---
name: allowable_hoist_cmp
tracksRegLiveness: true
body: |
; GCN-LABEL: name: allowable_hoist_cmp
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $exec = S_AND_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: S_ENDPGM 0
bb.0:
S_BRANCH %bb.1
bb.1:
%0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
$exec = S_AND_B64 $exec, %0:sreg_64, implicit-def $scc
S_CBRANCH_EXECNZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...
---
name: no_hoist_readfirstlane
tracksRegLiveness: true
body: |
; GCN-LABEL: name: no_hoist_readfirstlane
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF]], implicit $exec
; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: S_ENDPGM 0
bb.0:
%0:vgpr_32 = IMPLICIT_DEF
S_BRANCH %bb.1
bb.1:
%1:sgpr_32 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
$exec = S_OR_B64 $exec, 1, implicit-def $scc
S_CBRANCH_EXECNZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...