Files
clang-p2996/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir
Justin Bogner a81c7dbf0d [AMDGPU] Drop _oneuse checks from med3 patterns
We use _oneuse checks to make sure combines won't accidentally
increase code size, but this prevents the optimization in cases where
we happen to want to clamp multiple values to the same range

It's safe to drop these checks for two reasons:

1. The pattern of max/min operations for med3 is complicated enough
   it's unlikely to come up by accident, so this will still only fire
   when appropriate to do so
2. Even if every intermediate is used and we don't save a single
   operation, we still won't end up with more operations since the
   med3 replaces the final max/min.

In pathological cases we could potentially end up with a larger
encoding size or possibly slightly increased vgpr pressure, but the
risk of that is low, especially considering the upside.

Differential Revision: https://reviews.llvm.org/D132621
2022-09-07 16:31:49 -07:00

178 lines
6.3 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s
---
name: umed3_s32_vvv
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: umed3_s32_vvv
; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_UMAX %0, %1
%4:vgpr(s32) = G_UMIN %0, %1
%5:vgpr(s32) = G_UMAX %4, %2
%6:vgpr(s32) = G_UMIN %3, %5
S_ENDPGM 0, implicit %6
...
---
name: umed3_s32_sss
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2
; GFX6-LABEL: name: umed3_s32_sss
; GFX6: liveins: $sgpr0, $sgpr1, $sgpr2
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GFX6-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc
; GFX6-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc
; GFX6-NEXT: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[COPY2]], implicit-def $scc
; GFX6-NEXT: [[S_MIN_U32_1:%[0-9]+]]:sreg_32 = S_MIN_U32 [[S_MAX_U32_]], [[S_MAX_U32_1]], implicit-def $scc
; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_1]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = G_UMAX %0, %1
%4:sgpr(s32) = G_UMIN %0, %1
%5:sgpr(s32) = G_UMAX %4, %2
%6:sgpr(s32) = G_UMIN %3, %5
S_ENDPGM 0, implicit %6
...
---
name: umed3_s32_vvv_multiuse0
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: umed3_s32_vvv_multiuse0
; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_UMAX %0, %1
%4:vgpr(s32) = G_UMIN %0, %1
%5:vgpr(s32) = G_UMAX %4, %2
%6:vgpr(s32) = G_UMIN %3, %5
S_ENDPGM 0, implicit %6, implicit %3
...
---
name: umed3_s32_vvv_multiuse1
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: umed3_s32_vvv_multiuse1
; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MIN_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_UMAX %0, %1
%4:vgpr(s32) = G_UMIN %0, %1
%5:vgpr(s32) = G_UMAX %4, %2
%6:vgpr(s32) = G_UMIN %3, %5
S_ENDPGM 0, implicit %6, implicit %4
...
---
name: umed3_s32_vvv_multiuse2
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: umed3_s32_vvv_multiuse2
; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec
; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_UMAX %0, %1
%4:vgpr(s32) = G_UMIN %0, %1
%5:vgpr(s32) = G_UMAX %4, %2
%6:vgpr(s32) = G_UMIN %3, %5
S_ENDPGM 0, implicit %6, implicit %5
...
---
name: smed3_s32_vvv_reuse_bounds
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX6-LABEL: name: smed3_s32_vvv_reuse_bounds
; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MED3_U32_e64_1]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = COPY $vgpr3
%4:vgpr(s32) = G_UMAX %0, %1
%5:vgpr(s32) = G_UMIN %0, %1
%6:vgpr(s32) = G_UMIN %2, %4
%7:vgpr(s32) = G_UMAX %6, %5
%8:vgpr(s32) = G_UMIN %3, %4
%9:vgpr(s32) = G_UMAX %8, %5
S_ENDPGM 0, implicit %7, implicit %9
...