Note, only src0 and src1 will be commuted if the isCommutable flag
is set. This patch does not change that, it just makes it possible
to commute src0 and src1 of some U/I/B vop3 instructions.
This patch revises d35d8da7d6.
It contains the commute opportunities excluding float insts
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D101474
Change-Id: I62938173d750453839f2457a3851661a29135faf
58 lines
3.7 KiB
YAML
58 lines
3.7 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -run-pass=machine-cse -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
|
# RUN: llc -run-pass=machine-cse -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
|
|
|
---
|
|
|
|
name: commute_vop3
|
|
tracksRegLiveness: true
|
|
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX9-LABEL: name: commute_vop3
|
|
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GFX9: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
|
|
; GFX9: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
|
; GFX9: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
|
; GFX9: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
|
|
; GFX9: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
|
|
; GFX9: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
|
|
; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
|
|
; GFX9: [[V_SUB_I32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
|
|
; GFX10-LABEL: name: commute_vop3
|
|
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GFX10: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
|
|
; GFX10: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
|
; GFX10: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
|
; GFX10: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
|
|
; GFX10: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
|
|
; GFX10: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
|
|
; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
|
|
; GFX10: [[V_SUB_I32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:vgpr_32 = COPY $vgpr1
|
|
%2:vgpr_32 = COPY $vgpr2
|
|
%3:vgpr_32 = V_XOR3_B32_e64 %0, %1, %2, implicit $exec
|
|
%4:vgpr_32 = V_XOR3_B32_e64 %1, %0, %2, implicit $exec
|
|
; Insts with MayRaiseFPException do not get CSE
|
|
%5:vgpr_32 = V_MED3_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
|
%6:vgpr_32 = V_MED3_F16_e64 0, %1, 0, %0, 0, %2, 0, 0, implicit $mode, implicit $exec
|
|
|
|
%7:vgpr_32 = V_MAX3_I32_e64 %0, %1, %2, implicit $exec
|
|
%8:vgpr_32 = V_MAX3_I32_e64 %1, %0, %2, implicit $exec
|
|
%11:vgpr_32 = V_SAD_HI_U8_e64 %0, %1, %2, 0, implicit $exec
|
|
%12:vgpr_32 = V_SAD_HI_U8_e64 %1, %0, %2, 0, implicit $exec
|
|
%13:vgpr_32 = V_XAD_U32_e64 %0, %1, 0, implicit $exec
|
|
%14:vgpr_32 = V_XAD_U32_e64 %1, %0, 0, implicit $exec
|
|
; Sub should not be commuted
|
|
%15:vgpr_32 = V_SUB_I32_e64 %0, %1, 0, implicit $exec
|
|
%16:vgpr_32 = V_SUB_I32_e64 %1, %0, 0, implicit $exec
|
|
...
|