V_FMAC_F32 and V_DOT2C_F32_F16 have a dummy src2 operand tied to vdst to inform passes that the instructions read the dst operand. The VOPD versions of these instructions lacked the dummy operand, which was a problem for inserting s_delay_alu. Introduce the dummy src2 operand on the VOPD versions, and fix the VOPD operand tracking logic to account for it. Reviewed By: dp Differential Revision: https://reviews.llvm.org/D136629
52 lines
2.6 KiB
YAML
52 lines
2.6 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
|
|
|
|
---
|
|
name: vopd_fmac_fmac
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: vopd_fmac_fmac
|
|
; CHECK: $vgpr0 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_DELAY_ALU 1
|
|
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
|
|
$vgpr0 = IMPLICIT_DEF
|
|
$vgpr1 = IMPLICIT_DEF
|
|
$vgpr2 = IMPLICIT_DEF
|
|
$vgpr3 = IMPLICIT_DEF
|
|
$vgpr4 = IMPLICIT_DEF
|
|
$vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
|
|
$vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
|
|
$vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
|
|
$vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
|
|
...
|
|
---
|
|
name: vopd_dot2c_dot2c
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: vopd_dot2c_dot2c
|
|
; CHECK: $vgpr0 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_DELAY_ALU 1
|
|
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
|
|
$vgpr0 = IMPLICIT_DEF
|
|
$vgpr1 = IMPLICIT_DEF
|
|
$vgpr2 = IMPLICIT_DEF
|
|
$vgpr3 = IMPLICIT_DEF
|
|
$vgpr4 = IMPLICIT_DEF
|
|
$vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
|
|
$vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
|
|
$vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
|
|
$vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
|
|
...
|