Files
clang-p2996/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
Stanislav Mekhanoshin 08682dcc86 [AMDGPU] Define 16 bit VGPR subregs
We have loads preserving low and high 16 bits of their
destinations. However, we always use a whole 32 bit register
for these. The same happens with 16 bit stores, we have to
use full 32 bit register so if high bits are clobbered the
register needs to be copied. One example of such code is
added to the load-hi16.ll.

The proper solution to the problem is to define 16 bit subregs
and use them in the operations which do not read another half
of a VGPR or preserve it if the VGPR is written.

This patch simply defines subregisters and register classes.
At the moment there should be no difference in code generation.
A lot more work is needed to actually use these new register
classes. Therefore, there are no new tests at this time.

Register weight calculation has changed with new subregs so
appropriate changes were made to keep all calculations just
as they are now, especially calculations of register pressure.

Differential Revision: https://reviews.llvm.org/D74873
2020-03-31 11:49:06 -07:00

115 lines
9.0 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-post-ra-bundler %s -o - | FileCheck -check-prefix=GCN %s
---
name: bundle_memops
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: bundle_memops
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: S_NOP 0
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit undef $vgpr3_vgpr4, implicit $exec {
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 {
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, implicit $exec
; GCN: $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
; GCN: $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit undef $vgpr3_vgpr4, implicit $vgpr1, implicit $exec, implicit $vgpr0 {
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
; GCN: BUNDLE implicit undef $vgpr3_vgpr4, implicit $vgpr1, implicit $exec, implicit $vgpr0 {
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: S_NOP 0
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit $vgpr0, implicit $vgpr2, implicit killed $m0, implicit $exec, implicit $vgpr3 {
; GCN: DS_WRITE_B32_gfx9 $vgpr0, $vgpr2, 0, 0, implicit killed $m0, implicit $exec
; GCN: DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
; GCN: }
; GCN: S_NOP 0
; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
; GCN: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
; GCN: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
; GCN: }
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 {
; GCN: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit $vgpr0, implicit $vgpr2_vgpr3, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
; GCN: $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0
; GCN: S_NOP 0
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
; GCN: }
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
S_NOP 0
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
S_NOP 0
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, implicit $exec
$vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
$vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
S_NOP 0
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
S_NOP 0
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
S_NOP 0
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 $vgpr0, $vgpr2, 0, 0, implicit killed $m0, implicit $exec
DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
S_NOP 0
$sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
$sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
S_NOP 0
$sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
$sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0
S_NOP 0
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
...