We are relying on atrificial DAG edges inserted by the MemOpClusterMutation to keep loads and stores together in the post-RA scheduler. This does not work all the time since it allows to schedule a completely independent instruction in the middle of the cluster. Removed the DAG mutation and added pass to bundle already clustered instructions. These bundles are unpacked before the memory legalizer because it does not work with bundles but also because it allows to insert waitcounts in the middle of a store cluster. Removing artificial edges also allows a more relaxed scheduling. Differential Revision: https://reviews.llvm.org/D72737
115 lines
8.3 KiB
YAML
115 lines
8.3 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-post-ra-bundler %s -o - | FileCheck -check-prefix=GCN %s
|
|
|
|
---
|
|
name: bundle_memops
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: bundle_memops
|
|
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
; GCN: S_NOP 0
|
|
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit undef $vgpr3_vgpr4, implicit $exec {
|
|
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: S_NOP 0
|
|
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
; GCN: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr5, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 {
|
|
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: BUNDLE implicit undef $vgpr3_vgpr4, implicit $vgpr1, implicit $exec, implicit $vgpr0 {
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: S_NOP 0
|
|
; GCN: BUNDLE implicit undef $vgpr3_vgpr4, implicit $vgpr1, implicit $exec, implicit $vgpr0 {
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: S_NOP 0
|
|
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
; GCN: S_NOP 0
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
|
|
; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
|
|
; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: BUNDLE implicit $vgpr0, implicit $vgpr2, implicit killed $m0, implicit $exec, implicit $vgpr3 {
|
|
; GCN: DS_WRITE_B32_gfx9 $vgpr0, $vgpr2, 0, 0, implicit killed $m0, implicit $exec
|
|
; GCN: DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
|
|
; GCN: }
|
|
; GCN: S_NOP 0
|
|
; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
|
|
; GCN: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
|
|
; GCN: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
|
|
; GCN: }
|
|
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 {
|
|
; GCN: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: BUNDLE implicit $vgpr0, implicit $vgpr2_vgpr3, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
|
|
; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
|
|
; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
|
|
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: S_NOP 0
|
|
; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
|
|
; GCN: $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0
|
|
; GCN: S_NOP 0
|
|
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
|
|
; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
|
|
; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
|
|
; GCN: }
|
|
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, implicit $exec
|
|
$vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
|
$vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
|
|
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
|
|
DS_WRITE_B32_gfx9 $vgpr0, $vgpr2, 0, 0, implicit killed $m0, implicit $exec
|
|
DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
|
|
S_NOP 0
|
|
$sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
|
|
$sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
|
|
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
$sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
|
|
$sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0
|
|
S_NOP 0
|
|
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
|
|
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
|
|
...
|