Files
clang-p2996/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir
Joe Nash c87c61c52c [AMDGPU] Fix AGPR offset for waitcnt
An enum value stores the offset between AGPR ranges and VGPR
ranges in the internal storage of SIInsertWaitcnts. It said 226 when
it should say 256, causing some portion of the ranges to overlap. That
in turn causes 'aliasing' between the registers, potentially inserting
waitcnts that are not required.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D119749
2022-02-14 15:16:21 -05:00

358 lines
12 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
--- |
define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
<4 x i32> addrspace(1)* %global16,
i32* %flat4,
<4 x i32>* %flat16) {
ret void
}
define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
ret void
}
define amdgpu_kernel void @single_branch_successor_not_next_block() {
ret void
}
define amdgpu_kernel void @preexisting_waitcnt() {
ret void
}
define amdgpu_kernel void @bundle_no_waitcnt() {
ret void
}
define amdgpu_kernel void @preexisting_waitcnt_in_bundle() {
ret void
}
define amdgpu_kernel void @insert_in_bundle() {
ret void
}
define amdgpu_kernel void @exit_bundle() {
ret void
}
define amdgpu_kernel void @cross_bundle() {
ret void
}
define amdgpu_kernel void @high_register_collision() {
ret void
}
...
---
# Global loads will return in order so we should:
# s_waitcnt vmcnt(1)
# s_waitcnt vmcnt(0)
# s_waitcnt vmcnt(0)
name: flat_zero_waitcnt
body: |
; GCN-LABEL: name: flat_zero_waitcnt
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_WAITCNT 0
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
; GCN-NEXT: S_WAITCNT 3953
; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAITCNT 3952
; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: S_WAITCNT 49279
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
; GCN-NEXT: S_WAITCNT 3952
; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4)
$agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
$agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
S_BRANCH %bb.2
bb.2:
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
$agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
S_ENDPGM 0
...
---
# There is only a single fallthrough successor block, so there's no
# need to wait immediately.
name: single_fallthrough_successor_no_end_block_wait
body: |
; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_WAITCNT 0
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
; GCN-NEXT: S_WAITCNT 112
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
# The block has a single predecessor with a single successor, but it
# is not the next block so it's non-obvious that the wait is not needed.
name: single_branch_successor_not_next_block
body: |
; GCN-LABEL: name: single_branch_successor_not_next_block
; GCN: bb.0:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_WAITCNT 0
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
; GCN-NEXT: S_WAITCNT 112
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.2
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_BRANCH %bb.2
bb.1:
FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
bb.2:
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
# GCN-LABEL: name: preexisting_waitcnt{{$}}
# GCN: FLAT_LOAD_DWORD
# GCN-NEXT: S_WAITCNT 0
# GCN-NOT: S_WAITCNT
name: preexisting_waitcnt
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr1_vgpr2
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
---
name: bundle_no_waitcnt
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: bundle_no_waitcnt
; GCN: liveins: $vgpr1_vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: BUNDLE {
; GCN-NEXT: S_NOP 0
; GCN-NEXT: S_NOP 0
; GCN-NEXT: }
; GCN-NEXT: S_WAITCNT 112
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE {
S_NOP 0
S_NOP 0
}
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
---
# See the waitcnt inside the bundle and don't insert an extra
name: preexisting_waitcnt_in_bundle
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: preexisting_waitcnt_in_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: BUNDLE {
; GCN-NEXT: S_NOP 0
; GCN-NEXT: S_WAITCNT 0
; GCN-NEXT: }
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE {
S_NOP 0
S_WAITCNT 0
}
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
---
# Def and use inside bundle
name: insert_in_bundle
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: insert_in_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAITCNT 112
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: }
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
}
...
---
# Def is last instruction in bundle, use is outside bundle
name: exit_bundle
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: exit_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: }
; GCN-NEXT: S_WAITCNT 112
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
}
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
---
# Def is in bundle, use is in another bundle
name: cross_bundle
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: cross_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: }
; GCN-NEXT: S_WAITCNT 112
; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: }
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
}
BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
}
...
---
# agpr should be disjoint and tracked separately from vgpr
name: high_register_collision
body: |
bb.0:
; GCN-LABEL: name: high_register_collision
; GCN: S_WAITCNT 0
; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
$vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...