An enum value stores the offset between AGPR ranges and VGPR ranges in the internal storage of SIInsertWaitcnts. It said 226 when it should say 256, causing some portion of the ranges to overlap. That in turn causes 'aliasing' between the registers, potentially inserting waitcnts that are not required. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D119749
358 lines
12 KiB
YAML
358 lines
12 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
|
|
|
--- |
|
|
define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
|
|
<4 x i32> addrspace(1)* %global16,
|
|
i32* %flat4,
|
|
<4 x i32>* %flat16) {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @single_branch_successor_not_next_block() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @preexisting_waitcnt() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @bundle_no_waitcnt() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @preexisting_waitcnt_in_bundle() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @insert_in_bundle() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @exit_bundle() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @cross_bundle() {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @high_register_collision() {
|
|
ret void
|
|
}
|
|
|
|
...
|
|
---
|
|
|
|
|
|
# Global loads will return in order so we should:
|
|
# s_waitcnt vmcnt(1)
|
|
|
|
# s_waitcnt vmcnt(0)
|
|
|
|
# s_waitcnt vmcnt(0)
|
|
|
|
name: flat_zero_waitcnt
|
|
|
|
body: |
|
|
; GCN-LABEL: name: flat_zero_waitcnt
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.1(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAITCNT 0
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
|
|
; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
|
|
; GCN-NEXT: S_WAITCNT 3953
|
|
; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_WAITCNT 3952
|
|
; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
|
|
; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: S_WAITCNT 49279
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
|
|
; GCN-NEXT: S_WAITCNT 3952
|
|
; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
|
|
; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4)
|
|
$agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
|
|
$agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.2
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
$agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
|
|
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
|
|
$agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
|
|
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
---
|
|
# There is only a single fallthrough successor block, so there's no
|
|
# need to wait immediately.
|
|
|
|
|
|
name: single_fallthrough_successor_no_end_block_wait
|
|
|
|
body: |
|
|
; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.1(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAITCNT 0
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 112
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
|
|
bb.1:
|
|
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
|
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_ENDPGM 0
|
|
...
|
|
---
|
|
# The block has a single predecessor with a single successor, but it
|
|
# is not the next block so it's non-obvious that the wait is not needed.
|
|
|
|
|
|
|
|
|
|
name: single_branch_successor_not_next_block
|
|
|
|
body: |
|
|
; GCN-LABEL: name: single_branch_successor_not_next_block
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAITCNT 0
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_BRANCH %bb.2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 112
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.2
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_BRANCH %bb.2
|
|
|
|
bb.1:
|
|
FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_ENDPGM 0
|
|
|
|
bb.2:
|
|
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
|
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# GCN-LABEL: name: preexisting_waitcnt{{$}}
|
|
# GCN: FLAT_LOAD_DWORD
|
|
# GCN-NEXT: S_WAITCNT 0
|
|
# GCN-NOT: S_WAITCNT
|
|
name: preexisting_waitcnt
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr1_vgpr2
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_WAITCNT 0
|
|
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
|
|
...
|
|
|
|
---
|
|
|
|
name: bundle_no_waitcnt
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr1_vgpr2
|
|
; GCN-LABEL: name: bundle_no_waitcnt
|
|
; GCN: liveins: $vgpr1_vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: BUNDLE {
|
|
; GCN-NEXT: S_NOP 0
|
|
; GCN-NEXT: S_NOP 0
|
|
; GCN-NEXT: }
|
|
; GCN-NEXT: S_WAITCNT 112
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
BUNDLE {
|
|
S_NOP 0
|
|
S_NOP 0
|
|
}
|
|
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# See the waitcnt inside the bundle and don't insert an extra
|
|
name: preexisting_waitcnt_in_bundle
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr1_vgpr2
|
|
; GCN-LABEL: name: preexisting_waitcnt_in_bundle
|
|
; GCN: liveins: $vgpr1_vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: BUNDLE {
|
|
; GCN-NEXT: S_NOP 0
|
|
; GCN-NEXT: S_WAITCNT 0
|
|
; GCN-NEXT: }
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
BUNDLE {
|
|
S_NOP 0
|
|
S_WAITCNT 0
|
|
}
|
|
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# Def and use inside bundle
|
|
|
|
name: insert_in_bundle
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr1_vgpr2
|
|
; GCN-LABEL: name: insert_in_bundle
|
|
; GCN: liveins: $vgpr1_vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_WAITCNT 112
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: }
|
|
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
}
|
|
...
|
|
|
|
---
|
|
|
|
# Def is last instruction in bundle, use is outside bundle
|
|
|
|
|
|
name: exit_bundle
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr1_vgpr2
|
|
; GCN-LABEL: name: exit_bundle
|
|
; GCN: liveins: $vgpr1_vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: }
|
|
; GCN-NEXT: S_WAITCNT 112
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
}
|
|
|
|
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# Def is in bundle, use is in another bundle
|
|
|
|
|
|
name: cross_bundle
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr1_vgpr2
|
|
; GCN-LABEL: name: cross_bundle
|
|
; GCN: liveins: $vgpr1_vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
|
|
; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: }
|
|
; GCN-NEXT: S_WAITCNT 112
|
|
; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: }
|
|
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
|
|
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
}
|
|
BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
|
|
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
}
|
|
...
|
|
|
|
---
|
|
# agpr should be disjoint and tracked separately from vgpr
|
|
|
|
name: high_register_collision
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: high_register_collision
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
|
|
; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
|
|
$vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
|
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
|
|
FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_ENDPGM 0
|
|
...
|