Uniformity analysis needs to be the fundamental basis for regbank decisions. The considerations of the default pass are secondary, but potentially useful for some edge cases (e.g. selecting AGPRs when arbitrary loads and stores can directly use them). This needs to be a separate pass since it requires new analysis dependencies. Boilerplate to subclass the existing pass which does nothing different.
92 lines
4.8 KiB
YAML
92 lines
4.8 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-regbankselect -regbankselect-fast -o - %s | FileCheck -check-prefix=FAST %s
|
|
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-regbankselect -regbankselect-greedy -o - %s | FileCheck -check-prefix=GREEDY %s
|
|
|
|
# We see the offset is a VGPR, but this is due to a constant for some
|
|
# reason ending up in a VGPR. This shouldn't really ever happen, but
|
|
# make sure this doesn't break when looking through copies for the add
|
|
# operands.
|
|
|
|
---
|
|
name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset
|
|
legalized: true
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0
|
|
|
|
; FAST-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset
|
|
; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0
|
|
; FAST-NEXT: {{ $}}
|
|
; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
|
; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256
|
|
; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
|
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
|
; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]]
|
|
; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
|
; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
|
|
; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
|
|
; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset
|
|
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0
|
|
; GREEDY-NEXT: {{ $}}
|
|
; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
|
; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256
|
|
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
|
; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]]
|
|
; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
|
; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
|
|
; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
|
|
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
%1:_(s32) = COPY $sgpr0
|
|
%2:vgpr(s32) = G_CONSTANT i32 256
|
|
%3:_(s32) = G_ADD %1, %2
|
|
%4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0
|
|
S_ENDPGM 0, implicit %4
|
|
|
|
...
|
|
|
|
---
|
|
name: s_buffer_load_negative_offset
|
|
legalized: true
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
|
|
|
|
; FAST-LABEL: name: s_buffer_load_negative_offset
|
|
; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
|
|
; FAST-NEXT: {{ $}}
|
|
; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
|
; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
|
|
; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
|
; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
|
; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
|
; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
|
|
; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
|
|
; GREEDY-LABEL: name: s_buffer_load_negative_offset
|
|
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
|
|
; GREEDY-NEXT: {{ $}}
|
|
; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
|
; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
|
|
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
|
; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
|
; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
|
; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
|
|
; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
|
|
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
%1:_(s32) = COPY $vgpr0
|
|
%2:_(s32) = G_CONSTANT i32 -60
|
|
%3:_(s32) = G_ADD %1, %2
|
|
%4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0
|
|
S_ENDPGM 0, implicit %4
|
|
|
|
...
|