Files
clang-p2996/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
Matt Arsenault e9c49901a4 AMDGPU/GlobalISel: Add stub custom regbankselect pass
Uniformity analysis needs to be the fundamental basis for
regbank decisions. The considerations of the default pass
are secondary, but potentially useful for some edge cases (e.g.
selecting AGPRs when arbitrary loads and stores can directly use
them). This needs to be a separate pass since it requires new
analysis dependencies.

Boilerplate to subclass the existing pass which does nothing
different.
2023-01-30 16:18:20 -04:00

92 lines
4.8 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-regbankselect -regbankselect-fast -o - %s | FileCheck -check-prefix=FAST %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-regbankselect -regbankselect-greedy -o - %s | FileCheck -check-prefix=GREEDY %s
# We see the offset is a VGPR, but this is due to a constant for some
# reason ending up in a VGPR. This shouldn't really ever happen, but
# make sure this doesn't break when looking through copies for the add
# operands.
---
name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0
; FAST-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset
; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0
; FAST-NEXT: {{ $}}
; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256
; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]]
; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32))
; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]]
; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CONSTANT i32 256
%3:_(s32) = G_ADD %1, %2
%4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0
S_ENDPGM 0, implicit %4
...
---
name: s_buffer_load_negative_offset
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; FAST-LABEL: name: s_buffer_load_negative_offset
; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; FAST-NEXT: {{ $}}
; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
; GREEDY-LABEL: name: s_buffer_load_negative_offset
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_CONSTANT i32 -60
%3:_(s32) = G_ADD %1, %2
%4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0
S_ENDPGM 0, implicit %4
...