Files
clang-p2996/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir
Aaditya 0bd1c87996 [AMDGPU] Support divergent sized dynamic alloca (#121148)
Currently, AMDGPU backend can handle uniform-sized dynamic allocas. 
This patch extends support for divergent-sized dynamic allocas.
When the size argument of a dynamic alloca is divergent, 
a wave-wide reduction is performed to get the required stack space. 
`@llvm.amdgcn.wave.reduce.umax` is used to perform the 
wave reduction.

Dynamic allocas are not completely supported yet, 
as the stack is not properly restored on function exit.
This patch doesn't attempt to address the aforementioned issue.

Note: Compiler already Zero-Extends or Truncates all other 
types(of alloca size arg) to i32.
2025-01-06 12:28:24 +07:00

623 lines
27 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE32 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE32 %s
---
name: test_dyn_stackalloc_sgpr_align1
legalized: true
frameInfo:
maxAlignment: 2
stack:
- { id: 0, type: variable-sized, alignment: 1 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align1
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align1
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 1
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align2
legalized: true
frameInfo:
maxAlignment: 2
stack:
- { id: 0, type: variable-sized, alignment: 2 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align2
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align2
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 2
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align4
legalized: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: variable-sized, alignment: 4 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align4
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align4
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 4
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align8
legalized: true
frameInfo:
maxAlignment: 8
stack:
- { id: 0, type: variable-sized, alignment: 8 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align8
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align8
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 8
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align16
legalized: true
frameInfo:
maxAlignment: 16
stack:
- { id: 0, type: variable-sized, alignment: 16 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align16
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align16
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 16
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align32
legalized: true
frameInfo:
maxAlignment: 32
stack:
- { id: 0, type: variable-sized, alignment: 32 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align32
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048
; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align32
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1023
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024
; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 32
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align64
legalized: true
frameInfo:
maxAlignment: 64
stack:
- { id: 0, type: variable-sized, alignment: 64 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align64
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096
; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align64
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048
; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 64
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_align128
legalized: true
frameInfo:
maxAlignment: 64
stack:
- { id: 0, type: variable-sized, alignment: 128 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align128
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8191
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192
; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align128
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096
; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
%0:_(s32) = COPY $sgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 128
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_constant_align4
legalized: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: variable-sized, alignment: 4 }
body: |
bb.0:
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5)
%0:_(s32) = G_CONSTANT i32 32
%1:_(p5) = G_DYN_STACKALLOC %0, 4
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_constant_align8
legalized: true
frameInfo:
maxAlignment: 8
stack:
- { id: 0, type: variable-sized, alignment: 8 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5)
%0:_(s32) = G_CONSTANT i32 32
%1:_(p5) = G_DYN_STACKALLOC %0, 8
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_constant_align16
legalized: true
frameInfo:
maxAlignment: 16
stack:
- { id: 0, type: variable-sized, alignment: 16 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5)
%0:_(s32) = G_CONSTANT i32 32
%1:_(p5) = G_DYN_STACKALLOC %0, 16
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_sgpr_constant_align32
legalized: true
frameInfo:
maxAlignment: 32
stack:
- { id: 0, type: variable-sized, alignment: 32 }
body: |
bb.0:
liveins: $sgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32
; WAVE64: liveins: $sgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048
; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C3]](s32)
; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32)
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1023
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024
; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C3]](s32)
; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
%0:_(s32) = G_CONSTANT i32 32
%1:_(p5) = G_DYN_STACKALLOC %0, 32
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_vgpr_align4
legalized: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: variable-sized, alignment: 4 }
body: |
bb.0:
liveins: $vgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align4
; WAVE64: liveins: $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align4
; WAVE32: liveins: $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $vgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 4
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_vgpr_align16
legalized: true
frameInfo:
maxAlignment: 16
stack:
- { id: 0, type: variable-sized, alignment: 16 }
body: |
bb.0:
liveins: $vgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align16
; WAVE64: liveins: $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align16
; WAVE32: liveins: $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
%0:_(s32) = COPY $vgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 16
S_ENDPGM 0, implicit %1
...
---
name: test_dyn_stackalloc_vgpr_align64
legalized: true
frameInfo:
maxAlignment: 64
stack:
- { id: 0, type: variable-sized, alignment: 64 }
body: |
bb.0:
liveins: $vgpr0
; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align64
; WAVE64: liveins: $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095
; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096
; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
;
; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align64
; WAVE32: liveins: $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047
; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048
; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
%0:_(s32) = COPY $vgpr0
%1:_(p5) = G_DYN_STACKALLOC %0, 64
S_ENDPGM 0, implicit %1
...