Files
clang-p2996/llvm/test/CodeGen/AMDGPU/sroa-before-unroll.ll
pvanhout 3890a3b113 [AMDGPU] Use SSAUpdater in PromoteAlloca
This allows PromoteAlloca to not be reliant on a second SROA run to remove the alloca completely. It just does the full transformation directly.

Note PromoteAlloca is still reliant on SROA running first to
canonicalize the IR. For instance, PromoteAlloca will no longer handle aggregate types because those should be simplified by SROA before reaching the pass.

Reviewed By: #amdgpu, arsenm

Differential Revision: https://reviews.llvm.org/D152706
2023-07-25 07:44:47 +02:00

50 lines
1.7 KiB
LLVM

; RUN: opt -mtriple=amdgcn-- -O1 -S < %s | FileCheck %s --check-prefixes=FUNC,LOOP
; RUN: opt -mtriple=amdgcn-- -passes='default<O1>' -S < %s | FileCheck %s --check-prefixes=FUNC,LOOP
; RUN: opt -mtriple=amdgcn-- -O1 -S -disable-promote-alloca-to-vector < %s | FileCheck %s --check-prefixes=FUNC,FULL-UNROLL
; RUN: opt -mtriple=amdgcn-- -passes='default<O1>' -S -disable-promote-alloca-to-vector < %s | FileCheck %s --check-prefixes=FUNC,FULL-UNROLL
target datalayout = "A5"
; This test contains a simple loop that initializes an array declared in
; private memory. This loop would be fully unrolled if we could not SROA
; the alloca. Check that we successfully eliminate it before the unroll,
; so that we do not need to fully unroll it.
; FUNC-LABEL: @private_memory
; LOOP-NOT: = alloca
; LOOP: loop.header:
; LOOP: br i1 %{{[^,]+}}, label %exit, label %loop.header
; FULL-UNROLL: alloca
; FULL-UNROLL-COUNT-256: store i32 {{[0-9]+}}, ptr addrspace(5)
; FULL-UNROLL-NOT: br
; FUNC: store i32 %{{[^,]+}}, ptr addrspace(1) %out
define amdgpu_kernel void @private_memory(ptr addrspace(1) %out, i32 %n) {
entry:
%alloca = alloca [16 x i32], addrspace(5)
br label %loop.header
loop.header:
%counter = phi i32 [0, %entry], [%inc, %loop.inc]
br label %loop.body
loop.body:
%salt = xor i32 %counter, %n
%idx = and i32 %salt, 15
%ptr = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
store i32 %counter, ptr addrspace(5) %ptr
br label %loop.inc
loop.inc:
%inc = add i32 %counter, 1
%cmp = icmp sge i32 %counter, 255
br i1 %cmp, label %exit, label %loop.header
exit:
%gep = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %n
%load = load i32, ptr addrspace(5) %gep
store i32 %load, ptr addrspace(1) %out
ret void
}