This allows PromoteAlloca to not be reliant on a second SROA run to remove the alloca completely. It just does the full transformation directly. Note PromoteAlloca is still reliant on SROA running first to canonicalize the IR. For instance, PromoteAlloca will no longer handle aggregate types because those should be simplified by SROA before reaching the pass. Reviewed By: #amdgpu, arsenm Differential Revision: https://reviews.llvm.org/D152706
50 lines
1.7 KiB
LLVM
50 lines
1.7 KiB
LLVM
; RUN: opt -mtriple=amdgcn-- -O1 -S < %s | FileCheck %s --check-prefixes=FUNC,LOOP
|
|
; RUN: opt -mtriple=amdgcn-- -passes='default<O1>' -S < %s | FileCheck %s --check-prefixes=FUNC,LOOP
|
|
; RUN: opt -mtriple=amdgcn-- -O1 -S -disable-promote-alloca-to-vector < %s | FileCheck %s --check-prefixes=FUNC,FULL-UNROLL
|
|
; RUN: opt -mtriple=amdgcn-- -passes='default<O1>' -S -disable-promote-alloca-to-vector < %s | FileCheck %s --check-prefixes=FUNC,FULL-UNROLL
|
|
|
|
target datalayout = "A5"
|
|
|
|
; This test contains a simple loop that initializes an array declared in
|
|
; private memory. This loop would be fully unrolled if we could not SROA
|
|
; the alloca. Check that we successfully eliminate it before the unroll,
|
|
; so that we do not need to fully unroll it.
|
|
|
|
; FUNC-LABEL: @private_memory
|
|
; LOOP-NOT: = alloca
|
|
; LOOP: loop.header:
|
|
; LOOP: br i1 %{{[^,]+}}, label %exit, label %loop.header
|
|
|
|
; FULL-UNROLL: alloca
|
|
; FULL-UNROLL-COUNT-256: store i32 {{[0-9]+}}, ptr addrspace(5)
|
|
; FULL-UNROLL-NOT: br
|
|
|
|
; FUNC: store i32 %{{[^,]+}}, ptr addrspace(1) %out
|
|
define amdgpu_kernel void @private_memory(ptr addrspace(1) %out, i32 %n) {
|
|
entry:
|
|
%alloca = alloca [16 x i32], addrspace(5)
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%counter = phi i32 [0, %entry], [%inc, %loop.inc]
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%salt = xor i32 %counter, %n
|
|
%idx = and i32 %salt, 15
|
|
%ptr = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
|
|
store i32 %counter, ptr addrspace(5) %ptr
|
|
br label %loop.inc
|
|
|
|
loop.inc:
|
|
%inc = add i32 %counter, 1
|
|
%cmp = icmp sge i32 %counter, 255
|
|
br i1 %cmp, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%gep = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %n
|
|
%load = load i32, ptr addrspace(5) %gep
|
|
store i32 %load, ptr addrspace(1) %out
|
|
ret void
|
|
}
|