Files
clang-p2996/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
Matt Arsenault f0a88dbaab LoadStoreVectorizer: Split even sized illegal chains properly
Implement isLegalToVectorizeLoadChain for AMDGPU to avoid
producing private address spaces accesses that will need to be
split up later. This was doing the wrong thing in the case
where the queried chain was an even number of elements.

A possible <4 x i32> store was being split into
store <2 x i32>
store i32
store i32

rather than
store <2 x i32>
store <2 x i32>

when legal.

llvm-svn: 295933
2017-02-23 03:58:53 +00:00

133 lines
5.4 KiB
LLVM

; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; ALL-LABEL: @load_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
; UNALIGNED: load <2 x i8>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
; ALIGNED: load i8, i8* %ptr0, align 1{{$}}
; ALIGNED: load i8, i8* %ptr1, align 1{{$}}
define void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i8], align 1
%ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
%val0 = load i8, i8* %ptr0, align 1
%ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
%val1 = load i8, i8* %ptr1, align 1
%add = add i8 %val0, %val1
store i8 %add, i8 addrspace(1)* %out
ret void
}
; ALL-LABEL: @load_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1{{$}}
; UNALIGNED: load <2 x i16>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
; ALIGNED: load i16, i16* %ptr0, align 1{{$}}
; ALIGNED: load i16, i16* %ptr1, align 1{{$}}
define void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i16], align 1
%ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
%val0 = load i16, i16* %ptr0, align 1
%ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
%val1 = load i16, i16* %ptr1, align 1
%add = add i16 %val0, %val1
store i16 %add, i16 addrspace(1)* %out
ret void
}
; FIXME: Although the offset is unknown here, we know it is a multiple
; of the element size, so should still be align 4
; ALL-LABEL: @load_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
; ALIGNED: load i32, i32* %ptr0, align 1
; ALIGNED: load i32, i32* %ptr1, align 1
define void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i32], align 1
%ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
%val0 = load i32, i32* %ptr0, align 1
%ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
%val1 = load i32, i32* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
}
; FIXME: Should always increase alignment of the load
; Make sure alloca alignment isn't decreased
; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 16
; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
; FIXME: Should change alignment
; ALIGNED: load i32
; ALIGNED: load i32
define void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i32], align 16
%ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
%val0 = load i32, i32* %ptr0, align 1
%ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
%val1 = load i32, i32* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
}
; ALL-LABEL: @store_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}}
; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}}
define void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i8], align 1
%ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
store i8 9, i8* %ptr0, align 1
%ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
store i8 10, i8* %ptr1, align 1
ret void
}
; ALL-LABEL: @store_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1
; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}}
; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}}
define void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i16], align 1
%ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
store i16 9, i16* %ptr0, align 1
%ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
store i16 10, i16* %ptr1, align 1
ret void
}
; FIXME: Although the offset is unknown here, we know it is a multiple
; of the element size, so it still should be align 4.
; ALL-LABEL: @store_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
; ALIGNED: store i32 9, i32* %ptr0, align 1
; ALIGNED: store i32 10, i32* %ptr1, align 1
define void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
%alloca = alloca [128 x i32], align 1
%ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
store i32 9, i32* %ptr0, align 1
%ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
store i32 10, i32* %ptr1, align 1
ret void
}
attributes #0 = { nounwind }