Summary: TargetBaseAlign is no longer required since LSV checks if target allows misaligned accesses. A constant defining a base alignment is still needed for stack accesses where alignment can be adjusted. Previous patch (D22936) was reverted because tests were failing. This patch also fixes the cause of those failures: - x86 failing tests either did not have the right target, or the right alignment. - NVPTX failing tests did not have the right alignment. - AMDGPU failing test (merge-stores) should allow vectorization with the given alignment but the target info considers <3xi32> a non-standard type and gives up early. This patch removes the condition and only checks for a maximum size allowed and relies on the next condition checking for %4 for correctness. This should be revisited to include 3xi32 as a MVT type (on arsenm's non-immediate todo list). Note that checking the sizeInBits for a MVT is undefined (leads to an assertion failure), so we need to create an EVT, hence the interface change in allowsMisaligned to include the Context. Reviewers: arsenm, jlebar, tstellarAMD Subscribers: jholewinski, arsenm, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D23068 llvm-svn: 277735
88 lines
2.5 KiB
LLVM
88 lines
2.5 KiB
LLVM
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
|
|
; Vectorized subsets of the load/store chains in the presence of
|
|
; interleaved loads/stores
|
|
|
|
; CHECK-LABEL: @interleave_2L_2S(
|
|
; CHECK: load <2 x i32>
|
|
; CHECK: load i32
|
|
; CHECK: store <2 x i32>
|
|
; CHECK: load i32
|
|
define void @interleave_2L_2S(i32* noalias %ptr) {
|
|
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
|
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
|
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
|
|
|
%l1 = load i32, i32* %next.gep1, align 4
|
|
%l2 = load i32, i32* %next.gep, align 4
|
|
store i32 0, i32* %next.gep1, align 4
|
|
store i32 0, i32* %next.gep, align 4
|
|
%l3 = load i32, i32* %next.gep1, align 4
|
|
%l4 = load i32, i32* %next.gep2, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: @interleave_3L_2S_1L(
|
|
; CHECK: load <3 x i32>
|
|
; CHECK: store <2 x i32>
|
|
; CHECK: load i32
|
|
|
|
define void @interleave_3L_2S_1L(i32* noalias %ptr) {
|
|
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
|
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
|
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
|
|
|
%l2 = load i32, i32* %next.gep, align 4
|
|
%l1 = load i32, i32* %next.gep1, align 4
|
|
store i32 0, i32* %next.gep1, align 4
|
|
store i32 0, i32* %next.gep, align 4
|
|
%l3 = load i32, i32* %next.gep1, align 4
|
|
%l4 = load i32, i32* %next.gep2, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: @chain_suffix(
|
|
; CHECK: load i32
|
|
; CHECK: store <2 x i32>
|
|
; CHECK: load <2 x i32>
|
|
define void @chain_suffix(i32* noalias %ptr) {
|
|
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
|
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
|
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
|
|
|
%l2 = load i32, i32* %next.gep, align 4
|
|
store i32 0, i32* %next.gep1, align 4
|
|
store i32 0, i32* %next.gep, align 4
|
|
%l3 = load i32, i32* %next.gep1, align 4
|
|
%l4 = load i32, i32* %next.gep2, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
|
|
; CHECK-LABEL: @chain_prefix_suffix(
|
|
; CHECK: load <2 x i32>
|
|
; CHECK: store <2 x i32>
|
|
; CHECK: load <3 x i32>
|
|
define void @chain_prefix_suffix(i32* noalias %ptr) {
|
|
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
|
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
|
%next.gep2 = getelementptr i32, i32* %ptr, i64 2
|
|
%next.gep3 = getelementptr i32, i32* %ptr, i64 3
|
|
|
|
%l1 = load i32, i32* %next.gep, align 4
|
|
%l2 = load i32, i32* %next.gep1, align 4
|
|
store i32 0, i32* %next.gep1, align 4
|
|
store i32 0, i32* %next.gep2, align 4
|
|
%l3 = load i32, i32* %next.gep1, align 4
|
|
%l4 = load i32, i32* %next.gep2, align 4
|
|
%l5 = load i32, i32* %next.gep3, align 4
|
|
|
|
ret void
|
|
}
|
|
|