Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll
Alexey Bataev b51195dece [SLP]Fix PR63854: Add proper sorting of pointers for masked stores.
If the masked gathers can be reordered, it may produce strided access
pattern and the reordering does not affect common reodering, better to
try to reorder masked gathers for better performance.

Differential Revision: https://reviews.llvm.org/D157009
2023-08-22 06:14:01 -07:00

60 lines
3.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=riscv64-unknown-linux -mattr=+v | FileCheck %s
define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) {
; CHECK-LABEL: define i32 @sum_of_abs
; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP3]], i1 false)
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
%0 = load i8, ptr %a, align 1
%spec.select.i = tail call i8 @llvm.abs.i8(i8 %0, i1 false)
%conv = sext i8 %spec.select.i to i32
%arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 64
%1 = load i8, ptr %arrayidx.1, align 1
%spec.select.i.1 = tail call i8 @llvm.abs.i8(i8 %1, i1 false)
%conv.1 = sext i8 %spec.select.i.1 to i32
%add.1 = add nsw i32 %conv, %conv.1
%arrayidx.2 = getelementptr inbounds i8, ptr %a, i64 128
%2 = load i8, ptr %arrayidx.2, align 1
%spec.select.i.2 = tail call i8 @llvm.abs.i8(i8 %2, i1 false)
%conv.2 = sext i8 %spec.select.i.2 to i32
%add.2 = add nsw i32 %add.1, %conv.2
%arrayidx.3 = getelementptr inbounds i8, ptr %a, i64 192
%3 = load i8, ptr %arrayidx.3, align 1
%spec.select.i.3 = tail call i8 @llvm.abs.i8(i8 %3, i1 false)
%conv.3 = sext i8 %spec.select.i.3 to i32
%add.3 = add nsw i32 %add.2, %conv.3
%arrayidx.4 = getelementptr inbounds i8, ptr %a, i64 256
%4 = load i8, ptr %arrayidx.4, align 1
%spec.select.i.4 = tail call i8 @llvm.abs.i8(i8 %4, i1 false)
%conv.4 = sext i8 %spec.select.i.4 to i32
%add.4 = add nsw i32 %add.3, %conv.4
%arrayidx.5 = getelementptr inbounds i8, ptr %a, i64 320
%5 = load i8, ptr %arrayidx.5, align 1
%spec.select.i.5 = tail call i8 @llvm.abs.i8(i8 %5, i1 false)
%conv.5 = sext i8 %spec.select.i.5 to i32
%add.5 = add nsw i32 %add.4, %conv.5
%arrayidx.6 = getelementptr inbounds i8, ptr %a, i64 384
%6 = load i8, ptr %arrayidx.6, align 1
%spec.select.i.6 = tail call i8 @llvm.abs.i8(i8 %6, i1 false)
%conv.6 = sext i8 %spec.select.i.6 to i32
%add.6 = add nsw i32 %add.5, %conv.6
%arrayidx.7 = getelementptr inbounds i8, ptr %a, i64 448
%7 = load i8, ptr %arrayidx.7, align 1
%spec.select.i.7 = tail call i8 @llvm.abs.i8(i8 %7, i1 false)
%conv.7 = sext i8 %spec.select.i.7 to i32
%add.7 = add nsw i32 %add.6, %conv.7
ret i32 %add.7
}
declare i8 @llvm.abs.i8(i8, i1 immarg)