Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
Alexey Bataev a65a5feb1a [SLP]Improve masked loads vectorization, attempting gathered loads
If the vector of loads can be vectorized as masked gather and there are
several other masked gather nodes, compiler can try to attempt to check,
if it possible to gather such nodes into big consecutive/strided loads
  node, which provide better performance.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/110151
2024-10-08 16:43:10 -04:00

50 lines
2.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
define void @test(ptr %a, i64 %0) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[A:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]]
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> <i1 true, i1 true>, i32 2)
; CHECK-NEXT: br label %[[BB]]
;
entry:
br label %bb
bb:
%indvars.iv.next239.i = add i64 0, 0
%arrayidx.i.1 = getelementptr double, ptr %a, i64 %indvars.iv.next239.i
%1 = load double, ptr %arrayidx.i.1, align 8
%arrayidx10.i.1 = getelementptr double, ptr %a, i64 %0
%2 = or disjoint i64 %0, 1
%arrayidx17.i28.1 = getelementptr double, ptr %a, i64 %2
%3 = load double, ptr %arrayidx17.i28.1, align 8
%4 = load double, ptr %a, align 8
%5 = load double, ptr %a, align 8
%arrayidx38.i.1 = getelementptr double, ptr %a, i64 1
%6 = load double, ptr %arrayidx38.i.1, align 8
%arrayidx41.i.1 = getelementptr double, ptr %a, i64 1
%7 = load double, ptr %arrayidx41.i.1, align 8
%sub47.i.1 = fsub double %4, %5
%sub54.i.1 = fsub double %6, %7
%sub69.i.1 = fsub double %1, %sub54.i.1
store double %sub69.i.1, ptr %arrayidx10.i.1, align 8
%sub72.i.1 = fsub double %3, %sub47.i.1
store double %sub72.i.1, ptr %arrayidx17.i28.1, align 8
br label %bb
}