If the vector of loads can be vectorized as masked gather and there are several other masked gather nodes, compiler can try to attempt to check, if it possible to gather such nodes into big consecutive/strided loads node, which provide better performance. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/110151
50 lines
2.6 KiB
LLVM
50 lines
2.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
|
|
|
|
define void @test(ptr %a, i64 %0) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: ptr [[A:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[BB:.*]]
|
|
; CHECK: [[BB]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
|
|
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
|
|
; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> <i1 true, i1 true>, i32 2)
|
|
; CHECK-NEXT: br label %[[BB]]
|
|
;
|
|
entry:
|
|
br label %bb
|
|
|
|
bb:
|
|
%indvars.iv.next239.i = add i64 0, 0
|
|
%arrayidx.i.1 = getelementptr double, ptr %a, i64 %indvars.iv.next239.i
|
|
%1 = load double, ptr %arrayidx.i.1, align 8
|
|
%arrayidx10.i.1 = getelementptr double, ptr %a, i64 %0
|
|
%2 = or disjoint i64 %0, 1
|
|
%arrayidx17.i28.1 = getelementptr double, ptr %a, i64 %2
|
|
%3 = load double, ptr %arrayidx17.i28.1, align 8
|
|
%4 = load double, ptr %a, align 8
|
|
%5 = load double, ptr %a, align 8
|
|
%arrayidx38.i.1 = getelementptr double, ptr %a, i64 1
|
|
%6 = load double, ptr %arrayidx38.i.1, align 8
|
|
%arrayidx41.i.1 = getelementptr double, ptr %a, i64 1
|
|
%7 = load double, ptr %arrayidx41.i.1, align 8
|
|
%sub47.i.1 = fsub double %4, %5
|
|
%sub54.i.1 = fsub double %6, %7
|
|
%sub69.i.1 = fsub double %1, %sub54.i.1
|
|
store double %sub69.i.1, ptr %arrayidx10.i.1, align 8
|
|
%sub72.i.1 = fsub double %3, %sub47.i.1
|
|
store double %sub72.i.1, ptr %arrayidx17.i28.1, align 8
|
|
br label %bb
|
|
}
|