; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s define void @test(ptr %p, ptr noalias %s) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 %i = load float, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 %i1 = load float, ptr %arrayidx1, align 4 %add = fsub fast float %i1, %i %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 store float %add, ptr %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 %i2 = load float, ptr %arrayidx4, align 4 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 %i3 = load float, ptr %arrayidx6, align 4 %add7 = fsub fast float %i3, %i2 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 store float %add7, ptr %arrayidx9, align 4 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 %i4 = load float, ptr %arrayidx11, align 4 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 %i5 = load float, ptr %arrayidx13, align 4 %add14 = fsub fast float %i5, %i4 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 store float %add14, ptr %arrayidx16, align 4 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 %i6 = load float, ptr %arrayidx18, align 4 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 %i7 = load float, ptr %arrayidx20, align 4 %add21 = fsub fast float %i7, %i6 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 store float %add21, ptr %arrayidx23, align 4 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 %i8 = load float, ptr %arrayidx25, align 4 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 %i9 = load float, ptr %arrayidx27, align 4 %add28 = fsub fast float %i9, %i8 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 store float %add28, ptr %arrayidx30, align 4 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 %i10 = load float, ptr %arrayidx32, align 4 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 %i11 = load float, ptr %arrayidx34, align 4 %add35 = fsub fast float %i11, %i10 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 store float %add35, ptr %arrayidx37, align 4 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 %i12 = load float, ptr %arrayidx39, align 4 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 %i13 = load float, ptr %arrayidx41, align 4 %add42 = fsub fast float %i13, %i12 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 store float %add42, ptr %arrayidx44, align 4 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 %i14 = load float, ptr %arrayidx46, align 4 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 %i15 = load float, ptr %arrayidx48, align 4 %add49 = fsub fast float %i15, %i14 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 store float %add49, ptr %arrayidx51, align 4 ret void } define void @test1(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: %str = zext i32 %stride to i64 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 %i = load float, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 %i1 = load float, ptr %arrayidx1, align 4 %add = fsub fast float %i1, %i %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 store float %add, ptr %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str %i2 = load float, ptr %arrayidx4, align 4 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 %i3 = load float, ptr %arrayidx6, align 4 %add7 = fsub fast float %i3, %i2 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 store float %add7, ptr %arrayidx9, align 4 %st1 = mul i64 %str, 2 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 %i4 = load float, ptr %arrayidx11, align 4 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 %i5 = load float, ptr %arrayidx13, align 4 %add14 = fsub fast float %i5, %i4 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 store float %add14, ptr %arrayidx16, align 4 %st2 = mul i64 %str, 3 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 %i6 = load float, ptr %arrayidx18, align 4 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 %i7 = load float, ptr %arrayidx20, align 4 %add21 = fsub fast float %i7, %i6 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 store float %add21, ptr %arrayidx23, align 4 %st3 = mul i64 %str, 4 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 %i8 = load float, ptr %arrayidx25, align 4 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 %i9 = load float, ptr %arrayidx27, align 4 %add28 = fsub fast float %i9, %i8 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 store float %add28, ptr %arrayidx30, align 4 %st4 = mul i64 %str, 5 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 %i10 = load float, ptr %arrayidx32, align 4 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 %i11 = load float, ptr %arrayidx34, align 4 %add35 = fsub fast float %i11, %i10 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 store float %add35, ptr %arrayidx37, align 4 %st5 = mul i64 %str, 6 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 %i12 = load float, ptr %arrayidx39, align 4 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 %i13 = load float, ptr %arrayidx41, align 4 %add42 = fsub fast float %i13, %i12 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 store float %add42, ptr %arrayidx44, align 4 %st6 = mul i64 %str, 7 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 %i14 = load float, ptr %arrayidx46, align 4 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 %i15 = load float, ptr %arrayidx48, align 4 %add49 = fsub fast float %i15, %i14 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 store float %add49, ptr %arrayidx51, align 4 ret void } define void @test2(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2 ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: %str = zext i32 %stride to i64 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 %i = load float, ptr %arrayidx, align 4 %st6 = mul i64 %str, 7 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 %i1 = load float, ptr %arrayidx1, align 4 %add = fsub fast float %i1, %i %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 store float %add, ptr %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 %i2 = load float, ptr %arrayidx4, align 4 %st5 = mul i64 %str, 6 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 %i3 = load float, ptr %arrayidx6, align 4 %add7 = fsub fast float %i3, %i2 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 store float %add7, ptr %arrayidx9, align 4 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 %i4 = load float, ptr %arrayidx11, align 4 %st4 = mul i64 %str, 5 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 %i5 = load float, ptr %arrayidx13, align 4 %add14 = fsub fast float %i5, %i4 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 store float %add14, ptr %arrayidx16, align 4 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 %i6 = load float, ptr %arrayidx18, align 4 %st3 = mul i64 %str, 4 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 %i7 = load float, ptr %arrayidx20, align 4 %add21 = fsub fast float %i7, %i6 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 store float %add21, ptr %arrayidx23, align 4 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 %st2 = mul i64 %str, 3 %i8 = load float, ptr %arrayidx25, align 4 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 %i9 = load float, ptr %arrayidx27, align 4 %add28 = fsub fast float %i9, %i8 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 store float %add28, ptr %arrayidx30, align 4 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 %i10 = load float, ptr %arrayidx32, align 4 %st1 = mul i64 %str, 2 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 %i11 = load float, ptr %arrayidx34, align 4 %add35 = fsub fast float %i11, %i10 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 store float %add35, ptr %arrayidx37, align 4 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 %i12 = load float, ptr %arrayidx39, align 4 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str %i13 = load float, ptr %arrayidx41, align 4 %add42 = fsub fast float %i13, %i12 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 store float %add42, ptr %arrayidx44, align 4 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 %i14 = load float, ptr %arrayidx46, align 4 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 %i15 = load float, ptr %arrayidx48, align 4 %add49 = fsub fast float %i15, %i14 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 store float %add49, ptr %arrayidx51, align 4 ret void } define void @test3(ptr %p, ptr noalias %s) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 %i = load float, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 %i1 = load float, ptr %arrayidx1, align 4 %add = fsub fast float %i1, %i %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 store float %add, ptr %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 %i2 = load float, ptr %arrayidx4, align 4 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 29 %i3 = load float, ptr %arrayidx6, align 4 %add7 = fsub fast float %i3, %i2 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 store float %add7, ptr %arrayidx9, align 4 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 %i4 = load float, ptr %arrayidx11, align 4 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 %i5 = load float, ptr %arrayidx13, align 4 %add14 = fsub fast float %i5, %i4 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 store float %add14, ptr %arrayidx16, align 4 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 %i6 = load float, ptr %arrayidx18, align 4 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 27 %i7 = load float, ptr %arrayidx20, align 4 %add21 = fsub fast float %i7, %i6 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 store float %add21, ptr %arrayidx23, align 4 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 %i8 = load float, ptr %arrayidx25, align 4 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 %i9 = load float, ptr %arrayidx27, align 4 %add28 = fsub fast float %i9, %i8 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 store float %add28, ptr %arrayidx30, align 4 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 %i10 = load float, ptr %arrayidx32, align 4 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 25 %i11 = load float, ptr %arrayidx34, align 4 %add35 = fsub fast float %i11, %i10 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 store float %add35, ptr %arrayidx37, align 4 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 %i12 = load float, ptr %arrayidx39, align 4 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 %i13 = load float, ptr %arrayidx41, align 4 %add42 = fsub fast float %i13, %i12 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 store float %add42, ptr %arrayidx44, align 4 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 %i14 = load float, ptr %arrayidx46, align 4 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 23 %i15 = load float, ptr %arrayidx48, align 4 %add49 = fsub fast float %i15, %i14 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 store float %add49, ptr %arrayidx51, align 4 ret void }