Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
2024-02-01 09:09:02 -08:00

483 lines
30 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
define void @test([48 x float]* %p, float* noalias %s) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
; CHECK-NEXT: ret void
;
entry:
%arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
%i = load float, float* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
%i1 = load float, float* %arrayidx1, align 4
%add = fsub fast float %i1, %i
%arrayidx2 = getelementptr inbounds float, float* %s, i64 0
store float %add, float* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 4
%i2 = load float, float* %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
%i3 = load float, float* %arrayidx6, align 4
%add7 = fsub fast float %i3, %i2
%arrayidx9 = getelementptr inbounds float, float* %s, i64 1
store float %add7, float* %arrayidx9, align 4
%arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 8
%i4 = load float, float* %arrayidx11, align 4
%arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 22
%i5 = load float, float* %arrayidx13, align 4
%add14 = fsub fast float %i5, %i4
%arrayidx16 = getelementptr inbounds float, float* %s, i64 2
store float %add14, float* %arrayidx16, align 4
%arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 12
%i6 = load float, float* %arrayidx18, align 4
%arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 18
%i7 = load float, float* %arrayidx20, align 4
%add21 = fsub fast float %i7, %i6
%arrayidx23 = getelementptr inbounds float, float* %s, i64 3
store float %add21, float* %arrayidx23, align 4
%arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 16
%i8 = load float, float* %arrayidx25, align 4
%arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 14
%i9 = load float, float* %arrayidx27, align 4
%add28 = fsub fast float %i9, %i8
%arrayidx30 = getelementptr inbounds float, float* %s, i64 4
store float %add28, float* %arrayidx30, align 4
%arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 20
%i10 = load float, float* %arrayidx32, align 4
%arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 10
%i11 = load float, float* %arrayidx34, align 4
%add35 = fsub fast float %i11, %i10
%arrayidx37 = getelementptr inbounds float, float* %s, i64 5
store float %add35, float* %arrayidx37, align 4
%arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 24
%i12 = load float, float* %arrayidx39, align 4
%arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 6
%i13 = load float, float* %arrayidx41, align 4
%add42 = fsub fast float %i13, %i12
%arrayidx44 = getelementptr inbounds float, float* %s, i64 6
store float %add42, float* %arrayidx44, align 4
%arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 28
%i14 = load float, float* %arrayidx46, align 4
%arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 2
%i15 = load float, float* %arrayidx48, align 4
%add49 = fsub fast float %i15, %i14
%arrayidx51 = getelementptr inbounds float, float* %s, i64 7
store float %add49, float* %arrayidx51, align 4
ret void
}
define void @test1([48 x float]* %p, float* noalias %s, i32 %stride) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
; CHECK-NEXT: ret void
;
entry:
%str = zext i32 %stride to i64
%arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
%i = load float, float* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
%i1 = load float, float* %arrayidx1, align 4
%add = fsub fast float %i1, %i
%arrayidx2 = getelementptr inbounds float, float* %s, i64 0
store float %add, float* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %str
%i2 = load float, float* %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
%i3 = load float, float* %arrayidx6, align 4
%add7 = fsub fast float %i3, %i2
%arrayidx9 = getelementptr inbounds float, float* %s, i64 1
store float %add7, float* %arrayidx9, align 4
%st1 = mul i64 %str, 2
%arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st1
%i4 = load float, float* %arrayidx11, align 4
%arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 22
%i5 = load float, float* %arrayidx13, align 4
%add14 = fsub fast float %i5, %i4
%arrayidx16 = getelementptr inbounds float, float* %s, i64 2
store float %add14, float* %arrayidx16, align 4
%st2 = mul i64 %str, 3
%arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st2
%i6 = load float, float* %arrayidx18, align 4
%arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 18
%i7 = load float, float* %arrayidx20, align 4
%add21 = fsub fast float %i7, %i6
%arrayidx23 = getelementptr inbounds float, float* %s, i64 3
store float %add21, float* %arrayidx23, align 4
%st3 = mul i64 %str, 4
%arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st3
%i8 = load float, float* %arrayidx25, align 4
%arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 14
%i9 = load float, float* %arrayidx27, align 4
%add28 = fsub fast float %i9, %i8
%arrayidx30 = getelementptr inbounds float, float* %s, i64 4
store float %add28, float* %arrayidx30, align 4
%st4 = mul i64 %str, 5
%arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st4
%i10 = load float, float* %arrayidx32, align 4
%arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 10
%i11 = load float, float* %arrayidx34, align 4
%add35 = fsub fast float %i11, %i10
%arrayidx37 = getelementptr inbounds float, float* %s, i64 5
store float %add35, float* %arrayidx37, align 4
%st5 = mul i64 %str, 6
%arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st5
%i12 = load float, float* %arrayidx39, align 4
%arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 6
%i13 = load float, float* %arrayidx41, align 4
%add42 = fsub fast float %i13, %i12
%arrayidx44 = getelementptr inbounds float, float* %s, i64 6
store float %add42, float* %arrayidx44, align 4
%st6 = mul i64 %str, 7
%arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st6
%i14 = load float, float* %arrayidx46, align 4
%arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 2
%i15 = load float, float* %arrayidx48, align 4
%add49 = fsub fast float %i15, %i14
%arrayidx51 = getelementptr inbounds float, float* %s, i64 7
store float %add49, float* %arrayidx51, align 4
ret void
}
define void @test2([48 x float]* %p, float* noalias %s, i32 %stride) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
; CHECK-NEXT: ret void
;
entry:
%str = zext i32 %stride to i64
%arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 2
%i = load float, float* %arrayidx, align 4
%st6 = mul i64 %str, 7
%arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st6
%i1 = load float, float* %arrayidx1, align 4
%add = fsub fast float %i1, %i
%arrayidx2 = getelementptr inbounds float, float* %s, i64 0
store float %add, float* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 6
%i2 = load float, float* %arrayidx4, align 4
%st5 = mul i64 %str, 6
%arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st5
%i3 = load float, float* %arrayidx6, align 4
%add7 = fsub fast float %i3, %i2
%arrayidx9 = getelementptr inbounds float, float* %s, i64 1
store float %add7, float* %arrayidx9, align 4
%arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 10
%i4 = load float, float* %arrayidx11, align 4
%st4 = mul i64 %str, 5
%arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st4
%i5 = load float, float* %arrayidx13, align 4
%add14 = fsub fast float %i5, %i4
%arrayidx16 = getelementptr inbounds float, float* %s, i64 2
store float %add14, float* %arrayidx16, align 4
%arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 14
%i6 = load float, float* %arrayidx18, align 4
%st3 = mul i64 %str, 4
%arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st3
%i7 = load float, float* %arrayidx20, align 4
%add21 = fsub fast float %i7, %i6
%arrayidx23 = getelementptr inbounds float, float* %s, i64 3
store float %add21, float* %arrayidx23, align 4
%arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 18
%st2 = mul i64 %str, 3
%i8 = load float, float* %arrayidx25, align 4
%arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st2
%i9 = load float, float* %arrayidx27, align 4
%add28 = fsub fast float %i9, %i8
%arrayidx30 = getelementptr inbounds float, float* %s, i64 4
store float %add28, float* %arrayidx30, align 4
%arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 22
%i10 = load float, float* %arrayidx32, align 4
%st1 = mul i64 %str, 2
%arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st1
%i11 = load float, float* %arrayidx34, align 4
%add35 = fsub fast float %i11, %i10
%arrayidx37 = getelementptr inbounds float, float* %s, i64 5
store float %add35, float* %arrayidx37, align 4
%arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
%i12 = load float, float* %arrayidx39, align 4
%arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %str
%i13 = load float, float* %arrayidx41, align 4
%add42 = fsub fast float %i13, %i12
%arrayidx44 = getelementptr inbounds float, float* %s, i64 6
store float %add42, float* %arrayidx44, align 4
%arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
%i14 = load float, float* %arrayidx46, align 4
%arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
%i15 = load float, float* %arrayidx48, align 4
%add49 = fsub fast float %i15, %i14
%arrayidx51 = getelementptr inbounds float, float* %s, i64 7
store float %add49, float* %arrayidx51, align 4
ret void
}
define void @test3([48 x float]* %p, float* noalias %s) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> [[TMP0]], ptr [[ARRAYIDX4]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> [[TMP1]], ptr [[ARRAYIDX11]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX18]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX25]], i32 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX32]], i32 5
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX39]], i32 6
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX46]], i32 7
; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <8 x float> [[TMP10]], [[TMP8]]
; CHECK-NEXT: store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: ret void
;
entry:
%arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
%i = load float, float* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
%i1 = load float, float* %arrayidx1, align 4
%add = fsub fast float %i1, %i
%arrayidx2 = getelementptr inbounds float, float* %s, i64 0
store float %add, float* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 4
%i2 = load float, float* %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 29
%i3 = load float, float* %arrayidx6, align 4
%add7 = fsub fast float %i3, %i2
%arrayidx9 = getelementptr inbounds float, float* %s, i64 1
store float %add7, float* %arrayidx9, align 4
%arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 8
%i4 = load float, float* %arrayidx11, align 4
%arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 28
%i5 = load float, float* %arrayidx13, align 4
%add14 = fsub fast float %i5, %i4
%arrayidx16 = getelementptr inbounds float, float* %s, i64 2
store float %add14, float* %arrayidx16, align 4
%arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 12
%i6 = load float, float* %arrayidx18, align 4
%arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 27
%i7 = load float, float* %arrayidx20, align 4
%add21 = fsub fast float %i7, %i6
%arrayidx23 = getelementptr inbounds float, float* %s, i64 3
store float %add21, float* %arrayidx23, align 4
%arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 16
%i8 = load float, float* %arrayidx25, align 4
%arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
%i9 = load float, float* %arrayidx27, align 4
%add28 = fsub fast float %i9, %i8
%arrayidx30 = getelementptr inbounds float, float* %s, i64 4
store float %add28, float* %arrayidx30, align 4
%arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 20
%i10 = load float, float* %arrayidx32, align 4
%arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 25
%i11 = load float, float* %arrayidx34, align 4
%add35 = fsub fast float %i11, %i10
%arrayidx37 = getelementptr inbounds float, float* %s, i64 5
store float %add35, float* %arrayidx37, align 4
%arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 24
%i12 = load float, float* %arrayidx39, align 4
%arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 24
%i13 = load float, float* %arrayidx41, align 4
%add42 = fsub fast float %i13, %i12
%arrayidx44 = getelementptr inbounds float, float* %s, i64 6
store float %add42, float* %arrayidx44, align 4
%arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 28
%i14 = load float, float* %arrayidx46, align 4
%arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 23
%i15 = load float, float* %arrayidx48, align 4
%add49 = fsub fast float %i15, %i14
%arrayidx51 = getelementptr inbounds float, float* %s, i64 7
store float %add49, float* %arrayidx51, align 4
ret void
}