D68667 introduced a tighter limit to the number of GEPs to simplify together. The limit was based on the vector element size of the pointer, but the pointers themselves are not actually put in vectors. IIUC we try to vectorize the index computations here, so we should base the limit on the vector element size of the computation of the index. This restores the test regression on AArch64 and also restores the vectorization for a important pattern in SPEC2006/464.h264ref on AArch64 (@test_i16_extend). We get a large benefit from doing a single load up front and then processing the index computations in vectors. Note that we could probably even further improve the AArch64 codegen, if we would do zexts to i32 instead of i64 for the sub operands and then do a single vector sext on the result of the subtractions. AArch64 provides dedicated vector instructions to do so. Sketch of proof in Alive: https://alive2.llvm.org/ce/z/A4xYAB Reviewers: craig.topper, RKSimon, xbolva00, ABataev, spatel Reviewed By: ABataev, spatel Differential Revision: https://reviews.llvm.org/D82418
395 lines
19 KiB
LLVM
395 lines
19 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine -pass-remarks-output=%t < %s | FileCheck %s
|
|
; RUN: cat %t | FileCheck -check-prefix=YAML %s
|
|
; RUN: opt -S -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-18 -pass-remarks-output=%t < %s | FileCheck %s
|
|
; RUN: cat %t | FileCheck -check-prefix=YAML %s
|
|
|
|
|
|
target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
; These tests check that we remove from consideration pairs of seed
|
|
; getelementptrs when they are known to have a constant difference. Such pairs
|
|
; are likely not good candidates for vectorization since one can be computed
|
|
; from the other. We use an unprofitable threshold to force vectorization.
|
|
;
|
|
; int getelementptr(int *g, int n, int w, int x, int y, int z) {
|
|
; int sum = 0;
|
|
; for (int i = 0; i < n ; ++i) {
|
|
; sum += g[2*i + w]; sum += g[2*i + x];
|
|
; sum += g[2*i + y]; sum += g[2*i + z];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
;
|
|
|
|
; YAML-LABEL: Function: getelementptr_4x32
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '11'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '5'
|
|
|
|
; YAML: --- !Passed
|
|
; YAML-NEXT: Pass: slp-vectorizer
|
|
; YAML-NEXT: Name: VectorizedList
|
|
; YAML-NEXT: Function: getelementptr_4x32
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '16'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '3'
|
|
|
|
define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
|
|
; CHECK-LABEL: @getelementptr_4x32(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 [[X:%.*]], i32 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[Y:%.*]], i32 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[Z:%.*]], i32 3
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP21:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
|
|
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP21]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]]
|
|
; CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
|
|
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
|
|
; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP13]]
|
|
; CHECK-NEXT: [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
|
|
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP15]]
|
|
; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
|
|
; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
|
|
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]]
|
|
; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0
|
|
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[ADD11]], i32 1
|
|
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
|
|
; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP19]], [[TMP20]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
%cmp31 = icmp sgt i32 %n, 0
|
|
br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup.loopexit:
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
|
|
ret i32 %sum.0.lcssa
|
|
|
|
for.body:
|
|
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
|
|
%t4 = shl nsw i32 %indvars.iv, 1
|
|
%t5 = add nsw i32 %t4, 0
|
|
%arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
|
|
%t6 = load i32, i32* %arrayidx, align 4
|
|
%add1 = add nsw i32 %t6, %sum.032
|
|
%t7 = add nsw i32 %t4, %x
|
|
%arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
|
|
%t8 = load i32, i32* %arrayidx5, align 4
|
|
%add6 = add nsw i32 %add1, %t8
|
|
%t9 = add nsw i32 %t4, %y
|
|
%arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
|
|
%t10 = load i32, i32* %arrayidx10, align 4
|
|
%add11 = add nsw i32 %add6, %t10
|
|
%t11 = add nsw i32 %t4, %z
|
|
%arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
|
|
%t12 = load i32, i32* %arrayidx15, align 4
|
|
%add16 = add nsw i32 %add11, %t12
|
|
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
|
%exitcond = icmp eq i32 %indvars.iv.next , %n
|
|
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
|
}
|
|
|
|
; YAML-LABEL: Function: getelementptr_2x32
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '11'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '5'
|
|
|
|
; YAML: --- !Passed
|
|
; YAML-NEXT: Pass: slp-vectorizer
|
|
; YAML-NEXT: Name: VectorizedList
|
|
; YAML-NEXT: Function: getelementptr_2x32
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '6'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '3'
|
|
|
|
define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
|
|
; CHECK-LABEL: @getelementptr_2x32(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP18:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
|
|
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP18]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[T4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP6]]
|
|
; CHECK-NEXT: [[T7:%.*]] = or i32 [[T4]], 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[T7]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP7]]
|
|
; CHECK-NEXT: [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]]
|
|
; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
|
|
; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
|
|
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]]
|
|
; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
|
|
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[ADD11]], i32 1
|
|
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
|
|
; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP16]], [[TMP17]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
%cmp31 = icmp sgt i32 %n, 0
|
|
br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup.loopexit:
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
|
|
ret i32 %sum.0.lcssa
|
|
|
|
for.body:
|
|
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
|
|
%t4 = shl nsw i32 %indvars.iv, 1
|
|
%t5 = add nsw i32 %t4, 0
|
|
%arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
|
|
%t6 = load i32, i32* %arrayidx, align 4
|
|
%add1 = add nsw i32 %t6, %sum.032
|
|
%t7 = add nsw i32 %t4, 1
|
|
%arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
|
|
%t8 = load i32, i32* %arrayidx5, align 4
|
|
%add6 = add nsw i32 %add1, %t8
|
|
%t9 = add nsw i32 %t4, %y
|
|
%arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
|
|
%t10 = load i32, i32* %arrayidx10, align 4
|
|
%add11 = add nsw i32 %add6, %t10
|
|
%t11 = add nsw i32 %t4, %z
|
|
%arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
|
|
%t12 = load i32, i32* %arrayidx15, align 4
|
|
%add16 = add nsw i32 %add11, %t12
|
|
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
|
%exitcond = icmp eq i32 %indvars.iv.next , %n
|
|
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
|
}
|
|
|
|
@global = internal global { i32* } zeroinitializer, align 8
|
|
|
|
; Make sure we vectorize to maximize the load with when loading i16 and
|
|
; extending it for compute operations.
|
|
define void @test_i16_extend(i16* %p.1, i16* %p.2, i32 %idx.i32) {
|
|
; CHECK-LABEL: @test_i16_extend(
|
|
; CHECK-NEXT: [[P_0:%.*]] = load i32*, i32** getelementptr inbounds ({ i32* }, { i32* }* @global, i64 0, i32 0), align 8
|
|
; CHECK-NEXT: [[IDX_0:%.*]] = zext i32 [[IDX_I32:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i16, i16* [[P_1:%.*]], i64 [[IDX_0]]
|
|
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i16, i16* [[P_2:%.*]], i64 [[IDX_0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP53]] to <8 x i16>*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP56]] to <8 x i16>*
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP5]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP6]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP7]], i32 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
|
|
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP9]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[TMP60]], align 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP7]], i32 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
|
|
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP11]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[TMP71]], align 4
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP7]], i32 2
|
|
; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP13]]
|
|
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[TMP82]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP7]], i32 3
|
|
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
|
|
; CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP15]]
|
|
; CHECK-NEXT: [[L_4:%.*]] = load i32, i32* [[TMP93]], align 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP7]], i32 4
|
|
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
|
|
; CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP17]]
|
|
; CHECK-NEXT: [[L_5:%.*]] = load i32, i32* [[TMP104]], align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP7]], i32 5
|
|
; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
|
|
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP19]]
|
|
; CHECK-NEXT: [[L_6:%.*]] = load i32, i32* [[TMP115]], align 4
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP7]], i32 6
|
|
; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
|
|
; CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP21]]
|
|
; CHECK-NEXT: [[L_7:%.*]] = load i32, i32* [[TMP126]], align 4
|
|
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP7]], i32 7
|
|
; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
|
|
; CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP23]]
|
|
; CHECK-NEXT: [[L_8:%.*]] = load i32, i32* [[TMP137]], align 4
|
|
; CHECK-NEXT: call void @use(i32 [[L_1]], i32 [[L_2]], i32 [[L_3]], i32 [[L_4]], i32 [[L_5]], i32 [[L_6]], i32 [[L_7]], i32 [[L_8]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%g = getelementptr inbounds { i32*}, { i32 *}* @global, i64 0, i32 0
|
|
%p.0 = load i32*, i32** %g, align 8
|
|
|
|
%idx.0 = zext i32 %idx.i32 to i64
|
|
%idx.1 = add nsw i64 %idx.0, 1
|
|
%idx.2 = add nsw i64 %idx.0, 2
|
|
%idx.3 = add nsw i64 %idx.0, 3
|
|
%idx.4 = add nsw i64 %idx.0, 4
|
|
%idx.5 = add nsw i64 %idx.0, 5
|
|
%idx.6 = add nsw i64 %idx.0, 6
|
|
%idx.7 = add nsw i64 %idx.0, 7
|
|
|
|
%tmp53 = getelementptr inbounds i16, i16* %p.1, i64 %idx.0
|
|
%op1.l = load i16, i16* %tmp53, align 2
|
|
%op1.ext = zext i16 %op1.l to i64
|
|
%tmp56 = getelementptr inbounds i16, i16* %p.2, i64 %idx.0
|
|
%op2.l = load i16, i16* %tmp56, align 2
|
|
%op2.ext = zext i16 %op2.l to i64
|
|
%sub.1 = sub nsw i64 %op1.ext, %op2.ext
|
|
|
|
%tmp60 = getelementptr inbounds i32, i32* %p.0, i64 %sub.1
|
|
%l.1 = load i32, i32* %tmp60, align 4
|
|
|
|
%tmp64 = getelementptr inbounds i16, i16* %p.1, i64 %idx.1
|
|
%tmp65 = load i16, i16* %tmp64, align 2
|
|
%tmp66 = zext i16 %tmp65 to i64
|
|
%tmp67 = getelementptr inbounds i16, i16* %p.2, i64 %idx.1
|
|
%tmp68 = load i16, i16* %tmp67, align 2
|
|
%tmp69 = zext i16 %tmp68 to i64
|
|
%sub.2 = sub nsw i64 %tmp66, %tmp69
|
|
|
|
%tmp71 = getelementptr inbounds i32, i32* %p.0, i64 %sub.2
|
|
%l.2 = load i32, i32* %tmp71, align 4
|
|
|
|
%tmp75 = getelementptr inbounds i16, i16* %p.1, i64 %idx.2
|
|
%tmp76 = load i16, i16* %tmp75, align 2
|
|
%tmp77 = zext i16 %tmp76 to i64
|
|
%tmp78 = getelementptr inbounds i16, i16* %p.2, i64 %idx.2
|
|
%tmp79 = load i16, i16* %tmp78, align 2
|
|
%tmp80 = zext i16 %tmp79 to i64
|
|
%sub.3 = sub nsw i64 %tmp77, %tmp80
|
|
|
|
%tmp82 = getelementptr inbounds i32, i32* %p.0, i64 %sub.3
|
|
%l.3 = load i32, i32* %tmp82, align 4
|
|
|
|
%tmp86 = getelementptr inbounds i16, i16* %p.1, i64 %idx.3
|
|
%tmp87 = load i16, i16* %tmp86, align 2
|
|
%tmp88 = zext i16 %tmp87 to i64
|
|
%tmp89 = getelementptr inbounds i16, i16* %p.2, i64 %idx.3
|
|
%tmp90 = load i16, i16* %tmp89, align 2
|
|
%tmp91 = zext i16 %tmp90 to i64
|
|
%sub.4 = sub nsw i64 %tmp88, %tmp91
|
|
|
|
%tmp93 = getelementptr inbounds i32, i32* %p.0, i64 %sub.4
|
|
%l.4 = load i32, i32* %tmp93, align 4
|
|
|
|
%tmp97 = getelementptr inbounds i16, i16* %p.1, i64 %idx.4
|
|
%tmp98 = load i16, i16* %tmp97, align 2
|
|
%tmp99 = zext i16 %tmp98 to i64
|
|
%tmp100 = getelementptr inbounds i16, i16* %p.2, i64 %idx.4
|
|
%tmp101 = load i16, i16* %tmp100, align 2
|
|
%tmp102 = zext i16 %tmp101 to i64
|
|
%sub.5 = sub nsw i64 %tmp99, %tmp102
|
|
|
|
%tmp104 = getelementptr inbounds i32, i32* %p.0, i64 %sub.5
|
|
%l.5 = load i32, i32* %tmp104, align 4
|
|
|
|
%tmp108 = getelementptr inbounds i16, i16* %p.1, i64 %idx.5
|
|
%tmp109 = load i16, i16* %tmp108, align 2
|
|
%tmp110 = zext i16 %tmp109 to i64
|
|
%tmp111 = getelementptr inbounds i16, i16* %p.2, i64 %idx.5
|
|
%tmp112 = load i16, i16* %tmp111, align 2
|
|
%tmp113 = zext i16 %tmp112 to i64
|
|
%sub.6 = sub nsw i64 %tmp110, %tmp113
|
|
|
|
%tmp115 = getelementptr inbounds i32, i32* %p.0, i64 %sub.6
|
|
%l.6 = load i32, i32* %tmp115, align 4
|
|
|
|
%tmp119 = getelementptr inbounds i16, i16* %p.1, i64 %idx.6
|
|
%tmp120 = load i16, i16* %tmp119, align 2
|
|
%tmp121 = zext i16 %tmp120 to i64
|
|
%tmp122 = getelementptr inbounds i16, i16* %p.2, i64 %idx.6
|
|
%tmp123 = load i16, i16* %tmp122, align 2
|
|
%tmp124 = zext i16 %tmp123 to i64
|
|
%sub.7 = sub nsw i64 %tmp121, %tmp124
|
|
|
|
%tmp126 = getelementptr inbounds i32, i32* %p.0, i64 %sub.7
|
|
%l.7 = load i32, i32* %tmp126, align 4
|
|
|
|
%tmp130 = getelementptr inbounds i16, i16* %p.1, i64 %idx.7
|
|
%tmp131 = load i16, i16* %tmp130, align 2
|
|
%tmp132 = zext i16 %tmp131 to i64
|
|
%tmp133 = getelementptr inbounds i16, i16* %p.2, i64 %idx.7
|
|
%tmp134 = load i16, i16* %tmp133, align 2
|
|
%tmp135 = zext i16 %tmp134 to i64
|
|
%sub.8 = sub nsw i64 %tmp132, %tmp135
|
|
|
|
%tmp137 = getelementptr inbounds i32, i32* %p.0, i64 %sub.8
|
|
%l.8 = load i32, i32* %tmp137, align 4
|
|
|
|
call void @use(i32 %l.1, i32 %l.2, i32 %l.3, i32 %l.4, i32 %l.5, i32 %l.6, i32 %l.7, i32 %l.8)
|
|
ret void
|
|
}
|
|
|
|
declare void @use(i32, i32, i32, i32, i32, i32, i32, i32)
|