Follow-up as discussed when using VPInstruction::ResumePhi for all resume values (#112147). This patch explicitly adds incoming values for each predecessor in VPlan. This simplifies codegen and allows transformations adjusting the predecessors of blocks with NFC modulo incoming block order in phis.
125 lines
6.7 KiB
LLVM
125 lines
6.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
; Check that the interleaved-mem-access analysis currently does not create an
|
|
; interleave group for access 'a' due to the possible pointer wrap-around.
|
|
;
|
|
; To begin with, in this test the candidate interleave group can be created
|
|
; only when getPtrStride is called with Assume=true. Next, because
|
|
; the interleave-group of the loads is not full (has gaps), we also need to check
|
|
; for possible pointer wrapping. Here we currently use Assume=false and as a
|
|
; result cannot prove the transformation is safe and therefore invalidate the
|
|
; candidate interleave group.
|
|
;
|
|
|
|
; void func(unsigned * __restrict a, unsigned * __restrict b, unsigned char x, unsigned char y) {
|
|
; int i = 0;
|
|
; for (unsigned char index = x; i < y; index +=2, ++i)
|
|
; b[i] = aptr 2;
|
|
;
|
|
; }
|
|
|
|
define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i8 zeroext %x, i8 zeroext %y) local_unnamed_addr {
|
|
; CHECK-LABEL: define void @_Z4funcPjS_hh(
|
|
; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i8 zeroext [[X:%.*]], i8 zeroext [[Y:%.*]]) local_unnamed_addr {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[CMP9:%.*]] = icmp eq i8 [[Y]], 0
|
|
; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i8 [[Y]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i8 [[Y]], 5
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
|
|
; CHECK: [[VECTOR_SCEVCHECK]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
|
|
; CHECK-NEXT: [[MUL_RESULT:%.*]] = shl i8 [[TMP1]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[X]], -1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[MUL_RESULT]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 127
|
|
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP8]], i64 4, i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[TMP7]]
|
|
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i8
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shl i8 [[DOTCAST]], 1
|
|
; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[X]], [[TMP6]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i8
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl i8 [[DOTCAST3]], 1
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[X]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[OFFSET_IDX]] to i64
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP14]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4
|
|
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
|
; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], splat (i32 1)
|
|
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
|
|
; CHECK-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP25]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
|
|
; CHECK: [[FOR_COND_CLEANUP]]:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[INDEX_011]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[TMP27]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[ADD]] = add i8 [[INDEX_011]], 2
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
;
|
|
entry:
|
|
%cmp9 = icmp eq i8 %y, 0
|
|
br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
|
|
|
|
for.body.preheader:
|
|
%wide.trip.count = zext i8 %y to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup.loopexit:
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
|
%index.011 = phi i8 [ %add, %for.body ], [ %x, %for.body.preheader ]
|
|
%idxprom = zext i8 %index.011 to i64
|
|
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%mul = shl i32 %0, 1
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
|
|
store i32 %mul, ptr %arrayidx2, align 4
|
|
%add = add i8 %index.011, 2
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
|
|
;.
|