A set of microbenchmarks (https://github.com/llvm/llvm-test-suite/pull/26) showed that loop interleaving can be beneficial for loops with low trip count as well. Loop interleaving count computation is updated accordingly in prior patches while this patch removes the loop trip count threshold for interleaving.
245 lines
20 KiB
LLVM
245 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes='default<O3>' -unroll-runtime -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx"
|
|
|
|
@b = global [58 x double] zeroinitializer, align 16
|
|
@c = global [58 x double] zeroinitializer, align 16
|
|
@a = global [58 x double] zeroinitializer, align 16
|
|
|
|
; Test case for #42332, showing excessive unrolling of vector loop.
|
|
define void @test_known_trip_count() {
|
|
; CHECK-LABEL: @test_known_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr @b, align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 2), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr @c, align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 2), align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD4]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[WIDE_LOAD3]], [[WIDE_LOAD5]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP0]], ptr @a, align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 2), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[WIDE_LOAD_1]], [[WIDE_LOAD4_1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[WIDE_LOAD3_1]], [[WIDE_LOAD5_1]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 10), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 10), align 16
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD_2]], [[WIDE_LOAD4_2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD3_2]], [[WIDE_LOAD5_2]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 10), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 14), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 14), align 16
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[WIDE_LOAD_3]], [[WIDE_LOAD4_3]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[WIDE_LOAD3_3]], [[WIDE_LOAD5_3]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 14), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 18), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 18), align 16
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[WIDE_LOAD_4]], [[WIDE_LOAD4_4]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[WIDE_LOAD3_4]], [[WIDE_LOAD5_4]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 18), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 22), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 22), align 16
|
|
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[WIDE_LOAD_5]], [[WIDE_LOAD4_5]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[WIDE_LOAD3_5]], [[WIDE_LOAD5_5]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP10]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP11]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 22), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 26), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 26), align 16
|
|
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[WIDE_LOAD_6]], [[WIDE_LOAD4_6]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[WIDE_LOAD3_6]], [[WIDE_LOAD5_6]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP12]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP13]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 26), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 30), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 30), align 16
|
|
; CHECK-NEXT: [[TMP14:%.*]] = fadd <2 x double> [[WIDE_LOAD_7]], [[WIDE_LOAD4_7]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[WIDE_LOAD3_7]], [[WIDE_LOAD5_7]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP14]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP15]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 30), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 32), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 34), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 32), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 34), align 16
|
|
; CHECK-NEXT: [[TMP16:%.*]] = fadd <2 x double> [[WIDE_LOAD_8]], [[WIDE_LOAD4_8]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[WIDE_LOAD3_8]], [[WIDE_LOAD5_8]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP16]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 32), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP17]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 34), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 36), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 38), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 36), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 38), align 16
|
|
; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[WIDE_LOAD_9]], [[WIDE_LOAD4_9]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[WIDE_LOAD3_9]], [[WIDE_LOAD5_9]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP18]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 36), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP19]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 38), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 40), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 42), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 40), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 42), align 16
|
|
; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[WIDE_LOAD_10]], [[WIDE_LOAD4_10]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[WIDE_LOAD3_10]], [[WIDE_LOAD5_10]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP20]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 40), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP21]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 42), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 44), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 46), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 44), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 46), align 16
|
|
; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[WIDE_LOAD_11]], [[WIDE_LOAD4_11]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[WIDE_LOAD3_11]], [[WIDE_LOAD5_11]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP22]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 44), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP23]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 46), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 48), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 50), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 48), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 50), align 16
|
|
; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[WIDE_LOAD_12]], [[WIDE_LOAD4_12]]
|
|
; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[WIDE_LOAD3_12]], [[WIDE_LOAD5_12]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP24]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 48), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP25]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 50), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 52), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 54), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 52), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 54), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = fadd <2 x double> [[WIDE_LOAD_13]], [[WIDE_LOAD4_13]]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[WIDE_LOAD3_13]], [[WIDE_LOAD5_13]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP26]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 52), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP27]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 54), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 56), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 0), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 56), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP28:%.*]] = fadd <2 x double> [[WIDE_LOAD_14]], [[WIDE_LOAD4_14]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x double> [[WIDE_LOAD3_14]], [[WIDE_LOAD5_14]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP28]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 56), align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP29]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP30:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 2), align 16
|
|
; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 2), align 16
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[TMP31]]
|
|
; CHECK-NEXT: store double [[ADD]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 2), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%cmp = icmp slt i32 %i.0, 61
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
for.body:
|
|
%idxprom = sext i32 %i.0 to i64
|
|
%arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom
|
|
%0 = load double, ptr %arrayidx, align 8
|
|
%idxprom1 = sext i32 %i.0 to i64
|
|
%arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1
|
|
%1 = load double, ptr %arrayidx2, align 8
|
|
%add = fadd double %0, %1
|
|
%idxprom3 = sext i32 %i.0 to i64
|
|
%arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3
|
|
store double %add, ptr %arrayidx4, align 8
|
|
%inc = add nsw i32 %i.0, 1
|
|
br label %for.cond
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test_runtime_trip_count(i32 %N) {
|
|
; CHECK-LABEL: @test_runtime_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr [[TMP1]], align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP2]], align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD5]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD4]], [[WIDE_LOAD6]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 16
|
|
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 16
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[FOR_BODY_PREHEADER7]]
|
|
; CHECK: for.body.preheader7:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER7]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]]
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX4]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%cmp = icmp slt i32 %i.0, %N
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
for.body:
|
|
%idxprom = sext i32 %i.0 to i64
|
|
%arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom
|
|
%0 = load double, ptr %arrayidx, align 8
|
|
%idxprom1 = sext i32 %i.0 to i64
|
|
%arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1
|
|
%1 = load double, ptr %arrayidx2, align 8
|
|
%add = fadd double %0, %1
|
|
%idxprom3 = sext i32 %i.0 to i64
|
|
%arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3
|
|
store double %add, ptr %arrayidx4, align 8
|
|
%inc = add nsw i32 %i.0, 1
|
|
br label %for.cond
|
|
|
|
exit:
|
|
ret void
|
|
}
|