Files
clang-p2996/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
Tobias Grosser d0b9173caa IslAst: always use the context during ast generation
Providing the context to the ast generator allows for additional simplifcations
and -- more importantly -- allows to generate loops with only partially bounded
domains, assuming the domains are bounded for all parameter configurations
that are valid as defined by the context.

This change fixes the crash reported in http://llvm.org/PR30956

The original reason why we did not include the context when generating an
AST was that CLooG and later isl used to sometimes transfer some of the
constraints that bound the size of parameters from the context into the
generated AST. This resulted in operations with very large constants, which
sometimes introduced problematic integer overflows. The latest versions of
the isl AST generator are careful to not introduce such constants.

Reported-by: Eli Friedman <efriedma@codeaurora.org>
llvm-svn: 286442
2016-11-10 09:39:58 +00:00

90 lines
4.7 KiB
LLVM

; RUN: opt -S %loadPolly -polly-vectorizer=stripmine -polly-opt-isl -polly-ast -analyze < %s | FileCheck %s
; CHECK: // 1st level tiling - Tiles
; CHECK-NEXT: #pragma known-parallel
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
; CHECK-NEXT: }
; CHECK-NEXT: if (32 * c1 + 31 >= nj)
; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: }
; Function Attrs: nounwind uwtable
define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, [1024 x double]* %C, [1024 x double]* %A, [1024 x double]* %B) #0 {
entry:
%cmp.27 = icmp sgt i32 %ni, 0
br i1 %cmp.27, label %for.cond.1.preheader.lr.ph, label %for.end.22
for.cond.1.preheader.lr.ph: ; preds = %entry
br label %for.cond.1.preheader
for.cond.1.preheader: ; preds = %for.cond.1.preheader.lr.ph, %for.inc.20
%indvars.iv33 = phi i64 [ 0, %for.cond.1.preheader.lr.ph ], [ %indvars.iv.next34, %for.inc.20 ]
%cmp2.25 = icmp sgt i32 %nj, 0
br i1 %cmp2.25, label %for.cond.4.preheader.lr.ph, label %for.inc.20
for.cond.4.preheader.lr.ph: ; preds = %for.cond.1.preheader
br label %for.cond.4.preheader
for.cond.4.preheader: ; preds = %for.cond.4.preheader.lr.ph, %for.inc.17
%indvars.iv29 = phi i64 [ 0, %for.cond.4.preheader.lr.ph ], [ %indvars.iv.next30, %for.inc.17 ]
%cmp5.23 = icmp sgt i32 %nk, 0
br i1 %cmp5.23, label %for.body.6.lr.ph, label %for.inc.17
for.body.6.lr.ph: ; preds = %for.cond.4.preheader
br label %for.body.6
for.body.6: ; preds = %for.body.6.lr.ph, %for.body.6
%indvars.iv = phi i64 [ 0, %for.body.6.lr.ph ], [ %indvars.iv.next, %for.body.6 ]
%arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv33, i64 %indvars.iv
%0 = load double, double* %arrayidx8, align 8
%arrayidx12 = getelementptr inbounds [1024 x double], [1024 x double]* %B, i64 %indvars.iv, i64 %indvars.iv29
%1 = load double, double* %arrayidx12, align 8
%mul = fmul double %0, %1
%arrayidx16 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 %indvars.iv33, i64 %indvars.iv29
%2 = load double, double* %arrayidx16, align 8
%add = fadd double %2, %mul
store double %add, double* %arrayidx16, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, %nk
br i1 %exitcond, label %for.body.6, label %for.cond.4.for.inc.17_crit_edge
for.cond.4.for.inc.17_crit_edge: ; preds = %for.body.6
br label %for.inc.17
for.inc.17: ; preds = %for.cond.4.for.inc.17_crit_edge, %for.cond.4.preheader
%indvars.iv.next30 = add nuw nsw i64 %indvars.iv29, 1
%lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
%exitcond32 = icmp ne i32 %lftr.wideiv31, %nj
br i1 %exitcond32, label %for.cond.4.preheader, label %for.cond.1.for.inc.20_crit_edge
for.cond.1.for.inc.20_crit_edge: ; preds = %for.inc.17
br label %for.inc.20
for.inc.20: ; preds = %for.cond.1.for.inc.20_crit_edge, %for.cond.1.preheader
%indvars.iv.next34 = add nuw nsw i64 %indvars.iv33, 1
%lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
%exitcond36 = icmp ne i32 %lftr.wideiv35, %ni
br i1 %exitcond36, label %for.cond.1.preheader, label %for.cond.for.end.22_crit_edge
for.cond.for.end.22_crit_edge: ; preds = %for.inc.20
br label %for.end.22
for.end.22: ; preds = %for.cond.for.end.22_crit_edge, %entry
ret void
}