The -polly-flatten-schedule pass reduces the number of scattering
dimensions in its isl_union_map form to make them easier to understand.
It is not meant to be used in production, only for debugging and
regression tests.
To illustrate, how it can make sets simpler, here is a lifetime set
used computed by the porposed DeLICM pass without flattening:
{ Stmt_reduction_for[0, 4] -> [0, 2, o2, o3] : o2 < 0;
Stmt_reduction_for[0, 4] -> [0, 1, o2, o3] : o2 >= 5;
Stmt_reduction_for[0, 4] -> [0, 1, 4, o3] : o3 > 0;
Stmt_reduction_for[0, i1] -> [0, 1, i1, 1] : 0 <= i1 <= 3;
Stmt_reduction_for[0, 4] -> [0, 2, 0, o3] : o3 <= 0 }
And here the same lifetime for a semantically identical one-dimensional
schedule:
{ Stmt_reduction_for[0, i1] -> [2 + 3i1] : 0 <= i1 <= 4 }
Differential Revision: https://reviews.llvm.org/D24310
llvm-svn: 280948
99 lines
2.7 KiB
LLVM
99 lines
2.7 KiB
LLVM
; RUN: opt %loadPolly -polly-flatten-schedule -analyze < %s | FileCheck %s
|
|
;
|
|
; dgemm kernel
|
|
; C := alpha*A*B + beta*C
|
|
; C[ni][nj]
|
|
; A[ni][nk]
|
|
; B[nk][nj]
|
|
|
|
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
|
|
|
|
define void @gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, double* noalias nonnull %C, double* noalias nonnull %A, double* noalias nonnull %B) {
|
|
entry:
|
|
br label %ni.for
|
|
|
|
ni.for:
|
|
%i = phi i32 [0, %entry], [%i.inc, %ni.inc]
|
|
%i.cmp = icmp slt i32 %i, 3
|
|
br i1 %i.cmp, label %nj.for, label %ni.exit
|
|
|
|
nj.for:
|
|
%j = phi i32 [0, %ni.for], [%j.inc, %nj.inc]
|
|
%j.cmp = icmp slt i32 %j, 7
|
|
br i1 %j.cmp, label %nj_beta, label %nj.exit
|
|
|
|
nj_beta:
|
|
%c_stride = mul nsw i32 %i, 3; %nj
|
|
%c_idx_i = getelementptr inbounds double, double* %C, i32 %c_stride
|
|
%c_idx_ij = getelementptr inbounds double, double* %c_idx_i, i32 %j
|
|
|
|
; C[i][j] *= beta
|
|
%c = load double, double* %c_idx_ij
|
|
%c_beta = fmul double %c, %beta
|
|
store double %c_beta, double* %c_idx_ij
|
|
|
|
br label %nk.for
|
|
|
|
nk.for:
|
|
%k = phi i32 [0, %nj_beta], [%k.inc, %nk.inc]
|
|
%k.cmp = icmp slt i32 %k, 3 ; %nk
|
|
br i1 %k.cmp, label %nk_alpha, label %nk.exit
|
|
|
|
nk_alpha:
|
|
%a_stride = mul nsw i32 %i, 3; %nk
|
|
%a_idx_i = getelementptr inbounds double, double* %A, i32 %a_stride
|
|
%a_idx_ik = getelementptr inbounds double, double* %a_idx_i, i32 %k
|
|
|
|
%b_stride = mul nsw i32 %k, 3; %nj
|
|
%b_idx_k = getelementptr inbounds double, double* %B, i32 %b_stride
|
|
%b_idx_kj = getelementptr inbounds double, double* %b_idx_k, i32 %j
|
|
|
|
; C[i][j] += alpha * A[i][k] * B[k][j]
|
|
%a = load double, double* %a_idx_ik
|
|
%b = load double, double* %b_idx_kj
|
|
%beta_c = load double, double* %c_idx_ij
|
|
|
|
%alpha_a = fmul double %a, %alpha
|
|
%alpha_a_b = fmul double %alpha_a, %b
|
|
%beta_c_alpha_a_b = fadd double %beta_c, %alpha_a_b
|
|
|
|
store double %beta_c_alpha_a_b, double* %c_idx_ij
|
|
|
|
br label %nk.inc
|
|
|
|
nk.inc:
|
|
%k.inc = add nuw nsw i32 %k, 1
|
|
br label %nk.for
|
|
|
|
nk.exit:
|
|
; store double %c, double* %c_idx_ij
|
|
br label %nj.inc
|
|
|
|
nj.inc:
|
|
%j.inc = add nuw nsw i32 %j, 1
|
|
br label %nj.for
|
|
|
|
nj.exit:
|
|
br label %ni.inc
|
|
|
|
ni.inc:
|
|
%i.inc = add nuw nsw i32 %i, 1
|
|
br label %ni.for
|
|
|
|
ni.exit:
|
|
br label %return
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
|
|
; CHECK: Schedule before flattening {
|
|
; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 1, i2] }
|
|
; CHECK-NEXT: { Stmt_nj_beta[i0, i1] -> [i0, i1, 0, 0] }
|
|
; CHECK-NEXT: }
|
|
; CHECK: Schedule after flattening {
|
|
; CHECK-NEXT: { Stmt_nj_beta[i0, i1] -> [28i0 + 4i1] }
|
|
; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> [1 + 28i0 + 4i1 + i2] }
|
|
; CHECK-NEXT: }
|