The PreInits of a loop transformation (atm moment only tile) include the computation of the trip count. The trip count is needed by any loop-associated directives that consumes the transformation-generated loop. Hence, we must ensure that the PreInits of consumed loop transformations are emitted with the consuming directive. This is done by addinging the inner loop transformation's PreInits to the outer loop-directive's PreInits. The outer loop-directive will consume the de-sugared AST such that the inner PreInits are not emitted twice. The PreInits of a loop transformation are still emitted directly if its generated loop(s) are not associated with another loop-associated directive. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D102180
194 lines
10 KiB
C++
194 lines
10 KiB
C++
// Check code generation
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
|
|
|
// Check same results after serialization round-trip
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
|
// expected-no-diagnostics
|
|
|
|
// The loop trip count used by #pragma omp for depends on code generated
|
|
// by #pragma omp file. Check that theses PreInits are emitted before
|
|
// the code generated by #pragma omp for.
|
|
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
// placeholder for loop body code.
|
|
extern "C" void body(...) {}
|
|
|
|
|
|
// IR-LABEL: @func(
|
|
// IR-NEXT: [[ENTRY:.*]]:
|
|
// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[I:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4
|
|
// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
|
|
// IR-NEXT: store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
|
|
// IR-NEXT: store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
|
|
// IR-NEXT: store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
|
|
// IR-NEXT: %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
|
|
// IR-NEXT: store i32 %[[TMP1]], i32* %[[I]], align 4
|
|
// IR-NEXT: %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
|
|
// IR-NEXT: store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
|
|
// IR-NEXT: %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
|
|
// IR-NEXT: store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
|
|
// IR-NEXT: %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
|
|
// IR-NEXT: store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
|
|
// IR-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
|
|
// IR-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
|
|
// IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
|
|
// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1
|
|
// IR-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
|
|
// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
|
|
// IR-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
|
|
// IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
|
|
// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1
|
|
// IR-NEXT: store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
|
|
// IR-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
|
|
// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1
|
|
// IR-NEXT: store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
|
|
// IR-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
|
|
// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3
|
|
// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4
|
|
// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
|
|
// IR-NEXT: store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
|
|
// IR-NEXT: store i32 0, i32* %[[DOTFLOOR_0_IV_I]], align 4
|
|
// IR-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
|
|
// IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]]
|
|
// IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_PRECOND_THEN]]:
|
|
// IR-NEXT: store i32 0, i32* %[[DOTOMP_LB]], align 4
|
|
// IR-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
|
|
// IR-NEXT: store i32 %[[TMP12]], i32* %[[DOTOMP_UB]], align 4
|
|
// IR-NEXT: store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
|
|
// IR-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
|
|
// IR-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// IR-NEXT: %[[TMP13:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
|
|
// IR-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
|
|
// IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]]
|
|
// IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[COND_TRUE]]:
|
|
// IR-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
|
|
// IR-NEXT: br label %[[COND_END:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[COND_FALSE]]:
|
|
// IR-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
|
|
// IR-NEXT: br label %[[COND_END]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[COND_END]]:
|
|
// IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP15]], %[[COND_TRUE]] ], [ %[[TMP16]], %[[COND_FALSE]] ]
|
|
// IR-NEXT: store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
|
|
// IR-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
|
|
// IR-NEXT: store i32 %[[TMP17]], i32* %[[DOTOMP_IV]], align 4
|
|
// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_INNER_FOR_COND]]:
|
|
// IR-NEXT: %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
|
|
// IR-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
|
|
// IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP19]], 1
|
|
// IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]]
|
|
// IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_INNER_FOR_BODY]]:
|
|
// IR-NEXT: %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
|
|
// IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP20]], 4
|
|
// IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]]
|
|
// IR-NEXT: store i32 %[[ADD16]], i32* %[[DOTFLOOR_0_IV_I12]], align 4
|
|
// IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
|
|
// IR-NEXT: store i32 %[[TMP21]], i32* %[[DOTTILE_0_IV_I]], align 4
|
|
// IR-NEXT: br label %[[FOR_COND:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[FOR_COND]]:
|
|
// IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
|
|
// IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
|
|
// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP23]], 1
|
|
// IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
|
|
// IR-NEXT: %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4
|
|
// IR-NEXT: %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]]
|
|
// IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[COND_TRUE20]]:
|
|
// IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
|
|
// IR-NEXT: %[[ADD21:.+]] = add i32 %[[TMP25]], 1
|
|
// IR-NEXT: br label %[[COND_END24:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[COND_FALSE22]]:
|
|
// IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
|
|
// IR-NEXT: %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4
|
|
// IR-NEXT: br label %[[COND_END24]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[COND_END24]]:
|
|
// IR-NEXT: %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ]
|
|
// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]]
|
|
// IR-NEXT: br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[FOR_BODY]]:
|
|
// IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
|
|
// IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
|
|
// IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
|
|
// IR-NEXT: %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]]
|
|
// IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]]
|
|
// IR-NEXT: store i32 %[[ADD28]], i32* %[[I]], align 4
|
|
// IR-NEXT: %[[TMP30:.+]] = load i32, i32* %[[START_ADDR]], align 4
|
|
// IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[END_ADDR]], align 4
|
|
// IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
|
|
// IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[I]], align 4
|
|
// IR-NEXT: call void (...) @body(i32 %[[TMP30]], i32 %[[TMP31]], i32 %[[TMP32]], i32 %[[TMP33]])
|
|
// IR-NEXT: br label %[[FOR_INC:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[FOR_INC]]:
|
|
// IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
|
|
// IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP34]], 1
|
|
// IR-NEXT: store i32 %[[INC]], i32* %[[DOTTILE_0_IV_I]], align 4
|
|
// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[FOR_END]]:
|
|
// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_BODY_CONTINUE]]:
|
|
// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_INNER_FOR_INC]]:
|
|
// IR-NEXT: %[[TMP35:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
|
|
// IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP35]], 1
|
|
// IR-NEXT: store i32 %[[ADD29]], i32* %[[DOTOMP_IV]], align 4
|
|
// IR-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_INNER_FOR_END]]:
|
|
// IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_LOOP_EXIT]]:
|
|
// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
|
|
// IR-NEXT: br label %[[OMP_PRECOND_END]]
|
|
// IR-EMPTY:
|
|
// IR-NEXT: [[OMP_PRECOND_END]]:
|
|
// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
|
|
// IR-NEXT: ret void
|
|
// IR-NEXT: }
|
|
extern "C" void func(int start, int end, int step) {
|
|
#pragma omp for
|
|
#pragma omp tile sizes(4)
|
|
for (int i = start; i < end; i += step)
|
|
body(start, end, step, i);
|
|
}
|
|
|
|
#endif /* HEADER */
|