The device runtime contains several calls to __kmpc_get_hardware_num_threads_in_block and __kmpc_get_hardware_num_blocks. If the thread_limit and the num_teams are constant, these calls can be folded to the constant value. In commit D106033 we have the optimization phase. This commit adds the attributes to the outlined function for the grid size. the two attributes are `omp_target_num_teams` and `omp_target_thread_limit`. These values are added as long as they are constant. Two functions are created `getNumThreadsExprForTargetDirective` and `getNumTeamsExprForTargetDirective`. The original functions `emitNumTeamsForTargetDirective` and `emitNumThreadsForTargetDirective` identify the expresion and emit the code. However, for the Device version of the outlined function, we cannot emit anything. Therefore, this is a first attempt to separate emision of code from deduction of the values. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106298
2156 lines
139 KiB
C++
2156 lines
139 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
|
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
|
|
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
|
|
|
|
// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK5
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6
|
|
// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK7
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK8
|
|
|
|
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK9
|
|
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK10
|
|
|
|
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK11
|
|
// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK12
|
|
|
|
// expected-no-diagnostics
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
template <typename T>
|
|
T tmain() {
|
|
T t_var = T();
|
|
T vec[] = {1, 2};
|
|
#pragma omp target teams distribute simd reduction(+: t_var)
|
|
for (int i = 0; i < 2; ++i) {
|
|
t_var += (T) i;
|
|
}
|
|
return T();
|
|
}
|
|
|
|
int main() {
|
|
static int sivar;
|
|
#ifdef LAMBDA
|
|
|
|
[&]() {
|
|
#pragma omp target teams distribute simd reduction(+: sivar)
|
|
for (int i = 0; i < 2; ++i) {
|
|
|
|
// Skip global and bound tid vars
|
|
|
|
sivar += i;
|
|
|
|
[&]() {
|
|
|
|
sivar += 4;
|
|
|
|
}();
|
|
}
|
|
}();
|
|
return 0;
|
|
#else
|
|
#pragma omp target teams distribute simd reduction(+: sivar)
|
|
for (int i = 0; i < 2; ++i) {
|
|
sivar += i;
|
|
}
|
|
return tmain<int>();
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip global and bound tid vars
|
|
|
|
|
|
|
|
|
|
|
|
// Skip global and bound tid vars
|
|
|
|
|
|
#endif
|
|
// CHECK1-LABEL: define {{[^@]+}}@main
|
|
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
|
|
// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
|
|
// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK1-NEXT: store i8* null, i8** [[TMP4]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK1: omp_offload.failed:
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(i32* @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]]
|
|
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK1: omp_offload.cont:
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call signext i32 @_Z5tmainIiET_v()
|
|
// CHECK1-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60
|
|
// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK1: cond.true:
|
|
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK1: cond.false:
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: br label [[COND_END]]
|
|
// CHECK1: cond.end:
|
|
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK1: omp.inner.for.cond:
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK1: omp.inner.for.body:
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK1: omp.body.continue:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK1: omp.inner.for.inc:
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
// CHECK1: omp.inner.for.end:
|
|
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK1: omp.loop.exit:
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK1: .omp.final.then:
|
|
// CHECK1-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK1: .omp.final.done:
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK1-NEXT: ]
|
|
// CHECK1: .omp.reduction.case1:
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.case2:
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.default:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false)
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
|
|
// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
|
|
// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP4]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK1-NEXT: store i8* null, i8** [[TMP5]], align 8
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK1: omp_offload.failed:
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR3]]
|
|
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK1: omp_offload.cont:
|
|
// CHECK1-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
|
|
// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK1: cond.true:
|
|
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK1: cond.false:
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: br label [[COND_END]]
|
|
// CHECK1: cond.end:
|
|
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK1: omp.inner.for.cond:
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK1: omp.inner.for.body:
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK1: omp.body.continue:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK1: omp.inner.for.inc:
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
// CHECK1: omp.inner.for.end:
|
|
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK1: omp.loop.exit:
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK1: .omp.final.then:
|
|
// CHECK1-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK1: .omp.final.done:
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK1-NEXT: ]
|
|
// CHECK1: .omp.reduction.case1:
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.case2:
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.default:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
|
|
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK1-SAME: () #[[ATTR8:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@main
|
|
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
|
|
// CHECK2-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 8
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
|
|
// CHECK2-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 8
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP4]], align 8
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK2: omp_offload.failed:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(i32* @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK2: omp_offload.cont:
|
|
// CHECK2-NEXT: [[CALL:%.*]] = call signext i32 @_Z5tmainIiET_v()
|
|
// CHECK2-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60
|
|
// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK2-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK2-NEXT: store i32 0, i32* [[SIVAR1]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK2: omp.body.continue:
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK2: .omp.final.then:
|
|
// CHECK2-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK2: .omp.final.done:
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK2-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK2-SAME: () #[[ATTR6:[0-9]+]] comdat {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false)
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
|
|
// CHECK2-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
|
|
// CHECK2-NEXT: store i32* [[T_VAR]], i32** [[TMP4]], align 8
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP5]], align 8
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK2: omp_offload.failed:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR3]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK2: omp_offload.cont:
|
|
// CHECK2-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
|
|
// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
|
|
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK2-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK2-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
|
|
// CHECK2-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK2: omp.body.continue:
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK2: .omp.final.then:
|
|
// CHECK2-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK2: .omp.final.done:
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK2-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
|
|
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK2-SAME: () #[[ATTR8:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@main
|
|
// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
|
|
// CHECK3-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 4
|
|
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
|
|
// CHECK3-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 4
|
|
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CHECK3-NEXT: store i8* null, i8** [[TMP4]], align 4
|
|
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
|
|
// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
|
|
// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK3: omp_offload.failed:
|
|
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(i32* @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]]
|
|
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK3: omp_offload.cont:
|
|
// CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v()
|
|
// CHECK3-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60
|
|
// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK3: cond.true:
|
|
// CHECK3-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK3: cond.false:
|
|
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: br label [[COND_END]]
|
|
// CHECK3: cond.end:
|
|
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK3: omp.inner.for.cond:
|
|
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK3: omp.inner.for.body:
|
|
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK3: omp.body.continue:
|
|
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK3: omp.inner.for.inc:
|
|
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
// CHECK3: omp.inner.for.end:
|
|
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK3: omp.loop.exit:
|
|
// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK3: .omp.final.then:
|
|
// CHECK3-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK3: .omp.final.done:
|
|
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
|
|
// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4
|
|
// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK3-NEXT: ]
|
|
// CHECK3: .omp.reduction.case1:
|
|
// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK3: .omp.reduction.case2:
|
|
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK3: .omp.reduction.default:
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false)
|
|
// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
|
|
// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 4
|
|
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
|
|
// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[TMP4]], align 4
|
|
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CHECK3-NEXT: store i8* null, i8** [[TMP5]], align 4
|
|
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
|
|
// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK3-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
|
|
// CHECK3-NEXT: br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK3: omp_offload.failed:
|
|
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR3]]
|
|
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK3: omp_offload.cont:
|
|
// CHECK3-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
|
|
// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1
|
|
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK3: cond.true:
|
|
// CHECK3-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK3: cond.false:
|
|
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: br label [[COND_END]]
|
|
// CHECK3: cond.end:
|
|
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK3: omp.inner.for.cond:
|
|
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK3: omp.inner.for.body:
|
|
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK3: omp.body.continue:
|
|
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK3: omp.inner.for.inc:
|
|
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
// CHECK3: omp.inner.for.end:
|
|
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK3: omp.loop.exit:
|
|
// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK3: .omp.final.then:
|
|
// CHECK3-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK3: .omp.final.done:
|
|
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
|
|
// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4
|
|
// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK3-NEXT: ]
|
|
// CHECK3: .omp.reduction.case1:
|
|
// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK3: .omp.reduction.case2:
|
|
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK3: .omp.reduction.default:
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
|
|
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK3-SAME: () #[[ATTR8:[0-9]+]] {
|
|
// CHECK3-NEXT: entry:
|
|
// CHECK3-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK3-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@main
|
|
// CHECK4-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
|
|
// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 4
|
|
// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
|
|
// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 4
|
|
// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CHECK4-NEXT: store i8* null, i8** [[TMP4]], align 4
|
|
// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
|
|
// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
|
|
// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK4: omp_offload.failed:
|
|
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(i32* @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]]
|
|
// CHECK4-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK4: omp_offload.cont:
|
|
// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v()
|
|
// CHECK4-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60
|
|
// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[SIVAR1]], align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK4: cond.true:
|
|
// CHECK4-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK4: cond.false:
|
|
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: br label [[COND_END]]
|
|
// CHECK4: cond.end:
|
|
// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK4: omp.inner.for.cond:
|
|
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK4: omp.inner.for.body:
|
|
// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK4: omp.body.continue:
|
|
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK4: omp.inner.for.inc:
|
|
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
// CHECK4: omp.inner.for.end:
|
|
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK4: omp.loop.exit:
|
|
// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK4-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK4: .omp.final.then:
|
|
// CHECK4-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK4: .omp.final.done:
|
|
// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
|
|
// CHECK4-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4
|
|
// CHECK4-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK4-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK4-NEXT: ]
|
|
// CHECK4: .omp.reduction.case1:
|
|
// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK4-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK4: .omp.reduction.case2:
|
|
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK4-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK4: .omp.reduction.default:
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK4-SAME: () #[[ATTR6:[0-9]+]] comdat {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK4-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK4-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false)
|
|
// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
|
|
// CHECK4-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 4
|
|
// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
|
|
// CHECK4-NEXT: store i32* [[T_VAR]], i32** [[TMP4]], align 4
|
|
// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CHECK4-NEXT: store i8* null, i8** [[TMP5]], align 4
|
|
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK4-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
|
|
// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
|
|
// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
|
|
// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK4: omp_offload.failed:
|
|
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR3]]
|
|
// CHECK4-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK4: omp_offload.cont:
|
|
// CHECK4-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
|
|
// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1
|
|
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK4-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK4-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK4: cond.true:
|
|
// CHECK4-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK4: cond.false:
|
|
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: br label [[COND_END]]
|
|
// CHECK4: cond.end:
|
|
// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK4: omp.inner.for.cond:
|
|
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK4: omp.inner.for.body:
|
|
// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK4: omp.body.continue:
|
|
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK4: omp.inner.for.inc:
|
|
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
|
|
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
// CHECK4: omp.inner.for.end:
|
|
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK4: omp.loop.exit:
|
|
// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
|
|
// CHECK4-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK4: .omp.final.then:
|
|
// CHECK4-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK4: .omp.final.done:
|
|
// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
|
|
// CHECK4-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4
|
|
// CHECK4-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK4-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK4-NEXT: ]
|
|
// CHECK4: .omp.reduction.case1:
|
|
// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
|
|
// CHECK4-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK4: .omp.reduction.case2:
|
|
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK4-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
|
|
// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK4: .omp.reduction.default:
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
|
|
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK4-SAME: () #[[ATTR8:[0-9]+]] {
|
|
// CHECK4-NEXT: entry:
|
|
// CHECK4-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK4-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK5-LABEL: define {{[^@]+}}@main
|
|
// CHECK5-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK5-NEXT: entry:
|
|
// CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK5-NEXT: store i32 0, i32* [[SIVAR]], align 4
|
|
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK5: omp.inner.for.cond:
|
|
// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
|
|
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK5: omp.inner.for.body:
|
|
// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
|
|
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
|
|
// CHECK5-NEXT: store i32 [[ADD1]], i32* [[SIVAR]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK5: omp.body.continue:
|
|
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK5: omp.inner.for.inc:
|
|
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
// CHECK5: omp.inner.for.end:
|
|
// CHECK5-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[SIVAR]], align 4
|
|
// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
|
|
// CHECK5-NEXT: store i32 [[ADD3]], i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK5-NEXT: [[CALL:%.*]] = call signext i32 @_Z5tmainIiET_v()
|
|
// CHECK5-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK5-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK5-SAME: () #[[ATTR1:[0-9]+]] comdat {
|
|
// CHECK5-NEXT: entry:
|
|
// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK5-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK5-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false)
|
|
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK5-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK5: omp.inner.for.cond:
|
|
// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
|
|
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK5: omp.inner.for.body:
|
|
// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
|
|
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
|
|
// CHECK5-NEXT: store i32 [[ADD2]], i32* [[T_VAR1]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK5: omp.body.continue:
|
|
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK5: omp.inner.for.inc:
|
|
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
// CHECK5: omp.inner.for.end:
|
|
// CHECK5-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4
|
|
// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK5-NEXT: store i32 [[ADD4]], i32* [[T_VAR]], align 4
|
|
// CHECK5-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK6-LABEL: define {{[^@]+}}@main
|
|
// CHECK6-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK6-NEXT: entry:
|
|
// CHECK6-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK6-NEXT: store i32 0, i32* [[SIVAR]], align 4
|
|
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK6: omp.inner.for.cond:
|
|
// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
|
|
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK6: omp.inner.for.body:
|
|
// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
|
|
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
|
|
// CHECK6-NEXT: store i32 [[ADD1]], i32* [[SIVAR]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK6: omp.body.continue:
|
|
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK6: omp.inner.for.inc:
|
|
// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
|
|
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
// CHECK6: omp.inner.for.end:
|
|
// CHECK6-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[SIVAR]], align 4
|
|
// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
|
|
// CHECK6-NEXT: store i32 [[ADD3]], i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK6-NEXT: [[CALL:%.*]] = call signext i32 @_Z5tmainIiET_v()
|
|
// CHECK6-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK6-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK6-SAME: () #[[ATTR1:[0-9]+]] comdat {
|
|
// CHECK6-NEXT: entry:
|
|
// CHECK6-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK6-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK6-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false)
|
|
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK6-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK6: omp.inner.for.cond:
|
|
// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
|
|
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK6: omp.inner.for.body:
|
|
// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
|
|
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
|
|
// CHECK6-NEXT: store i32 [[ADD2]], i32* [[T_VAR1]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK6: omp.body.continue:
|
|
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK6: omp.inner.for.inc:
|
|
// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
|
|
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
// CHECK6: omp.inner.for.end:
|
|
// CHECK6-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4
|
|
// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK6-NEXT: store i32 [[ADD4]], i32* [[T_VAR]], align 4
|
|
// CHECK6-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK7-LABEL: define {{[^@]+}}@main
|
|
// CHECK7-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK7-NEXT: entry:
|
|
// CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK7-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK7-NEXT: store i32 0, i32* [[SIVAR]], align 4
|
|
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK7: omp.inner.for.cond:
|
|
// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
|
|
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK7: omp.inner.for.body:
|
|
// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
|
|
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
|
|
// CHECK7-NEXT: store i32 [[ADD1]], i32* [[SIVAR]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK7: omp.body.continue:
|
|
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK7: omp.inner.for.inc:
|
|
// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
// CHECK7: omp.inner.for.end:
|
|
// CHECK7-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[SIVAR]], align 4
|
|
// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
|
|
// CHECK7-NEXT: store i32 [[ADD3]], i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK7-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v()
|
|
// CHECK7-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK7-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK7-SAME: () #[[ATTR1:[0-9]+]] comdat {
|
|
// CHECK7-NEXT: entry:
|
|
// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK7-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK7-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false)
|
|
// CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK7-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK7-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK7: omp.inner.for.cond:
|
|
// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
|
|
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK7: omp.inner.for.body:
|
|
// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
|
|
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
|
|
// CHECK7-NEXT: store i32 [[ADD2]], i32* [[T_VAR1]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK7: omp.body.continue:
|
|
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK7: omp.inner.for.inc:
|
|
// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
// CHECK7: omp.inner.for.end:
|
|
// CHECK7-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4
|
|
// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK7-NEXT: store i32 [[ADD4]], i32* [[T_VAR]], align 4
|
|
// CHECK7-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK8-LABEL: define {{[^@]+}}@main
|
|
// CHECK8-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK8-NEXT: entry:
|
|
// CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK8-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK8-NEXT: store i32 0, i32* [[SIVAR]], align 4
|
|
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK8: omp.inner.for.cond:
|
|
// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
|
|
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK8: omp.inner.for.body:
|
|
// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
|
|
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
|
|
// CHECK8-NEXT: store i32 [[ADD1]], i32* [[SIVAR]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK8: omp.body.continue:
|
|
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK8: omp.inner.for.inc:
|
|
// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3
|
|
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
// CHECK8: omp.inner.for.end:
|
|
// CHECK8-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[SIVAR]], align 4
|
|
// CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
|
|
// CHECK8-NEXT: store i32 [[ADD3]], i32* @_ZZ4mainE5sivar, align 4
|
|
// CHECK8-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v()
|
|
// CHECK8-NEXT: ret i32 [[CALL]]
|
|
//
|
|
//
|
|
// CHECK8-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
|
|
// CHECK8-SAME: () #[[ATTR1:[0-9]+]] comdat {
|
|
// CHECK8-NEXT: entry:
|
|
// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK8-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
|
|
// CHECK8-NEXT: store i32 0, i32* [[T_VAR]], align 4
|
|
// CHECK8-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
|
|
// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false)
|
|
// CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK8-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK8-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK8-NEXT: store i32 0, i32* [[T_VAR1]], align 4
|
|
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK8: omp.inner.for.cond:
|
|
// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
|
|
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK8: omp.inner.for.body:
|
|
// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
|
|
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
|
|
// CHECK8-NEXT: store i32 [[ADD2]], i32* [[T_VAR1]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK8: omp.body.continue:
|
|
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK8: omp.inner.for.inc:
|
|
// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7
|
|
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
// CHECK8: omp.inner.for.end:
|
|
// CHECK8-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4
|
|
// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 4
|
|
// CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK8-NEXT: store i32 [[ADD4]], i32* [[T_VAR]], align 4
|
|
// CHECK8-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK9-LABEL: define {{[^@]+}}@main
|
|
// CHECK9-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK9-NEXT: entry:
|
|
// CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
|
|
// CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK9-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 1 dereferenceable(1) [[REF_TMP]])
|
|
// CHECK9-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44
|
|
// CHECK9-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK9-NEXT: entry:
|
|
// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK9-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
// CHECK9-NEXT: entry:
|
|
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
|
|
// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4
|
|
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK9: cond.true:
|
|
// CHECK9-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK9: cond.false:
|
|
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK9-NEXT: br label [[COND_END]]
|
|
// CHECK9: cond.end:
|
|
// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK9: omp.inner.for.cond:
|
|
// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK9: omp.inner.for.body:
|
|
// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
|
|
// CHECK9-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8, !llvm.access.group !4
|
|
// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4
|
|
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK9: omp.body.continue:
|
|
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK9: omp.inner.for.inc:
|
|
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
|
|
// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
// CHECK9: omp.inner.for.end:
|
|
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK9: omp.loop.exit:
|
|
// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
|
|
// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK9: .omp.final.then:
|
|
// CHECK9-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK9: .omp.final.done:
|
|
// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK9-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR1]] to i8*
|
|
// CHECK9-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8
|
|
// CHECK9-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK9-NEXT: ]
|
|
// CHECK9: .omp.reduction.case1:
|
|
// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
|
|
// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK9: .omp.reduction.case2:
|
|
// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4
|
|
// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK9: .omp.reduction.default:
|
|
// CHECK9-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK9-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK9-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
|
|
// CHECK9-NEXT: entry:
|
|
// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK9-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK9-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK9-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK9-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK9-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK9-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK9-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK9-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK9-SAME: () #[[ATTR7:[0-9]+]] {
|
|
// CHECK9-NEXT: entry:
|
|
// CHECK9-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK9-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK10-LABEL: define {{[^@]+}}@main
|
|
// CHECK10-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK10-NEXT: entry:
|
|
// CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
|
|
// CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK10-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 1 dereferenceable(1) [[REF_TMP]])
|
|
// CHECK10-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44
|
|
// CHECK10-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK10-NEXT: entry:
|
|
// CHECK10-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK10-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
|
|
// CHECK10-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
// CHECK10-NEXT: entry:
|
|
// CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK10-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK10-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK10-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
|
|
// CHECK10-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK10-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
|
|
// CHECK10-NEXT: store i32 0, i32* [[SIVAR1]], align 4
|
|
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK10-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
|
|
// CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK10: cond.true:
|
|
// CHECK10-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK10: cond.false:
|
|
// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK10-NEXT: br label [[COND_END]]
|
|
// CHECK10: cond.end:
|
|
// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
|
// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK10: omp.inner.for.cond:
|
|
// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
|
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK10: omp.inner.for.body:
|
|
// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
|
|
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
|
|
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
|
|
// CHECK10-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8, !llvm.access.group !4
|
|
// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4
|
|
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK10: omp.body.continue:
|
|
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK10: omp.inner.for.inc:
|
|
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
|
|
// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
|
|
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
// CHECK10: omp.inner.for.end:
|
|
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK10: omp.loop.exit:
|
|
// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
|
|
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK10-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
|
|
// CHECK10-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
|
|
// CHECK10: .omp.final.then:
|
|
// CHECK10-NEXT: store i32 2, i32* [[I]], align 4
|
|
// CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]]
|
|
// CHECK10: .omp.final.done:
|
|
// CHECK10-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK10-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR1]] to i8*
|
|
// CHECK10-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8
|
|
// CHECK10-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK10-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK10-NEXT: ]
|
|
// CHECK10: .omp.reduction.case1:
|
|
// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
|
|
// CHECK10-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4
|
|
// CHECK10-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK10: .omp.reduction.case2:
|
|
// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4
|
|
// CHECK10-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4
|
|
// CHECK10-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK10: .omp.reduction.default:
|
|
// CHECK10-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK10-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK10-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
|
|
// CHECK10-NEXT: entry:
|
|
// CHECK10-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK10-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK10-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK10-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK10-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK10-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK10-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK10-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK10-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK10-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK10-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK10-SAME: () #[[ATTR7:[0-9]+]] {
|
|
// CHECK10-NEXT: entry:
|
|
// CHECK10-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK10-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK11-LABEL: define {{[^@]+}}@main
|
|
// CHECK11-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK11-NEXT: entry:
|
|
// CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
|
|
// CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK11-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 1 dereferenceable(1) [[REF_TMP]])
|
|
// CHECK11-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK12-LABEL: define {{[^@]+}}@main
|
|
// CHECK12-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK12-NEXT: entry:
|
|
// CHECK12-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK12-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
|
|
// CHECK12-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK12-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 1 dereferenceable(1) [[REF_TMP]])
|
|
// CHECK12-NEXT: ret i32 0
|
|
//
|