The KernelEnvironment is for compile time information about a kernel. It allows the compiler to feed information to the runtime. The KernelLaunchEnvironment is for dynamic information *per* kernel launch. It allows the rutime to feed information to the kernel that is not shared with other invocations of the kernel. The first use case is to replace the globals that synchronize teams reductions with per-launch versions. This allows concurrent teams reductions. More uses cases will follow, e.g., per launch memory pools. Fixes: https://github.com/llvm/llvm-project/issues/70249
99 lines
6.2 KiB
C++
99 lines
6.2 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_ size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck --check-prefix=OMP51 %s
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=OMP51
|
|
|
|
// expected-no-diagnostics
|
|
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
int thread_limit_target_simd() {
|
|
|
|
// Check that the offloading function is called after setting thread_limit in the task entry function
|
|
#pragma omp target simd thread_limit(2)
|
|
for(int i=0; i<2; i++) {}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
// OMP51-LABEL: define dso_local noundef i32 @_Z24thread_limit_target_simdv
|
|
// OMP51-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// OMP51-NEXT: entry:
|
|
// OMP51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
|
|
// OMP51-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
|
|
// OMP51-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.)
|
|
// OMP51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0
|
|
// OMP51-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
|
|
// OMP51-NEXT: [[TMP3:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP0]], ptr [[TMP1]]) #[[ATTR2:[0-9]+]]
|
|
// OMP51-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
|
|
// OMP51-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// OMP51-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24thread_limit_target_simdv_l14
|
|
// OMP51-SAME: () #[[ATTR1:[0-9]+]] {
|
|
// OMP51-NEXT: entry:
|
|
// OMP51-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// OMP51-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// OMP51-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// OMP51-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
|
|
// OMP51-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// OMP51: omp.inner.for.cond:
|
|
// OMP51-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
|
|
// OMP51-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 2
|
|
// OMP51-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// OMP51: omp.inner.for.body:
|
|
// OMP51-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
|
// OMP51-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 1
|
|
// OMP51-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// OMP51-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
|
// OMP51-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// OMP51: omp.body.continue:
|
|
// OMP51-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// OMP51: omp.inner.for.inc:
|
|
// OMP51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
|
// OMP51-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// OMP51-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
|
// OMP51-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
// OMP51: omp.inner.for.end:
|
|
// OMP51-NEXT: store i32 2, ptr [[I]], align 4
|
|
// OMP51-NEXT: ret void
|
|
//
|
|
//
|
|
// OMP51-LABEL: define internal noundef i32 @.omp_task_entry.
|
|
// OMP51-SAME: (i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
// OMP51-NEXT: entry:
|
|
// OMP51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
|
|
// OMP51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca ptr, align 8
|
|
// OMP51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca ptr, align 8
|
|
// OMP51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca ptr, align 8
|
|
// OMP51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca ptr, align 8
|
|
// OMP51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca ptr, align 8
|
|
// OMP51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
|
|
// OMP51-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
|
// OMP51-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// OMP51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
|
// OMP51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
|
|
// OMP51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
|
// OMP51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
|
|
// OMP51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
|
|
// OMP51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
|
|
// OMP51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
|
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
|
|
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
|
|
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
|
|
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]])
|
|
// OMP51-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !16
|
|
// OMP51-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !16
|
|
// OMP51-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !16
|
|
// OMP51-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !16
|
|
// OMP51-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !16
|
|
// OMP51-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !16
|
|
// OMP51-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !16
|
|
// OMP51-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !16
|
|
// OMP51-NEXT: call void @__kmpc_set_thread_limit(ptr @[[GLOB1]], i32 [[TMP9]], i32 2)
|
|
// OMP51-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24thread_limit_target_simdv_l14() #[[ATTR2]]
|
|
// OMP51-NEXT: ret i32 0
|
|
//
|