Files
clang-p2996/clang/test/OpenMP/target_parallel_tl_codegen.cpp
Johannes Doerfert b8cbc5c02c [OpenMP] Introduce the KernelLaunchEnvironment as implicit argument (#70401)
The KernelEnvironment is for compile time information about a kernel. It
allows the compiler to feed information to the runtime. The
KernelLaunchEnvironment is for dynamic information *per* kernel launch.
It allows the rutime to feed information to the kernel that is not
shared with other invocations of the kernel. The first use case is to
replace the globals that synchronize teams reductions with per-launch
versions. This allows concurrent teams reductions. More uses cases will
follow, e.g., per launch memory pools.

Fixes: https://github.com/llvm/llvm-project/issues/70249
2023-10-31 19:38:43 -07:00

86 lines
5.4 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_ size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck --check-prefix=OMP51 %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=OMP51
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
int thread_limit_target_parallel() {
// Check that the offloading function is called after setting thread_limit in the task entry function
#pragma omp target parallel thread_limit(2)
{}
return 0;
}
#endif
// OMP51-LABEL: define dso_local noundef i32 @_Z28thread_limit_target_parallelv
// OMP51-SAME: () #[[ATTR0:[0-9]+]] {
// OMP51-NEXT: entry:
// OMP51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// OMP51-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// OMP51-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.)
// OMP51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0
// OMP51-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
// OMP51-NEXT: [[TMP3:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP0]], ptr [[TMP1]]) #[[ATTR2:[0-9]+]]
// OMP51-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
// OMP51-NEXT: ret i32 0
//
//
// OMP51-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28thread_limit_target_parallelv_l14
// OMP51-SAME: () #[[ATTR1:[0-9]+]] {
// OMP51-NEXT: entry:
// OMP51-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28thread_limit_target_parallelv_l14.omp_outlined)
// OMP51-NEXT: ret void
//
//
// OMP51-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28thread_limit_target_parallelv_l14.omp_outlined
// OMP51-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// OMP51-NEXT: entry:
// OMP51-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// OMP51-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// OMP51-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// OMP51-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// OMP51-NEXT: ret void
//
//
// OMP51-LABEL: define internal noundef i32 @.omp_task_entry.
// OMP51-SAME: (i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// OMP51-NEXT: entry:
// OMP51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// OMP51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca ptr, align 8
// OMP51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca ptr, align 8
// OMP51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca ptr, align 8
// OMP51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca ptr, align 8
// OMP51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca ptr, align 8
// OMP51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// OMP51-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// OMP51-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4
// OMP51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// OMP51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
// OMP51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// OMP51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
// OMP51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
// OMP51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
// OMP51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// OMP51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// OMP51-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// OMP51-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// OMP51-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// OMP51-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// OMP51-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// OMP51-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// OMP51-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// OMP51-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// OMP51-NEXT: call void @__kmpc_set_thread_limit(ptr @[[GLOB1]], i32 [[TMP9]], i32 2)
// OMP51-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28thread_limit_target_parallelv_l14() #[[ATTR2]]
// OMP51-NEXT: ret i32 0
//