Files
clang-p2996/llvm/test/Transforms/OpenMP/global_constructor.ll
Shilei Tian 10068cd654 [OpenMP] Introduce kernel environment
This patch introduces per kernel environment. Previously, flags such as execution mode are set through global variables with name like `__kernel_name_exec_mode`. They are accessible on the host by reading the corresponding global variable, but not from the device. Besides, some assumptions, such as no nested parallelism, are not per kernel basis, preventing us applying per kernel optimization in the device runtime.

This is a combination and refinement of patch series D116908, D116909, and D116910.

Depend on D155886.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D142569
2023-07-26 13:35:14 -04:00

111 lines
4.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@_ZL6Device = internal global double 0.000000e+00, align 8
@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" {
entry:
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret:
ret void
user_code.entry:
%1 = load double, ptr @_ZL6Device, align 8, !tbaa !11
%2 = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #0
%3 = icmp eq i32 %2, 0
br i1 %3, label %region.guarded, label %region.barrier
region.guarded:
store double %1, ptr %X, align 8, !tbaa !11
br label %region.barrier
region.barrier:
tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
tail call void @__kmpc_target_deinit() #0
br label %common.ret
}
declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
declare void @__kmpc_target_deinit() local_unnamed_addr
define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" {
entry:
%call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
%call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
%div = fdiv double %call.i, %call.i2
store double %div, ptr @_ZL6Device, align 8, !tbaa !11
ret void
}
declare double @__nv_log(double)
declare i32 @__kmpc_get_hardware_thread_id_in_block()
declare void @__kmpc_barrier_simple_spmd(ptr, i32)
attributes #0 = { nounwind }
attributes #1 = { convergent nounwind }
!omp_offload.info = !{!0, !1, !2}
!nvvm.annotations = !{!3, !4}
!llvm.module.flags = !{!5, !6, !7, !8, !9}
!llvm.ident = !{!10}
!0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1}
!1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2}
!2 = !{i32 1, !"_ZL6Device", i32 0, i32 0}
!3 = !{ptr @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1}
!4 = !{ptr @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 7, !"openmp", i32 50}
!7 = !{i32 7, !"openmp-device", i32 50}
!8 = !{i32 7, !"PIC Level", i32 2}
!9 = !{i32 7, !"frame-pointer", i32 2}
!10 = !{!"clang version 14.0.0"}
!11 = !{!12, !12, i64 0}
!12 = !{!"double", !13, i64 0}
!13 = !{!"omnipotent char", !14, i64 0}
!14 = !{!"Simple C++ TBAA"}
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR1]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
;
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2]]
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: ret void
;