This patch expands SPMDization (converting generic execution mode to SPMD for target regions) by guarding code regions that should be executed only by the main thread. Specifically, it generates guarded regions, which only the main thread executes, and the synchronization with worker threads using simple barriers. For correctness, the patch aborts SPMDization for target regions if the same code executes in a parallel region, thus must be not be guarded. This check is implemented using the ParallelLevels AA. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D106892
2537 lines
147 KiB
LLVM
2537 lines
147 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
|
|
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
|
|
; RUN: opt -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED
|
|
|
|
;; void p0(void);
|
|
;; void p1(void);
|
|
;; int unknown(void);
|
|
;; void unknown_pure(void) __attribute__((pure));
|
|
;; void unknown_no_openmp(void) __attribute__((assume("omp_no_openmp")));
|
|
;;
|
|
;; int G;
|
|
;; void no_parallel_region_in_here(void) {
|
|
;; #pragma omp single
|
|
;; G = 0;
|
|
;; }
|
|
;;
|
|
;; void no_state_machine_needed() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; no_parallel_region_in_here();
|
|
;; unknown_no_openmp();
|
|
;; }
|
|
;; }
|
|
;;
|
|
;; void simple_state_machine() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; unknown_no_openmp();
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; no_parallel_region_in_here();
|
|
;; #pragma omp parallel
|
|
;; { p1(); }
|
|
;; }
|
|
;; }
|
|
;;
|
|
;; void simple_state_machine_interprocedural_after(void);
|
|
;; void simple_state_machine_interprocedural_before(void) {
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; }
|
|
;; void simple_state_machine_interprocedural() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; unknown_no_openmp();
|
|
;; simple_state_machine_interprocedural_before();
|
|
;; no_parallel_region_in_here();
|
|
;; #pragma omp parallel
|
|
;; { p1(); }
|
|
;; simple_state_machine_interprocedural_after();
|
|
;; }
|
|
;; }
|
|
;; void simple_state_machine_interprocedural_after(void) {
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; }
|
|
;;
|
|
;; void simple_state_machine_with_fallback() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; unknown();
|
|
;; #pragma omp parallel
|
|
;; { p1(); }
|
|
;; }
|
|
;; }
|
|
;;
|
|
;; void simple_state_machine_no_openmp_attr() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; unknown_no_openmp();
|
|
;; #pragma omp parallel
|
|
;; { p1(); }
|
|
;; }
|
|
;; }
|
|
;;
|
|
;; void simple_state_machine_pure() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; unknown_no_openmp();
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; unknown_pure();
|
|
;; #pragma omp parallel
|
|
;; { p1(); }
|
|
;; }
|
|
;; }
|
|
;;
|
|
;; int omp_get_thread_num();
|
|
;; void simple_state_machine_interprocedural_nested_recursive_after(int);
|
|
;; void simple_state_machine_interprocedural_nested_recursive_after_after(void);
|
|
;; void simple_state_machine_interprocedural_nested_recursive() {
|
|
;; #pragma omp target teams
|
|
;; {
|
|
;; simple_state_machine_interprocedural_nested_recursive_after(
|
|
;; omp_get_thread_num());
|
|
;; }
|
|
;; }
|
|
;;
|
|
;; void simple_state_machine_interprocedural_nested_recursive_after(int a) {
|
|
;; if (a == 0)
|
|
;; return;
|
|
;; simple_state_machine_interprocedural_nested_recursive_after(a - 1);
|
|
;; simple_state_machine_interprocedural_nested_recursive_after_after();
|
|
;; }
|
|
;; void simple_state_machine_interprocedural_nested_recursive_after_after(void) {
|
|
;; #pragma omp parallel
|
|
;; { p0(); }
|
|
;; }
|
|
;;
|
|
;; __attribute__((weak)) void weak_callee_empty(void) {}
|
|
;; void no_state_machine_weak_callee() {
|
|
;; #pragma omp target teams
|
|
;; { weak_callee_empty(); }
|
|
;; }
|
|
|
|
target triple = "nvptx64"
|
|
|
|
%struct.ident_t = type { i32, i32, i32, i32, i8* }
|
|
|
|
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
|
|
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
|
|
@__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1
|
|
@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1
|
|
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
|
|
@G = external global i32, align 4
|
|
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
|
|
@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, i8* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
|
|
|
|
define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
|
|
|
|
define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @no_parallel_region_in_here() #7
|
|
call void @unknown_no_openmp() #8
|
|
ret void
|
|
}
|
|
|
|
define hidden void @no_parallel_region_in_here() #1 {
|
|
entry:
|
|
%0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
|
|
%1 = call i32 @__kmpc_single(%struct.ident_t* @2, i32 %0)
|
|
%2 = icmp ne i32 %1, 0
|
|
br i1 %2, label %omp_if.then, label %omp_if.end
|
|
|
|
omp_if.then: ; preds = %entry
|
|
store i32 0, i32* @G, align 4
|
|
call void @__kmpc_end_single(%struct.ident_t* @2, i32 %0)
|
|
br label %omp_if.end
|
|
|
|
omp_if.end: ; preds = %omp_if.then, %entry
|
|
call void @__kmpc_barrier(%struct.ident_t* @3, i32 %0)
|
|
ret void
|
|
}
|
|
|
|
declare void @unknown_no_openmp() #2
|
|
|
|
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
|
|
|
|
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
|
|
|
|
define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%captured_vars_addrs1 = alloca [0 x i8*], align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @unknown_no_openmp() #8
|
|
%0 = load i32*, i32** %.global_tid..addr, align 8
|
|
%1 = load i32, i32* %0, align 4
|
|
%2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0)
|
|
call void @no_parallel_region_in_here() #7
|
|
%3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
declare void @p0() #4
|
|
|
|
define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
declare void @__kmpc_get_shared_variables(i8***)
|
|
|
|
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
|
|
|
|
define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p1() #7
|
|
ret void
|
|
}
|
|
|
|
declare void @p1() #4
|
|
|
|
define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @unknown_no_openmp() #8
|
|
call void @simple_state_machine_interprocedural_before() #7
|
|
call void @no_parallel_region_in_here() #7
|
|
%0 = load i32*, i32** %.global_tid..addr, align 8
|
|
%1 = load i32, i32* %0, align 4
|
|
%2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0)
|
|
call void @simple_state_machine_interprocedural_after() #7
|
|
ret void
|
|
}
|
|
|
|
define hidden void @simple_state_machine_interprocedural_before() #1 {
|
|
entry:
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
|
|
%1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** %1, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p1() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define hidden void @simple_state_machine_interprocedural_after() #1 {
|
|
entry:
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
|
|
%1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** %1, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%captured_vars_addrs1 = alloca [0 x i8*], align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
%0 = load i32*, i32** %.global_tid..addr, align 8
|
|
%1 = load i32, i32* %0, align 4
|
|
%2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0)
|
|
%call = call i32 @unknown() #7
|
|
%3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %3, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
declare i32 @unknown() #4
|
|
|
|
define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p1() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%captured_vars_addrs1 = alloca [0 x i8*], align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
%0 = load i32*, i32** %.global_tid..addr, align 8
|
|
%1 = load i32, i32* %0, align 4
|
|
%2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0)
|
|
call void @unknown_no_openmp() #8
|
|
%3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p1() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%captured_vars_addrs1 = alloca [0 x i8*], align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @unknown_no_openmp() #8
|
|
%0 = load i32*, i32** %.global_tid..addr, align 8
|
|
%1 = load i32, i32* %0, align 4
|
|
%2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0)
|
|
call void @unknown_pure() #9
|
|
%3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
declare void @unknown_pure() #5
|
|
|
|
define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p1() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
%call = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #7
|
|
call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
|
|
ret void
|
|
}
|
|
|
|
define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
%0 = load i32, i32* %a.addr, align 4
|
|
%cmp = icmp eq i32 %0, 0
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %entry
|
|
br label %return
|
|
|
|
if.end: ; preds = %entry
|
|
%1 = load i32, i32* %a.addr, align 4
|
|
%sub = sub nsw i32 %1, 1
|
|
call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
|
|
call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
|
|
br label %return
|
|
|
|
return: ; preds = %if.end, %if.then
|
|
ret void
|
|
}
|
|
|
|
declare i32 @omp_get_thread_num(...) #4
|
|
|
|
define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112() #0 {
|
|
entry:
|
|
%.zero.addr = alloca i32, align 4
|
|
%.threadid_temp. = alloca i32, align 4
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
|
|
|
|
user_code.entry: ; preds = %entry
|
|
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
|
store i32 %1, i32* %.threadid_temp., align 4
|
|
call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3
|
|
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
|
|
ret void
|
|
|
|
worker.exit: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @weak_callee_empty() #7
|
|
ret void
|
|
}
|
|
|
|
define weak hidden void @weak_callee_empty() #1 {
|
|
entry:
|
|
ret void
|
|
}
|
|
|
|
declare i32 @__kmpc_single(%struct.ident_t*, i32) #6
|
|
|
|
declare void @__kmpc_end_single(%struct.ident_t*, i32) #6
|
|
|
|
declare void @__kmpc_barrier(%struct.ident_t*, i32) #6
|
|
|
|
define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
|
|
entry:
|
|
%captured_vars_addrs = alloca [0 x i8*], align 8
|
|
%0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
|
|
%1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
|
|
call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** %1, i64 0)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
|
|
entry:
|
|
%.global_tid..addr = alloca i32*, align 8
|
|
%.bound_tid..addr = alloca i32*, align 8
|
|
store i32* %.global_tid., i32** %.global_tid..addr, align 8
|
|
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
|
|
call void @p0() #7
|
|
ret void
|
|
}
|
|
|
|
define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 {
|
|
entry:
|
|
%.addr = alloca i16, align 2
|
|
%.addr1 = alloca i32, align 4
|
|
%.zero.addr = alloca i32, align 4
|
|
%global_args = alloca i8**, align 8
|
|
store i32 0, i32* %.zero.addr, align 4
|
|
store i16 %0, i16* %.addr, align 2
|
|
store i32 %1, i32* %.addr1, align 4
|
|
call void @__kmpc_get_shared_variables(i8*** %global_args)
|
|
call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #3
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
|
|
attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
|
|
attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
|
|
attributes #3 = { nounwind }
|
|
attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
|
|
attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
|
|
attributes #6 = { convergent nounwind }
|
|
attributes #7 = { convergent }
|
|
attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
|
|
attributes #9 = { convergent nounwind readonly willreturn }
|
|
|
|
!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
|
|
!nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15}
|
|
!llvm.module.flags = !{!16, !17, !18}
|
|
|
|
!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
|
|
!1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
|
|
!2 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
|
|
!3 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
|
|
!4 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
|
|
!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
|
|
!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
|
|
!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
|
|
!8 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
|
|
!9 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
|
|
!10 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
|
|
!11 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
|
|
!12 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
|
|
!13 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
|
|
!14 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
|
|
!15 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
|
|
!16 = !{i32 1, !"wchar_size", i32 4}
|
|
!17 = !{i32 7, !"openmp", i32 50}
|
|
!18 = !{i32 7, !"openmp-device", i32 50}
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
|
|
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
|
|
; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
|
|
; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]]
|
|
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
|
|
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
|
|
; CHECK: omp_if.then:
|
|
; CHECK-NEXT: store i32 0, i32* @G, align 4
|
|
; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
|
|
; CHECK-NEXT: br label [[OMP_IF_END]]
|
|
; CHECK: omp_if.end:
|
|
; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
|
|
; CHECK: omp_if.then:
|
|
; CHECK-NEXT: store i32 0, i32* @G, align 4
|
|
; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[OMP_IF_END]]
|
|
; CHECK: omp_if.end:
|
|
; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*)
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute:
|
|
; CHECK-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check1:
|
|
; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute2:
|
|
; CHECK-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.check3:
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
|
|
; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9:[0-9]+]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute:
|
|
; CHECK-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check1:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*)
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute2:
|
|
; CHECK-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.check3:
|
|
; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute5:
|
|
; CHECK-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.check6:
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__4
|
|
; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]]
|
|
; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*)
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute:
|
|
; CHECK-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check1:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*)
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute2:
|
|
; CHECK-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.fallback.execute:
|
|
; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__6
|
|
; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*)
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute:
|
|
; CHECK-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check1:
|
|
; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute2:
|
|
; CHECK-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.check3:
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__9
|
|
; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*)
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute:
|
|
; CHECK-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check1:
|
|
; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute2:
|
|
; CHECK-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.check3:
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__12
|
|
; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.check:
|
|
; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper
|
|
; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.execute:
|
|
; CHECK-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.fallback.execute:
|
|
; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__15
|
|
; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]]
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
|
|
; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: br label [[RETURN:%.*]]
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
|
|
; CHECK-NEXT: br label [[RETURN]]
|
|
; CHECK: return:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
|
|
; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: br label [[RETURN:%.*]]
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]]
|
|
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]]
|
|
; CHECK-NEXT: br label [[RETURN]]
|
|
; CHECK: return:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
|
|
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.begin:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
|
|
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
|
|
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
|
|
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
|
|
; CHECK: worker_state_machine.finished:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker_state_machine.is_active.check:
|
|
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.fallback.execute:
|
|
; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
|
|
; CHECK: worker_state_machine.parallel_region.end:
|
|
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
|
|
; CHECK: worker_state_machine.done.barrier:
|
|
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
|
|
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
|
|
; CHECK: thread.user_code.check:
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: worker.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__16
|
|
; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @weak_callee_empty() #[[ATTR7]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@weak_callee_empty
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
|
|
; CHECK-SAME: () #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19
|
|
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
|
|
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]]
|
|
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
|
|
; CHECK-DISABLED: omp_if.then:
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]]
|
|
; CHECK-DISABLED: omp_if.end:
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
|
|
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
|
|
; CHECK-DISABLED: omp_if.then:
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
|
|
; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]]
|
|
; CHECK-DISABLED: omp_if.end:
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9:[0-9]+]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]]
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
|
|
; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
|
|
; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK-DISABLED: if.then:
|
|
; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]]
|
|
; CHECK-DISABLED: if.end:
|
|
; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: br label [[RETURN]]
|
|
; CHECK-DISABLED: return:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
|
|
; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK-DISABLED: if.then:
|
|
; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]]
|
|
; CHECK-DISABLED: if.end:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: br label [[RETURN]]
|
|
; CHECK-DISABLED: return:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
|
|
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
|
|
; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
; CHECK-DISABLED: user_code.entry:
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
; CHECK-DISABLED: worker.exit:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR7]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
|
|
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
|
|
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
|
|
; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind
|
|
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
|
|
; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
|
|
; CHECK-DISABLED-NEXT: entry:
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
|
|
; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
|
; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
|
|
; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
|
; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
|
|
; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
|
|
; CHECK-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
|
|
; CHECK-DISABLED-NEXT: ret void
|
|
;
|