Files
clang-p2996/clang/test/OpenMP/reduction_implicit_map.cpp
dhruvachak b5d02bbd0d [OpenMP] Increment kernel args version, used by runtime for detecting dyn_ptr. (#85363)
A kernel implicit parameter (dyn_ptr) was introduced some time back.
This patch increments the kernel args version for a compiler supporting
dyn_ptr. The version will be used by the runtime to determine whether
the implicit parameter is generated by the compiler. The versioning is
required to support use cases where code generated by an older compiler
is linked with a newer runtime.

If approved, this patch should be backported to release 18.
2024-03-19 16:40:22 -07:00

2103 lines
150 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
// RUN: -triple powerpc64le-unknown-unknown -DCUDA \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
// RUN: %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
// RUN: -triple nvptx64-unknown-unknown -DCUA \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
// RUN: -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -verify -fopenmp -x c++ \
// RUN: -triple powerpc64le-unknown-unknown -DDIAG\
// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
// RUN: %s -o - | FileCheck %s \
// RUN: --check-prefix=CHECK1
// RUN: %clang_cc1 -verify -fopenmp -x c++ \
// RUN: -triple i386-unknown-unknown \
// RUN: -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
// RUN: %s -o - | FileCheck %s \
// RUN: --check-prefix=CHECK2
#if defined(CUDA)
// expected-no-diagnostics
int foo(int n) {
double *e;
//no error and no implicit map generated for e[:1]
#pragma omp target parallel reduction(+: e[:1])
*e=10;
;
return 0;
}
// CHECK-NOT @.offload_maptypes
#elif defined(DIAG)
class S2 {
mutable int a;
public:
S2():a(0) { }
S2(S2 &s2):a(s2.a) { }
S2 &operator +(S2 &s);
};
int bar() {
S2 o[5];
//warnig "copyable and not guaranteed to be mapped correctly" and
//implicit map generated.
#pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}}
for (int i = 0; i < 10; i++);
double b[10][10][10];
//no error no implicit map generated, the map for b is generated but not
//for b[0:2][2:4][1].
#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
for (long long i = 0; i < 10; ++i);
return 0;
}
// map for variable o
// map for b:
#else
// expected-no-diagnostics
// generate implicit map for array elements or array sections in reduction
// clause. In following case: the implicit map is generate for output[0]
// with map size 4 and output[:3] with map size 12.
void sum(int* input, int size, int* output)
{
#pragma omp target teams distribute parallel for reduction(+: output[0]) \
map(to: input [0:size])
for (int i = 0; i < size; i++)
output[0] += input[i];
#pragma omp target teams distribute parallel for reduction(+: output[:3]) \
map(to: input [0:size])
for (int i = 0; i < size; i++)
output[0] += input[i];
int a[10];
#pragma omp target parallel reduction(+: a[:2])
for (int i = 0; i < size; i++)
;
#pragma omp target parallel reduction(+: a[3])
for (int i = 0; i < size; i++)
;
}
#endif
int main()
{
#if defined(CUDA)
int a = foo(10);
#elif defined(DIAG)
int a = bar();
#else
const int size = 100;
int *array = new int[size];
int result = 0;
sum(array, size, &result);
#endif
return 0;
}
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment, ptr [[DYN_PTR]])
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 0
// CHECK-NEXT: store double 0.000000e+00, ptr [[E2]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[E2]], i64 [[TMP6]]
// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP]], align 8
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP8]], align 8
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK-NEXT: store ptr [[E2]], ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
// CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1
// CHECK-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK: .omp.reduction.then:
// CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
// CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[E2]], align 8
// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
// CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX]], align 8
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK: .omp.reduction.done:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
// CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
// CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
// CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
// CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
// CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK: then:
// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: br label [[IFCONT:%.*]]
// CHECK: else:
// CHECK-NEXT: br label [[IFCONT]]
// CHECK: ifcont:
// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK: then4:
// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
// CHECK-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8
// CHECK-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8
// CHECK-NEXT: br label [[IFCONT6:%.*]]
// CHECK: else5:
// CHECK-NEXT: br label [[IFCONT6]]
// CHECK: ifcont6:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
// CHECK-NEXT: br label [[PRECOND:%.*]]
// CHECK: precond:
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK: body:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK: then:
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
// CHECK-NEXT: br label [[IFCONT:%.*]]
// CHECK: else:
// CHECK-NEXT: br label [[IFCONT]]
// CHECK: ifcont:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK: then2:
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// CHECK-NEXT: br label [[IFCONT4:%.*]]
// CHECK: else3:
// CHECK-NEXT: br label [[IFCONT4]]
// CHECK: ifcont4:
// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
// CHECK-NEXT: br label [[PRECOND]]
// CHECK: exit:
// CHECK-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[O:%.*]] = alloca [5 x %class.S2], align 4
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x [10 x double]]], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[O]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], ptr [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK1: arrayctor.loop:
// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_S2]], ptr [[ARRAYCTOR_CUR]], i64 1
// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK1: arrayctor.cont:
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[O]], i64 0, i64 0
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[O]], ptr [[TMP0]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP5]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP14]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP15]], align 4
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4
// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50(ptr [[O]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP21]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP25]], align 4
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8
// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP34]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP35]], align 4
// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4
// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP37]], align 4
// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.region_id, ptr [[KERNEL_ARGS4]])
// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0
// CHECK1-NEXT: br i1 [[TMP39]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK1: omp_offload.failed5:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55(ptr [[B]]) #[[ATTR6]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK1: omp_offload.cont6:
// CHECK1-NEXT: ret i32 0
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C1Ev
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: call void @_ZN2S2C2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined, ptr [[TMP0]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[TMP0]], i64 0, i64 0
// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr ([[CLASS_S2]], ptr null, i32 1) to i64)
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[CLASS_S2]], ptr [[O1]], i64 [[TMP4]]
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
// CHECK1-NEXT: br label [[FOR_COND:%.*]]
// CHECK1: for.cond:
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK1: for.body:
// CHECK1-NEXT: br label [[FOR_INC:%.*]]
// CHECK1: for.inc:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[O1]], ptr [[TMP8]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL]], i64 4, i1 false)
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL2]], i64 4, i1 false)
// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined.omp.reduction.reduction_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]])
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55
// CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined, ptr [[TMP0]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [1 x %struct.kmp_taskred_input_t], align 8
// CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY]], i64 2
// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX1]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY2]], i64 1
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
// CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX4]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY5]], i64 5
// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX6]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY7]], i64 1
// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
// CHECK1-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1
// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK1: omp.arrayinit.done:
// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP0]] to i64
// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP12]]
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX9]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY10]], i64 2
// CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY12]], i64 1
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
// CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX14]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY15]], i64 5
// CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX16]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY17]], i64 1
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP14]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP15]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX18]] to i64
// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64
// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1
// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP22]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP23]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP25]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i32 1, ptr [[DOTRD_INPUT_]])
// CHECK1-NEXT: store ptr [[TMP29]], ptr [[DOTTASK_RED_]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP31]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP32]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP33]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
// CHECK1-NEXT: store i64 [[TMP34]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP37]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]]
// CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP38]], 1
// CHECK1-NEXT: store i64 [[ADD20]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP40]])
// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP42]], i32 1)
// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP43]], align 8
// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP5]] to ptr
// CHECK1-NEXT: store ptr [[TMP45]], ptr [[TMP44]], align 8
// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: switch i32 [[TMP48]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP49]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[TMP50:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
// CHECK1-NEXT: [[TMP51:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP50]], [[TMP51]]
// CHECK1-NEXT: store double [[ADD22]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP49]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done25:
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP52]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
// CHECK1: omp.arraycpy.body27:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
// CHECK1-NEXT: [[TMP53:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
// CHECK1-NEXT: [[TMP54:%.*]] = atomicrmw fadd ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP53]] monotonic, align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP52]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]]
// CHECK1: omp.arraycpy.done33:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP55]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.red_init.
// CHECK1-SAME: (ptr noalias noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 [[TMP4]]
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK1: omp.arrayinit.done:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.red_comb.
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP4]], i64 [[TMP3]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP7]], [[TMP8]]
// CHECK1-NEXT: store double [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done2:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined.omp.reduction.reduction_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP10]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store double [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done2:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C2Ev
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@main
// CHECK1-SAME: () #[[ATTR9:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3barv()
// CHECK1-NEXT: store i32 [[CALL]], ptr [[A]], align 4
// CHECK1-NEXT: ret i32 0
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3sumPiiS_
// CHECK2-SAME: (ptr noundef [[INPUT:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT: [[SIZE_CASTED4:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES10:%.*]] = alloca [3 x i64], align 4
// CHECK2-NEXT: [[_TMP11:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT: [[SIZE_CASTED21:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT: [[SIZE_CASTED29:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS31:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS32:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS33:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[KERNEL_ARGS34:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
// CHECK2-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 24, i1 false)
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP12]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP13]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP15]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP16]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP18]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2
// CHECK2-NEXT: store i64 [[TMP10]], ptr [[TMP19]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT: store ptr null, ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP25]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
// CHECK2-NEXT: [[TMP27:%.*]] = zext i32 [[ADD]] to i64
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP28]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT: store i32 3, ptr [[TMP29]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP30]], align 4
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP31]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP32]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes, ptr [[TMP33]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP34]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP35]], align 4
// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT: store i64 [[TMP27]], ptr [[TMP36]], align 8
// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP37]], align 8
// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP40]], align 4
// CHECK2-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.region_id, ptr [[KERNEL_ARGS]])
// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK2-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2: omp_offload.failed:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69(i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2: omp_offload.cont:
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP43]], ptr [[SIZE_CASTED4]], align 4
// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[SIZE_CASTED4]], align 4
// CHECK2-NEXT: [[TMP45:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP47:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 0
// CHECK2-NEXT: [[TMP49:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP50]], i32 0
// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: [[TMP52:%.*]] = mul nuw i32 [[TMP51]], 4
// CHECK2-NEXT: [[TMP53:%.*]] = sext i32 [[TMP52]] to i64
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES10]], ptr align 4 @.offload_sizes.1, i32 24, i1 false)
// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP44]], ptr [[TMP54]], align 4
// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP44]], ptr [[TMP55]], align 4
// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP56]], align 4
// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP47]], ptr [[TMP57]], align 4
// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[ARRAYIDX5]], ptr [[TMP58]], align 4
// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP59]], align 4
// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP49]], ptr [[TMP60]], align 4
// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[ARRAYIDX6]], ptr [[TMP61]], align 4
// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 2
// CHECK2-NEXT: store i64 [[TMP53]], ptr [[TMP62]], align 4
// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 2
// CHECK2-NEXT: store ptr null, ptr [[TMP63]], align 4
// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK2-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 0
// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP67]], ptr [[DOTCAPTURE_EXPR_12]], align 4
// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP68]], 0
// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1
// CHECK2-NEXT: store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4
// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP69]], 1
// CHECK2-NEXT: [[TMP70:%.*]] = zext i32 [[ADD17]] to i64
// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP71]], align 4
// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1
// CHECK2-NEXT: store i32 3, ptr [[TMP72]], align 4
// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP64]], ptr [[TMP73]], align 4
// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP65]], ptr [[TMP74]], align 4
// CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[TMP66]], ptr [[TMP75]], align 4
// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP76]], align 4
// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP77]], align 4
// CHECK2-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP78]], align 4
// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8
// CHECK2-NEXT: store i64 [[TMP70]], ptr [[TMP79]], align 8
// CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP80]], align 8
// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4
// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4
// CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP83]], align 4
// CHECK2-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.region_id, ptr [[KERNEL_ARGS18]])
// CHECK2-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0
// CHECK2-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
// CHECK2: omp_offload.failed19:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73(i32 [[TMP44]], ptr [[TMP45]], ptr [[TMP46]]) #[[ATTR2]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT20]]
// CHECK2: omp_offload.cont20:
// CHECK2-NEXT: [[TMP86:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP86]], ptr [[SIZE_CASTED21]], align 4
// CHECK2-NEXT: [[TMP87:%.*]] = load i32, ptr [[SIZE_CASTED21]], align 4
// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 0
// CHECK2-NEXT: [[TMP88:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP87]], ptr [[TMP88]], align 4
// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP87]], ptr [[TMP89]], align 4
// CHECK2-NEXT: [[TMP90:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP90]], align 4
// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[A]], ptr [[TMP91]], align 4
// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[ARRAYIDX22]], ptr [[TMP92]], align 4
// CHECK2-NEXT: [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP93]], align 4
// CHECK2-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
// CHECK2-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
// CHECK2-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP96]], align 4
// CHECK2-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
// CHECK2-NEXT: store i32 2, ptr [[TMP97]], align 4
// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP94]], ptr [[TMP98]], align 4
// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP95]], ptr [[TMP99]], align 4
// CHECK2-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 4
// CHECK2-NEXT: store ptr @.offload_sizes.3, ptr [[TMP100]], align 4
// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP101]], align 4
// CHECK2-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP102]], align 4
// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP103]], align 4
// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP104]], align 8
// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP105]], align 8
// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP106]], align 4
// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP107]], align 4
// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP108]], align 4
// CHECK2-NEXT: [[TMP109:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.region_id, ptr [[KERNEL_ARGS26]])
// CHECK2-NEXT: [[TMP110:%.*]] = icmp ne i32 [[TMP109]], 0
// CHECK2-NEXT: br i1 [[TMP110]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
// CHECK2: omp_offload.failed27:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78(i32 [[TMP87]], ptr [[A]]) #[[ATTR2]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT28]]
// CHECK2: omp_offload.cont28:
// CHECK2-NEXT: [[TMP111:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP111]], ptr [[SIZE_CASTED29]], align 4
// CHECK2-NEXT: [[TMP112:%.*]] = load i32, ptr [[SIZE_CASTED29]], align 4
// CHECK2-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 3
// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP112]], ptr [[TMP113]], align 4
// CHECK2-NEXT: [[TMP114:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP112]], ptr [[TMP114]], align 4
// CHECK2-NEXT: [[TMP115:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP115]], align 4
// CHECK2-NEXT: [[TMP116:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[A]], ptr [[TMP116]], align 4
// CHECK2-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[ARRAYIDX30]], ptr [[TMP117]], align 4
// CHECK2-NEXT: [[TMP118:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP118]], align 4
// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
// CHECK2-NEXT: [[TMP120:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP121]], align 4
// CHECK2-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 1
// CHECK2-NEXT: store i32 2, ptr [[TMP122]], align 4
// CHECK2-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP119]], ptr [[TMP123]], align 4
// CHECK2-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP120]], ptr [[TMP124]], align 4
// CHECK2-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 4
// CHECK2-NEXT: store ptr @.offload_sizes.5, ptr [[TMP125]], align 4
// CHECK2-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP126]], align 4
// CHECK2-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP127]], align 4
// CHECK2-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP128]], align 4
// CHECK2-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP129]], align 8
// CHECK2-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP130]], align 8
// CHECK2-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP131]], align 4
// CHECK2-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP132]], align 4
// CHECK2-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP133]], align 4
// CHECK2-NEXT: [[TMP134:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.region_id, ptr [[KERNEL_ARGS34]])
// CHECK2-NEXT: [[TMP135:%.*]] = icmp ne i32 [[TMP134]], 0
// CHECK2-NEXT: br i1 [[TMP135]], label [[OMP_OFFLOAD_FAILED35:%.*]], label [[OMP_OFFLOAD_CONT36:%.*]]
// CHECK2: omp_offload.failed35:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81(i32 [[TMP112]], ptr [[A]]) #[[ATTR2]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT36]]
// CHECK2: omp_offload.cont36:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined, i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[OUTPUT1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
// CHECK2-NEXT: store i32 0, ptr [[OUTPUT1]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[OUTPUT1]], i64 [[TMP5]]
// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP22]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined, i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], ptr [[TMP24]], ptr [[TMP25]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[OUTPUT1]], ptr [[TMP30]], align 4
// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP32]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.reduction.case1:
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[OUTPUT1]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[OUTPUT1]], align 4
// CHECK2-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP36]] monotonic, align 4
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
// CHECK2-NEXT: store i32 0, ptr [[OUTPUT3]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]]
// CHECK2-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[OUTPUT3]], i64 [[TMP11]]
// CHECK2-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4
// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]]
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]]
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX9]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1
// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[OUTPUT3]], ptr [[TMP31]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.reduction.case1:
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[OUTPUT3]], align 4
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
// CHECK2-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OUTPUT3]], align 4
// CHECK2-NEXT: [[TMP38:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP37]] monotonic, align 4
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined.omp.reduction.reduction_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp.reduction.reduction_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined, i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[OUTPUT2:%.*]] = alloca [3 x i32], align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK2: omp.arrayinit.done:
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK2-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
// CHECK2-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[OUTPUT2]], i64 [[TMP7]]
// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP24]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 4
// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined, i32 [[TMP22]], i32 [[TMP23]], i32 [[TMP25]], ptr [[TMP26]], ptr [[TMP27]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[OUTPUT2]], ptr [[TMP32]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.reduction.case1:
// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP36]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP36]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done13:
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP39]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]]
// CHECK2: omp.arraycpy.body15:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4
// CHECK2-NEXT: [[TMP41:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP40]] monotonic, align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP39]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]]
// CHECK2: omp.arraycpy.done21:
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[OUTPUT4:%.*]] = alloca [3 x i32], align 4
// CHECK2-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK2: omp.arrayinit.done:
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
// CHECK2-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[OUTPUT4]], i64 [[TMP13]]
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]]
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[I6]], align 4
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]]
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP5]], align 4
// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP27]]
// CHECK2-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX10]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1
// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[OUTPUT4]], ptr [[TMP33]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
// CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.reduction.case1:
// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP38]], [[TMP39]]
// CHECK2-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP37]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done17:
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP40]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]]
// CHECK2: omp.arraycpy.body19:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4
// CHECK2-NEXT: [[TMP42:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP41]] monotonic, align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP40]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]]
// CHECK2: omp.arraycpy.done25:
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined.omp.reduction.reduction_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done2:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp.reduction.reduction_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 3
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done2:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined, i32 [[TMP2]], ptr [[TMP0]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i32], align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 1
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK2: omp.arrayinit.done:
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A2]], i64 [[TMP5]]
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
// CHECK2: for.cond:
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK2: for.body:
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
// CHECK2: for.inc:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK2: for.end:
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A2]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.reduction.case1:
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP14]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP14]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done6:
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP17]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK2: omp.arraycpy.body8:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP18]] monotonic, align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP17]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK2: omp.arraycpy.done14:
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined.omp.reduction.reduction_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done2:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined, i32 [[TMP2]], ptr [[TMP0]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 3
// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK2-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
// CHECK2-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A1]], i64 [[TMP4]]
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
// CHECK2: for.cond:
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK2: for.body:
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
// CHECK2: for.inc:
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK2: for.end:
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.reduction.case1:
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[A1]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP15]] monotonic, align 4
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined.omp.reduction.reduction_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@main
// CHECK2-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[ARRAY:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[RESULT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK2-NEXT: store i32 100, ptr [[SIZE]], align 4
// CHECK2-NEXT: [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znaj(i32 noundef 400) #[[ATTR8:[0-9]+]]
// CHECK2-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 4
// CHECK2-NEXT: store i32 0, ptr [[RESULT]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAY]], align 4
// CHECK2-NEXT: call void @_Z3sumPiiS_(ptr noundef [[TMP0]], i32 noundef 100, ptr noundef [[RESULT]])
// CHECK2-NEXT: ret i32 0
//