Previously we added the `push_target_tripcount` function to send the loop tripcount to the device runtime so we knew how to configure the teams / threads for execute the loop for a teams distribute construct. This was implemented as a separate function mostly to avoid changing the interface for backwards compatbility. Now that we've changed it anyway and the new interface can take an arbitrary number of arguments via the struct without changing the ABI, we can move this to the new interface. This will simplify the runtime by removing unnecessary state between calls. Depends on D128550 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D128816
2186 lines
160 KiB
C++
2186 lines
160 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ \
|
|
// RUN: -triple powerpc64le-unknown-unknown -DCUDA \
|
|
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
|
|
// RUN: %t-ppc-host.bc
|
|
|
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ \
|
|
// RUN: -triple nvptx64-unknown-unknown -DCUA \
|
|
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
|
|
// RUN: -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
|
|
// RUN: -o - | FileCheck %s --check-prefix CHECK
|
|
|
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ \
|
|
// RUN: -triple powerpc64le-unknown-unknown -DDIAG\
|
|
// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
|
|
// RUN: %s -o - | FileCheck %s \
|
|
// RUN: --check-prefix=CHECK1
|
|
|
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ \
|
|
// RUN: -triple i386-unknown-unknown \
|
|
// RUN: -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
|
|
// RUN: %s -o - | FileCheck %s \
|
|
// RUN: --check-prefix=CHECK2
|
|
|
|
|
|
#if defined(CUDA)
|
|
// expected-no-diagnostics
|
|
|
|
int foo(int n) {
|
|
double *e;
|
|
//no error and no implicit map generated for e[:1]
|
|
#pragma omp target parallel reduction(+: e[:1])
|
|
*e=10;
|
|
;
|
|
return 0;
|
|
}
|
|
// CHECK-NOT @.offload_maptypes
|
|
#elif defined(DIAG)
|
|
class S2 {
|
|
mutable int a;
|
|
public:
|
|
S2():a(0) { }
|
|
S2(S2 &s2):a(s2.a) { }
|
|
S2 &operator +(S2 &s);
|
|
};
|
|
int bar() {
|
|
S2 o[5];
|
|
//warnig "copyable and not guaranteed to be mapped correctly" and
|
|
//implicit map generated.
|
|
#pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}}
|
|
for (int i = 0; i < 10; i++);
|
|
double b[10][10][10];
|
|
//no error no implicit map generated, the map for b is generated but not
|
|
//for b[0:2][2:4][1].
|
|
#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
|
|
for (long long i = 0; i < 10; ++i);
|
|
return 0;
|
|
}
|
|
// map for variable o
|
|
// map for b:
|
|
#else
|
|
// expected-no-diagnostics
|
|
|
|
// generate implicit map for array elements or array sections in reduction
|
|
// clause. In following case: the implicit map is generate for output[0]
|
|
// with map size 4 and output[:3] with map size 12.
|
|
void sum(int* input, int size, int* output)
|
|
{
|
|
#pragma omp target teams distribute parallel for reduction(+: output[0]) \
|
|
map(to: input [0:size])
|
|
for (int i = 0; i < size; i++)
|
|
output[0] += input[i];
|
|
#pragma omp target teams distribute parallel for reduction(+: output[:3]) \
|
|
map(to: input [0:size])
|
|
for (int i = 0; i < size; i++)
|
|
output[0] += input[i];
|
|
int a[10];
|
|
#pragma omp target parallel reduction(+: a[:2])
|
|
for (int i = 0; i < size; i++)
|
|
;
|
|
#pragma omp target parallel reduction(+: a[3])
|
|
for (int i = 0; i < size; i++)
|
|
;
|
|
}
|
|
#endif
|
|
int main()
|
|
{
|
|
#if defined(CUDA)
|
|
int a = foo(10);
|
|
#elif defined(DIAG)
|
|
int a = bar();
|
|
#else
|
|
const int size = 100;
|
|
int *array = new int[size];
|
|
int result = 0;
|
|
sum(array, size, &result);
|
|
#endif
|
|
return 0;
|
|
}
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32
|
|
// CHECK-SAME: (double* noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8
|
|
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK-NEXT: store double* [[E]], double** [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
|
|
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
// CHECK: user_code.entry:
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP2]] to i8*
|
|
// CHECK-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
|
|
// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1)
|
|
// CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
|
|
// CHECK-NEXT: ret void
|
|
// CHECK: worker.exit:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
|
|
// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8
|
|
// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8
|
|
// CHECK-NEXT: [[TMP:%.*]] = alloca double*, align 8
|
|
// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK-NEXT: store double* [[E]], double** [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 0
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 0
|
|
// CHECK-NEXT: store double 0.000000e+00, double* [[E2]], align 8
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint double* [[TMP2]] to i64
|
|
// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint double* [[ARRAYIDX]] to i64
|
|
// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
|
|
// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[E2]], i64 [[TMP6]]
|
|
// CHECK-NEXT: store double* [[TMP7]], double** [[TMP]], align 8
|
|
// CHECK-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP]], align 8
|
|
// CHECK-NEXT: store double 1.000000e+01, double* [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[E2]] to i8*
|
|
// CHECK-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8
|
|
// CHECK-NEXT: [[TMP13:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func)
|
|
// CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1
|
|
// CHECK-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
|
|
// CHECK: .omp.reduction.then:
|
|
// CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[ARRAYIDX]], align 8
|
|
// CHECK-NEXT: [[TMP17:%.*]] = load double, double* [[E2]], align 8
|
|
// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]]
|
|
// CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX]], align 8
|
|
// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP10]])
|
|
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
|
|
// CHECK: .omp.reduction.done:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
|
|
// CHECK-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
|
|
// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
|
|
// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
|
|
// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
|
|
// CHECK-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2
|
|
// CHECK-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2
|
|
// CHECK-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2
|
|
// CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2
|
|
// CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double**
|
|
// CHECK-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8
|
|
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i64 1
|
|
// CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*
|
|
// CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64*
|
|
// CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64*
|
|
// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8
|
|
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size()
|
|
// CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
|
|
// CHECK-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]])
|
|
// CHECK-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8
|
|
// CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1
|
|
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1
|
|
// CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8*
|
|
// CHECK-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8
|
|
// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0
|
|
// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1
|
|
// CHECK-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
|
|
// CHECK-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
|
|
// CHECK-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2
|
|
// CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1
|
|
// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0
|
|
// CHECK-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]]
|
|
// CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0
|
|
// CHECK-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
|
|
// CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]]
|
|
// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]]
|
|
// CHECK-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
|
// CHECK: then:
|
|
// CHECK-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8*
|
|
// CHECK-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8*
|
|
// CHECK-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]]
|
|
// CHECK-NEXT: br label [[IFCONT:%.*]]
|
|
// CHECK: else:
|
|
// CHECK-NEXT: br label [[IFCONT]]
|
|
// CHECK: ifcont:
|
|
// CHECK-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
|
|
// CHECK-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
|
|
// CHECK-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
|
|
// CHECK-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
|
|
// CHECK: then4:
|
|
// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double**
|
|
// CHECK-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 8
|
|
// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double**
|
|
// CHECK-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 8
|
|
// CHECK-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8
|
|
// CHECK-NEXT: store double [[TMP47]], double* [[TMP46]], align 8
|
|
// CHECK-NEXT: br label [[IFCONT6:%.*]]
|
|
// CHECK: else5:
|
|
// CHECK-NEXT: br label [[IFCONT6]]
|
|
// CHECK: ifcont6:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
|
|
// CHECK-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
|
|
// CHECK-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]*
|
|
// CHECK-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4
|
|
// CHECK-NEXT: br label [[PRECOND:%.*]]
|
|
// CHECK: precond:
|
|
// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2
|
|
// CHECK-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]]
|
|
// CHECK: body:
|
|
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]])
|
|
// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
|
|
// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
|
// CHECK: then:
|
|
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8
|
|
// CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32*
|
|
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]]
|
|
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
|
|
// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4
|
|
// CHECK-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4
|
|
// CHECK-NEXT: br label [[IFCONT:%.*]]
|
|
// CHECK: else:
|
|
// CHECK-NEXT: br label [[IFCONT]]
|
|
// CHECK: ifcont:
|
|
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
|
|
// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4
|
|
// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]]
|
|
// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
|
|
// CHECK: then2:
|
|
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
|
|
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8
|
|
// CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32*
|
|
// CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]]
|
|
// CHECK-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4
|
|
// CHECK-NEXT: br label [[IFCONT4:%.*]]
|
|
// CHECK: else3:
|
|
// CHECK-NEXT: br label [[IFCONT4]]
|
|
// CHECK: ifcont4:
|
|
// CHECK-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1
|
|
// CHECK-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4
|
|
// CHECK-NEXT: br label [[PRECOND]]
|
|
// CHECK: exit:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv
|
|
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[O:%.*]] = alloca [5 x %class.S2], align 4
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x [10 x double]]], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[O]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], %class.S2* [[ARRAY_BEGIN]], i64 5
|
|
// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
|
|
// CHECK1: arrayctor.loop:
|
|
// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.S2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
|
|
// CHECK1-NEXT: call void @_ZN2S2C1Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
|
|
// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_S2]], %class.S2* [[ARRAYCTOR_CUR]], i64 1
|
|
// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.S2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
|
|
// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
|
|
// CHECK1: arrayctor.cont:
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[O]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i8** [[TMP0]] to [5 x %class.S2]**
|
|
// CHECK1-NEXT: store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[TMP1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to %class.S2**
|
|
// CHECK1-NEXT: store %class.S2* [[ARRAYIDX]], %class.S2** [[TMP3]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK1-NEXT: store i8* null, i8** [[TMP4]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CHECK1-NEXT: store i32 1, i32* [[TMP7]], align 4
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CHECK1-NEXT: store i32 1, i32* [[TMP8]], align 4
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CHECK1-NEXT: store i8** [[TMP5]], i8*** [[TMP9]], align 8
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CHECK1-NEXT: store i8** [[TMP6]], i8*** [[TMP10]], align 8
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CHECK1-NEXT: store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP11]], align 8
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CHECK1-NEXT: store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP12]], align 8
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CHECK1-NEXT: store i8** null, i8*** [[TMP13]], align 8
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CHECK1-NEXT: store i8** null, i8*** [[TMP14]], align 8
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CHECK1-NEXT: store i64 0, i64* [[TMP15]], align 8
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK1: omp_offload.failed:
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50([5 x %class.S2]* [[O]]) #[[ATTR8:[0-9]+]]
|
|
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK1: omp_offload.cont:
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to [10 x [10 x [10 x double]]]**
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[TMP19]], align 8
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to [10 x [10 x [10 x double]]]**
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[TMP21]], align 8
|
|
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
|
|
// CHECK1-NEXT: store i8* null, i8** [[TMP22]], align 8
|
|
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 0
|
|
// CHECK1-NEXT: store i32 1, i32* [[TMP25]], align 4
|
|
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 1
|
|
// CHECK1-NEXT: store i32 1, i32* [[TMP26]], align 4
|
|
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 2
|
|
// CHECK1-NEXT: store i8** [[TMP23]], i8*** [[TMP27]], align 8
|
|
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 3
|
|
// CHECK1-NEXT: store i8** [[TMP24]], i8*** [[TMP28]], align 8
|
|
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 4
|
|
// CHECK1-NEXT: store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP29]], align 8
|
|
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 5
|
|
// CHECK1-NEXT: store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP30]], align 8
|
|
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 6
|
|
// CHECK1-NEXT: store i8** null, i8*** [[TMP31]], align 8
|
|
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 7
|
|
// CHECK1-NEXT: store i8** null, i8*** [[TMP32]], align 8
|
|
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]], i32 0, i32 8
|
|
// CHECK1-NEXT: store i64 0, i64* [[TMP33]], align 8
|
|
// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS4]])
|
|
// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
|
|
// CHECK1: omp_offload.failed5:
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55([10 x [10 x [10 x double]]]* [[B]]) #[[ATTR8]]
|
|
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]]
|
|
// CHECK1: omp_offload.cont6:
|
|
// CHECK1-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C1Ev
|
|
// CHECK1-SAME: (%class.S2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.S2*, align 8
|
|
// CHECK1-NEXT: store %class.S2* [[THIS]], %class.S2** [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.S2*, %class.S2** [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @_ZN2S2C2Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[THIS1]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50
|
|
// CHECK1-SAME: ([5 x %class.S2]* noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca [5 x %class.S2]*, align 8
|
|
// CHECK1-NEXT: store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[O_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[O_ADDR]], align 8
|
|
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [5 x %class.S2]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [5 x %class.S2]* [[TMP0]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [5 x %class.S2]* noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca [5 x %class.S2]*, align 8
|
|
// CHECK1-NEXT: [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[O_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[O_ADDR]], align 8
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[TMP0]], i64 0, i64 0
|
|
// CHECK1-NEXT: call void @_ZN2S2C1Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]])
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [5 x %class.S2]* [[TMP0]] to %class.S2*
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint %class.S2* [[TMP1]] to i64
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint %class.S2* [[ARRAYIDX]] to i64
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (%class.S2* getelementptr ([[CLASS_S2]], %class.S2* null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[CLASS_S2]], %class.S2* [[O1]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %class.S2* [[TMP6]] to [5 x %class.S2]*
|
|
// CHECK1-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK1-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK1: for.cond:
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], 10
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK1: for.body:
|
|
// CHECK1-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK1: for.inc:
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4
|
|
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
|
|
// CHECK1-NEXT: store i32 [[INC]], i32* [[I]], align 4
|
|
// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
// CHECK1: for.end:
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %class.S2* [[O1]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK1-NEXT: ]
|
|
// CHECK1: .omp.reduction.case1:
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]])
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8*
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %class.S2* [[CALL]] to i8*
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false)
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.case2:
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
|
|
// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]])
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8*
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %class.S2* [[CALL2]] to i8*
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false)
|
|
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.default:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK1-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %class.S2*
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %class.S2*
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[TMP11]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[TMP8]])
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %class.S2* [[TMP11]] to i8*
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %class.S2* [[CALL]] to i8*
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false)
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55
|
|
// CHECK1-SAME: ([10 x [10 x [10 x double]]]* noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR2]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x [10 x double]]]*, align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [10 x [10 x double]]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [10 x [10 x double]]]* [[TMP0]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
|
|
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x double]]]* noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR3]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x [10 x double]]]*, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [1 x %struct.kmp_taskred_input_t], align 8
|
|
// CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8
|
|
// CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY]], i64 2
|
|
// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX1]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY2]], i64 1
|
|
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX4]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY5]], i64 5
|
|
// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX6]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY7]], i64 1
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint double* [[ARRAYIDX8]] to i64
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave()
|
|
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8
|
|
// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8
|
|
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR0]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[VLA]], [[TMP8]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK1: omp.arrayinit.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK1-NEXT: store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK1: omp.arrayinit.done:
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x [10 x double]]]* [[TMP0]] to double*
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint double* [[TMP9]] to i64
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP13]]
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to [10 x [10 x [10 x double]]]*
|
|
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX9]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY10]], i64 2
|
|
// CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY12]], i64 1
|
|
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX14]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY15]], i64 5
|
|
// CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY17]], i64 1
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[VLA]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[ARRAYIDX13]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint double* [[ARRAYIDX18]] to i64
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint double* [[ARRAYIDX13]] to i64
|
|
// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]]
|
|
// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP24:%.*]] = add nuw i64 [[TMP23]], 1
|
|
// CHECK1-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
|
|
// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP26]], align 8
|
|
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
|
|
// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8
|
|
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
|
|
// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8
|
|
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
|
|
// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8
|
|
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
|
|
// CHECK1-NEXT: store i32 1, i32* [[TMP30]], align 8
|
|
// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4
|
|
// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
|
|
// CHECK1-NEXT: [[TMP34:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB2]], i32 [[TMP32]], i32 1, i32 1, i8* [[TMP33]])
|
|
// CHECK1-NEXT: store i8* [[TMP34]], i8** [[DOTTASK_RED_]], align 8
|
|
// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP36]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1)
|
|
// CHECK1-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP37]], 9
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK1: cond.true:
|
|
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK1: cond.false:
|
|
// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: br label [[COND_END]]
|
|
// CHECK1: cond.end:
|
|
// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP38]], [[COND_FALSE]] ]
|
|
// CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8
|
|
// CHECK1-NEXT: store i64 [[TMP39]], i64* [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK1: omp.inner.for.cond:
|
|
// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP40]], [[TMP41]]
|
|
// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
|
|
// CHECK1: omp.inner.for.cond.cleanup:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK1: omp.inner.for.body:
|
|
// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP42]], 1
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]]
|
|
// CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8
|
|
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK1: omp.body.continue:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK1: omp.inner.for.inc:
|
|
// CHECK1-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP43]], 1
|
|
// CHECK1-NEXT: store i64 [[ADD20]], i64* [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK1: omp.inner.for.end:
|
|
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK1: omp.loop.exit:
|
|
// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]])
|
|
// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]], i32 1)
|
|
// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP49:%.*]] = bitcast double* [[VLA]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP49]], i8** [[TMP48]], align 8
|
|
// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP5]] to i8*
|
|
// CHECK1-NEXT: store i8* [[TMP51]], i8** [[TMP50]], align 8
|
|
// CHECK1-NEXT: [[TMP52:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[TMP52]], align 4
|
|
// CHECK1-NEXT: [[TMP54:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK1-NEXT: [[TMP55:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], i32 1, i64 16, i8* [[TMP54]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: switch i32 [[TMP55]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK1-NEXT: ]
|
|
// CHECK1: .omp.reduction.case1:
|
|
// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP56]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK1: omp.arraycpy.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[TMP57:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
|
|
// CHECK1-NEXT: [[TMP58:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP57]], [[TMP58]]
|
|
// CHECK1-NEXT: store double [[ADD22]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP56]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK1: omp.arraycpy.done25:
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.case2:
|
|
// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP59]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
|
|
// CHECK1: omp.arraycpy.body27:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
|
|
// CHECK1-NEXT: [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
|
|
// CHECK1-NEXT: [[TMP61:%.*]] = atomicrmw fadd double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP60]] monotonic, align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP59]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]]
|
|
// CHECK1: omp.arraycpy.done33:
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.default:
|
|
// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
|
|
// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP62]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.red_init.
|
|
// CHECK1-SAME: (i8* noalias noundef [[TMP0:%.*]], i8* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to double**
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP3]], i64 [[TMP4]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP3]], [[TMP5]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK1: omp.arrayinit.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK1-NEXT: store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK1: omp.arrayinit.done:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.red_comb.
|
|
// CHECK1-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to double**
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[DOTADDR1]] to double**
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP2]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP7]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK1: omp.arraycpy.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP8]], [[TMP9]]
|
|
// CHECK1-NEXT: store double [[ADD]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK1: omp.arraycpy.done2:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
|
|
// CHECK1-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR4]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x i8*]*
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]*
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP3]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP3]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint i8* [[TMP13]] to i64
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr double, double* [[TMP11]], i64 [[TMP14]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP11]], [[TMP15]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK1: omp.arraycpy.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]]
|
|
// CHECK1-NEXT: store double [[ADD]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK1: omp.arraycpy.done2:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C2Ev
|
|
// CHECK1-SAME: (%class.S2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.S2*, align 8
|
|
// CHECK1-NEXT: store %class.S2* [[THIS]], %class.S2** [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.S2*, %class.S2** [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], %class.S2* [[THIS1]], i32 0, i32 0
|
|
// CHECK1-NEXT: store i32 0, i32* [[A]], align 4
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@main
|
|
// CHECK1-SAME: () #[[ATTR10:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3barv()
|
|
// CHECK1-NEXT: store i32 [[CALL]], i32* [[A]], align 4
|
|
// CHECK1-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK1-SAME: () #[[ATTR11:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@_Z3sumPiiS_
|
|
// CHECK2-SAME: (i32* noundef [[INPUT:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED4:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [3 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [3 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [3 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES10:%.*]] = alloca [3 x i64], align 4
|
|
// CHECK2-NEXT: [[_TMP11:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED21:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x i8*], align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED29:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS31:%.*]] = alloca [2 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS32:%.*]] = alloca [2 x i8*], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS33:%.*]] = alloca [2 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
|
|
// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP13]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP15]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP16]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
|
|
// CHECK2-NEXT: store i32* [[TMP4]], i32** [[TMP18]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
|
|
// CHECK2-NEXT: store i32* [[ARRAYIDX]], i32** [[TMP20]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP21]], align 4
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32**
|
|
// CHECK2-NEXT: store i32* [[TMP6]], i32** [[TMP23]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
|
|
// CHECK2-NEXT: store i32* [[ARRAYIDX1]], i32** [[TMP25]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i64 [[TMP10]], i64* [[TMP26]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP27]], align 4
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], 1
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = zext i32 [[ADD]] to i64
|
|
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 1, i32* [[TMP35]], align 4
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 3, i32* [[TMP36]], align 4
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i8** [[TMP28]], i8*** [[TMP37]], align 4
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i8** [[TMP29]], i8*** [[TMP38]], align 4
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CHECK2-NEXT: store i64* [[TMP30]], i64** [[TMP39]], align 4
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP40]], align 4
|
|
// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP41]], align 4
|
|
// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP42]], align 4
|
|
// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 [[TMP34]], i64* [[TMP43]], align 8
|
|
// CHECK2-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
|
|
// CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK2: omp_offload.failed:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69(i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]]) #[[ATTR2:[0-9]+]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK2: omp_offload.cont:
|
|
// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP46]], i32* [[SIZE_CASTED4]], align 4
|
|
// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[SIZE_CASTED4]], align 4
|
|
// CHECK2-NEXT: [[TMP48:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP49:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP50:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP51:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP51]], i32 0
|
|
// CHECK2-NEXT: [[TMP52:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP53:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP53]], i32 0
|
|
// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP55:%.*]] = mul nuw i32 [[TMP54]], 4
|
|
// CHECK2-NEXT: [[TMP56:%.*]] = sext i32 [[TMP55]] to i64
|
|
// CHECK2-NEXT: [[TMP57:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES10]] to i8*
|
|
// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.7 to i8*), i32 24, i1 false)
|
|
// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP47]], i32* [[TMP59]], align 4
|
|
// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP47]], i32* [[TMP61]], align 4
|
|
// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP62]], align 4
|
|
// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32**
|
|
// CHECK2-NEXT: store i32* [[TMP50]], i32** [[TMP64]], align 4
|
|
// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i32**
|
|
// CHECK2-NEXT: store i32* [[ARRAYIDX5]], i32** [[TMP66]], align 4
|
|
// CHECK2-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP67]], align 4
|
|
// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
|
|
// CHECK2-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32**
|
|
// CHECK2-NEXT: store i32* [[TMP52]], i32** [[TMP69]], align 4
|
|
// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
|
|
// CHECK2-NEXT: [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32**
|
|
// CHECK2-NEXT: store i32* [[ARRAYIDX6]], i32** [[TMP71]], align 4
|
|
// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES10]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i64 [[TMP56]], i64* [[TMP72]], align 4
|
|
// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP73]], align 4
|
|
// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES10]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP77:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP77]], i32* [[DOTCAPTURE_EXPR_12]], align 4
|
|
// CHECK2-NEXT: [[TMP78:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_12]], align 4
|
|
// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP78]], 0
|
|
// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
|
|
// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB16]], i32* [[DOTCAPTURE_EXPR_13]], align 4
|
|
// CHECK2-NEXT: [[TMP79:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_13]], align 4
|
|
// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP79]], 1
|
|
// CHECK2-NEXT: [[TMP80:%.*]] = zext i32 [[ADD17]] to i64
|
|
// CHECK2-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 1, i32* [[TMP81]], align 4
|
|
// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 3, i32* [[TMP82]], align 4
|
|
// CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i8** [[TMP74]], i8*** [[TMP83]], align 4
|
|
// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i8** [[TMP75]], i8*** [[TMP84]], align 4
|
|
// CHECK2-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 4
|
|
// CHECK2-NEXT: store i64* [[TMP76]], i64** [[TMP85]], align 4
|
|
// CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 5
|
|
// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP86]], align 4
|
|
// CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 6
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP87]], align 4
|
|
// CHECK2-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 7
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP88]], align 4
|
|
// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 [[TMP80]], i64* [[TMP89]], align 8
|
|
// CHECK2-NEXT: [[TMP90:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB4]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS18]])
|
|
// CHECK2-NEXT: [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP91]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
|
|
// CHECK2: omp_offload.failed19:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73(i32 [[TMP47]], i32* [[TMP48]], i32* [[TMP49]]) #[[ATTR2]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT20]]
|
|
// CHECK2: omp_offload.cont20:
|
|
// CHECK2-NEXT: [[TMP92:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP92]], i32* [[SIZE_CASTED21]], align 4
|
|
// CHECK2-NEXT: [[TMP93:%.*]] = load i32, i32* [[SIZE_CASTED21]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP93]], i32* [[TMP95]], align 4
|
|
// CHECK2-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP93]], i32* [[TMP97]], align 4
|
|
// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP98]], align 4
|
|
// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP100:%.*]] = bitcast i8** [[TMP99]] to [10 x i32]**
|
|
// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[TMP100]], align 4
|
|
// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32**
|
|
// CHECK2-NEXT: store i32* [[ARRAYIDX22]], i32** [[TMP102]], align 4
|
|
// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP103]], align 4
|
|
// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 1, i32* [[TMP106]], align 4
|
|
// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 2, i32* [[TMP107]], align 4
|
|
// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i8** [[TMP104]], i8*** [[TMP108]], align 4
|
|
// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i8** [[TMP105]], i8*** [[TMP109]], align 4
|
|
// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 4
|
|
// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.11, i32 0, i32 0), i64** [[TMP110]], align 4
|
|
// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 5
|
|
// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.12, i32 0, i32 0), i64** [[TMP111]], align 4
|
|
// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 6
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP112]], align 4
|
|
// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 7
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP113]], align 4
|
|
// CHECK2-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 0, i64* [[TMP114]], align 8
|
|
// CHECK2-NEXT: [[TMP115:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB4]], i64 -1, i32 1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS26]])
|
|
// CHECK2-NEXT: [[TMP116:%.*]] = icmp ne i32 [[TMP115]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP116]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
|
|
// CHECK2: omp_offload.failed27:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78(i32 [[TMP93]], [10 x i32]* [[A]]) #[[ATTR2]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT28]]
|
|
// CHECK2: omp_offload.cont28:
|
|
// CHECK2-NEXT: [[TMP117:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP117]], i32* [[SIZE_CASTED29]], align 4
|
|
// CHECK2-NEXT: [[TMP118:%.*]] = load i32, i32* [[SIZE_CASTED29]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i32 0, i32 3
|
|
// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP118]], i32* [[TMP120]], align 4
|
|
// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32*
|
|
// CHECK2-NEXT: store i32 [[TMP118]], i32* [[TMP122]], align 4
|
|
// CHECK2-NEXT: [[TMP123:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP123]], align 4
|
|
// CHECK2-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [10 x i32]**
|
|
// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[TMP125]], align 4
|
|
// CHECK2-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS32]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[TMP127:%.*]] = bitcast i8** [[TMP126]] to i32**
|
|
// CHECK2-NEXT: store i32* [[ARRAYIDX30]], i32** [[TMP127]], align 4
|
|
// CHECK2-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i8* null, i8** [[TMP128]], align 4
|
|
// CHECK2-NEXT: [[TMP129:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP130:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[KERNEL_ARGS34:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK2-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 1, i32* [[TMP131]], align 4
|
|
// CHECK2-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 2, i32* [[TMP132]], align 4
|
|
// CHECK2-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 2
|
|
// CHECK2-NEXT: store i8** [[TMP129]], i8*** [[TMP133]], align 4
|
|
// CHECK2-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i8** [[TMP130]], i8*** [[TMP134]], align 4
|
|
// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 4
|
|
// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.15, i32 0, i32 0), i64** [[TMP135]], align 4
|
|
// CHECK2-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 5
|
|
// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.16, i32 0, i32 0), i64** [[TMP136]], align 4
|
|
// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 6
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP137]], align 4
|
|
// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 7
|
|
// CHECK2-NEXT: store i8** null, i8*** [[TMP138]], align 4
|
|
// CHECK2-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 0, i64* [[TMP139]], align 8
|
|
// CHECK2-NEXT: [[TMP140:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB4]], i64 -1, i32 1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS34]])
|
|
// CHECK2-NEXT: [[TMP141:%.*]] = icmp ne i32 [[TMP140]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP141]], label [[OMP_OFFLOAD_FAILED35:%.*]], label [[OMP_OFFLOAD_CONT36:%.*]]
|
|
// CHECK2: omp_offload.failed35:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81(i32 [[TMP118]], [10 x i32]* [[A]]) #[[ATTR2]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT36]]
|
|
// CHECK2: omp_offload.cont36:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[OUTPUT1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 0
|
|
// CHECK2-NEXT: store i32 0, i32* [[OUTPUT1]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP1]] to i64
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[OUTPUT1]], i64 [[TMP5]]
|
|
// CHECK2-NEXT: store i32* [[TMP6]], i32** [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]]
|
|
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP22]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], i32* [[TMP24]], i32* [[TMP25]])
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]])
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[OUTPUT1]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP33]], i32 1, i32 4, i8* [[TMP34]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[OUTPUT1]], align 4
|
|
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
|
|
// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[OUTPUT1]], align 4
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP38]] monotonic, align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
|
|
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT3:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
|
|
// CHECK2-NEXT: store i32 0, i32* [[OUTPUT3]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint i32* [[TMP7]] to i64
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]]
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[OUTPUT3]], i64 [[TMP11]]
|
|
// CHECK2-NEXT: store i32* [[TMP12]], i32** [[_TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
|
|
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I5]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]]
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP4]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
|
|
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]]
|
|
// CHECK2-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX9]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK2: omp.body.continue:
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1
|
|
// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]])
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[OUTPUT3]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, i8* [[TMP35]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[OUTPUT3]], align 4
|
|
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
|
|
// CHECK2-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[OUTPUT3]], align 4
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP39]] monotonic, align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
|
|
// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
|
|
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[OUTPUT2:%.*]] = alloca [3 x i32], align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 0
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 2
|
|
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK2: omp.arrayinit.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK2-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK2: omp.arrayinit.done:
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [3 x i32]* [[OUTPUT2]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP8]], i64 [[TMP7]]
|
|
// CHECK2-NEXT: store i32* [[TMP9]], i32** [[TMP]], align 4
|
|
// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT2]] to i32*
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]]
|
|
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP25]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP23]], i32 [[TMP24]], i32 [[TMP26]], i32* [[TMP27]], i32* [[TMP28]])
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]])
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 1, i32 4, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP39]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
|
|
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]]
|
|
// CHECK2-NEXT: store i32 [[ADD10]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP39]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done13:
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP42]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]]
|
|
// CHECK2: omp.arraycpy.body15:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
|
|
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4
|
|
// CHECK2-NEXT: [[TMP44:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP43]] monotonic, align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP42]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]]
|
|
// CHECK2: omp.arraycpy.done21:
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
|
|
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT4:%.*]] = alloca [3 x i32], align 4
|
|
// CHECK2-NEXT: [[_TMP5:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 2
|
|
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT4]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK2: omp.arrayinit.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK2-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK2: omp.arrayinit.done:
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[TMP9]] to i64
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]]
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [3 x i32]* [[OUTPUT4]] to i32*
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP14]], i64 [[TMP13]]
|
|
// CHECK2-NEXT: store i32* [[TMP15]], i32** [[_TMP5]], align 4
|
|
// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT4]] to i32*
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
|
|
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]]
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32*, i32** [[_TMP5]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 0
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
|
|
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP30]], [[TMP28]]
|
|
// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX10]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK2: omp.body.continue:
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1
|
|
// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]])
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], i32 1, i32 4, i8* [[TMP38]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP40]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
|
|
// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
|
|
// CHECK2-NEXT: store i32 [[ADD14]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP40]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done17:
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP43]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]]
|
|
// CHECK2: omp.arraycpy.body19:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
|
|
// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4
|
|
// CHECK2-NEXT: [[TMP45:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP44]] monotonic, align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP43]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]]
|
|
// CHECK2: omp.arraycpy.done25:
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.5
|
|
// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP11]], [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done2:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
|
|
// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP11]], [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done2:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP2]], [10 x i32]* [[TMP0]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
|
|
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
|
|
// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP1]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK2: omp.arrayinit.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK2-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK2: omp.arrayinit.done:
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i32*
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[A2]] to i32*
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP7]], i64 [[TMP6]]
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to [10 x i32]*
|
|
// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x i32]* [[A2]] to i32*
|
|
// CHECK2-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK2: for.cond:
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK2: for.body:
|
|
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK2: for.inc:
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1
|
|
// CHECK2-NEXT: store i32 [[INC]], i32* [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
// CHECK2: for.end:
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 1, i32 4, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP19]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP19]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done6:
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP22]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
|
|
// CHECK2: omp.arraycpy.body8:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP23]] monotonic, align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP22]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
|
|
// CHECK2: omp.arraycpy.done14:
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.10
|
|
// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP11]], [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done2:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR6]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], [10 x i32]* [[TMP0]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
|
|
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
|
|
// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[TMP0]] to i32*
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP1]] to i64
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[A1]], i64 [[TMP5]]
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to [10 x i32]*
|
|
// CHECK2-NEXT: store i32 0, i32* [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK2: for.cond:
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK2: for.body:
|
|
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK2: for.inc:
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4
|
|
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1
|
|
// CHECK2-NEXT: store i32 [[INC]], i32* [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
// CHECK2: for.end:
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i32* [[A1]] to i8*
|
|
// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 1, i32 4, i8* [[TMP15]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[A1]], align 4
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP19]] monotonic, align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.14
|
|
// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 4
|
|
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@main
|
|
// CHECK2-SAME: () #[[ATTR7:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[ARRAY:%.*]] = alloca i32*, align 4
|
|
// CHECK2-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
|
|
// CHECK2-NEXT: store i32 100, i32* [[SIZE]], align 4
|
|
// CHECK2-NEXT: [[CALL:%.*]] = call noalias noundef nonnull i8* @_Znaj(i32 noundef 400) #[[ATTR10:[0-9]+]]
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
|
|
// CHECK2-NEXT: store i32* [[TMP0]], i32** [[ARRAY]], align 4
|
|
// CHECK2-NEXT: store i32 0, i32* [[RESULT]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARRAY]], align 4
|
|
// CHECK2-NEXT: call void @_Z3sumPiiS_(i32* noundef [[TMP1]], i32 noundef 100, i32* noundef [[RESULT]])
|
|
// CHECK2-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK2-SAME: () #[[ATTR9:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK2-NEXT: ret void
|
|
//
|