Files
clang-p2996/clang/test/OpenMP/target_codegen_global_capture.cpp
Joseph Huber 5300263c70 [OpenMP] Add loop tripcount argument to kernel launch and remove push function
Previously we added the `push_target_tripcount` function to send the
loop tripcount to the device runtime so we knew how to configure the
teams / threads for execute the loop for a teams distribute construct.
This was implemented as a separate function mostly to avoid changing the
interface for backwards compatbility. Now that we've changed it anyway
and the new interface can take an arbitrary number of arguments via the
struct without changing the ABI, we can move this to the new interface.
This will simplify the runtime by removing unnecessary state between
calls.

Depends on D128550

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D128816
2022-07-08 14:44:16 -04:00

2148 lines
151 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
double Ga = 1.0;
double Gb = 2.0;
double Gc = 3.0;
double Gd = 4.0;
int foo(short a, short b, short c, short d){
static float Sa = 5.0;
static float Sb = 6.0;
static float Sc = 7.0;
static float Sd = 8.0;
// 3 local vars being captured.
// 3 static vars being captured.
// 3 static global vars being captured.
// Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
#pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
{
b += 1;
Gb += 1.0;
Sb += 1.0;
// The parallel region only uses 3 captures.
// Capture d, Gd, Sd,
#pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
{
d += 1;
Gd += 1.0;
Sd += 1.0;
}
}
return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
}
int bar(short a, short b, short c, short d){
static float Sa = 9.0;
static float Sb = 10.0;
static float Sc = 11.0;
static float Sd = 12.0;
// Capture a, b, c, d
#pragma omp parallel
{
// 3 local vars being captured.
// 3 static vars being captured.
// 3 static global vars being captured.
// Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
#pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
{
b += 1;
Gb += 1.0;
Sb += 1.0;
// Capture d, Gd, Sd
#pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
{
d += 1;
Gd += 1.0;
Sd += 1.0;
}
}
}
return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
}
///
/// Tests with template functions.
///
template<typename T>
int tbar(T a, T b, T c, T d){
static float Sa = 17.0;
static float Sb = 18.0;
static float Sc = 19.0;
static float Sd = 20.0;
// Capture a, b, c, d
#pragma omp parallel
{
// 3 local vars being captured.
// 3 static vars being captured.
// 3 static global vars being captured.
// Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
#pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
{
b += 1;
Gb += 1.0;
Sb += 1.0;
// Capture d, Gd, Sd
#pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
{
d += 1;
Gd += 1.0;
Sd += 1.0;
}
}
}
return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
}
int tbar2(short a, short b, short c, short d){
return tbar(a, b, c, d);
}
#endif
// CHECK1-LABEL: define {{[^@]+}}@_Z3foossss
// CHECK1-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GD_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SD_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK1-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK1-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK1-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV]], align 2
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* @Gb, align 8
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP2]], double* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[GB_CASTED]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ3foossssE2Sb, align 4
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP4]], float* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[SB_CASTED]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* @Gc, align 8
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP6]], double* [[CONV3]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[GC_CASTED]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP8]], i16* [[CONV4]], align 2
// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ3foossssE2Sc, align 4
// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP10]], float* [[CONV5]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[SC_CASTED]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP12]], i16* [[CONV6]], align 2
// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[D_CASTED]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = load double, double* @Gd, align 8
// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP14]], double* [[CONV7]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[GD_CASTED]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* @_ZZ3foossssE2Sd, align 4
// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP16]], float* [[CONV8]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[SD_CASTED]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* @Ga, align 8
// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP18]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: land.lhs.true:
// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP19]] to i32
// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0
// CHECK1-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: land.lhs.true11:
// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* @_ZZ3foossssE2Sa, align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP20]] to double
// CHECK1-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP22]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP24]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store i8* null, i8** [[TMP25]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store i8* null, i8** [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i64*
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP32]], align 8
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i64*
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP34]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT: store i8* null, i8** [[TMP35]], align 8
// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP37]], align 8
// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP39]], align 8
// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-NEXT: store i8* null, i8** [[TMP40]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP42]], align 8
// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP44]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-NEXT: store i8* null, i8** [[TMP45]], align 8
// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP47]], align 8
// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP49]], align 8
// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
// CHECK1-NEXT: store i8* null, i8** [[TMP50]], align 8
// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP52]], align 8
// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP54]], align 8
// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
// CHECK1-NEXT: store i8* null, i8** [[TMP55]], align 8
// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i64*
// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP57]], align 8
// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i64*
// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP59]], align 8
// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
// CHECK1-NEXT: store i8* null, i8** [[TMP60]], align 8
// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP62]], align 8
// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP64]], align 8
// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
// CHECK1-NEXT: store i8* null, i8** [[TMP65]], align 8
// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, i32* [[TMP68]], align 4
// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 9, i32* [[TMP69]], align 4
// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store i8** [[TMP66]], i8*** [[TMP70]], align 8
// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store i8** [[TMP67]], i8*** [[TMP71]], align 8
// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP72]], align 8
// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP73]], align 8
// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store i8** null, i8*** [[TMP74]], align 8
// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store i8** null, i8*** [[TMP75]], align 8
// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, i64* [[TMP76]], align 8
// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP78:%.*]] = icmp ne i32 [[TMP77]], 0
// CHECK1-NEXT: br i1 [[TMP78]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49(i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49(i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP79:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP79]] to i32
// CHECK1-NEXT: [[TMP80:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP80]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV14]], [[CONV15]]
// CHECK1-NEXT: [[TMP81:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK1-NEXT: [[CONV16:%.*]] = sext i16 [[TMP81]] to i32
// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD]], [[CONV16]]
// CHECK1-NEXT: [[TMP82:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[CONV18:%.*]] = sext i16 [[TMP82]] to i32
// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[ADD17]], [[CONV18]]
// CHECK1-NEXT: [[TMP83:%.*]] = load float, float* @_ZZ3foossssE2Sa, align 4
// CHECK1-NEXT: [[CONV20:%.*]] = fptosi float [[TMP83]] to i32
// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[CONV20]]
// CHECK1-NEXT: [[TMP84:%.*]] = load float, float* @_ZZ3foossssE2Sb, align 4
// CHECK1-NEXT: [[CONV22:%.*]] = fptosi float [[TMP84]] to i32
// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[ADD21]], [[CONV22]]
// CHECK1-NEXT: [[TMP85:%.*]] = load float, float* @_ZZ3foossssE2Sc, align 4
// CHECK1-NEXT: [[CONV24:%.*]] = fptosi float [[TMP85]] to i32
// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[CONV24]]
// CHECK1-NEXT: [[TMP86:%.*]] = load float, float* @_ZZ3foossssE2Sd, align 4
// CHECK1-NEXT: [[CONV26:%.*]] = fptosi float [[TMP86]] to i32
// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[CONV26]]
// CHECK1-NEXT: ret i32 [[ADD27]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49
// CHECK1-SAME: (i64 noundef [[B:%.*]], i64 noundef [[GB:%.*]], i64 noundef [[SB:%.*]], i64 noundef [[GC:%.*]], i64 noundef [[C:%.*]], i64 noundef [[SC:%.*]], i64 noundef [[D:%.*]], i64 noundef [[GD:%.*]], i64 noundef [[SD:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GB_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SB_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SB]], i64* [[SB_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GC]], i64* [[GC_ADDR]], align 8
// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SC]], i64* [[SC_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GD]], i64* [[GD_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SD]], i64* [[SD_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i16*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_ADDR]] to double*
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_ADDR]] to float*
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_ADDR]] to double*
// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_ADDR]] to i16*
// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_ADDR]] to float*
// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16*
// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double*
// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float*
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1
// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2
// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8
// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00
// CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double
// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00
// CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float
// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: land.lhs.true:
// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2
// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32
// CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0
// CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: land.lhs.true17:
// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4
// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double
// CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]])
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8
// CHECK1-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 8
// CHECK1-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK1-NEXT: store double [[ADD2]], double* [[TMP1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4
// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double
// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00
// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float
// CHECK1-NEXT: store float [[CONV5]], float* [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3barssss
// CHECK1-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK1-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK1-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK1-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]])
// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]]
// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]]
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]]
// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4
// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]]
// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4
// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]]
// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4
// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]]
// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32
// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
// CHECK1-NEXT: ret i32 [[ADD13]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GD_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SD_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 8
// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8
// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* @Gb, align 8
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP6]], double* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[GB_CASTED]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP8]], float* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[SB_CASTED]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* @Gc, align 8
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP10]], double* [[CONV3]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[GC_CASTED]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP2]], align 2
// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2
// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4
// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP14]], float* [[CONV5]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[SC_CASTED]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP3]], align 2
// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP16]], i16* [[CONV6]], align 2
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[D_CASTED]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* @Gd, align 8
// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP18]], double* [[CONV7]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[GD_CASTED]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4
// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP20]], float* [[CONV8]], align 4
// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SD_CASTED]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* @Ga, align 8
// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: land.lhs.true:
// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP23]] to i32
// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0
// CHECK1-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: land.lhs.true11:
// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP24]] to double
// CHECK1-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP33]], align 8
// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store i8* null, i8** [[TMP34]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i64*
// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP36]], align 8
// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i64*
// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP38]], align 8
// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT: store i8* null, i8** [[TMP39]], align 8
// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-NEXT: [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP41]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP43]], align 8
// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-NEXT: store i8* null, i8** [[TMP44]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP46]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64*
// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP48]], align 8
// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-NEXT: store i8* null, i8** [[TMP49]], align 8
// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
// CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64*
// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP51]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP53]], align 8
// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
// CHECK1-NEXT: store i8* null, i8** [[TMP54]], align 8
// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i64*
// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP56]], align 8
// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
// CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i64*
// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP58]], align 8
// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
// CHECK1-NEXT: store i8* null, i8** [[TMP59]], align 8
// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
// CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64*
// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP61]], align 8
// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64*
// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP63]], align 8
// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
// CHECK1-NEXT: store i8* null, i8** [[TMP64]], align 8
// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64*
// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP66]], align 8
// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
// CHECK1-NEXT: [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP68]], align 8
// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
// CHECK1-NEXT: store i8* null, i8** [[TMP69]], align 8
// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, i32* [[TMP72]], align 4
// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 9, i32* [[TMP73]], align 4
// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store i8** [[TMP70]], i8*** [[TMP74]], align 8
// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store i8** [[TMP71]], i8*** [[TMP75]], align 8
// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP76]], align 8
// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP77]], align 8
// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store i8** null, i8*** [[TMP78]], align 8
// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store i8** null, i8*** [[TMP79]], align 8
// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, i64* [[TMP80]], align 8
// CHECK1-NEXT: [[TMP81:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP82:%.*]] = icmp ne i32 [[TMP81]], 0
// CHECK1-NEXT: br i1 [[TMP82]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94
// CHECK1-SAME: (i64 noundef [[B:%.*]], i64 noundef [[GB:%.*]], i64 noundef [[SB:%.*]], i64 noundef [[GC:%.*]], i64 noundef [[C:%.*]], i64 noundef [[SC:%.*]], i64 noundef [[D:%.*]], i64 noundef [[GD:%.*]], i64 noundef [[SD:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GB_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SB_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SB]], i64* [[SB_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GC]], i64* [[GC_ADDR]], align 8
// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SC]], i64* [[SC_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GD]], i64* [[GD_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SD]], i64* [[SD_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i16*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_ADDR]] to double*
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_ADDR]] to float*
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_ADDR]] to double*
// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_ADDR]] to i16*
// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_ADDR]] to float*
// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16*
// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double*
// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float*
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1
// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2
// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8
// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00
// CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double
// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00
// CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float
// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: land.lhs.true:
// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2
// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32
// CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0
// CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: land.lhs.true17:
// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4
// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double
// CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]])
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8
// CHECK1-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 8
// CHECK1-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK1-NEXT: store double [[ADD2]], double* [[TMP1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4
// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double
// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00
// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float
// CHECK1-NEXT: store float [[CONV5]], float* [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z5tbar2ssss
// CHECK1-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK1-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK1-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK1-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z4tbarIsEiT_S0_S0_S0_(i16 noundef signext [[TMP0]], i16 noundef signext [[TMP1]], i16 noundef signext [[TMP2]], i16 noundef signext [[TMP3]])
// CHECK1-NEXT: ret i32 [[CALL]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
// CHECK1-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK1-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK1-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK1-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]])
// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]]
// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]]
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]]
// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4
// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]]
// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4
// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]]
// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4
// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]]
// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32
// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
// CHECK1-NEXT: ret i32 [[ADD13]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GD_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SD_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 8
// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8
// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* @Gb, align 8
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP6]], double* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[GB_CASTED]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP8]], float* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[SB_CASTED]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* @Gc, align 8
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP10]], double* [[CONV3]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[GC_CASTED]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP2]], align 2
// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2
// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4
// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP14]], float* [[CONV5]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[SC_CASTED]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP3]], align 2
// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP16]], i16* [[CONV6]], align 2
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[D_CASTED]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* @Gd, align 8
// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP18]], double* [[CONV7]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[GD_CASTED]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4
// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_CASTED]] to float*
// CHECK1-NEXT: store float [[TMP20]], float* [[CONV8]], align 4
// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SD_CASTED]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* @Ga, align 8
// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: land.lhs.true:
// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP23]] to i32
// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0
// CHECK1-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: land.lhs.true11:
// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP24]] to double
// CHECK1-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP33]], align 8
// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store i8* null, i8** [[TMP34]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i64*
// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP36]], align 8
// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i64*
// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP38]], align 8
// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT: store i8* null, i8** [[TMP39]], align 8
// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-NEXT: [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP41]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP43]], align 8
// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-NEXT: store i8* null, i8** [[TMP44]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP46]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64*
// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP48]], align 8
// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-NEXT: store i8* null, i8** [[TMP49]], align 8
// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
// CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64*
// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP51]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP53]], align 8
// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
// CHECK1-NEXT: store i8* null, i8** [[TMP54]], align 8
// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i64*
// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP56]], align 8
// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
// CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i64*
// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP58]], align 8
// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
// CHECK1-NEXT: store i8* null, i8** [[TMP59]], align 8
// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
// CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64*
// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP61]], align 8
// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64*
// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP63]], align 8
// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
// CHECK1-NEXT: store i8* null, i8** [[TMP64]], align 8
// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64*
// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP66]], align 8
// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
// CHECK1-NEXT: [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP68]], align 8
// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
// CHECK1-NEXT: store i8* null, i8** [[TMP69]], align 8
// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, i32* [[TMP72]], align 4
// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 9, i32* [[TMP73]], align 4
// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store i8** [[TMP70]], i8*** [[TMP74]], align 8
// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store i8** [[TMP71]], i8*** [[TMP75]], align 8
// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP76]], align 8
// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP77]], align 8
// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store i8** null, i8*** [[TMP78]], align 8
// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store i8** null, i8*** [[TMP79]], align 8
// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, i64* [[TMP80]], align 8
// CHECK1-NEXT: [[TMP81:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP82:%.*]] = icmp ne i32 [[TMP81]], 0
// CHECK1-NEXT: br i1 [[TMP82]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145
// CHECK1-SAME: (i64 noundef [[B:%.*]], i64 noundef [[GB:%.*]], i64 noundef [[SB:%.*]], i64 noundef [[GC:%.*]], i64 noundef [[C:%.*]], i64 noundef [[SC:%.*]], i64 noundef [[D:%.*]], i64 noundef [[GD:%.*]], i64 noundef [[SD:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GB_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SB_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SB]], i64* [[SB_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GC]], i64* [[GC_ADDR]], align 8
// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SC]], i64* [[SC_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GD]], i64* [[GD_ADDR]], align 8
// CHECK1-NEXT: store i64 [[SD]], i64* [[SD_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i16*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_ADDR]] to double*
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_ADDR]] to float*
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_ADDR]] to double*
// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_ADDR]] to i16*
// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_ADDR]] to float*
// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16*
// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double*
// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float*
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1
// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2
// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8
// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00
// CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4
// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double
// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00
// CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float
// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: land.lhs.true:
// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2
// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32
// CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0
// CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: land.lhs.true17:
// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4
// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double
// CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00
// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]])
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6
// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8
// CHECK1-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 8
// CHECK1-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK1-NEXT: store double [[ADD2]], double* [[TMP1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4
// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double
// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00
// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float
// CHECK1-NEXT: store float [[CONV5]], float* [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK1-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3foossss
// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SC_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SD_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK3-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK3-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK3-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV]], align 2
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @_ZZ3foossssE2Sb, align 4
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP2]], float* [[CONV1]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[SB_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP4]], i16* [[CONV2]], align 2
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[C_CASTED]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3foossssE2Sc, align 4
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP6]], float* [[CONV3]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[SC_CASTED]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP8]], i16* [[CONV4]], align 2
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[D_CASTED]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ3foossssE2Sd, align 4
// CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP10]], float* [[CONV5]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[SD_CASTED]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* @Ga, align 8
// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP12]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: land.lhs.true:
// CHECK3-NEXT: [[TMP13:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32
// CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0
// CHECK3-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: land.lhs.true8:
// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ3foossssE2Sa, align 4
// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP14]] to double
// CHECK3-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP16]], align 4
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP18]], align 4
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store i8* null, i8** [[TMP19]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to double**
// CHECK3-NEXT: store double* @Gb, double** [[TMP21]], align 4
// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to double**
// CHECK3-NEXT: store double* @Gb, double** [[TMP23]], align 4
// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store i8* null, i8** [[TMP24]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP26]], align 4
// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP28]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT: store i8* null, i8** [[TMP29]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to double**
// CHECK3-NEXT: store double* @Gc, double** [[TMP31]], align 4
// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to double**
// CHECK3-NEXT: store double* @Gc, double** [[TMP33]], align 4
// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-NEXT: store i8* null, i8** [[TMP34]], align 4
// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-NEXT: [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP36]], align 4
// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-NEXT: [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP38]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-NEXT: store i8* null, i8** [[TMP39]], align 4
// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
// CHECK3-NEXT: [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP41]], align 4
// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP43]], align 4
// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
// CHECK3-NEXT: store i8* null, i8** [[TMP44]], align 4
// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
// CHECK3-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP46]], align 4
// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
// CHECK3-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32*
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP48]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
// CHECK3-NEXT: store i8* null, i8** [[TMP49]], align 4
// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
// CHECK3-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
// CHECK3-NEXT: store double* @Gd, double** [[TMP51]], align 4
// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to double**
// CHECK3-NEXT: store double* @Gd, double** [[TMP53]], align 4
// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
// CHECK3-NEXT: store i8* null, i8** [[TMP54]], align 4
// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
// CHECK3-NEXT: [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32*
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP56]], align 4
// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
// CHECK3-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32*
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP58]], align 4
// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
// CHECK3-NEXT: store i8* null, i8** [[TMP59]], align 4
// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 1, i32* [[TMP62]], align 4
// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 9, i32* [[TMP63]], align 4
// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store i8** [[TMP60]], i8*** [[TMP64]], align 4
// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store i8** [[TMP61]], i8*** [[TMP65]], align 4
// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP66]], align 4
// CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP67]], align 4
// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store i8** null, i8*** [[TMP68]], align 4
// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store i8** null, i8*** [[TMP69]], align 4
// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, i64* [[TMP70]], align 8
// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0
// CHECK3-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49(i32 [[TMP1]], double* @Gb, i32 [[TMP3]], double* @Gc, i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP9]], double* @Gd, i32 [[TMP11]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49(i32 [[TMP1]], double* @Gb, i32 [[TMP3]], double* @Gc, i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP9]], double* @Gd, i32 [[TMP11]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP73:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP73]] to i32
// CHECK3-NEXT: [[TMP74:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP74]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV11]], [[CONV12]]
// CHECK3-NEXT: [[TMP75:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK3-NEXT: [[CONV13:%.*]] = sext i16 [[TMP75]] to i32
// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD]], [[CONV13]]
// CHECK3-NEXT: [[TMP76:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP76]] to i32
// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CONV15]]
// CHECK3-NEXT: [[TMP77:%.*]] = load float, float* @_ZZ3foossssE2Sa, align 4
// CHECK3-NEXT: [[CONV17:%.*]] = fptosi float [[TMP77]] to i32
// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[ADD16]], [[CONV17]]
// CHECK3-NEXT: [[TMP78:%.*]] = load float, float* @_ZZ3foossssE2Sb, align 4
// CHECK3-NEXT: [[CONV19:%.*]] = fptosi float [[TMP78]] to i32
// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD18]], [[CONV19]]
// CHECK3-NEXT: [[TMP79:%.*]] = load float, float* @_ZZ3foossssE2Sc, align 4
// CHECK3-NEXT: [[CONV21:%.*]] = fptosi float [[TMP79]] to i32
// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[ADD20]], [[CONV21]]
// CHECK3-NEXT: [[TMP80:%.*]] = load float, float* @_ZZ3foossssE2Sd, align 4
// CHECK3-NEXT: [[CONV23:%.*]] = fptosi float [[TMP80]] to i32
// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[ADD22]], [[CONV23]]
// CHECK3-NEXT: ret i32 [[ADD24]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foossss_l49
// CHECK3-SAME: (i32 noundef [[B:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GB:%.*]], i32 noundef [[SB:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GC:%.*]], i32 noundef [[C:%.*]], i32 noundef [[SC:%.*]], i32 noundef [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], i32 noundef [[SD:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GB_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SB_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GC_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SC_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GB6:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[GC7:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SB]], i32* [[SB_ADDR]], align 4
// CHECK3-NEXT: store double* [[GC]], double** [[GC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SC]], i32* [[SC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4
// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SD]], i32* [[SD_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16*
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GB_ADDR]], align 4
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_ADDR]] to float*
// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[GC_ADDR]], align 4
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_ADDR]] to i16*
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_ADDR]] to float*
// CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_ADDR]] to i16*
// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[GD_ADDR]], align 4
// CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_ADDR]] to float*
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK3-NEXT: store double [[TMP4]], double* [[GB6]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[TMP2]], align 8
// CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8
// CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1
// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8
// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00
// CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[CONV1]], align 4
// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP9]] to double
// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00
// CHECK3-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float
// CHECK3-NEXT: store float [[CONV14]], float* [[CONV1]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[GC7]], align 8
// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: land.lhs.true:
// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2
// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32
// CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0
// CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: land.lhs.true17:
// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[CONV3]], align 4
// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP12]] to double
// CHECK3-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[CONV4]], double* [[GD8]], float* [[CONV5]])
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4
// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4
// CHECK3-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK3-NEXT: store double [[ADD2]], double* [[TMP1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4
// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double
// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00
// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float
// CHECK3-NEXT: store float [[CONV5]], float* [[TMP2]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3barssss
// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK3-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK3-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK3-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]])
// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]]
// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]]
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32
// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]]
// CHECK3-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4
// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]]
// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4
// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32
// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]]
// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4
// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32
// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]]
// CHECK3-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4
// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32
// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
// CHECK3-NEXT: ret i32 [[ADD13]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SC_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SD_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 4
// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4
// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP6]], float* [[CONV1]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[SB_CASTED]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP2]], align 2
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP8]], i16* [[CONV2]], align 2
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP10]], float* [[CONV3]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[SC_CASTED]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP3]], align 2
// CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[D_CASTED]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4
// CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP14]], float* [[CONV5]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SD_CASTED]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* @Ga, align 8
// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: land.lhs.true:
// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP17]] to i32
// CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0
// CHECK3-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: land.lhs.true8:
// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4
// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP18]] to double
// CHECK3-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP20]], align 4
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP22]], align 4
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store i8* null, i8** [[TMP23]], align 4
// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to double**
// CHECK3-NEXT: store double* @Gb, double** [[TMP25]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to double**
// CHECK3-NEXT: store double* @Gb, double** [[TMP27]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store i8* null, i8** [[TMP28]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP30]], align 4
// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP32]], align 4
// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT: store i8* null, i8** [[TMP33]], align 4
// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to double**
// CHECK3-NEXT: store double* @Gc, double** [[TMP35]], align 4
// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to double**
// CHECK3-NEXT: store double* @Gc, double** [[TMP37]], align 4
// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-NEXT: store i8* null, i8** [[TMP38]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-NEXT: [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP40]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32*
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP42]], align 4
// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-NEXT: store i8* null, i8** [[TMP43]], align 4
// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP45]], align 4
// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
// CHECK3-NEXT: [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP47]], align 4
// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
// CHECK3-NEXT: store i8* null, i8** [[TMP48]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
// CHECK3-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP50]], align 4
// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP52]], align 4
// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
// CHECK3-NEXT: store i8* null, i8** [[TMP53]], align 4
// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to double**
// CHECK3-NEXT: store double* @Gd, double** [[TMP55]], align 4
// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double**
// CHECK3-NEXT: store double* @Gd, double** [[TMP57]], align 4
// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
// CHECK3-NEXT: store i8* null, i8** [[TMP58]], align 4
// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i32*
// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP60]], align 4
// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP62]], align 4
// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
// CHECK3-NEXT: store i8* null, i8** [[TMP63]], align 4
// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 1, i32* [[TMP66]], align 4
// CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 9, i32* [[TMP67]], align 4
// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store i8** [[TMP64]], i8*** [[TMP68]], align 4
// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store i8** [[TMP65]], i8*** [[TMP69]], align 4
// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP70]], align 4
// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP71]], align 4
// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store i8** null, i8*** [[TMP72]], align 4
// CHECK3-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store i8** null, i8*** [[TMP73]], align 4
// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, i64* [[TMP74]], align 8
// CHECK3-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0
// CHECK3-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94
// CHECK3-SAME: (i32 noundef [[B:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GB:%.*]], i32 noundef [[SB:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GC:%.*]], i32 noundef [[C:%.*]], i32 noundef [[SC:%.*]], i32 noundef [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], i32 noundef [[SD:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GB_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SB_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GC_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SC_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GB6:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[GC7:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SB]], i32* [[SB_ADDR]], align 4
// CHECK3-NEXT: store double* [[GC]], double** [[GC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SC]], i32* [[SC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4
// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SD]], i32* [[SD_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16*
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GB_ADDR]], align 4
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_ADDR]] to float*
// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[GC_ADDR]], align 4
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_ADDR]] to i16*
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_ADDR]] to float*
// CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_ADDR]] to i16*
// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[GD_ADDR]], align 4
// CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_ADDR]] to float*
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK3-NEXT: store double [[TMP4]], double* [[GB6]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[TMP2]], align 8
// CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8
// CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1
// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8
// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00
// CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[CONV1]], align 4
// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP9]] to double
// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00
// CHECK3-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float
// CHECK3-NEXT: store float [[CONV14]], float* [[CONV1]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[GC7]], align 8
// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: land.lhs.true:
// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2
// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32
// CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0
// CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: land.lhs.true17:
// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[CONV3]], align 4
// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP12]] to double
// CHECK3-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i16* [[CONV4]], double* [[GD8]], float* [[CONV5]])
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4
// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4
// CHECK3-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK3-NEXT: store double [[ADD2]], double* [[TMP1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4
// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double
// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00
// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float
// CHECK3-NEXT: store float [[CONV5]], float* [[TMP2]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2ssss
// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK3-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK3-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK3-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z4tbarIsEiT_S0_S0_S0_(i16 noundef signext [[TMP0]], i16 noundef signext [[TMP1]], i16 noundef signext [[TMP2]], i16 noundef signext [[TMP3]])
// CHECK3-NEXT: ret i32 [[CALL]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2
// CHECK3-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2
// CHECK3-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2
// CHECK3-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]])
// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2
// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]]
// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2
// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]]
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2
// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32
// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]]
// CHECK3-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4
// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]]
// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4
// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32
// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]]
// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4
// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32
// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]]
// CHECK3-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4
// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32
// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
// CHECK3-NEXT: ret i32 [[ADD13]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SC_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SD_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 4
// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4
// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP6]], float* [[CONV1]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[SB_CASTED]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP2]], align 2
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP8]], i16* [[CONV2]], align 2
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP10]], float* [[CONV3]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[SC_CASTED]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP3]], align 2
// CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[D_CASTED]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4
// CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_CASTED]] to float*
// CHECK3-NEXT: store float [[TMP14]], float* [[CONV5]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SD_CASTED]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* @Ga, align 8
// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: land.lhs.true:
// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP17]] to i32
// CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0
// CHECK3-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: land.lhs.true8:
// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4
// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP18]] to double
// CHECK3-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP20]], align 4
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP22]], align 4
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store i8* null, i8** [[TMP23]], align 4
// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to double**
// CHECK3-NEXT: store double* @Gb, double** [[TMP25]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to double**
// CHECK3-NEXT: store double* @Gb, double** [[TMP27]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store i8* null, i8** [[TMP28]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP30]], align 4
// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP32]], align 4
// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT: store i8* null, i8** [[TMP33]], align 4
// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to double**
// CHECK3-NEXT: store double* @Gc, double** [[TMP35]], align 4
// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to double**
// CHECK3-NEXT: store double* @Gc, double** [[TMP37]], align 4
// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-NEXT: store i8* null, i8** [[TMP38]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-NEXT: [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP40]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32*
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP42]], align 4
// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-NEXT: store i8* null, i8** [[TMP43]], align 4
// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP45]], align 4
// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
// CHECK3-NEXT: [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP47]], align 4
// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
// CHECK3-NEXT: store i8* null, i8** [[TMP48]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
// CHECK3-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP50]], align 4
// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP52]], align 4
// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
// CHECK3-NEXT: store i8* null, i8** [[TMP53]], align 4
// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to double**
// CHECK3-NEXT: store double* @Gd, double** [[TMP55]], align 4
// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double**
// CHECK3-NEXT: store double* @Gd, double** [[TMP57]], align 4
// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
// CHECK3-NEXT: store i8* null, i8** [[TMP58]], align 4
// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i32*
// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP60]], align 4
// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP62]], align 4
// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
// CHECK3-NEXT: store i8* null, i8** [[TMP63]], align 4
// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 1, i32* [[TMP66]], align 4
// CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 9, i32* [[TMP67]], align 4
// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store i8** [[TMP64]], i8*** [[TMP68]], align 4
// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store i8** [[TMP65]], i8*** [[TMP69]], align 4
// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP70]], align 4
// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP71]], align 4
// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store i8** null, i8*** [[TMP72]], align 4
// CHECK3-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store i8** null, i8*** [[TMP73]], align 4
// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, i64* [[TMP74]], align 8
// CHECK3-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0
// CHECK3-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145
// CHECK3-SAME: (i32 noundef [[B:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GB:%.*]], i32 noundef [[SB:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GC:%.*]], i32 noundef [[C:%.*]], i32 noundef [[SC:%.*]], i32 noundef [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], i32 noundef [[SD:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GB_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SB_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GC_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SC_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GB6:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[GC7:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SB]], i32* [[SB_ADDR]], align 4
// CHECK3-NEXT: store double* [[GC]], double** [[GC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SC]], i32* [[SC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4
// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4
// CHECK3-NEXT: store i32 [[SD]], i32* [[SD_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16*
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GB_ADDR]], align 4
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_ADDR]] to float*
// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[GC_ADDR]], align 4
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_ADDR]] to i16*
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_ADDR]] to float*
// CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_ADDR]] to i16*
// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[GD_ADDR]], align 4
// CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_ADDR]] to float*
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK3-NEXT: store double [[TMP4]], double* [[GB6]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[TMP2]], align 8
// CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8
// CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1
// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8
// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00
// CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[CONV1]], align 4
// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP9]] to double
// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00
// CHECK3-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float
// CHECK3-NEXT: store float [[CONV14]], float* [[CONV1]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[GC7]], align 8
// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: land.lhs.true:
// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2
// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32
// CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0
// CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: land.lhs.true17:
// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[CONV3]], align 4
// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP12]] to double
// CHECK3-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00
// CHECK3-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i16* [[CONV4]], double* [[GD8]], float* [[CONV5]])
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4
// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4
// CHECK3-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8
// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK3-NEXT: store double [[ADD2]], double* [[TMP1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4
// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double
// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00
// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float
// CHECK3-NEXT: store float [[CONV5]], float* [[TMP2]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK3-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK3-NEXT: ret void
//