Files
clang-p2996/clang/test/OpenMP/target_firstprivate_codegen.cpp
dhruvachak b5d02bbd0d [OpenMP] Increment kernel args version, used by runtime for detecting dyn_ptr. (#85363)
A kernel implicit parameter (dyn_ptr) was introduced some time back.
This patch increments the kernel args version for a compiler supporting
dyn_ptr. The version will be used by the runtime to determine whether
the implicit parameter is generated by the compiler. The versioning is
required to support use cases where code generated by an older compiler
is linked with a newer runtime.

If approved, this patch should be backported to release 18.
2024-03-19 16:40:22 -07:00

11632 lines
894 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test host codegen.
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK0
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK1
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK2
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK3
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK1
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK2
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK3
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY12 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
template <typename tx, typename ty>
struct TT {
tx X;
ty Y;
};
#pragma omp declare target
int ga = 5;
#pragma omp end declare target
int foo(int n, double *ptr) {
int a = 0;
short aa = 0;
float b[10];
float bn[n];
double c[5][10];
double cn[5][n];
TT<long long, char> d;
const TT<int, int> e = {n, n};
int *p __attribute__ ((aligned (64))) = &a;
#pragma omp target firstprivate(a, p, ga)
{
}
// a is passed by value to tgt_target
#pragma omp target firstprivate(aa, b, bn, c, cn, d)
{
aa += 1;
b[2] = 1.0;
bn[3] = 1.0;
c[1][2] = 1.0;
cn[1][3] = 1.0;
d.X = 1;
d.Y = 1;
}
// firstprivate(aa) --> base_ptr = aa, ptr = aa, size = 2 (short)
// firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10)
// firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float)
// firstprivate(c): base_ptr = &c[0], ptr = &c[0], size = 400 (5*10*sizeof(double))
// firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double)
// firstprivate(d): base_ptr = &d, ptr = &d, size = 16
// make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
// target region
// firstprivate(aa): a_priv = a_in
// firstprivate(b): memcpy(b_priv,b_in)
// firstprivate(bn)
// firstprivate(c)
// firstprivate(cn)
// firstprivate(d)
#pragma omp target firstprivate(ptr, e)
{
ptr[0] = e.X;
ptr[0]++;
}
return a;
}
template <typename tx>
tx ftemplate(int n) {
tx a = 0;
tx b[10];
#pragma omp target firstprivate(a, b)
{
a += 1;
b[2] += 1;
}
return a;
}
static int fstatic(int n) {
int a = 0;
char aaa = 0;
int b[10];
#pragma omp target firstprivate(a, aaa, b)
{
a += 1;
aaa += 1;
b[2] += 1;
}
return a;
}
// firstprivate(a): a_priv = a_in
// firstprivate(aaa)
// firstprivate(b)
struct S1 {
double a;
int r1(int n) {
int b = n + 1;
short int c[2][n];
#pragma omp target firstprivate(b, c)
{
this->a = (double)b + 1.5;
c[1][1] = ++a;
}
return c[1][1] + (int)b;
}
// on the host side, we first generate r1, then the static function and the template above
// map(this: this ptr is implicitly captured (not firstprivate matter)
// firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value)
// firstprivate(c), 3 entries: 2, n, c
// only check that we use the map types stored in the global variable
// firstprivate(b)
// firstprivate(c)
// finish
// static host function
// firstprivate(a): by value
// firstprivate(aaa): by value
// firstprivate(b): base_ptr = &b[0], ptr= &b[0]
// only check that the right sizes and map types are used
};
int bar(int n, double *ptr) {
int a = 0;
a += foo(n, ptr);
S1 S;
a += S.r1(n);
a += fstatic(n);
a += ftemplate<int>(n);
return a;
}
// template host and device
// firstprivate(a): by value
// firstprivate(b): pointer
// firstprivate(a)
// firstprivate(b)
#endif
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-64-NEXT: [[P:%.*]] = alloca i32*, align 64
// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4
// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK-64-NEXT: store i32* [[A]], i32** [[P]], align 64
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP17]], align 8
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP22]], align 8
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK-64-NEXT: store i8* null, i8** [[TMP27]], align 8
// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP30]], align 4
// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT: store i32 3, i32* [[TMP31]], align 4
// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP36]], align 8
// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP37]], align 8
// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT: store i64 0, i64* [[TMP38]], align 8
// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP39]], align 8
// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP42]], align 4
// CHECK-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64: omp_offload.failed:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-64: omp_offload.cont:
// CHECK-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP55]], align 8
// CHECK-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP60]], align 8
// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK-64-NEXT: store i8* null, i8** [[TMP65]], align 8
// CHECK-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8
// CHECK-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8
// CHECK-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK-64-NEXT: store i8* null, i8** [[TMP71]], align 8
// CHECK-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK-64-NEXT: store i8* null, i8** [[TMP76]], align 8
// CHECK-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK-64-NEXT: store i64 5, i64* [[TMP78]], align 8
// CHECK-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK-64-NEXT: store i64 5, i64* [[TMP80]], align 8
// CHECK-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK-64-NEXT: store i8* null, i8** [[TMP81]], align 8
// CHECK-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK-64-NEXT: store i8* null, i8** [[TMP86]], align 8
// CHECK-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK-64-NEXT: store i8* null, i8** [[TMP92]], align 8
// CHECK-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK-64-NEXT: store i8* null, i8** [[TMP97]], align 8
// CHECK-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP101]], align 4
// CHECK-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK-64-NEXT: store i32 9, i32* [[TMP102]], align 4
// CHECK-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP107]], align 8
// CHECK-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP108]], align 8
// CHECK-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK-64-NEXT: store i64 0, i64* [[TMP109]], align 8
// CHECK-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP110]], align 8
// CHECK-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP113]], align 4
// CHECK-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK-64: omp_offload.failed8:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT9]]
// CHECK-64: omp_offload.cont9:
// CHECK-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP121]], align 8
// CHECK-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP126]], align 8
// CHECK-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP129]], align 4
// CHECK-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK-64-NEXT: store i32 2, i32* [[TMP130]], align 4
// CHECK-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP135]], align 8
// CHECK-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP136]], align 8
// CHECK-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK-64-NEXT: store i64 0, i64* [[TMP137]], align 8
// CHECK-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP138]], align 8
// CHECK-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP141]], align 4
// CHECK-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK-64: omp_offload.failed14:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT15]]
// CHECK-64: omp_offload.cont15:
// CHECK-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK-64-NEXT: ret i32 [[TMP144]]
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK-64-NEXT: store i64 1, i64* [[X]], align 8
// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK-64-NEXT: store i8 1, i8* [[Y]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: ret i32 [[TMP9]]
// CHECK-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK-64-NEXT: store double* [[A]], double** [[TMP13]], align 8
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP14]], align 8
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP19]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK-64-NEXT: store i64 2, i64* [[TMP21]], align 8
// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK-64-NEXT: store i64 2, i64* [[TMP23]], align 8
// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK-64-NEXT: store i8* null, i8** [[TMP24]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK-64-NEXT: store i8* null, i8** [[TMP29]], align 8
// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK-64-NEXT: store i8* null, i8** [[TMP35]], align 8
// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP39]], align 4
// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT: store i32 5, i32* [[TMP40]], align 4
// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP45]], align 8
// CHECK-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP46]], align 8
// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT: store i64 0, i64* [[TMP47]], align 8
// CHECK-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP48]], align 8
// CHECK-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP51]], align 4
// CHECK-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64: omp_offload.failed:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-64: omp_offload.cont:
// CHECK-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK-64-NEXT: ret i32 [[ADD4]]
// CHECK-64-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT: store i8 0, i8* [[AAA]], align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP8]], align 8
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP13]], align 8
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK-64-NEXT: store i8* null, i8** [[TMP18]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT: store i32 3, i32* [[TMP22]], align 4
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP27]], align 8
// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP28]], align 8
// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT: store i64 0, i64* [[TMP29]], align 8
// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP30]], align 8
// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP33]], align 4
// CHECK-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64: omp_offload.failed:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-64: omp_offload.cont:
// CHECK-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: ret i32 [[TMP36]]
// CHECK-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP6]], align 8
// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP11]], align 8
// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP14]], align 4
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT: store i32 2, i32* [[TMP15]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP20]], align 8
// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP21]], align 8
// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT: store i64 0, i64* [[TMP22]], align 8
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP23]], align 8
// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP26]], align 4
// CHECK-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64: omp_offload.failed:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-64: omp_offload.cont:
// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: ret i32 [[TMP29]]
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-64-NEXT: store double [[ADD]], double* [[A]], align 8
// CHECK-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK-64-NEXT: store double [[INC]], double* [[A5]], align 8
// CHECK-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-64-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-64-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-32-NEXT: [[P:%.*]] = alloca i32*, align 64
// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4
// CHECK-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK-32-NEXT: store i32* [[A]], i32** [[P]], align 64
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP15]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP20]], align 4
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK-32-NEXT: store i8* null, i8** [[TMP25]], align 4
// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP28]], align 4
// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT: store i32 3, i32* [[TMP29]], align 4
// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP34]], align 4
// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP35]], align 4
// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT: store i64 0, i64* [[TMP36]], align 8
// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP37]], align 8
// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP40]], align 4
// CHECK-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32: omp_offload.failed:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-32: omp_offload.cont:
// CHECK-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP55]], align 4
// CHECK-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP60]], align 4
// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK-32-NEXT: store i8* null, i8** [[TMP65]], align 4
// CHECK-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4
// CHECK-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4
// CHECK-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK-32-NEXT: store i8* null, i8** [[TMP71]], align 4
// CHECK-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK-32-NEXT: store i8* null, i8** [[TMP76]], align 4
// CHECK-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK-32-NEXT: store i32 5, i32* [[TMP78]], align 4
// CHECK-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK-32-NEXT: store i32 5, i32* [[TMP80]], align 4
// CHECK-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK-32-NEXT: store i8* null, i8** [[TMP81]], align 4
// CHECK-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK-32-NEXT: store i8* null, i8** [[TMP86]], align 4
// CHECK-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK-32-NEXT: store i8* null, i8** [[TMP92]], align 4
// CHECK-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK-32-NEXT: store i8* null, i8** [[TMP97]], align 4
// CHECK-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP101]], align 4
// CHECK-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK-32-NEXT: store i32 9, i32* [[TMP102]], align 4
// CHECK-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP107]], align 4
// CHECK-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP108]], align 4
// CHECK-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK-32-NEXT: store i64 0, i64* [[TMP109]], align 8
// CHECK-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP110]], align 8
// CHECK-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP113]], align 4
// CHECK-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK-32: omp_offload.failed6:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK-32: omp_offload.cont7:
// CHECK-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP121]], align 4
// CHECK-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP126]], align 4
// CHECK-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP129]], align 4
// CHECK-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK-32-NEXT: store i32 2, i32* [[TMP130]], align 4
// CHECK-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP135]], align 4
// CHECK-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP136]], align 4
// CHECK-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK-32-NEXT: store i64 0, i64* [[TMP137]], align 8
// CHECK-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP138]], align 8
// CHECK-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP141]], align 4
// CHECK-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK-32: omp_offload.failed12:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK-32: omp_offload.cont13:
// CHECK-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK-32-NEXT: ret i32 [[TMP144]]
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK-32-NEXT: store i64 1, i64* [[X]], align 4
// CHECK-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK-32-NEXT: store i8 1, i8* [[Y]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: ret i32 [[TMP9]]
// CHECK-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK-32-NEXT: store double* [[A]], double** [[TMP13]], align 4
// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP14]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP19]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK-32-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK-32-NEXT: store i32 2, i32* [[TMP23]], align 4
// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK-32-NEXT: store i8* null, i8** [[TMP24]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK-32-NEXT: store i8* null, i8** [[TMP29]], align 4
// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK-32-NEXT: store i8* null, i8** [[TMP35]], align 4
// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP39]], align 4
// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT: store i32 5, i32* [[TMP40]], align 4
// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP45]], align 4
// CHECK-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP46]], align 4
// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT: store i64 0, i64* [[TMP47]], align 8
// CHECK-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP48]], align 8
// CHECK-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP51]], align 4
// CHECK-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32: omp_offload.failed:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-32: omp_offload.cont:
// CHECK-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK-32-NEXT: ret i32 [[ADD3]]
// CHECK-32-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT: store i8 0, i8* [[AAA]], align 1
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP8]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP13]], align 4
// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK-32-NEXT: store i8* null, i8** [[TMP18]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT: store i32 3, i32* [[TMP22]], align 4
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP27]], align 4
// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP28]], align 4
// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT: store i64 0, i64* [[TMP29]], align 8
// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP30]], align 8
// CHECK-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP33]], align 4
// CHECK-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32: omp_offload.failed:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-32: omp_offload.cont:
// CHECK-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: ret i32 [[TMP36]]
// CHECK-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP6]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP11]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP14]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT: store i32 2, i32* [[TMP15]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP20]], align 4
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP21]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT: store i64 0, i64* [[TMP22]], align 8
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP23]], align 8
// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP26]], align 4
// CHECK-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32: omp_offload.failed:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-32: omp_offload.cont:
// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: ret i32 [[TMP29]]
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-32-NEXT: store double [[ADD]], double* [[A]], align 4
// CHECK-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK-32-NEXT: store double [[INC]], double* [[A4]], align 4
// CHECK-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-32-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-32-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-64-NEXT: [[P:%.*]] = alloca i32*, align 64
// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK0-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK0-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4
// CHECK0-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK0-64-NEXT: store i32* [[A]], i32** [[P]], align 64
// CHECK0-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK0-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK0-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK0-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK0-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT: store i8* null, i8** [[TMP17]], align 8
// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK0-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK0-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT: store i8* null, i8** [[TMP22]], align 8
// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-64-NEXT: store i8* null, i8** [[TMP27]], align 8
// CHECK0-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT: store i32 2, i32* [[TMP30]], align 4
// CHECK0-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT: store i32 3, i32* [[TMP31]], align 4
// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK0-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK0-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK0-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK0-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP36]], align 8
// CHECK0-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP37]], align 8
// CHECK0-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT: store i64 0, i64* [[TMP38]], align 8
// CHECK0-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT: store i64 0, i64* [[TMP39]], align 8
// CHECK0-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK0-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK0-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT: store i32 0, i32* [[TMP42]], align 4
// CHECK0-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK0-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64: omp_offload.failed:
// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64: omp_offload.cont:
// CHECK0-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK0-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK0-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK0-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK0-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK0-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK0-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK0-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK0-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK0-64-NEXT: store i8* null, i8** [[TMP55]], align 8
// CHECK0-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK0-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK0-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK0-64-NEXT: store i8* null, i8** [[TMP60]], align 8
// CHECK0-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK0-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK0-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK0-64-NEXT: store i8* null, i8** [[TMP65]], align 8
// CHECK0-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK0-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK0-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8
// CHECK0-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK0-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK0-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8
// CHECK0-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK0-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK0-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK0-64-NEXT: store i8* null, i8** [[TMP71]], align 8
// CHECK0-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK0-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK0-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK0-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK0-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK0-64-NEXT: store i8* null, i8** [[TMP76]], align 8
// CHECK0-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK0-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK0-64-NEXT: store i64 5, i64* [[TMP78]], align 8
// CHECK0-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK0-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK0-64-NEXT: store i64 5, i64* [[TMP80]], align 8
// CHECK0-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK0-64-NEXT: store i8* null, i8** [[TMP81]], align 8
// CHECK0-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK0-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK0-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK0-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK0-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK0-64-NEXT: store i8* null, i8** [[TMP86]], align 8
// CHECK0-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK0-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK0-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK0-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK0-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK0-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK0-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK0-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK0-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK0-64-NEXT: store i8* null, i8** [[TMP92]], align 8
// CHECK0-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK0-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK0-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK0-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK0-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK0-64-NEXT: store i8* null, i8** [[TMP97]], align 8
// CHECK0-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK0-64-NEXT: store i32 2, i32* [[TMP101]], align 4
// CHECK0-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK0-64-NEXT: store i32 9, i32* [[TMP102]], align 4
// CHECK0-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK0-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK0-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK0-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK0-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK0-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK0-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK0-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP107]], align 8
// CHECK0-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP108]], align 8
// CHECK0-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK0-64-NEXT: store i64 0, i64* [[TMP109]], align 8
// CHECK0-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK0-64-NEXT: store i64 0, i64* [[TMP110]], align 8
// CHECK0-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK0-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK0-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK0-64-NEXT: store i32 0, i32* [[TMP113]], align 4
// CHECK0-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK0-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK0-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK0-64: omp_offload.failed8:
// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT9]]
// CHECK0-64: omp_offload.cont9:
// CHECK0-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK0-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK0-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK0-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK0-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK0-64-NEXT: store i8* null, i8** [[TMP121]], align 8
// CHECK0-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK0-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK0-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK0-64-NEXT: store i8* null, i8** [[TMP126]], align 8
// CHECK0-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK0-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK0-64-NEXT: store i32 2, i32* [[TMP129]], align 4
// CHECK0-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK0-64-NEXT: store i32 2, i32* [[TMP130]], align 4
// CHECK0-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK0-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK0-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK0-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK0-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK0-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK0-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP135]], align 8
// CHECK0-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP136]], align 8
// CHECK0-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK0-64-NEXT: store i64 0, i64* [[TMP137]], align 8
// CHECK0-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK0-64-NEXT: store i64 0, i64* [[TMP138]], align 8
// CHECK0-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK0-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK0-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK0-64-NEXT: store i32 0, i32* [[TMP141]], align 4
// CHECK0-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK0-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK0-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK0-64: omp_offload.failed14:
// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT15]]
// CHECK0-64: omp_offload.cont15:
// CHECK0-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK0-64-NEXT: ret i32 [[TMP144]]
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK0-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK0-64-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK0-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK0-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK0-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK0-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK0-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK0-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK0-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK0-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK0-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK0-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK0-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK0-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK0-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK0-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK0-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK0-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK0-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK0-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK0-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK0-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK0-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK0-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK0-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK0-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK0-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK0-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK0-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK0-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK0-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK0-64-NEXT: store i64 1, i64* [[X]], align 8
// CHECK0-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK0-64-NEXT: store i8 1, i8* [[Y]], align 8
// CHECK0-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK0-64-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK0-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK0-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK0-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK0-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK0-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK0-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK0-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK0-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK0-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK0-64-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK0-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK0-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK0-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK0-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK0-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK0-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK0-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: ret i32 [[TMP9]]
// CHECK0-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK0-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK0-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4
// CHECK0-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK0-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK0-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK0-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK0-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK0-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK0-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK0-64-NEXT: store double* [[A]], double** [[TMP13]], align 8
// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT: store i8* null, i8** [[TMP14]], align 8
// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT: store i8* null, i8** [[TMP19]], align 8
// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK0-64-NEXT: store i64 2, i64* [[TMP21]], align 8
// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK0-64-NEXT: store i64 2, i64* [[TMP23]], align 8
// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-64-NEXT: store i8* null, i8** [[TMP24]], align 8
// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK0-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK0-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK0-64-NEXT: store i8* null, i8** [[TMP29]], align 8
// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK0-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK0-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK0-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK0-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK0-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK0-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK0-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK0-64-NEXT: store i8* null, i8** [[TMP35]], align 8
// CHECK0-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT: store i32 2, i32* [[TMP39]], align 4
// CHECK0-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT: store i32 5, i32* [[TMP40]], align 4
// CHECK0-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK0-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK0-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK0-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK0-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP45]], align 8
// CHECK0-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP46]], align 8
// CHECK0-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT: store i64 0, i64* [[TMP47]], align 8
// CHECK0-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT: store i64 0, i64* [[TMP48]], align 8
// CHECK0-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK0-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK0-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT: store i32 0, i32* [[TMP51]], align 4
// CHECK0-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK0-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64: omp_offload.failed:
// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64: omp_offload.cont:
// CHECK0-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK0-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK0-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK0-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK0-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK0-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK0-64-NEXT: ret i32 [[ADD4]]
// CHECK0-64-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT: store i8 0, i8* [[AAA]], align 1
// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK0-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK0-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK0-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK0-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT: store i8* null, i8** [[TMP8]], align 8
// CHECK0-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK0-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT: store i8* null, i8** [[TMP13]], align 8
// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK0-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-64-NEXT: store i8* null, i8** [[TMP18]], align 8
// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT: store i32 3, i32* [[TMP22]], align 4
// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK0-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP27]], align 8
// CHECK0-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP28]], align 8
// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT: store i64 0, i64* [[TMP29]], align 8
// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT: store i64 0, i64* [[TMP30]], align 8
// CHECK0-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK0-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT: store i32 0, i32* [[TMP33]], align 4
// CHECK0-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK0-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64: omp_offload.failed:
// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64: omp_offload.cont:
// CHECK0-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: ret i32 [[TMP36]]
// CHECK0-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK0-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK0-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT: store i8* null, i8** [[TMP6]], align 8
// CHECK0-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK0-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK0-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT: store i8* null, i8** [[TMP11]], align 8
// CHECK0-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT: store i32 2, i32* [[TMP14]], align 4
// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT: store i32 2, i32* [[TMP15]], align 4
// CHECK0-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP20]], align 8
// CHECK0-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT: store i8** null, i8*** [[TMP21]], align 8
// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT: store i64 0, i64* [[TMP22]], align 8
// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT: store i64 0, i64* [[TMP23]], align 8
// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK0-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT: store i32 0, i32* [[TMP26]], align 4
// CHECK0-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK0-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64: omp_offload.failed:
// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64: omp_offload.cont:
// CHECK0-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT: ret i32 [[TMP29]]
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK0-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK0-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK0-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK0-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK0-64-NEXT: store double [[ADD]], double* [[A]], align 8
// CHECK0-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK0-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK0-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK0-64-NEXT: store double [[INC]], double* [[A5]], align 8
// CHECK0-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK0-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK0-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK0-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK0-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK0-64-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK0-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK0-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK0-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK0-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK0-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK0-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK0-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK0-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK0-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT: ret void
// CHECK0-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK0-64-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK0-64-NEXT: entry:
// CHECK0-64-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK0-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-64-NEXT: [[P:%.*]] = alloca i32*, align 64
// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK1-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4
// CHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK1-64-NEXT: store i32* [[A]], i32** [[P]], align 64
// CHECK1-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK1-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK1-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK1-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK1-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT: store i8* null, i8** [[TMP17]], align 8
// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK1-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK1-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT: store i8* null, i8** [[TMP22]], align 8
// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-64-NEXT: store i8* null, i8** [[TMP27]], align 8
// CHECK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT: store i32 2, i32* [[TMP30]], align 4
// CHECK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT: store i32 3, i32* [[TMP31]], align 4
// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP36]], align 8
// CHECK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP37]], align 8
// CHECK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT: store i64 0, i64* [[TMP38]], align 8
// CHECK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT: store i64 0, i64* [[TMP39]], align 8
// CHECK1-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK1-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK1-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT: store i32 0, i32* [[TMP42]], align 4
// CHECK1-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK1-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64: omp_offload.failed:
// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64: omp_offload.cont:
// CHECK1-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK1-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK1-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK1-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK1-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK1-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK1-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK1-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK1-64-NEXT: store i8* null, i8** [[TMP55]], align 8
// CHECK1-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK1-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK1-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK1-64-NEXT: store i8* null, i8** [[TMP60]], align 8
// CHECK1-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK1-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK1-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK1-64-NEXT: store i8* null, i8** [[TMP65]], align 8
// CHECK1-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK1-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK1-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8
// CHECK1-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK1-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK1-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8
// CHECK1-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK1-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK1-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK1-64-NEXT: store i8* null, i8** [[TMP71]], align 8
// CHECK1-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK1-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK1-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK1-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK1-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK1-64-NEXT: store i8* null, i8** [[TMP76]], align 8
// CHECK1-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK1-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK1-64-NEXT: store i64 5, i64* [[TMP78]], align 8
// CHECK1-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK1-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK1-64-NEXT: store i64 5, i64* [[TMP80]], align 8
// CHECK1-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK1-64-NEXT: store i8* null, i8** [[TMP81]], align 8
// CHECK1-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK1-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK1-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK1-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK1-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK1-64-NEXT: store i8* null, i8** [[TMP86]], align 8
// CHECK1-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK1-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK1-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK1-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK1-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK1-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK1-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK1-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK1-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK1-64-NEXT: store i8* null, i8** [[TMP92]], align 8
// CHECK1-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK1-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK1-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK1-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK1-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK1-64-NEXT: store i8* null, i8** [[TMP97]], align 8
// CHECK1-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK1-64-NEXT: store i32 2, i32* [[TMP101]], align 4
// CHECK1-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK1-64-NEXT: store i32 9, i32* [[TMP102]], align 4
// CHECK1-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK1-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK1-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK1-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK1-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK1-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK1-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK1-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP107]], align 8
// CHECK1-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP108]], align 8
// CHECK1-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK1-64-NEXT: store i64 0, i64* [[TMP109]], align 8
// CHECK1-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK1-64-NEXT: store i64 0, i64* [[TMP110]], align 8
// CHECK1-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK1-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK1-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK1-64-NEXT: store i32 0, i32* [[TMP113]], align 4
// CHECK1-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK1-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK1-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK1-64: omp_offload.failed8:
// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT9]]
// CHECK1-64: omp_offload.cont9:
// CHECK1-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK1-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK1-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK1-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK1-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK1-64-NEXT: store i8* null, i8** [[TMP121]], align 8
// CHECK1-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK1-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK1-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK1-64-NEXT: store i8* null, i8** [[TMP126]], align 8
// CHECK1-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK1-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK1-64-NEXT: store i32 2, i32* [[TMP129]], align 4
// CHECK1-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK1-64-NEXT: store i32 2, i32* [[TMP130]], align 4
// CHECK1-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK1-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK1-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK1-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK1-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK1-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK1-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP135]], align 8
// CHECK1-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP136]], align 8
// CHECK1-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK1-64-NEXT: store i64 0, i64* [[TMP137]], align 8
// CHECK1-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK1-64-NEXT: store i64 0, i64* [[TMP138]], align 8
// CHECK1-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK1-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK1-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK1-64-NEXT: store i32 0, i32* [[TMP141]], align 4
// CHECK1-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK1-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK1-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK1-64: omp_offload.failed14:
// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT15]]
// CHECK1-64: omp_offload.cont15:
// CHECK1-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK1-64-NEXT: ret i32 [[TMP144]]
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK1-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK1-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK1-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK1-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK1-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK1-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK1-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK1-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK1-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK1-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK1-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK1-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK1-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK1-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK1-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK1-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK1-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK1-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK1-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK1-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK1-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK1-64-NEXT: store i64 1, i64* [[X]], align 8
// CHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK1-64-NEXT: store i8 1, i8* [[Y]], align 8
// CHECK1-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK1-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK1-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK1-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK1-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK1-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK1-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK1-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK1-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK1-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK1-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK1-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK1-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK1-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK1-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK1-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: ret i32 [[TMP9]]
// CHECK1-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK1-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4
// CHECK1-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK1-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK1-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK1-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK1-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK1-64-NEXT: store double* [[A]], double** [[TMP13]], align 8
// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT: store i8* null, i8** [[TMP14]], align 8
// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT: store i8* null, i8** [[TMP19]], align 8
// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK1-64-NEXT: store i64 2, i64* [[TMP21]], align 8
// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK1-64-NEXT: store i64 2, i64* [[TMP23]], align 8
// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-64-NEXT: store i8* null, i8** [[TMP24]], align 8
// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-64-NEXT: store i8* null, i8** [[TMP29]], align 8
// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK1-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK1-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK1-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-64-NEXT: store i8* null, i8** [[TMP35]], align 8
// CHECK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT: store i32 2, i32* [[TMP39]], align 4
// CHECK1-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT: store i32 5, i32* [[TMP40]], align 4
// CHECK1-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK1-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK1-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK1-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK1-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP45]], align 8
// CHECK1-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP46]], align 8
// CHECK1-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT: store i64 0, i64* [[TMP47]], align 8
// CHECK1-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT: store i64 0, i64* [[TMP48]], align 8
// CHECK1-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK1-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT: store i32 0, i32* [[TMP51]], align 4
// CHECK1-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK1-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64: omp_offload.failed:
// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64: omp_offload.cont:
// CHECK1-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK1-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK1-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK1-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK1-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK1-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK1-64-NEXT: ret i32 [[ADD4]]
// CHECK1-64-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT: store i8 0, i8* [[AAA]], align 1
// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK1-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK1-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT: store i8* null, i8** [[TMP8]], align 8
// CHECK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT: store i8* null, i8** [[TMP13]], align 8
// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-64-NEXT: store i8* null, i8** [[TMP18]], align 8
// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT: store i32 3, i32* [[TMP22]], align 4
// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP27]], align 8
// CHECK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP28]], align 8
// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT: store i64 0, i64* [[TMP29]], align 8
// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT: store i64 0, i64* [[TMP30]], align 8
// CHECK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT: store i32 0, i32* [[TMP33]], align 4
// CHECK1-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK1-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64: omp_offload.failed:
// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64: omp_offload.cont:
// CHECK1-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: ret i32 [[TMP36]]
// CHECK1-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT: store i8* null, i8** [[TMP6]], align 8
// CHECK1-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT: store i8* null, i8** [[TMP11]], align 8
// CHECK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT: store i32 2, i32* [[TMP14]], align 4
// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT: store i32 2, i32* [[TMP15]], align 4
// CHECK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP20]], align 8
// CHECK1-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT: store i8** null, i8*** [[TMP21]], align 8
// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT: store i64 0, i64* [[TMP22]], align 8
// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT: store i64 0, i64* [[TMP23]], align 8
// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT: store i32 0, i32* [[TMP26]], align 4
// CHECK1-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK1-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64: omp_offload.failed:
// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64: omp_offload.cont:
// CHECK1-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT: ret i32 [[TMP29]]
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK1-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK1-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK1-64-NEXT: store double [[ADD]], double* [[A]], align 8
// CHECK1-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK1-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK1-64-NEXT: store double [[INC]], double* [[A5]], align 8
// CHECK1-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK1-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK1-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK1-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK1-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK1-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK1-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK1-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK1-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT: ret void
// CHECK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK1-64-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-64-NEXT: entry:
// CHECK1-64-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-64-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-32-NEXT: [[P:%.*]] = alloca i32*, align 64
// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4
// CHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK2-32-NEXT: store i32* [[A]], i32** [[P]], align 64
// CHECK2-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK2-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK2-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT: store i8* null, i8** [[TMP15]], align 4
// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK2-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK2-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT: store i8* null, i8** [[TMP20]], align 4
// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8* null, i8** [[TMP25]], align 4
// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT: store i32 2, i32* [[TMP28]], align 4
// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT: store i32 3, i32* [[TMP29]], align 4
// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP34]], align 4
// CHECK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP35]], align 4
// CHECK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT: store i64 0, i64* [[TMP36]], align 8
// CHECK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT: store i64 0, i64* [[TMP37]], align 8
// CHECK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT: store i32 0, i32* [[TMP40]], align 4
// CHECK2-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK2-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32: omp_offload.failed:
// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32: omp_offload.cont:
// CHECK2-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK2-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK2-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK2-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK2-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK2-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK2-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK2-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK2-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK2-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK2-32-NEXT: store i8* null, i8** [[TMP55]], align 4
// CHECK2-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK2-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK2-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK2-32-NEXT: store i8* null, i8** [[TMP60]], align 4
// CHECK2-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK2-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK2-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK2-32-NEXT: store i8* null, i8** [[TMP65]], align 4
// CHECK2-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK2-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK2-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4
// CHECK2-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK2-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK2-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4
// CHECK2-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK2-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK2-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK2-32-NEXT: store i8* null, i8** [[TMP71]], align 4
// CHECK2-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK2-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK2-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK2-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK2-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK2-32-NEXT: store i8* null, i8** [[TMP76]], align 4
// CHECK2-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK2-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK2-32-NEXT: store i32 5, i32* [[TMP78]], align 4
// CHECK2-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK2-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK2-32-NEXT: store i32 5, i32* [[TMP80]], align 4
// CHECK2-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK2-32-NEXT: store i8* null, i8** [[TMP81]], align 4
// CHECK2-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK2-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK2-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK2-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK2-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK2-32-NEXT: store i8* null, i8** [[TMP86]], align 4
// CHECK2-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK2-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK2-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK2-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK2-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK2-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK2-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK2-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK2-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK2-32-NEXT: store i8* null, i8** [[TMP92]], align 4
// CHECK2-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK2-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK2-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK2-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK2-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK2-32-NEXT: store i8* null, i8** [[TMP97]], align 4
// CHECK2-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK2-32-NEXT: store i32 2, i32* [[TMP101]], align 4
// CHECK2-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK2-32-NEXT: store i32 9, i32* [[TMP102]], align 4
// CHECK2-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK2-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK2-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK2-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK2-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK2-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK2-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK2-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP107]], align 4
// CHECK2-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP108]], align 4
// CHECK2-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK2-32-NEXT: store i64 0, i64* [[TMP109]], align 8
// CHECK2-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK2-32-NEXT: store i64 0, i64* [[TMP110]], align 8
// CHECK2-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK2-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK2-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK2-32-NEXT: store i32 0, i32* [[TMP113]], align 4
// CHECK2-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK2-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK2-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK2-32: omp_offload.failed6:
// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK2-32: omp_offload.cont7:
// CHECK2-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK2-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK2-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK2-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK2-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK2-32-NEXT: store i8* null, i8** [[TMP121]], align 4
// CHECK2-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK2-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK2-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK2-32-NEXT: store i8* null, i8** [[TMP126]], align 4
// CHECK2-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK2-32-NEXT: store i32 2, i32* [[TMP129]], align 4
// CHECK2-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK2-32-NEXT: store i32 2, i32* [[TMP130]], align 4
// CHECK2-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK2-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK2-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK2-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK2-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK2-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK2-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP135]], align 4
// CHECK2-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP136]], align 4
// CHECK2-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK2-32-NEXT: store i64 0, i64* [[TMP137]], align 8
// CHECK2-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK2-32-NEXT: store i64 0, i64* [[TMP138]], align 8
// CHECK2-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK2-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK2-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK2-32-NEXT: store i32 0, i32* [[TMP141]], align 4
// CHECK2-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK2-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK2-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK2-32: omp_offload.failed12:
// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK2-32: omp_offload.cont13:
// CHECK2-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK2-32-NEXT: ret i32 [[TMP144]]
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK2-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK2-32-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK2-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK2-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK2-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK2-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK2-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK2-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK2-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK2-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK2-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK2-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK2-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK2-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK2-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK2-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK2-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK2-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK2-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK2-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK2-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK2-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK2-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK2-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK2-32-NEXT: store i64 1, i64* [[X]], align 4
// CHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK2-32-NEXT: store i8 1, i8* [[Y]], align 4
// CHECK2-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK2-32-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK2-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK2-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK2-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK2-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK2-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK2-32-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK2-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK2-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK2-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK2-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK2-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: ret i32 [[TMP9]]
// CHECK2-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK2-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK2-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK2-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK2-32-NEXT: store double* [[A]], double** [[TMP13]], align 4
// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT: store i8* null, i8** [[TMP14]], align 4
// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT: store i8* null, i8** [[TMP19]], align 4
// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK2-32-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK2-32-NEXT: store i32 2, i32* [[TMP23]], align 4
// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8* null, i8** [[TMP24]], align 4
// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK2-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK2-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK2-32-NEXT: store i8* null, i8** [[TMP29]], align 4
// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK2-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK2-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK2-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK2-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK2-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK2-32-NEXT: store i8* null, i8** [[TMP35]], align 4
// CHECK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT: store i32 2, i32* [[TMP39]], align 4
// CHECK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT: store i32 5, i32* [[TMP40]], align 4
// CHECK2-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK2-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK2-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK2-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK2-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP45]], align 4
// CHECK2-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP46]], align 4
// CHECK2-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT: store i64 0, i64* [[TMP47]], align 8
// CHECK2-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT: store i64 0, i64* [[TMP48]], align 8
// CHECK2-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK2-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK2-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT: store i32 0, i32* [[TMP51]], align 4
// CHECK2-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK2-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32: omp_offload.failed:
// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32: omp_offload.cont:
// CHECK2-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK2-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK2-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK2-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK2-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK2-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK2-32-NEXT: ret i32 [[ADD3]]
// CHECK2-32-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT: store i8 0, i8* [[AAA]], align 1
// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK2-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK2-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT: store i8* null, i8** [[TMP8]], align 4
// CHECK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT: store i8* null, i8** [[TMP13]], align 4
// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8* null, i8** [[TMP18]], align 4
// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT: store i32 3, i32* [[TMP22]], align 4
// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP27]], align 4
// CHECK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP28]], align 4
// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT: store i64 0, i64* [[TMP29]], align 8
// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT: store i64 0, i64* [[TMP30]], align 8
// CHECK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT: store i32 0, i32* [[TMP33]], align 4
// CHECK2-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK2-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32: omp_offload.failed:
// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32: omp_offload.cont:
// CHECK2-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: ret i32 [[TMP36]]
// CHECK2-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT: store i8* null, i8** [[TMP6]], align 4
// CHECK2-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT: store i8* null, i8** [[TMP11]], align 4
// CHECK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT: store i32 2, i32* [[TMP14]], align 4
// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT: store i32 2, i32* [[TMP15]], align 4
// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP20]], align 4
// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT: store i8** null, i8*** [[TMP21]], align 4
// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT: store i64 0, i64* [[TMP22]], align 8
// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT: store i64 0, i64* [[TMP23]], align 8
// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT: store i32 0, i32* [[TMP26]], align 4
// CHECK2-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK2-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32: omp_offload.failed:
// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32: omp_offload.cont:
// CHECK2-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT: ret i32 [[TMP29]]
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK2-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK2-32-NEXT: store double [[ADD]], double* [[A]], align 4
// CHECK2-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK2-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK2-32-NEXT: store double [[INC]], double* [[A4]], align 4
// CHECK2-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK2-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK2-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK2-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK2-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK2-32-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK2-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK2-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK2-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT: ret void
// CHECK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK2-32-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-32-NEXT: entry:
// CHECK2-32-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK2-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-32-NEXT: [[P:%.*]] = alloca i32*, align 64
// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4
// CHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK3-32-NEXT: store i32* [[A]], i32** [[P]], align 64
// CHECK3-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK3-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK3-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT: store i8* null, i8** [[TMP15]], align 4
// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK3-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK3-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT: store i8* null, i8** [[TMP20]], align 4
// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8* null, i8** [[TMP25]], align 4
// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT: store i32 2, i32* [[TMP28]], align 4
// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT: store i32 3, i32* [[TMP29]], align 4
// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK3-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK3-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK3-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP34]], align 4
// CHECK3-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP35]], align 4
// CHECK3-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT: store i64 0, i64* [[TMP36]], align 8
// CHECK3-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT: store i64 0, i64* [[TMP37]], align 8
// CHECK3-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK3-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK3-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT: store i32 0, i32* [[TMP40]], align 4
// CHECK3-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK3-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32: omp_offload.failed:
// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32: omp_offload.cont:
// CHECK3-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK3-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK3-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK3-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK3-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK3-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK3-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK3-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK3-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK3-32-NEXT: store i8* null, i8** [[TMP55]], align 4
// CHECK3-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK3-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK3-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK3-32-NEXT: store i8* null, i8** [[TMP60]], align 4
// CHECK3-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK3-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK3-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK3-32-NEXT: store i8* null, i8** [[TMP65]], align 4
// CHECK3-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK3-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK3-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4
// CHECK3-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK3-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK3-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4
// CHECK3-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK3-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK3-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK3-32-NEXT: store i8* null, i8** [[TMP71]], align 4
// CHECK3-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK3-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK3-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK3-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK3-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK3-32-NEXT: store i8* null, i8** [[TMP76]], align 4
// CHECK3-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK3-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK3-32-NEXT: store i32 5, i32* [[TMP78]], align 4
// CHECK3-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK3-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK3-32-NEXT: store i32 5, i32* [[TMP80]], align 4
// CHECK3-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK3-32-NEXT: store i8* null, i8** [[TMP81]], align 4
// CHECK3-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK3-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK3-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK3-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK3-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK3-32-NEXT: store i8* null, i8** [[TMP86]], align 4
// CHECK3-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK3-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK3-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK3-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK3-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK3-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK3-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK3-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK3-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK3-32-NEXT: store i8* null, i8** [[TMP92]], align 4
// CHECK3-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK3-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK3-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK3-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK3-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK3-32-NEXT: store i8* null, i8** [[TMP97]], align 4
// CHECK3-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK3-32-NEXT: store i32 2, i32* [[TMP101]], align 4
// CHECK3-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK3-32-NEXT: store i32 9, i32* [[TMP102]], align 4
// CHECK3-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK3-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK3-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK3-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK3-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK3-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK3-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK3-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP107]], align 4
// CHECK3-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP108]], align 4
// CHECK3-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK3-32-NEXT: store i64 0, i64* [[TMP109]], align 8
// CHECK3-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK3-32-NEXT: store i64 0, i64* [[TMP110]], align 8
// CHECK3-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK3-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK3-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK3-32-NEXT: store i32 0, i32* [[TMP113]], align 4
// CHECK3-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK3-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK3-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK3-32: omp_offload.failed6:
// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK3-32: omp_offload.cont7:
// CHECK3-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK3-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK3-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK3-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK3-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK3-32-NEXT: store i8* null, i8** [[TMP121]], align 4
// CHECK3-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK3-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK3-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK3-32-NEXT: store i8* null, i8** [[TMP126]], align 4
// CHECK3-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK3-32-NEXT: store i32 2, i32* [[TMP129]], align 4
// CHECK3-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK3-32-NEXT: store i32 2, i32* [[TMP130]], align 4
// CHECK3-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK3-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK3-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK3-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK3-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK3-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK3-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP135]], align 4
// CHECK3-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP136]], align 4
// CHECK3-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK3-32-NEXT: store i64 0, i64* [[TMP137]], align 8
// CHECK3-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK3-32-NEXT: store i64 0, i64* [[TMP138]], align 8
// CHECK3-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK3-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK3-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK3-32-NEXT: store i32 0, i32* [[TMP141]], align 4
// CHECK3-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK3-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK3-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK3-32: omp_offload.failed12:
// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK3-32: omp_offload.cont13:
// CHECK3-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK3-32-NEXT: ret i32 [[TMP144]]
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK3-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK3-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK3-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK3-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK3-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK3-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK3-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK3-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK3-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK3-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK3-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK3-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK3-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK3-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK3-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK3-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK3-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK3-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK3-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK3-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK3-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK3-32-NEXT: store i64 1, i64* [[X]], align 4
// CHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK3-32-NEXT: store i8 1, i8* [[Y]], align 4
// CHECK3-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK3-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK3-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK3-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK3-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK3-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK3-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK3-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK3-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK3-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK3-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK3-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK3-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: ret i32 [[TMP9]]
// CHECK3-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK3-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK3-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK3-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK3-32-NEXT: store double* [[A]], double** [[TMP13]], align 4
// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT: store i8* null, i8** [[TMP14]], align 4
// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT: store i8* null, i8** [[TMP19]], align 4
// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK3-32-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK3-32-NEXT: store i32 2, i32* [[TMP23]], align 4
// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8* null, i8** [[TMP24]], align 4
// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-32-NEXT: store i8* null, i8** [[TMP29]], align 4
// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK3-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK3-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK3-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK3-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK3-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-32-NEXT: store i8* null, i8** [[TMP35]], align 4
// CHECK3-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT: store i32 2, i32* [[TMP39]], align 4
// CHECK3-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT: store i32 5, i32* [[TMP40]], align 4
// CHECK3-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK3-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK3-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK3-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK3-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP45]], align 4
// CHECK3-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP46]], align 4
// CHECK3-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT: store i64 0, i64* [[TMP47]], align 8
// CHECK3-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT: store i64 0, i64* [[TMP48]], align 8
// CHECK3-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK3-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK3-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT: store i32 0, i32* [[TMP51]], align 4
// CHECK3-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK3-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32: omp_offload.failed:
// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32: omp_offload.cont:
// CHECK3-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK3-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK3-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK3-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK3-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK3-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK3-32-NEXT: ret i32 [[ADD3]]
// CHECK3-32-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT: store i8 0, i8* [[AAA]], align 1
// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK3-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK3-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT: store i8* null, i8** [[TMP8]], align 4
// CHECK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT: store i8* null, i8** [[TMP13]], align 4
// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8* null, i8** [[TMP18]], align 4
// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT: store i32 2, i32* [[TMP21]], align 4
// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT: store i32 3, i32* [[TMP22]], align 4
// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP27]], align 4
// CHECK3-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP28]], align 4
// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT: store i64 0, i64* [[TMP29]], align 8
// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT: store i64 0, i64* [[TMP30]], align 8
// CHECK3-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK3-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT: store i32 0, i32* [[TMP33]], align 4
// CHECK3-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK3-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32: omp_offload.failed:
// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32: omp_offload.cont:
// CHECK3-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: ret i32 [[TMP36]]
// CHECK3-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT: store i8* null, i8** [[TMP6]], align 4
// CHECK3-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT: store i8* null, i8** [[TMP11]], align 4
// CHECK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT: store i32 2, i32* [[TMP14]], align 4
// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT: store i32 2, i32* [[TMP15]], align 4
// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP20]], align 4
// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT: store i8** null, i8*** [[TMP21]], align 4
// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT: store i64 0, i64* [[TMP22]], align 8
// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT: store i64 0, i64* [[TMP23]], align 8
// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT: store i32 0, i32* [[TMP26]], align 4
// CHECK3-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK3-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32: omp_offload.failed:
// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32: omp_offload.cont:
// CHECK3-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT: ret i32 [[TMP29]]
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK3-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK3-32-NEXT: store double [[ADD]], double* [[A]], align 4
// CHECK3-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK3-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK3-32-NEXT: store double [[INC]], double* [[A4]], align 4
// CHECK3-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK3-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK3-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK3-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK3-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK3-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK3-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK3-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK3-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK3-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK3-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT: ret void
// CHECK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK3-32-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK3-32-NEXT: entry:
// CHECK3-32-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK3-32-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// TCHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8
// TCHECK-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// TCHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8
// TCHECK-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// TCHECK-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// TCHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8
// TCHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// TCHECK-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8
// TCHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// TCHECK-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// TCHECK-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// TCHECK-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// TCHECK-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// TCHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// TCHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// TCHECK-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// TCHECK-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// TCHECK-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// TCHECK-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// TCHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// TCHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// TCHECK-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// TCHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// TCHECK-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// TCHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK-64-NEXT: store i64 1, i64* [[X]], align 8
// TCHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK-64-NEXT: store i8 1, i8* [[Y]], align 8
// TCHECK-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// TCHECK-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// TCHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// TCHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// TCHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK-64-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
// TCHECK-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8
// TCHECK-64-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
// TCHECK-64-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
// TCHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK-64-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4
// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// TCHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// TCHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// TCHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// TCHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// TCHECK-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1
// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// TCHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// TCHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// TCHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK-64-NEXT: store double [[ADD]], double* [[A]], align 8
// TCHECK-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8
// TCHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK-64-NEXT: store double [[INC]], double* [[A5]], align 8
// TCHECK-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16
// TCHECK-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// TCHECK-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// TCHECK-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// TCHECK-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT: ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK1-64-NEXT: entry:
// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8
// TCHECK1-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8
// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// TCHECK1-64-NEXT: ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT: entry:
// TCHECK1-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// TCHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8
// TCHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// TCHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8
// TCHECK1-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// TCHECK1-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK1-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// TCHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8
// TCHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// TCHECK1-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8
// TCHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// TCHECK1-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// TCHECK1-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK1-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK1-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK1-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK1-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// TCHECK1-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK1-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// TCHECK1-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// TCHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// TCHECK1-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// TCHECK1-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// TCHECK1-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK1-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// TCHECK1-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK1-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// TCHECK1-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK1-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK1-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK1-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// TCHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// TCHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK1-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// TCHECK1-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// TCHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK1-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK1-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// TCHECK1-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// TCHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK1-64-NEXT: store i64 1, i64* [[X]], align 8
// TCHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK1-64-NEXT: store i8 1, i8* [[Y]], align 8
// TCHECK1-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK1-64-NEXT: ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT: entry:
// TCHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8
// TCHECK1-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// TCHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8
// TCHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// TCHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK1-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK1-64-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
// TCHECK1-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8
// TCHECK1-64-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK1-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
// TCHECK1-64-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
// TCHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK1-64-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8
// TCHECK1-64-NEXT: ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT: entry:
// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK1-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4
// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// TCHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// TCHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// TCHECK1-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// TCHECK1-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// TCHECK1-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1
// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// TCHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK1-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT: ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT: entry:
// TCHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8
// TCHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK1-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// TCHECK1-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK1-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// TCHECK1-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK1-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// TCHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK1-64-NEXT: store double [[ADD]], double* [[A]], align 8
// TCHECK1-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK1-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8
// TCHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK1-64-NEXT: store double [[INC]], double* [[A5]], align 8
// TCHECK1-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16
// TCHECK1-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// TCHECK1-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// TCHECK1-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// TCHECK1-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK1-64-NEXT: ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT: entry:
// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK1-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK1-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT: ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK2-32-NEXT: entry:
// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4
// TCHECK2-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4
// TCHECK2-32-NEXT: ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT: entry:
// TCHECK2-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// TCHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4
// TCHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// TCHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4
// TCHECK2-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// TCHECK2-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK2-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// TCHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4
// TCHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// TCHECK2-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4
// TCHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// TCHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// TCHECK2-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// TCHECK2-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK2-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK2-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK2-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK2-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// TCHECK2-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK2-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// TCHECK2-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// TCHECK2-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// TCHECK2-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// TCHECK2-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// TCHECK2-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK2-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// TCHECK2-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK2-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// TCHECK2-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK2-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK2-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK2-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// TCHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// TCHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK2-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// TCHECK2-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// TCHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK2-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK2-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// TCHECK2-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// TCHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK2-32-NEXT: store i64 1, i64* [[X]], align 4
// TCHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK2-32-NEXT: store i8 1, i8* [[Y]], align 4
// TCHECK2-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK2-32-NEXT: ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT: entry:
// TCHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// TCHECK2-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// TCHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// TCHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// TCHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK2-32-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
// TCHECK2-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK2-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
// TCHECK2-32-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
// TCHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK2-32-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4
// TCHECK2-32-NEXT: ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT: entry:
// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// TCHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// TCHECK2-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// TCHECK2-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// TCHECK2-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1
// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK2-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT: ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT: entry:
// TCHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// TCHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK2-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// TCHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK2-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// TCHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// TCHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK2-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK2-32-NEXT: store double [[ADD]], double* [[A]], align 4
// TCHECK2-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK2-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4
// TCHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK2-32-NEXT: store double [[INC]], double* [[A4]], align 4
// TCHECK2-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK2-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// TCHECK2-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// TCHECK2-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// TCHECK2-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK2-32-NEXT: ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT: entry:
// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK2-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT: ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK3-32-NEXT: entry:
// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4
// TCHECK3-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4
// TCHECK3-32-NEXT: ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT: entry:
// TCHECK3-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// TCHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4
// TCHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// TCHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4
// TCHECK3-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// TCHECK3-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK3-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// TCHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4
// TCHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// TCHECK3-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4
// TCHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// TCHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// TCHECK3-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// TCHECK3-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK3-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK3-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK3-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK3-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// TCHECK3-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK3-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// TCHECK3-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// TCHECK3-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// TCHECK3-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// TCHECK3-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// TCHECK3-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK3-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// TCHECK3-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK3-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// TCHECK3-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK3-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK3-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK3-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// TCHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// TCHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK3-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// TCHECK3-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// TCHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK3-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK3-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// TCHECK3-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// TCHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK3-32-NEXT: store i64 1, i64* [[X]], align 4
// TCHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK3-32-NEXT: store i8 1, i8* [[Y]], align 4
// TCHECK3-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK3-32-NEXT: ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT: entry:
// TCHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4
// TCHECK3-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// TCHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4
// TCHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// TCHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK3-32-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
// TCHECK3-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK3-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
// TCHECK3-32-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
// TCHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK3-32-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4
// TCHECK3-32-NEXT: ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT: entry:
// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// TCHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// TCHECK3-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// TCHECK3-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// TCHECK3-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1
// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK3-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT: ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT: entry:
// TCHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4
// TCHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK3-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// TCHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK3-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// TCHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// TCHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK3-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK3-32-NEXT: store double [[ADD]], double* [[A]], align 4
// TCHECK3-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK3-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4
// TCHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK3-32-NEXT: store double [[INC]], double* [[A4]], align 4
// TCHECK3-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK3-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// TCHECK3-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// TCHECK3-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// TCHECK3-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK3-32-NEXT: ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT: entry:
// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK3-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT: ret void
// CHECK0-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK0-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-NEXT: [[P:%.*]] = alloca ptr, align 64
// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK0-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
// CHECK0-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK0-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: store i32 0, ptr [[A]], align 4
// CHECK0-NEXT: store i16 0, ptr [[AA]], align 2
// CHECK0-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK0-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK0-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// CHECK0-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK0-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK0-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// CHECK0-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: store i32 [[TMP6]], ptr [[X]], align 4
// CHECK0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// CHECK0-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: store i32 [[TMP7]], ptr [[Y]], align 4
// CHECK0-NEXT: store ptr [[A]], ptr [[P]], align 64
// CHECK0-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4
// CHECK0-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK0-NEXT: [[TMP10:%.*]] = load ptr, ptr [[P]], align 64
// CHECK0-NEXT: [[TMP11:%.*]] = load i32, ptr @ga, align 4
// CHECK0-NEXT: store i32 [[TMP11]], ptr [[GA_CASTED]], align 4
// CHECK0-NEXT: [[TMP12:%.*]] = load i64, ptr [[GA_CASTED]], align 8
// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP9]], ptr [[TMP13]], align 8
// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP9]], ptr [[TMP14]], align 8
// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT: store ptr null, ptr [[TMP15]], align 8
// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8
// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[TMP10]], ptr [[TMP17]], align 8
// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT: store ptr null, ptr [[TMP18]], align 8
// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-NEXT: store i64 [[TMP12]], ptr [[TMP19]], align 8
// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-NEXT: store i64 [[TMP12]], ptr [[TMP20]], align 8
// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-NEXT: store ptr null, ptr [[TMP21]], align 8
// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT: store i32 3, ptr [[TMP24]], align 4
// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT: store i32 3, ptr [[TMP25]], align 4
// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
// CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8
// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8
// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT: store ptr null, ptr [[TMP30]], align 8
// CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT: store ptr null, ptr [[TMP31]], align 8
// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT: store i64 0, ptr [[TMP32]], align 8
// CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT: store i64 0, ptr [[TMP33]], align 8
// CHECK0-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP34]], align 4
// CHECK0-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4
// CHECK0-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT: store i32 0, ptr [[TMP36]], align 4
// CHECK0-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
// CHECK0-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
// CHECK0-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0: omp_offload.failed:
// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], ptr [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0: omp_offload.cont:
// CHECK0-NEXT: [[TMP39:%.*]] = load i16, ptr [[AA]], align 2
// CHECK0-NEXT: store i16 [[TMP39]], ptr [[AA_CASTED]], align 2
// CHECK0-NEXT: [[TMP40:%.*]] = load i64, ptr [[AA_CASTED]], align 8
// CHECK0-NEXT: [[TMP41:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-NEXT: [[TMP42:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-NEXT: [[TMP43:%.*]] = mul nuw i64 [[TMP42]], 8
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 72, i1 false)
// CHECK0-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP40]], ptr [[TMP44]], align 8
// CHECK0-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP40]], ptr [[TMP45]], align 8
// CHECK0-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0
// CHECK0-NEXT: store ptr null, ptr [[TMP46]], align 8
// CHECK0-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[B]], ptr [[TMP47]], align 8
// CHECK0-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[B]], ptr [[TMP48]], align 8
// CHECK0-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1
// CHECK0-NEXT: store ptr null, ptr [[TMP49]], align 8
// CHECK0-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[TMP50]], align 8
// CHECK0-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[TMP51]], align 8
// CHECK0-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 2
// CHECK0-NEXT: store ptr null, ptr [[TMP52]], align 8
// CHECK0-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[VLA]], ptr [[TMP53]], align 8
// CHECK0-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[VLA]], ptr [[TMP54]], align 8
// CHECK0-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK0-NEXT: store i64 [[TMP41]], ptr [[TMP55]], align 8
// CHECK0-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 3
// CHECK0-NEXT: store ptr null, ptr [[TMP56]], align 8
// CHECK0-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK0-NEXT: store ptr [[C]], ptr [[TMP57]], align 8
// CHECK0-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK0-NEXT: store ptr [[C]], ptr [[TMP58]], align 8
// CHECK0-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 4
// CHECK0-NEXT: store ptr null, ptr [[TMP59]], align 8
// CHECK0-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK0-NEXT: store i64 5, ptr [[TMP60]], align 8
// CHECK0-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK0-NEXT: store i64 5, ptr [[TMP61]], align 8
// CHECK0-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 5
// CHECK0-NEXT: store ptr null, ptr [[TMP62]], align 8
// CHECK0-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK0-NEXT: store i64 [[TMP4]], ptr [[TMP63]], align 8
// CHECK0-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK0-NEXT: store i64 [[TMP4]], ptr [[TMP64]], align 8
// CHECK0-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 6
// CHECK0-NEXT: store ptr null, ptr [[TMP65]], align 8
// CHECK0-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK0-NEXT: store ptr [[VLA1]], ptr [[TMP66]], align 8
// CHECK0-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK0-NEXT: store ptr [[VLA1]], ptr [[TMP67]], align 8
// CHECK0-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK0-NEXT: store i64 [[TMP43]], ptr [[TMP68]], align 8
// CHECK0-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 7
// CHECK0-NEXT: store ptr null, ptr [[TMP69]], align 8
// CHECK0-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK0-NEXT: store ptr [[D]], ptr [[TMP70]], align 8
// CHECK0-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK0-NEXT: store ptr [[D]], ptr [[TMP71]], align 8
// CHECK0-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 8
// CHECK0-NEXT: store ptr null, ptr [[TMP72]], align 8
// CHECK0-NEXT: [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK0-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK0-NEXT: [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK0-NEXT: store i32 3, ptr [[TMP76]], align 4
// CHECK0-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK0-NEXT: store i32 9, ptr [[TMP77]], align 4
// CHECK0-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[TMP73]], ptr [[TMP78]], align 8
// CHECK0-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[TMP74]], ptr [[TMP79]], align 8
// CHECK0-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK0-NEXT: store ptr [[TMP75]], ptr [[TMP80]], align 8
// CHECK0-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK0-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP81]], align 8
// CHECK0-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK0-NEXT: store ptr null, ptr [[TMP82]], align 8
// CHECK0-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK0-NEXT: store ptr null, ptr [[TMP83]], align 8
// CHECK0-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK0-NEXT: store i64 0, ptr [[TMP84]], align 8
// CHECK0-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK0-NEXT: store i64 0, ptr [[TMP85]], align 8
// CHECK0-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
// CHECK0-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK0-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
// CHECK0-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK0-NEXT: store i32 0, ptr [[TMP88]], align 4
// CHECK0-NEXT: [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
// CHECK0-NEXT: [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
// CHECK0-NEXT: br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK0: omp_offload.failed6:
// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP40]], ptr [[B]], i64 [[TMP1]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP4]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK0: omp_offload.cont7:
// CHECK0-NEXT: [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK0-NEXT: store ptr [[TMP91]], ptr [[TMP92]], align 8
// CHECK0-NEXT: [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK0-NEXT: store ptr [[TMP91]], ptr [[TMP93]], align 8
// CHECK0-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 0
// CHECK0-NEXT: store ptr null, ptr [[TMP94]], align 8
// CHECK0-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[E]], ptr [[TMP95]], align 8
// CHECK0-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[E]], ptr [[TMP96]], align 8
// CHECK0-NEXT: [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 1
// CHECK0-NEXT: store ptr null, ptr [[TMP97]], align 8
// CHECK0-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK0-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK0-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK0-NEXT: store i32 3, ptr [[TMP100]], align 4
// CHECK0-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK0-NEXT: store i32 2, ptr [[TMP101]], align 4
// CHECK0-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[TMP98]], ptr [[TMP102]], align 8
// CHECK0-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[TMP99]], ptr [[TMP103]], align 8
// CHECK0-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK0-NEXT: store ptr @.offload_sizes.3, ptr [[TMP104]], align 8
// CHECK0-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK0-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP105]], align 8
// CHECK0-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK0-NEXT: store ptr null, ptr [[TMP106]], align 8
// CHECK0-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK0-NEXT: store ptr null, ptr [[TMP107]], align 8
// CHECK0-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK0-NEXT: store i64 0, ptr [[TMP108]], align 8
// CHECK0-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK0-NEXT: store i64 0, ptr [[TMP109]], align 8
// CHECK0-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
// CHECK0-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK0-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
// CHECK0-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK0-NEXT: store i32 0, ptr [[TMP112]], align 4
// CHECK0-NEXT: [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
// CHECK0-NEXT: [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
// CHECK0-NEXT: br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK0: omp_offload.failed12:
// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK0: omp_offload.cont13:
// CHECK0-NEXT: [[TMP115:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP116]])
// CHECK0-NEXT: ret i32 [[TMP115]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK0-SAME: (i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK0-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
// CHECK0-NEXT: store i64 [[GA]], ptr [[GA_ADDR]], align 8
// CHECK0-NEXT: ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK0-SAME: (i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK0-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK0-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// CHECK0-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8
// CHECK0-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK0-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// CHECK0-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
// CHECK0-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8
// CHECK0-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
// CHECK0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK0-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
// CHECK0-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK0-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK0-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// CHECK0-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// CHECK0-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK0-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK0-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// CHECK0-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
// CHECK0-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
// CHECK0-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// CHECK0-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
// CHECK0-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
// CHECK0-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK0-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK0-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK0-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
// CHECK0-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// CHECK0-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
// CHECK0-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// CHECK0-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
// CHECK0-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
// CHECK0-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// CHECK0-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK0-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
// CHECK0-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
// CHECK0-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// CHECK0-NEXT: store i64 1, ptr [[X]], align 8
// CHECK0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// CHECK0-NEXT: store i8 1, ptr [[Y]], align 8
// CHECK0-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// CHECK0-NEXT: ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK0-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i64 8, i1 false)
// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
// CHECK0-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// CHECK0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// CHECK0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
// CHECK0-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 8
// CHECK0-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
// CHECK0-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
// CHECK0-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK0-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 8
// CHECK0-NEXT: ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK0-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: store i32 0, ptr [[A]], align 4
// CHECK0-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK0-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
// CHECK0-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK0-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// CHECK0-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK0-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK0-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// CHECK0-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK0-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK0-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// CHECK0-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK0-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK0-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// CHECK0-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: ret i32 [[TMP9]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK0-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK0-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK0-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// CHECK0-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK0-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK0-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK0-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK0-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
// CHECK0-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4
// CHECK0-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4
// CHECK0-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
// CHECK0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK0-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.5, i64 40, i1 false)
// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 8
// CHECK0-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: store ptr [[A]], ptr [[TMP10]], align 8
// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT: store ptr null, ptr [[TMP11]], align 8
// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT: store i64 [[TMP6]], ptr [[TMP12]], align 8
// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT: store i64 [[TMP6]], ptr [[TMP13]], align 8
// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT: store ptr null, ptr [[TMP14]], align 8
// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-NEXT: store i64 2, ptr [[TMP15]], align 8
// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-NEXT: store i64 2, ptr [[TMP16]], align 8
// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-NEXT: store ptr null, ptr [[TMP17]], align 8
// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK0-NEXT: store i64 [[TMP2]], ptr [[TMP18]], align 8
// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK0-NEXT: store i64 [[TMP2]], ptr [[TMP19]], align 8
// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK0-NEXT: store ptr null, ptr [[TMP20]], align 8
// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK0-NEXT: store ptr [[VLA]], ptr [[TMP21]], align 8
// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK0-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 8
// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK0-NEXT: store i64 [[TMP8]], ptr [[TMP23]], align 8
// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK0-NEXT: store ptr null, ptr [[TMP24]], align 8
// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT: store i32 3, ptr [[TMP28]], align 4
// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT: store i32 5, ptr [[TMP29]], align 4
// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[TMP25]], ptr [[TMP30]], align 8
// CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 8
// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT: store ptr [[TMP27]], ptr [[TMP32]], align 8
// CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP33]], align 8
// CHECK0-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT: store ptr null, ptr [[TMP34]], align 8
// CHECK0-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT: store ptr null, ptr [[TMP35]], align 8
// CHECK0-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT: store i64 0, ptr [[TMP36]], align 8
// CHECK0-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT: store i64 0, ptr [[TMP37]], align 8
// CHECK0-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
// CHECK0-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
// CHECK0-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT: store i32 0, ptr [[TMP40]], align 4
// CHECK0-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
// CHECK0-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK0-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0: omp_offload.failed:
// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]]
// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0: omp_offload.cont:
// CHECK0-NEXT: [[TMP43:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP43]]
// CHECK0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// CHECK0-NEXT: [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
// CHECK0-NEXT: [[CONV:%.*]] = sext i16 [[TMP44]] to i32
// CHECK0-NEXT: [[TMP45:%.*]] = load i32, ptr [[B]], align 4
// CHECK0-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
// CHECK0-NEXT: [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP46]])
// CHECK0-NEXT: ret i32 [[ADD3]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK0-NEXT: store i32 0, ptr [[A]], align 4
// CHECK0-NEXT: store i8 0, ptr [[AAA]], align 1
// CHECK0-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK0-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK0-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
// CHECK0-NEXT: store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
// CHECK0-NEXT: [[TMP3:%.*]] = load i64, ptr [[AAA_CASTED]], align 8
// CHECK0-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8
// CHECK0-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8
// CHECK0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT: store ptr null, ptr [[TMP6]], align 8
// CHECK0-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT: store i64 [[TMP3]], ptr [[TMP7]], align 8
// CHECK0-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT: store i64 [[TMP3]], ptr [[TMP8]], align 8
// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT: store ptr null, ptr [[TMP9]], align 8
// CHECK0-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[B]], ptr [[TMP10]], align 8
// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[B]], ptr [[TMP11]], align 8
// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-NEXT: store ptr null, ptr [[TMP12]], align 8
// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT: store i32 3, ptr [[TMP15]], align 4
// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT: store i32 3, ptr [[TMP16]], align 4
// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8
// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8
// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT: store ptr @.offload_sizes.7, ptr [[TMP19]], align 8
// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP20]], align 8
// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT: store ptr null, ptr [[TMP21]], align 8
// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT: store ptr null, ptr [[TMP22]], align 8
// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT: store i64 0, ptr [[TMP23]], align 8
// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT: store i64 0, ptr [[TMP24]], align 8
// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT: store i32 0, ptr [[TMP27]], align 4
// CHECK0-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
// CHECK0-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK0-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0: omp_offload.failed:
// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]]
// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0: omp_offload.cont:
// CHECK0-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: ret i32 [[TMP30]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK0-NEXT: store i32 0, ptr [[A]], align 4
// CHECK0-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK0-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK0-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
// CHECK0-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8
// CHECK0-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT: store ptr null, ptr [[TMP4]], align 8
// CHECK0-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[B]], ptr [[TMP5]], align 8
// CHECK0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT: store ptr [[B]], ptr [[TMP6]], align 8
// CHECK0-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT: store ptr null, ptr [[TMP7]], align 8
// CHECK0-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT: store i32 3, ptr [[TMP10]], align 4
// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT: store i32 2, ptr [[TMP11]], align 4
// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8
// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT: store ptr @.offload_sizes.9, ptr [[TMP14]], align 8
// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP15]], align 8
// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT: store ptr null, ptr [[TMP16]], align 8
// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT: store ptr null, ptr [[TMP17]], align 8
// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT: store i64 0, ptr [[TMP18]], align 8
// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT: store i64 0, ptr [[TMP19]], align 8
// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT: store i32 0, ptr [[TMP22]], align 4
// CHECK0-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
// CHECK0-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK0-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0: omp_offload.failed:
// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], ptr [[B]]) #[[ATTR3]]
// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK0: omp_offload.cont:
// CHECK0-NEXT: [[TMP25:%.*]] = load i32, ptr [[A]], align 4
// CHECK0-NEXT: ret i32 [[TMP25]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK0-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK0-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK0-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// CHECK0-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// CHECK0-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK0-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK0-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK0-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK0-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK0-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK0-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// CHECK0-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
// CHECK0-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
// CHECK0-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// CHECK0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// CHECK0-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// CHECK0-NEXT: store double [[ADD]], ptr [[A]], align 8
// CHECK0-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK0-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 8
// CHECK0-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// CHECK0-NEXT: store double [[INC]], ptr [[A4]], align 8
// CHECK0-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK0-NEXT: [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
// CHECK0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// CHECK0-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// CHECK0-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK0-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// CHECK0-NEXT: ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK0-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK0-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// CHECK0-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK0-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK0-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK0-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// CHECK0-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// CHECK0-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// CHECK0-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// CHECK0-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// CHECK0-NEXT: ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK0-SAME: (i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT: entry:
// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK0-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK0-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK0-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK0-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// CHECK0-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK0-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// CHECK0-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-NEXT: [[P:%.*]] = alloca ptr, align 64
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: store i16 0, ptr [[AA]], align 2
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK1-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[X]], align 4
// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP7]], ptr [[Y]], align 4
// CHECK1-NEXT: store ptr [[A]], ptr [[P]], align 64
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[P]], align 64
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr @ga, align 4
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[GA_CASTED]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[GA_CASTED]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP13]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP14]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP17]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP19]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP24]], align 4
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 3, ptr [[TMP25]], align 4
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP32]], align 8
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8
// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP34]], align 4
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4
// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP36]], align 4
// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
// CHECK1-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], ptr [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[TMP39:%.*]] = load i16, ptr [[AA]], align 2
// CHECK1-NEXT: store i16 [[TMP39]], ptr [[AA_CASTED]], align 2
// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[AA_CASTED]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-NEXT: [[TMP43:%.*]] = mul nuw i64 [[TMP42]], 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 72, i1 false)
// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP40]], ptr [[TMP44]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP40]], ptr [[TMP45]], align 8
// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP47]], align 8
// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP48]], align 8
// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1
// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8
// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP50]], align 8
// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP51]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 2
// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8
// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP53]], align 8
// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP54]], align 8
// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK1-NEXT: store i64 [[TMP41]], ptr [[TMP55]], align 8
// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 3
// CHECK1-NEXT: store ptr null, ptr [[TMP56]], align 8
// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK1-NEXT: store ptr [[C]], ptr [[TMP57]], align 8
// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK1-NEXT: store ptr [[C]], ptr [[TMP58]], align 8
// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 4
// CHECK1-NEXT: store ptr null, ptr [[TMP59]], align 8
// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK1-NEXT: store i64 5, ptr [[TMP60]], align 8
// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK1-NEXT: store i64 5, ptr [[TMP61]], align 8
// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 5
// CHECK1-NEXT: store ptr null, ptr [[TMP62]], align 8
// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP63]], align 8
// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP64]], align 8
// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 6
// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8
// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP66]], align 8
// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP67]], align 8
// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK1-NEXT: store i64 [[TMP43]], ptr [[TMP68]], align 8
// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 7
// CHECK1-NEXT: store ptr null, ptr [[TMP69]], align 8
// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK1-NEXT: store ptr [[D]], ptr [[TMP70]], align 8
// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK1-NEXT: store ptr [[D]], ptr [[TMP71]], align 8
// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 8
// CHECK1-NEXT: store ptr null, ptr [[TMP72]], align 8
// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP76]], align 4
// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK1-NEXT: store i32 9, ptr [[TMP77]], align 4
// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP78]], align 8
// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP74]], ptr [[TMP79]], align 8
// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP80]], align 8
// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP81]], align 8
// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP82]], align 8
// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP83]], align 8
// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP84]], align 8
// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP85]], align 8
// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP88]], align 4
// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
// CHECK1-NEXT: [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
// CHECK1-NEXT: br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK1: omp_offload.failed6:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP40]], ptr [[B]], i64 [[TMP1]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP4]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK1: omp_offload.cont7:
// CHECK1-NEXT: [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[TMP91]], ptr [[TMP92]], align 8
// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[TMP91]], ptr [[TMP93]], align 8
// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP94]], align 8
// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[E]], ptr [[TMP95]], align 8
// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[E]], ptr [[TMP96]], align 8
// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 1
// CHECK1-NEXT: store ptr null, ptr [[TMP97]], align 8
// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP100]], align 4
// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK1-NEXT: store i32 2, ptr [[TMP101]], align 4
// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP98]], ptr [[TMP102]], align 8
// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP99]], ptr [[TMP103]], align 8
// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP104]], align 8
// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP105]], align 8
// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP106]], align 8
// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP107]], align 8
// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP108]], align 8
// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP109]], align 8
// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP112]], align 4
// CHECK1-NEXT: [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
// CHECK1-NEXT: [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
// CHECK1-NEXT: br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK1: omp_offload.failed12:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK1: omp_offload.cont13:
// CHECK1-NEXT: [[TMP115:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP116]])
// CHECK1-NEXT: ret i32 [[TMP115]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK1-SAME: (i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GA]], ptr [[GA_ADDR]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK1-SAME: (i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8
// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
// CHECK1-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// CHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
// CHECK1-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
// CHECK1-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
// CHECK1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
// CHECK1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// CHECK1-NEXT: store i64 1, ptr [[X]], align 8
// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// CHECK1-NEXT: store i8 1, ptr [[Y]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK1-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i64 8, i1 false)
// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
// CHECK1-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK1-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK1-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: ret i32 [[TMP9]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.5, i64 40, i1 false)
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[A]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: store i64 [[TMP6]], ptr [[TMP12]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: store i64 [[TMP6]], ptr [[TMP13]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP18]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP19]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP21]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP23]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP28]], align 4
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 5, ptr [[TMP29]], align 4
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP32]], align 8
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP33]], align 8
// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP34]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8
// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP36]], align 8
// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP37]], align 8
// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP40]], align 4
// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK1-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[TMP43:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP43]]
// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// CHECK1-NEXT: [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP44]] to i32
// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[B]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP46]])
// CHECK1-NEXT: ret i32 [[ADD3]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: store i8 0, ptr [[AAA]], align 1
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
// CHECK1-NEXT: store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AAA_CASTED]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP8]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP15]], align 4
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 3, ptr [[TMP16]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP19]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP23]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP27]], align 4
// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK1-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: ret i32 [[TMP30]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP5]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 3, ptr [[TMP10]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP14]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP15]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 0, ptr [[TMP18]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP22]], align 4
// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK1-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], ptr [[B]]) #[[ATTR3]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: ret i32 [[TMP25]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8
// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 8
// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8
// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK1-NEXT: [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// CHECK1-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK1-SAME: (i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK2-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK2-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-NEXT: [[P:%.*]] = alloca ptr, align 64
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK2-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: store i32 0, ptr [[A]], align 4
// CHECK2-NEXT: store i16 0, ptr [[AA]], align 2
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[X]], align 4
// CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[Y]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[P]], align 64
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[P]], align 64
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr @ga, align 4
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[GA_CASTED]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[GA_CASTED]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP11]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP12]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP13]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP16]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT: store i32 [[TMP10]], ptr [[TMP17]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT: store i32 [[TMP10]], ptr [[TMP18]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT: store ptr null, ptr [[TMP19]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP22]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT: store i32 3, ptr [[TMP23]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP25]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT: store ptr @.offload_sizes, ptr [[TMP26]], align 4
// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes, ptr [[TMP27]], align 4
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP28]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP29]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP30]], align 8
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP31]], align 8
// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP32]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP34]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
// CHECK2-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
// CHECK2-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2: omp_offload.failed:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], ptr [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2: omp_offload.cont:
// CHECK2-NEXT: [[TMP37:%.*]] = load i16, ptr [[AA]], align 2
// CHECK2-NEXT: store i16 [[TMP37]], ptr [[AA_CASTED]], align 2
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[AA_CASTED]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK2-NEXT: [[TMP40:%.*]] = sext i32 [[TMP39]] to i64
// CHECK2-NEXT: [[TMP41:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-NEXT: [[TMP42:%.*]] = mul nuw i32 [[TMP41]], 8
// CHECK2-NEXT: [[TMP43:%.*]] = sext i32 [[TMP42]] to i64
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 72, i1 false)
// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP38]], ptr [[TMP44]], align 4
// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP38]], ptr [[TMP45]], align 4
// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP46]], align 4
// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[B]], ptr [[TMP47]], align 4
// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[B]], ptr [[TMP48]], align 4
// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP49]], align 4
// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[TMP50]], align 4
// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[TMP51]], align 4
// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK2-NEXT: store ptr null, ptr [[TMP52]], align 4
// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP53]], align 4
// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP54]], align 4
// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK2-NEXT: store i64 [[TMP40]], ptr [[TMP55]], align 4
// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK2-NEXT: store ptr null, ptr [[TMP56]], align 4
// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[C]], ptr [[TMP57]], align 4
// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[C]], ptr [[TMP58]], align 4
// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK2-NEXT: store ptr null, ptr [[TMP59]], align 4
// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK2-NEXT: store i32 5, ptr [[TMP60]], align 4
// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK2-NEXT: store i32 5, ptr [[TMP61]], align 4
// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK2-NEXT: store ptr null, ptr [[TMP62]], align 4
// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP63]], align 4
// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP64]], align 4
// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP65]], align 4
// CHECK2-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK2-NEXT: store ptr [[VLA1]], ptr [[TMP66]], align 4
// CHECK2-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK2-NEXT: store ptr [[VLA1]], ptr [[TMP67]], align 4
// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK2-NEXT: store i64 [[TMP43]], ptr [[TMP68]], align 4
// CHECK2-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP69]], align 4
// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK2-NEXT: store ptr [[D]], ptr [[TMP70]], align 4
// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK2-NEXT: store ptr [[D]], ptr [[TMP71]], align 4
// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK2-NEXT: store ptr null, ptr [[TMP72]], align 4
// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP76]], align 4
// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK2-NEXT: store i32 9, ptr [[TMP77]], align 4
// CHECK2-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP73]], ptr [[TMP78]], align 4
// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP74]], ptr [[TMP79]], align 4
// CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[TMP75]], ptr [[TMP80]], align 4
// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP81]], align 4
// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP82]], align 4
// CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP83]], align 4
// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP84]], align 8
// CHECK2-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP85]], align 8
// CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
// CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
// CHECK2-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP88]], align 4
// CHECK2-NEXT: [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
// CHECK2-NEXT: [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
// CHECK2-NEXT: br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK2: omp_offload.failed6:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP38]], ptr [[B]], i32 [[TMP0]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP2]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK2: omp_offload.cont7:
// CHECK2-NEXT: [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP91]], ptr [[TMP92]], align 4
// CHECK2-NEXT: [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP91]], ptr [[TMP93]], align 4
// CHECK2-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP94]], align 4
// CHECK2-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[E]], ptr [[TMP95]], align 4
// CHECK2-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[E]], ptr [[TMP96]], align 4
// CHECK2-NEXT: [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP97]], align 4
// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP100]], align 4
// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK2-NEXT: store i32 2, ptr [[TMP101]], align 4
// CHECK2-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP98]], ptr [[TMP102]], align 4
// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP99]], ptr [[TMP103]], align 4
// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK2-NEXT: store ptr @.offload_sizes.3, ptr [[TMP104]], align 4
// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP105]], align 4
// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP106]], align 4
// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP107]], align 4
// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP108]], align 8
// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP109]], align 8
// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP112]], align 4
// CHECK2-NEXT: [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
// CHECK2-NEXT: [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
// CHECK2-NEXT: br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK2: omp_offload.failed12:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK2: omp_offload.cont13:
// CHECK2-NEXT: [[TMP115:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP116]])
// CHECK2-NEXT: ret i32 [[TMP115]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK2-SAME: (i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 4
// CHECK2-NEXT: store i32 [[GA]], ptr [[GA_ADDR]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK2-SAME: (i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
// CHECK2-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4
// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK2-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
// CHECK2-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK2-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
// CHECK2-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
// CHECK2-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
// CHECK2-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// CHECK2-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
// CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
// CHECK2-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// CHECK2-NEXT: store i64 1, ptr [[X]], align 4
// CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// CHECK2-NEXT: store i8 1, ptr [[Y]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK2-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i32 8, i1 false)
// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
// CHECK2-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 4
// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK2-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK2-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: store i32 0, ptr [[A]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK2-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK2-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK2-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: ret i32 [[TMP9]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.5, i32 40, i1 false)
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP11]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP13]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP14]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT: store i32 2, ptr [[TMP15]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT: store i32 2, ptr [[TMP16]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT: store ptr null, ptr [[TMP17]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP18]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP19]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK2-NEXT: store ptr null, ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP21]], align 4
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK2-NEXT: store i64 [[TMP8]], ptr [[TMP23]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK2-NEXT: store ptr null, ptr [[TMP24]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP28]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT: store i32 5, ptr [[TMP29]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP30]], align 4
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT: store ptr [[TMP27]], ptr [[TMP32]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP33]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP34]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP35]], align 4
// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP36]], align 8
// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP37]], align 8
// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP40]], align 4
// CHECK2-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK2-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2: omp_offload.failed:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2: omp_offload.cont:
// CHECK2-NEXT: [[TMP43:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP43]]
// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// CHECK2-NEXT: [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP44]] to i32
// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[B]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP46]])
// CHECK2-NEXT: ret i32 [[ADD3]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 0, ptr [[A]], align 4
// CHECK2-NEXT: store i8 0, ptr [[AAA]], align 1
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
// CHECK2-NEXT: store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AAA_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP8]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP9]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[B]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[B]], ptr [[TMP11]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT: store ptr null, ptr [[TMP12]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP15]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT: store i32 3, ptr [[TMP16]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT: store ptr @.offload_sizes.7, ptr [[TMP19]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP21]], align 4
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP22]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP23]], align 8
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP24]], align 8
// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP27]], align 4
// CHECK2-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK2-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2: omp_offload.failed:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2: omp_offload.cont:
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: ret i32 [[TMP30]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 0, ptr [[A]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT: store ptr null, ptr [[TMP4]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[B]], ptr [[TMP5]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[B]], ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT: store ptr null, ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT: store i32 3, ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT: store i32 2, ptr [[TMP11]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT: store ptr @.offload_sizes.9, ptr [[TMP14]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP15]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT: store ptr null, ptr [[TMP16]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT: store ptr null, ptr [[TMP17]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT: store i64 0, ptr [[TMP18]], align 8
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT: store i64 0, ptr [[TMP19]], align 8
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT: store i32 0, ptr [[TMP22]], align 4
// CHECK2-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
// CHECK2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK2-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2: omp_offload.failed:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], ptr [[B]]) #[[ATTR3]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2: omp_offload.cont:
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[A]], align 4
// CHECK2-NEXT: ret i32 [[TMP25]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK2-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK2-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: store double [[ADD]], ptr [[A]], align 4
// CHECK2-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 4
// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// CHECK2-NEXT: store double [[INC]], ptr [[A4]], align 4
// CHECK2-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK2-NEXT: [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// CHECK2-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// CHECK2-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK2-SAME: (i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-NEXT: [[P:%.*]] = alloca ptr, align 64
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK3-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
// CHECK3-NEXT: store i16 0, ptr [[AA]], align 2
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP4]], ptr [[X]], align 4
// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[Y]], align 4
// CHECK3-NEXT: store ptr [[A]], ptr [[P]], align 64
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[P]], align 64
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr @ga, align 4
// CHECK3-NEXT: store i32 [[TMP9]], ptr [[GA_CASTED]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[GA_CASTED]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP12]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP17]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP18]], align 4
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 3, ptr [[TMP22]], align 4
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 3, ptr [[TMP23]], align 4
// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP25]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP26]], align 4
// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP27]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP28]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP29]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, ptr [[TMP30]], align 8
// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT: store i64 0, ptr [[TMP31]], align 8
// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP32]], align 4
// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4
// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP34]], align 4
// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], ptr [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: [[TMP37:%.*]] = load i16, ptr [[AA]], align 2
// CHECK3-NEXT: store i16 [[TMP37]], ptr [[AA_CASTED]], align 2
// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[AA_CASTED]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK3-NEXT: [[TMP40:%.*]] = sext i32 [[TMP39]] to i64
// CHECK3-NEXT: [[TMP41:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-NEXT: [[TMP42:%.*]] = mul nuw i32 [[TMP41]], 8
// CHECK3-NEXT: [[TMP43:%.*]] = sext i32 [[TMP42]] to i64
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 72, i1 false)
// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP44]], align 4
// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP45]], align 4
// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK3-NEXT: store ptr null, ptr [[TMP46]], align 4
// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[B]], ptr [[TMP47]], align 4
// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[B]], ptr [[TMP48]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK3-NEXT: store ptr null, ptr [[TMP49]], align 4
// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[TMP50]], align 4
// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[TMP51]], align 4
// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK3-NEXT: store ptr null, ptr [[TMP52]], align 4
// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP53]], align 4
// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP54]], align 4
// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK3-NEXT: store i64 [[TMP40]], ptr [[TMP55]], align 4
// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK3-NEXT: store ptr null, ptr [[TMP56]], align 4
// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[C]], ptr [[TMP57]], align 4
// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[C]], ptr [[TMP58]], align 4
// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK3-NEXT: store ptr null, ptr [[TMP59]], align 4
// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK3-NEXT: store i32 5, ptr [[TMP60]], align 4
// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK3-NEXT: store i32 5, ptr [[TMP61]], align 4
// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK3-NEXT: store ptr null, ptr [[TMP62]], align 4
// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP63]], align 4
// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP64]], align 4
// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP65]], align 4
// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK3-NEXT: store ptr [[VLA1]], ptr [[TMP66]], align 4
// CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK3-NEXT: store ptr [[VLA1]], ptr [[TMP67]], align 4
// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK3-NEXT: store i64 [[TMP43]], ptr [[TMP68]], align 4
// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP69]], align 4
// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK3-NEXT: store ptr [[D]], ptr [[TMP70]], align 4
// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK3-NEXT: store ptr [[D]], ptr [[TMP71]], align 4
// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK3-NEXT: store ptr null, ptr [[TMP72]], align 4
// CHECK3-NEXT: [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK3-NEXT: store i32 3, ptr [[TMP76]], align 4
// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK3-NEXT: store i32 9, ptr [[TMP77]], align 4
// CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP73]], ptr [[TMP78]], align 4
// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP74]], ptr [[TMP79]], align 4
// CHECK3-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[TMP75]], ptr [[TMP80]], align 4
// CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK3-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP81]], align 4
// CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP82]], align 4
// CHECK3-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP83]], align 4
// CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, ptr [[TMP84]], align 8
// CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK3-NEXT: store i64 0, ptr [[TMP85]], align 8
// CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
// CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
// CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP88]], align 4
// CHECK3-NEXT: [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
// CHECK3-NEXT: [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
// CHECK3-NEXT: br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK3: omp_offload.failed6:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP38]], ptr [[B]], i32 [[TMP0]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP2]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK3: omp_offload.cont7:
// CHECK3-NEXT: [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[TMP91]], ptr [[TMP92]], align 4
// CHECK3-NEXT: [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[TMP91]], ptr [[TMP93]], align 4
// CHECK3-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK3-NEXT: store ptr null, ptr [[TMP94]], align 4
// CHECK3-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[E]], ptr [[TMP95]], align 4
// CHECK3-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[E]], ptr [[TMP96]], align 4
// CHECK3-NEXT: [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK3-NEXT: store ptr null, ptr [[TMP97]], align 4
// CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK3-NEXT: store i32 3, ptr [[TMP100]], align 4
// CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK3-NEXT: store i32 2, ptr [[TMP101]], align 4
// CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP98]], ptr [[TMP102]], align 4
// CHECK3-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP99]], ptr [[TMP103]], align 4
// CHECK3-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP104]], align 4
// CHECK3-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP105]], align 4
// CHECK3-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP106]], align 4
// CHECK3-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP107]], align 4
// CHECK3-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, ptr [[TMP108]], align 8
// CHECK3-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK3-NEXT: store i64 0, ptr [[TMP109]], align 8
// CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
// CHECK3-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
// CHECK3-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP112]], align 4
// CHECK3-NEXT: [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
// CHECK3-NEXT: [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
// CHECK3-NEXT: br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK3: omp_offload.failed12:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT13]]
// CHECK3: omp_offload.cont13:
// CHECK3-NEXT: [[TMP115:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP116]])
// CHECK3-NEXT: ret i32 [[TMP115]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK3-SAME: (i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 4
// CHECK3-NEXT: store i32 [[GA]], ptr [[GA_ADDR]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK3-SAME: (i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4
// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK3-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
// CHECK3-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
// CHECK3-NEXT: store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
// CHECK3-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
// CHECK3-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
// CHECK3-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
// CHECK3-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// CHECK3-NEXT: store i64 1, ptr [[X]], align 4
// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// CHECK3-NEXT: store i8 1, ptr [[Y]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK3-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i32 8, i1 false)
// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
// CHECK3-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 4
// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK3-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK3-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK3-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: ret i32 [[TMP9]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK3-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4
// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.5, i32 40, i1 false)
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[A]], ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP13]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT: store i32 2, ptr [[TMP15]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP18]], align 4
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP19]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-NEXT: store ptr null, ptr [[TMP20]], align 4
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP21]], align 4
// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 4
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK3-NEXT: store i64 [[TMP8]], ptr [[TMP23]], align 4
// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 3, ptr [[TMP28]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 5, ptr [[TMP29]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP30]], align 4
// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 4
// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP32]], align 4
// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP33]], align 4
// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP34]], align 4
// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4
// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, ptr [[TMP36]], align 8
// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT: store i64 0, ptr [[TMP37]], align 8
// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP40]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK3-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: [[TMP43:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP43]]
// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// CHECK3-NEXT: [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP44]] to i32
// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[B]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP46]])
// CHECK3-NEXT: ret i32 [[ADD3]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1
// CHECK3-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
// CHECK3-NEXT: store i8 0, ptr [[AAA]], align 1
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
// CHECK3-NEXT: store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AAA_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP8]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[B]], ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[B]], ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 3, ptr [[TMP15]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 3, ptr [[TMP16]], align 4
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP19]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP20]], align 4
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4
// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP22]], align 4
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, ptr [[TMP23]], align 8
// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT: store i64 0, ptr [[TMP24]], align 8
// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP27]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK3-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: ret i32 [[TMP30]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[B]], ptr [[TMP5]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 3, ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT: store ptr @.offload_sizes.9, ptr [[TMP14]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP15]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT: store i64 0, ptr [[TMP18]], align 8
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP22]], align 4
// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK3-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], ptr [[B]]) #[[ATTR3]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[A]], align 4
// CHECK3-NEXT: ret i32 [[TMP25]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// CHECK3-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4
// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 4
// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4
// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK3-NEXT: [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// CHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// CHECK3-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK3-SAME: (i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK3-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: ret void
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY0-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY0-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY0-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY0-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY0-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY0-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY0-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY0-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY0-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// SIMD-ONLY0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: store i32 [[TMP6]], ptr [[X]], align 4
// SIMD-ONLY0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: store i32 [[TMP7]], ptr [[Y]], align 4
// SIMD-ONLY0-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY0-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY0-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
// SIMD-ONLY0-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
// SIMD-ONLY0-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY0-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
// SIMD-ONLY0-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY0-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
// SIMD-ONLY0-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY0-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY0-NEXT: store i64 1, ptr [[X8]], align 8
// SIMD-ONLY0-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY0-NEXT: store i8 1, ptr [[Y9]], align 8
// SIMD-ONLY0-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
// SIMD-ONLY0-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY0-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY0-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// SIMD-ONLY0-NEXT: ret i32 [[TMP14]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY0-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY0-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY0-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY0-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY0-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY0-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// SIMD-ONLY0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY0-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY0-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY0-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY0-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY0-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY0-NEXT: store double [[ADD2]], ptr [[A]], align 8
// SIMD-ONLY0-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8
// SIMD-ONLY0-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY0-NEXT: store double [[INC]], ptr [[A3]], align 8
// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
// SIMD-ONLY0-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// SIMD-ONLY0-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
// SIMD-ONLY0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY0-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY0-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// SIMD-ONLY0-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY0-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY0-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY0-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY0-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY0-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY0-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY0-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY0-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY0-NEXT: ret i32 [[TMP2]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY01-NEXT: entry:
// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY01-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY01-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY01-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY01-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY01-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY01-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY01-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY01-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY01-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY01-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY01-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY01-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// SIMD-ONLY01-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: store i32 [[TMP6]], ptr [[X]], align 4
// SIMD-ONLY01-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: store i32 [[TMP7]], ptr [[Y]], align 4
// SIMD-ONLY01-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY01-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY01-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
// SIMD-ONLY01-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
// SIMD-ONLY01-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY01-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
// SIMD-ONLY01-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY01-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY01-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY01-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
// SIMD-ONLY01-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY01-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY01-NEXT: store i64 1, ptr [[X8]], align 8
// SIMD-ONLY01-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY01-NEXT: store i8 1, ptr [[Y9]], align 8
// SIMD-ONLY01-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY01-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
// SIMD-ONLY01-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
// SIMD-ONLY01-NEXT: [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
// SIMD-ONLY01-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY01-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY01-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY01-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// SIMD-ONLY01-NEXT: ret i32 [[TMP14]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY01-NEXT: entry:
// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY01-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY01-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY01-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY01-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY01-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY01-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY01-NEXT: entry:
// SIMD-ONLY01-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY01-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY01-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// SIMD-ONLY01-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY01-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY01-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY01-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY01-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY01-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY01-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY01-NEXT: store double [[ADD2]], ptr [[A]], align 8
// SIMD-ONLY01-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8
// SIMD-ONLY01-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY01-NEXT: store double [[INC]], ptr [[A3]], align 8
// SIMD-ONLY01-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
// SIMD-ONLY01-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// SIMD-ONLY01-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY01-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
// SIMD-ONLY01-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY01-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY01-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// SIMD-ONLY01-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY01-NEXT: entry:
// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY01-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY01-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY01-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY01-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY01-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY01-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY01-NEXT: entry:
// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY01-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY01-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY01-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY01-NEXT: ret i32 [[TMP2]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY02-NEXT: entry:
// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY02-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY02-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY02-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY02-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY02-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY02-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY02-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY02-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY02-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY02-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// SIMD-ONLY02-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 [[TMP4]], ptr [[X]], align 4
// SIMD-ONLY02-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 [[TMP5]], ptr [[Y]], align 4
// SIMD-ONLY02-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY02-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY02-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
// SIMD-ONLY02-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
// SIMD-ONLY02-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY02-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
// SIMD-ONLY02-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY02-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY02-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY02-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
// SIMD-ONLY02-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY02-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY02-NEXT: store i64 1, ptr [[X8]], align 4
// SIMD-ONLY02-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY02-NEXT: store i8 1, ptr [[Y9]], align 4
// SIMD-ONLY02-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY02-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
// SIMD-ONLY02-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
// SIMD-ONLY02-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY02-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY02-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY02-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
// SIMD-ONLY02-NEXT: ret i32 [[TMP12]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY02-NEXT: entry:
// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY02-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY02-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY02-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY02-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY02-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY02-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY02-NEXT: entry:
// SIMD-ONLY02-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY02-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY02-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY02-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY02-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY02-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY02-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY02-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY02-NEXT: store double [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4
// SIMD-ONLY02-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY02-NEXT: store double [[INC]], ptr [[A3]], align 4
// SIMD-ONLY02-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
// SIMD-ONLY02-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// SIMD-ONLY02-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY02-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
// SIMD-ONLY02-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY02-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY02-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP10]])
// SIMD-ONLY02-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY02-NEXT: entry:
// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY02-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY02-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY02-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY02-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY02-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY02-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY02-NEXT: entry:
// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY02-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY02-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY02-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY02-NEXT: ret i32 [[TMP2]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY03-NEXT: entry:
// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY03-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY03-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY03-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY03-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY03-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY03-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY03-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY03-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY03-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY03-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// SIMD-ONLY03-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 [[TMP4]], ptr [[X]], align 4
// SIMD-ONLY03-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 [[TMP5]], ptr [[Y]], align 4
// SIMD-ONLY03-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY03-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY03-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
// SIMD-ONLY03-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
// SIMD-ONLY03-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY03-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
// SIMD-ONLY03-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY03-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY03-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY03-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
// SIMD-ONLY03-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY03-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY03-NEXT: store i64 1, ptr [[X8]], align 4
// SIMD-ONLY03-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY03-NEXT: store i8 1, ptr [[Y9]], align 4
// SIMD-ONLY03-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY03-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
// SIMD-ONLY03-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
// SIMD-ONLY03-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY03-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY03-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY03-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
// SIMD-ONLY03-NEXT: ret i32 [[TMP12]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY03-NEXT: entry:
// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY03-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY03-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY03-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY03-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY03-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY03-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY03-NEXT: entry:
// SIMD-ONLY03-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY03-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY03-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY03-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY03-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY03-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY03-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY03-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY03-NEXT: store double [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4
// SIMD-ONLY03-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY03-NEXT: store double [[INC]], ptr [[A3]], align 4
// SIMD-ONLY03-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
// SIMD-ONLY03-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// SIMD-ONLY03-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY03-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
// SIMD-ONLY03-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY03-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY03-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP10]])
// SIMD-ONLY03-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY03-NEXT: entry:
// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY03-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY03-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY03-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY03-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY03-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY03-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY03-NEXT: entry:
// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY03-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY03-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY03-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY03-NEXT: ret i32 [[TMP2]]
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
// TCHECK-NEXT: store i64 [[GA]], ptr [[GA_ADDR]], align 8
// TCHECK-NEXT: ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// TCHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// TCHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// TCHECK-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8
// TCHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// TCHECK-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// TCHECK-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
// TCHECK-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8
// TCHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
// TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// TCHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
// TCHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// TCHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// TCHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// TCHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// TCHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
// TCHECK-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
// TCHECK-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// TCHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
// TCHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
// TCHECK-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// TCHECK-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
// TCHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
// TCHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// TCHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
// TCHECK-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// TCHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
// TCHECK-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// TCHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
// TCHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
// TCHECK-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// TCHECK-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
// TCHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
// TCHECK-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// TCHECK-NEXT: store i64 1, ptr [[X]], align 8
// TCHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// TCHECK-NEXT: store i8 1, ptr [[Y]], align 8
// TCHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// TCHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// TCHECK-NEXT: ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// TCHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
// TCHECK-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 8
// TCHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// TCHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
// TCHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 8
// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK-NEXT: store double [[INC]], ptr [[ARRAYIDX1]], align 8
// TCHECK-NEXT: ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// TCHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// TCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// TCHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// TCHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// TCHECK-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// TCHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// TCHECK-NEXT: ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// TCHECK-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// TCHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// TCHECK-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// TCHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// TCHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// TCHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// TCHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// TCHECK-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
// TCHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// TCHECK-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
// TCHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
// TCHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// TCHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK-NEXT: store double [[ADD]], ptr [[A]], align 8
// TCHECK-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// TCHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 8
// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// TCHECK-NEXT: store double [[INC]], ptr [[A4]], align 8
// TCHECK-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK-NEXT: [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
// TCHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// TCHECK-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// TCHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// TCHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// TCHECK-NEXT: ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// TCHECK-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// TCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// TCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// TCHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// TCHECK-NEXT: ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK1-NEXT: entry:
// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[GA]], ptr [[GA_ADDR]], align 8
// TCHECK1-NEXT: ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT: entry:
// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// TCHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
// TCHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
// TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
// TCHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// TCHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// TCHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// TCHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// TCHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
// TCHECK1-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK1-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
// TCHECK1-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// TCHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
// TCHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
// TCHECK1-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// TCHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
// TCHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
// TCHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// TCHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK1-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
// TCHECK1-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// TCHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
// TCHECK1-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// TCHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
// TCHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
// TCHECK1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// TCHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
// TCHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
// TCHECK1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// TCHECK1-NEXT: store i64 1, ptr [[X]], align 8
// TCHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// TCHECK1-NEXT: store i8 1, ptr [[Y]], align 8
// TCHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// TCHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// TCHECK1-NEXT: ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT: entry:
// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
// TCHECK1-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 8
// TCHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// TCHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
// TCHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 8
// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK1-NEXT: store double [[INC]], ptr [[ARRAYIDX1]], align 8
// TCHECK1-NEXT: ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT: entry:
// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// TCHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// TCHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// TCHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// TCHECK1-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// TCHECK1-NEXT: ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT: entry:
// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
// TCHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// TCHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// TCHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// TCHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// TCHECK1-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK1-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
// TCHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// TCHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
// TCHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
// TCHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// TCHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8
// TCHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// TCHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 8
// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// TCHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8
// TCHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK1-NEXT: [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
// TCHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// TCHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// TCHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// TCHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// TCHECK1-NEXT: ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT: entry:
// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// TCHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// TCHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
// TCHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
// TCHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// TCHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// TCHECK1-NEXT: ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK2-NEXT: entry:
// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[GA]], ptr [[GA_ADDR]], align 4
// TCHECK2-NEXT: ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT: entry:
// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// TCHECK2-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
// TCHECK2-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
// TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
// TCHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// TCHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// TCHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
// TCHECK2-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK2-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
// TCHECK2-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK2-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// TCHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
// TCHECK2-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
// TCHECK2-NEXT: store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
// TCHECK2-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
// TCHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
// TCHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// TCHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK2-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
// TCHECK2-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// TCHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
// TCHECK2-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// TCHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
// TCHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
// TCHECK2-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// TCHECK2-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
// TCHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
// TCHECK2-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// TCHECK2-NEXT: store i64 1, ptr [[X]], align 4
// TCHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// TCHECK2-NEXT: store i8 1, ptr [[Y]], align 4
// TCHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// TCHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// TCHECK2-NEXT: ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT: entry:
// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
// TCHECK2-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 4
// TCHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// TCHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
// TCHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 4
// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK2-NEXT: store double [[INC]], ptr [[ARRAYIDX1]], align 4
// TCHECK2-NEXT: ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT: entry:
// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// TCHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// TCHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// TCHECK2-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// TCHECK2-NEXT: ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT: entry:
// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// TCHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// TCHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// TCHECK2-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK2-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
// TCHECK2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK2-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// TCHECK2-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// TCHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
// TCHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// TCHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK2-NEXT: store double [[ADD]], ptr [[A]], align 4
// TCHECK2-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// TCHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 4
// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// TCHECK2-NEXT: store double [[INC]], ptr [[A4]], align 4
// TCHECK2-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK2-NEXT: [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
// TCHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// TCHECK2-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// TCHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// TCHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// TCHECK2-NEXT: ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT: entry:
// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// TCHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// TCHECK2-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// TCHECK2-NEXT: ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK3-NEXT: entry:
// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[GA]], ptr [[GA_ADDR]], align 4
// TCHECK3-NEXT: ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT: entry:
// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// TCHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
// TCHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
// TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
// TCHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// TCHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// TCHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
// TCHECK3-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK3-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
// TCHECK3-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK3-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// TCHECK3-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
// TCHECK3-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
// TCHECK3-NEXT: store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
// TCHECK3-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
// TCHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
// TCHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// TCHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK3-NEXT: store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
// TCHECK3-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// TCHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
// TCHECK3-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
// TCHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
// TCHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
// TCHECK3-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
// TCHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
// TCHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
// TCHECK3-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
// TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
// TCHECK3-NEXT: store i64 1, ptr [[X]], align 4
// TCHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
// TCHECK3-NEXT: store i8 1, ptr [[Y]], align 4
// TCHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// TCHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// TCHECK3-NEXT: ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT: entry:
// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
// TCHECK3-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 4
// TCHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// TCHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
// TCHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 4
// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK3-NEXT: store double [[INC]], ptr [[ARRAYIDX1]], align 4
// TCHECK3-NEXT: ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT: entry:
// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
// TCHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// TCHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
// TCHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
// TCHECK3-NEXT: store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// TCHECK3-NEXT: ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT: entry:
// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
// TCHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// TCHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// TCHECK3-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
// TCHECK3-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
// TCHECK3-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK3-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// TCHECK3-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// TCHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
// TCHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
// TCHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// TCHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4
// TCHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// TCHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[A4]], align 4
// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
// TCHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4
// TCHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK3-NEXT: [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
// TCHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// TCHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
// TCHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// TCHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// TCHECK3-NEXT: ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT: entry:
// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// TCHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// TCHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// TCHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// TCHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
// TCHECK3-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// TCHECK3-NEXT: ret void
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY1-NEXT: entry:
// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY1-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY1-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY1-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY1-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// SIMD-ONLY1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: store i32 [[TMP6]], ptr [[X]], align 4
// SIMD-ONLY1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: store i32 [[TMP7]], ptr [[Y]], align 4
// SIMD-ONLY1-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
// SIMD-ONLY1-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
// SIMD-ONLY1-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
// SIMD-ONLY1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
// SIMD-ONLY1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY1-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY1-NEXT: store i64 1, ptr [[X8]], align 8
// SIMD-ONLY1-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY1-NEXT: store i8 1, ptr [[Y9]], align 8
// SIMD-ONLY1-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY1-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
// SIMD-ONLY1-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
// SIMD-ONLY1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
// SIMD-ONLY1-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY1-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// SIMD-ONLY1-NEXT: ret i32 [[TMP14]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY1-NEXT: entry:
// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY1-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY1-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY1-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY1-NEXT: entry:
// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// SIMD-ONLY1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY1-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY1-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY1-NEXT: store double [[ADD2]], ptr [[A]], align 8
// SIMD-ONLY1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8
// SIMD-ONLY1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY1-NEXT: store double [[INC]], ptr [[A3]], align 8
// SIMD-ONLY1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
// SIMD-ONLY1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// SIMD-ONLY1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
// SIMD-ONLY1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// SIMD-ONLY1-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY1-NEXT: entry:
// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY1-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY1-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY1-NEXT: entry:
// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY1-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY1-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY1-NEXT: ret i32 [[TMP2]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY11-NEXT: entry:
// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY11-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY11-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY11-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY11-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY11-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY11-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY11-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY11-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY11-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY11-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
// SIMD-ONLY11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: store i32 [[TMP6]], ptr [[X]], align 4
// SIMD-ONLY11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: store i32 [[TMP7]], ptr [[Y]], align 4
// SIMD-ONLY11-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
// SIMD-ONLY11-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
// SIMD-ONLY11-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
// SIMD-ONLY11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY11-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
// SIMD-ONLY11-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY11-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY11-NEXT: store i64 1, ptr [[X8]], align 8
// SIMD-ONLY11-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY11-NEXT: store i8 1, ptr [[Y9]], align 8
// SIMD-ONLY11-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY11-NEXT: [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY11-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
// SIMD-ONLY11-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
// SIMD-ONLY11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
// SIMD-ONLY11-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY11-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 8
// SIMD-ONLY11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP15]])
// SIMD-ONLY11-NEXT: ret i32 [[TMP14]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY11-NEXT: entry:
// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY11-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY11-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY11-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY11-NEXT: entry:
// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
// SIMD-ONLY11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// SIMD-ONLY11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY11-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY11-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY11-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY11-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY11-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY11-NEXT: store double [[ADD2]], ptr [[A]], align 8
// SIMD-ONLY11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8
// SIMD-ONLY11-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY11-NEXT: store double [[INC]], ptr [[A3]], align 8
// SIMD-ONLY11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
// SIMD-ONLY11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
// SIMD-ONLY11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
// SIMD-ONLY11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY11-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY11-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP11]])
// SIMD-ONLY11-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY11-NEXT: entry:
// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY11-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY11-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY11-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY11-NEXT: entry:
// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY11-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY11-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY11-NEXT: ret i32 [[TMP2]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY12-NEXT: entry:
// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY12-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY12-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY12-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY12-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY12-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY12-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY12-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY12-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY12-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// SIMD-ONLY12-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 [[TMP4]], ptr [[X]], align 4
// SIMD-ONLY12-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 [[TMP5]], ptr [[Y]], align 4
// SIMD-ONLY12-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY12-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
// SIMD-ONLY12-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
// SIMD-ONLY12-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY12-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
// SIMD-ONLY12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY12-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
// SIMD-ONLY12-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY12-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY12-NEXT: store i64 1, ptr [[X8]], align 4
// SIMD-ONLY12-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY12-NEXT: store i8 1, ptr [[Y9]], align 4
// SIMD-ONLY12-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY12-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
// SIMD-ONLY12-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
// SIMD-ONLY12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
// SIMD-ONLY12-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY12-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY12-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY12-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
// SIMD-ONLY12-NEXT: ret i32 [[TMP12]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY12-NEXT: entry:
// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY12-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY12-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY12-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY12-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY12-NEXT: entry:
// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY12-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY12-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY12-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY12-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY12-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY12-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY12-NEXT: store double [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4
// SIMD-ONLY12-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY12-NEXT: store double [[INC]], ptr [[A3]], align 4
// SIMD-ONLY12-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
// SIMD-ONLY12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// SIMD-ONLY12-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
// SIMD-ONLY12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY12-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY12-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP10]])
// SIMD-ONLY12-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY12-NEXT: entry:
// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY12-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY12-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY12-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY12-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY12-NEXT: entry:
// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY12-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY12-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY12-NEXT: ret i32 [[TMP2]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY13-NEXT: entry:
// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY13-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY13-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY13-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY13-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY13-NEXT: [[P:%.*]] = alloca ptr, align 64
// SIMD-ONLY13-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: store i16 0, ptr [[AA]], align 2
// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY13-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY13-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY13-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY13-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
// SIMD-ONLY13-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 [[TMP4]], ptr [[X]], align 4
// SIMD-ONLY13-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 [[TMP5]], ptr [[Y]], align 4
// SIMD-ONLY13-NEXT: store ptr [[A]], ptr [[P]], align 64
// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY13-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY13-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2
// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
// SIMD-ONLY13-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
// SIMD-ONLY13-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
// SIMD-ONLY13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
// SIMD-ONLY13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY13-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
// SIMD-ONLY13-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
// SIMD-ONLY13-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
// SIMD-ONLY13-NEXT: store i64 1, ptr [[X8]], align 4
// SIMD-ONLY13-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
// SIMD-ONLY13-NEXT: store i8 1, ptr [[Y9]], align 4
// SIMD-ONLY13-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
// SIMD-ONLY13-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
// SIMD-ONLY13-NEXT: store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
// SIMD-ONLY13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
// SIMD-ONLY13-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY13-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY13-NEXT: store double [[INC]], ptr [[ARRAYIDX13]], align 4
// SIMD-ONLY13-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
// SIMD-ONLY13-NEXT: ret i32 [[TMP12]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY13-NEXT: entry:
// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY13-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY13-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY13-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY13-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY13-NEXT: store i32 [[ADD6]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: ret i32 [[TMP9]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY13-NEXT: entry:
// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4
// SIMD-ONLY13-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY13-NEXT: store i32 [[ADD]], ptr [[B]], align 4
// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
// SIMD-ONLY13-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY13-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY13-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY13-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY13-NEXT: store double [[ADD2]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4
// SIMD-ONLY13-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY13-NEXT: store double [[INC]], ptr [[A3]], align 4
// SIMD-ONLY13-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
// SIMD-ONLY13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
// SIMD-ONLY13-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
// SIMD-ONLY13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
// SIMD-ONLY13-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4
// SIMD-ONLY13-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP10]])
// SIMD-ONLY13-NEXT: ret i32 [[ADD9]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY13-NEXT: entry:
// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY13-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: store i8 0, ptr [[AAA]], align 1
// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY13-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY13-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY13-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY13-NEXT: store i8 [[CONV2]], ptr [[AAA]], align 1
// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY13-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: ret i32 [[TMP3]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY13-NEXT: entry:
// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY13-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT: store i32 0, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY13-NEXT: store i32 [[ADD]], ptr [[A]], align 4
// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY13-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
// SIMD-ONLY13-NEXT: ret i32 [[TMP2]]
//