While there are no special rules in the standards regarding void
pointers and strict aliasing, emitting distinct tags for void pointers
break some common idioms and there is no good alternative to re-write
the code without strict-aliasing violations. An example is to count the
entries in an array of pointers:
int count_elements(void * values) {
void **seq = values;
int count;
for (count = 0; seq && seq[count]; count++);
return count;
}
https://clang.godbolt.org/z/8dTv51v8W
An example in the wild is from
https://github.com/llvm/llvm-project/issues/119099
This patch avoids emitting distinct tags for void pointers, to avoid
those idioms causing mis-compiles for now.
Fixes https://github.com/llvm/llvm-project/issues/119099.
Fixes https://github.com/llvm/llvm-project/issues/122537.
PR: https://github.com/llvm/llvm-project/pull/122116
818 lines
67 KiB
Common Lisp
818 lines
67 KiB
Common Lisp
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR
|
|
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU
|
|
|
|
// // Check no-optnone and target-cpu behavior
|
|
// RUN: %clang_cc1 -cl-std=CL2.0 -O1 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 -target-feature -sram-ecc -fdenormal-fp-math-f32=preserve-sign %s | FileCheck %s --check-prefixes=CHECK,GFX900
|
|
|
|
|
|
typedef struct {int a;} ndrange_t;
|
|
|
|
void callee(long id, global long *out) {
|
|
out[id] = id;
|
|
}
|
|
|
|
kernel void test(global char *a, char b, global long *c, long d) {
|
|
queue_t default_queue;
|
|
unsigned flags = 0;
|
|
ndrange_t ndrange;
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(void) {
|
|
a[0] = b;
|
|
});
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(void) {
|
|
a[0] = b;
|
|
c[0] = d;
|
|
});
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(local void *lp) {
|
|
a[0] = b;
|
|
c[0] = d;
|
|
((local int*)lp)[0] = 1;
|
|
}, 100);
|
|
|
|
void (^block)(void) = ^{
|
|
callee(d, c);
|
|
};
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange, block);
|
|
}
|
|
|
|
// The target attribute from the caller is not propagated to the block.
|
|
|
|
// FIXME: Something is broken and inconsistent. Either the builtin use in the
|
|
// block should be rejected, or the target attribute needs to apply to the block
|
|
// as well.
|
|
// https://github.com/llvm/llvm-project/issues/60005
|
|
__attribute__((target("s-memtime-inst")))
|
|
kernel void test_target_features_kernel(global int *i) {
|
|
queue_t default_queue;
|
|
unsigned flags = 0;
|
|
ndrange_t ndrange;
|
|
|
|
__builtin_amdgcn_s_memtime();
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(void) {
|
|
__builtin_amdgcn_s_memtime();
|
|
});
|
|
}
|
|
|
|
//.
|
|
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
|
|
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
|
|
//.
|
|
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@callee
|
|
// NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
|
// NOCPU-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
|
|
// NOCPU-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@test
|
|
// NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
|
// NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
|
// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
|
// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
|
// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
|
|
// NOCPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
|
|
// NOCPU-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
|
|
// NOCPU-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
|
// NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
|
|
// NOCPU-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
|
|
// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4
|
|
// NOCPU-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
|
|
// NOCPU-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
|
|
// NOCPU-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8
|
|
// NOCPU-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]])
|
|
// NOCPU-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
|
// NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
|
|
// NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
|
|
// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4
|
|
// NOCPU-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
|
|
// NOCPU-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
|
|
// NOCPU-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
|
|
// NOCPU-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
|
|
// NOCPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8
|
|
// NOCPU-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]])
|
|
// NOCPU-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
|
// NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
|
|
// NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
|
|
// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4
|
|
// NOCPU-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
|
|
// NOCPU-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
|
|
// NOCPU-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
|
|
// NOCPU-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
|
|
// NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8
|
|
// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
|
|
// NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8
|
|
// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
|
|
// NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
|
|
// NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
|
|
// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4
|
|
// NOCPU-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
|
|
// NOCPU-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8
|
|
// NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
|
// NOCPU-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]])
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke
|
|
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
|
|
// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
|
|
// NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent nounwind
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
|
|
// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// NOCPU-NEXT: call void @__test_block_invoke(ptr [[TMP2]])
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2
|
|
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
|
|
// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
|
|
// NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
|
|
// NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
|
|
// NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent nounwind
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
|
|
// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// NOCPU-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]])
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3
|
|
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
|
|
// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
|
|
// NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
|
|
// NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
|
|
// NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
|
|
// NOCPU-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4
|
|
// NOCPU-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
|
|
// NOCPU-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent nounwind
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
|
|
// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META11:![0-9]+]] !kernel_arg_access_qual [[META12:![0-9]+]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META14:![0-9]+]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
|
|
// NOCPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
|
|
// NOCPU-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4
|
|
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
|
|
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
|
|
// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]]
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent nounwind
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
|
|
// NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
|
// NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// NOCPU-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]])
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel
|
|
// NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META15:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META10]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
|
// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
|
// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
|
// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
|
// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
|
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
|
// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
|
|
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
|
|
// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
// NOCPU: Function Attrs: convergent nounwind
|
|
// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
|
|
// NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
|
|
// NOCPU-NEXT: entry:
|
|
// NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
|
|
// NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// NOCPU-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
|
|
// NOCPU-NEXT: ret void
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent norecurse nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@callee
|
|
// GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
|
|
// GFX900-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
|
// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]]
|
|
// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
|
|
// GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent norecurse nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@test
|
|
// GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
|
// GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// GFX900-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
|
|
// GFX900-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
|
|
// GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
|
|
// GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
|
|
// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
|
// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
|
// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
|
// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
|
// GFX900-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
|
|
// GFX900-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
|
|
// GFX900-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
|
|
// GFX900-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
|
|
// GFX900-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
|
|
// GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
|
|
// GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
|
|
// GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
|
|
// GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
|
|
// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]]
|
|
// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8:[0-9]+]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
|
|
// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]]
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
|
|
// GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
|
|
// GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8
|
|
// GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
|
|
// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4
|
|
// GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
|
|
// GFX900-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]])
|
|
// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
|
|
// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
|
// GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
|
|
// GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8
|
|
// GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
|
|
// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4
|
|
// GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
|
|
// GFX900-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
|
|
// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
|
|
// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]])
|
|
// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
|
|
// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
|
// GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
|
|
// GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8
|
|
// GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
|
|
// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4
|
|
// GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
|
|
// GFX900-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
|
|
// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
|
|
// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
|
|
// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
|
|
// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8
|
|
// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
|
|
// GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
|
|
// GFX900-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8
|
|
// GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
|
|
// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4
|
|
// GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
|
|
// GFX900-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
|
|
// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
|
// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]])
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke
|
|
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
|
|
// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
|
|
// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// GFX900-NEXT: call void @__test_block_invoke(ptr [[TMP2]])
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2
|
|
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
|
|
// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
|
|
// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
|
|
// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
|
|
// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
|
|
// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// GFX900-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]])
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3
|
|
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
|
|
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA26:![0-9]+]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
|
|
// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
|
|
// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
|
|
// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
|
|
// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
|
|
// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA26]]
|
|
// GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
|
|
// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
|
|
// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META27:![0-9]+]] !kernel_arg_access_qual [[META28:![0-9]+]] !kernel_arg_type [[META29:![0-9]+]] !kernel_arg_base_type [[META29]] !kernel_arg_type_qual [[META30:![0-9]+]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
|
|
// GFX900-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
|
|
// GFX900-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4
|
|
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
|
// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]]
|
|
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
|
|
// GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
|
|
// GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
|
// GFX900-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// GFX900-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]])
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent norecurse nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel
|
|
// GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META31:![0-9]+]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META32:![0-9]+]] !kernel_arg_base_type [[META32]] !kernel_arg_type_qual [[META25]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
|
// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
|
// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
|
// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
|
// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
|
// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
|
// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA33:![0-9]+]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
|
|
// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
|
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
|
|
// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
|
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
|
// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
|
|
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
|
|
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
|
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
|
// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//
|
|
// GFX900: Function Attrs: convergent nounwind
|
|
// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
|
|
// GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
|
|
// GFX900-NEXT: entry:
|
|
// GFX900-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
|
|
// GFX900-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
|
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
|
// GFX900-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
|
|
// GFX900-NEXT: ret void
|
|
//
|
|
//.
|
|
// NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
|
|
// NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
|
// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
|
// NOCPU: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
|
// NOCPU: attributes #[[ATTR4]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
|
// NOCPU: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
|
// NOCPU: attributes #[[ATTR6]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
|
|
// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
|
// NOCPU: attributes #[[ATTR8]] = { convergent nounwind }
|
|
//.
|
|
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
|
|
// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
|
// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
|
|
// GFX900: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
|
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
|
// GFX900: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
|
// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
|
// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
|
// GFX900: attributes #[[ATTR8]] = { nounwind }
|
|
// GFX900: attributes #[[ATTR9]] = { convergent nounwind }
|
|
//.
|
|
// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
|
// NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
|
|
// NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
|
|
// NOCPU: [[META4]] = !{!"none", !"none", !"none", !"none"}
|
|
// NOCPU: [[META5]] = !{!"char*", !"char", !"long*", !"long"}
|
|
// NOCPU: [[META6]] = !{!"", !"", !"", !""}
|
|
// NOCPU: [[META7]] = !{i32 0}
|
|
// NOCPU: [[META8]] = !{!"none"}
|
|
// NOCPU: [[META9]] = !{!"__block_literal"}
|
|
// NOCPU: [[META10]] = !{!""}
|
|
// NOCPU: [[META11]] = !{i32 0, i32 3}
|
|
// NOCPU: [[META12]] = !{!"none", !"none"}
|
|
// NOCPU: [[META13]] = !{!"__block_literal", !"void*"}
|
|
// NOCPU: [[META14]] = !{!"", !""}
|
|
// NOCPU: [[META15]] = !{i32 1}
|
|
// NOCPU: [[META16]] = !{!"int*"}
|
|
//.
|
|
// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
// GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
|
// GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
|
|
// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
|
|
// GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
|
|
// GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
|
|
// GFX900: [[META6]] = !{!"Simple C/C++ TBAA"}
|
|
// GFX900: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
|
|
// GFX900: [[META8]] = !{!"p1 long", [[META9:![0-9]+]], i64 0}
|
|
// GFX900: [[META9]] = !{!"any pointer", [[META5]], i64 0}
|
|
// GFX900: [[META10]] = !{i32 1, i32 0, i32 1, i32 0}
|
|
// GFX900: [[META11]] = !{!"none", !"none", !"none", !"none"}
|
|
// GFX900: [[META12]] = !{!"char*", !"char", !"long*", !"long"}
|
|
// GFX900: [[META13]] = !{!"", !"", !"", !""}
|
|
// GFX900: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
|
|
// GFX900: [[META15]] = !{!"p1 omnipotent char", [[META9]], i64 0}
|
|
// GFX900: [[TBAA16]] = !{[[META5]], [[META5]], i64 0}
|
|
// GFX900: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
|
|
// GFX900: [[META18]] = !{!"int", [[META5]], i64 0}
|
|
// GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
|
|
// GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0}
|
|
// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]}
|
|
// GFX900: [[META22]] = !{i32 0}
|
|
// GFX900: [[META23]] = !{!"none"}
|
|
// GFX900: [[META24]] = !{!"__block_literal"}
|
|
// GFX900: [[META25]] = !{!""}
|
|
// GFX900: [[TBAA26]] = !{[[META9]], [[META9]], i64 0}
|
|
// GFX900: [[META27]] = !{i32 0, i32 3}
|
|
// GFX900: [[META28]] = !{!"none", !"none"}
|
|
// GFX900: [[META29]] = !{!"__block_literal", !"void*"}
|
|
// GFX900: [[META30]] = !{!"", !""}
|
|
// GFX900: [[META31]] = !{i32 1}
|
|
// GFX900: [[META32]] = !{!"int*"}
|
|
// GFX900: [[TBAA33]] = !{[[META34:![0-9]+]], [[META34]], i64 0}
|
|
// GFX900: [[META34]] = !{!"p1 int", [[META9]], i64 0}
|
|
//.
|
|
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
// CHECK: {{.*}}
|