the `ptx_kernel` calling convention is a more idiomatic and standard way of specifying a NVPTX kernel than using the metadata which is not supposed to change the meaning of the program. Further, checking the calling convention is significantly faster than traversing the metadata, improving compile time. This change updates the clang and mlir frontends as well as the NVPTXCtorDtorLowering pass to emit kernels using the calling convention. In addition, this updates all NVPTX unit tests to use the calling convention as well.
35 lines
1.6 KiB
Common Lisp
35 lines
1.6 KiB
Common Lisp
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
|
|
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
|
|
|
|
// CHECK-LABEL: define dso_local zeroext i1 @device_function(
|
|
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr))
|
|
// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700
|
|
// CHECK-NEXT: ret i1 [[CMP]]
|
|
//
|
|
bool device_function() {
|
|
return __nvvm_reflect("__CUDA_ARCH") >= 700;
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
|
|
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
|
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
|
|
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]]
|
|
// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
__kernel void kernel_function(__global int *i) {
|
|
*i = device_function();
|
|
}
|
|
//.
|
|
// CHECK: [[META3]] = !{i32 1}
|
|
// CHECK: [[META4]] = !{!"none"}
|
|
// CHECK: [[META5]] = !{!"int*"}
|
|
// CHECK: [[META6]] = !{!""}
|
|
//.
|