Files
clang-p2996/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-tr-w64.cl
Changpeng Fang 280d90d0fd AMDGPU: Add back half and bfloat support for global_load_tr16 pats (#99540)
half and bfloat are common types for 16-bit elements. The support of
them was original there and dropped due to some reasons. This work adds
the support of the float types back.
2024-07-18 11:23:35 -07:00

48 lines
2.0 KiB
Common Lisp

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
typedef short v4s __attribute__((ext_vector_type(4)));
typedef half v4h __attribute__((ext_vector_type(4)));
typedef __bf16 v4y __attribute__((ext_vector_type(4)));
// CHECK-GFX1200-LABEL: @test_amdgcn_global_load_tr_b64_i32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.tr.b64.i32(ptr addrspace(1) [[INPTR:%.*]])
// CHECK-GFX1200-NEXT: ret i32 [[TMP0]]
//
int test_amdgcn_global_load_tr_b64_i32(global int* inptr)
{
return __builtin_amdgcn_global_load_tr_b64_i32(inptr);
}
// CHECK-GFX1200-LABEL: @test_amdgcn_global_load_tr_b128_v4i16(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.global.load.tr.b128.v4i16(ptr addrspace(1) [[INPTR:%.*]])
// CHECK-GFX1200-NEXT: ret <4 x i16> [[TMP0]]
//
v4s test_amdgcn_global_load_tr_b128_v4i16(global v4s* inptr)
{
return __builtin_amdgcn_global_load_tr_b128_v4i16(inptr);
}
// CHECK-GFX1200-LABEL: @test_amdgcn_global_load_tr_b128_v4f16(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.global.load.tr.b128.v4f16(ptr addrspace(1) [[INPTR:%.*]])
// CHECK-GFX1200-NEXT: ret <4 x half> [[TMP0]]
//
v4h test_amdgcn_global_load_tr_b128_v4f16(global v4h* inptr)
{
return __builtin_amdgcn_global_load_tr_b128_v4f16(inptr);
}
// CHECK-GFX1200-LABEL: @test_amdgcn_global_load_tr_b128_v4bf16(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x bfloat> @llvm.amdgcn.global.load.tr.b128.v4bf16(ptr addrspace(1) [[INPTR:%.*]])
// CHECK-GFX1200-NEXT: ret <4 x bfloat> [[TMP0]]
//
v4y test_amdgcn_global_load_tr_b128_v4bf16(global v4y* inptr)
{
return __builtin_amdgcn_global_load_tr_b128_v4bf16(inptr);
}