Files
clang-p2996/clang/test/CodeGen/X86/bfloat16.cpp
M. Zeeshan Siddiqui e621757365 [Clang][BFloat16] Upgrade __bf16 to arithmetic type, change mangling, and extend excess precision support
Pursuant to discussions at
https://discourse.llvm.org/t/rfc-c-23-p1467r9-extended-floating-point-types-and-standard-names/70033/22,
this commit enhances the handling of the __bf16 type in Clang.
- Firstly, it upgrades __bf16 from a storage-only type to an arithmetic
  type.
- Secondly, it changes the mangling of __bf16 to DF16b on all
  architectures except ARM. This change has been made in
  accordance with the finalization of the mangling for the
  std::bfloat16_t type, as discussed at
  https://github.com/itanium-cxx-abi/cxx-abi/pull/147.
- Finally, this commit extends the existing excess precision support to
  the __bf16 type. This applies to hardware architectures that do not
  natively support bfloat16 arithmetic.
Appropriate tests have been added to verify the effects of these
changes and ensure no regressions in other areas of the compiler.

Reviewed By: rjmccall, pengfei, zahiraam

Differential Revision: https://reviews.llvm.org/D150913
2023-05-27 13:33:50 +08:00

146 lines
9.1 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -S -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-NBF16 %s
// CHECK-LABEL: define dso_local void @_Z11test_scalarDF16bDF16b
// CHECK-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK: [[A_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[C:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[ADD:%.*]] = fadd bfloat [[TMP0]], [[TMP1]]
// CHECK-NEXT: store bfloat [[ADD]], ptr [[C]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[SUB:%.*]] = fsub bfloat [[TMP2]], [[TMP3]]
// CHECK-NEXT: store bfloat [[SUB]], ptr [[C]], align 2
// CHECK-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[MUL:%.*]] = fmul bfloat [[TMP4]], [[TMP5]]
// CHECK-NEXT: store bfloat [[MUL]], ptr [[C]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[TMP7:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[DIV:%.*]] = fdiv bfloat [[TMP6]], [[TMP7]]
// CHECK-NEXT: store bfloat [[DIV]], ptr [[C]], align 2
// CHECK-NEXT: ret void
//
// CHECK-NBF16-LABEL: define dso_local void @_Z11test_scalarDF16bDF16b
// CHECK-NBF16-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NBF16: [[A_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NBF16-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NBF16-NEXT: [[C:%.*]] = alloca bfloat, align 2
// CHECK-NBF16-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
// CHECK-NBF16-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
// CHECK-NBF16-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float
// CHECK-NBF16-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT1:%.*]] = fpext bfloat [[TMP1]] to float
// CHECK-NBF16-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
// CHECK-NBF16-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to bfloat
// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION]], ptr [[C]], align 2
// CHECK-NBF16-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT2:%.*]] = fpext bfloat [[TMP2]] to float
// CHECK-NBF16-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT3:%.*]] = fpext bfloat [[TMP3]] to float
// CHECK-NBF16-NEXT: [[SUB:%.*]] = fsub float [[EXT2]], [[EXT3]]
// CHECK-NBF16-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[SUB]] to bfloat
// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION4]], ptr [[C]], align 2
// CHECK-NBF16-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT5:%.*]] = fpext bfloat [[TMP4]] to float
// CHECK-NBF16-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT6:%.*]] = fpext bfloat [[TMP5]] to float
// CHECK-NBF16-NEXT: [[MUL:%.*]] = fmul float [[EXT5]], [[EXT6]]
// CHECK-NBF16-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[MUL]] to bfloat
// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION7]], ptr [[C]], align 2
// CHECK-NBF16-NEXT: [[TMP6:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT8:%.*]] = fpext bfloat [[TMP6]] to float
// CHECK-NBF16-NEXT: [[TMP7:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
// CHECK-NBF16-NEXT: [[EXT9:%.*]] = fpext bfloat [[TMP7]] to float
// CHECK-NBF16-NEXT: [[DIV:%.*]] = fdiv float [[EXT8]], [[EXT9]]
// CHECK-NBF16-NEXT: [[UNPROMOTION10:%.*]] = fptrunc float [[DIV]] to bfloat
// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION10]], ptr [[C]], align 2
// CHECK-NBF16-NEXT: ret void
//
void test_scalar(__bf16 a, __bf16 b) {
__bf16 c;
c = a + b;
c = a - b;
c = a * b;
c = a / b;
}
typedef __bf16 v8bfloat16 __attribute__((__vector_size__(16)));
// CHECK-LABEL: define dso_local void @_Z11test_vectorDv8_DF16bS_
// CHECK-SAME: (<8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NEXT: [[C:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[ADD:%.*]] = fadd <8 x bfloat> [[TMP0]], [[TMP1]]
// CHECK-NEXT: store <8 x bfloat> [[ADD]], ptr [[C]], align 16
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[SUB:%.*]] = fsub <8 x bfloat> [[TMP2]], [[TMP3]]
// CHECK-NEXT: store <8 x bfloat> [[SUB]], ptr [[C]], align 16
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP5:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x bfloat> [[TMP4]], [[TMP5]]
// CHECK-NEXT: store <8 x bfloat> [[MUL]], ptr [[C]], align 16
// CHECK-NEXT: [[TMP6:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP7:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x bfloat> [[TMP6]], [[TMP7]]
// CHECK-NEXT: store <8 x bfloat> [[DIV]], ptr [[C]], align 16
// CHECK-NEXT: ret void
//
// CHECK-NBF16-LABEL: define dso_local void @_Z11test_vectorDv8_DF16bS_
// CHECK-NBF16-SAME: (<8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NBF16: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NBF16-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NBF16-NEXT: [[C:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NBF16-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NBF16-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16
// CHECK-NBF16-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT:%.*]] = fpext <8 x bfloat> [[TMP0]] to <8 x float>
// CHECK-NBF16-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT1:%.*]] = fpext <8 x bfloat> [[TMP1]] to <8 x float>
// CHECK-NBF16-NEXT: [[ADD:%.*]] = fadd <8 x float> [[EXT]], [[EXT1]]
// CHECK-NBF16-NEXT: [[UNPROMOTION:%.*]] = fptrunc <8 x float> [[ADD]] to <8 x bfloat>
// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION]], ptr [[C]], align 16
// CHECK-NBF16-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT2:%.*]] = fpext <8 x bfloat> [[TMP2]] to <8 x float>
// CHECK-NBF16-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT3:%.*]] = fpext <8 x bfloat> [[TMP3]] to <8 x float>
// CHECK-NBF16-NEXT: [[SUB:%.*]] = fsub <8 x float> [[EXT2]], [[EXT3]]
// CHECK-NBF16-NEXT: [[UNPROMOTION4:%.*]] = fptrunc <8 x float> [[SUB]] to <8 x bfloat>
// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION4]], ptr [[C]], align 16
// CHECK-NBF16-NEXT: [[TMP4:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT5:%.*]] = fpext <8 x bfloat> [[TMP4]] to <8 x float>
// CHECK-NBF16-NEXT: [[TMP5:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT6:%.*]] = fpext <8 x bfloat> [[TMP5]] to <8 x float>
// CHECK-NBF16-NEXT: [[MUL:%.*]] = fmul <8 x float> [[EXT5]], [[EXT6]]
// CHECK-NBF16-NEXT: [[UNPROMOTION7:%.*]] = fptrunc <8 x float> [[MUL]] to <8 x bfloat>
// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION7]], ptr [[C]], align 16
// CHECK-NBF16-NEXT: [[TMP6:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT8:%.*]] = fpext <8 x bfloat> [[TMP6]] to <8 x float>
// CHECK-NBF16-NEXT: [[TMP7:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
// CHECK-NBF16-NEXT: [[EXT9:%.*]] = fpext <8 x bfloat> [[TMP7]] to <8 x float>
// CHECK-NBF16-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[EXT8]], [[EXT9]]
// CHECK-NBF16-NEXT: [[UNPROMOTION10:%.*]] = fptrunc <8 x float> [[DIV]] to <8 x bfloat>
// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION10]], ptr [[C]], align 16
// CHECK-NBF16-NEXT: ret void
//
void test_vector(v8bfloat16 a, v8bfloat16 b) {
v8bfloat16 c;
c = a + b;
c = a - b;
c = a * b;
c = a / b;
}