Files
clang-p2996/llvm/test/CodeGen/RISCV/bfloat.ll
Alex Bradbury 929124993a Recommit "[RISCV] Implement support for bf16 truncate/extend on hard FP targets"
Without the changes from D153598.

Original commit message:

For the same reasons as D151284, this requires custom lowering of the
truncate libcall on hard float ABIs (the normal libcall code path is
used on soft ABIs).

The extend operation is implemented by a shift just as in the standard
legalisation, but needs to be custom lowered because i32 isn't a legal
type on RV64.

This patch aims to make the minimal changes that result in correct
codegen for the bfloat.ll tests.

Differential Revision: https://reviews.llvm.org/D151663
2023-06-23 17:23:12 -07:00

678 lines
24 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64D
define bfloat @float_to_bfloat(float %a) nounwind {
; RV32I-ILP32-LABEL: float_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: float_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: float_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: float_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: float_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: float_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fptrunc float %a to bfloat
ret bfloat %1
}
define bfloat @double_to_bfloat(double %a) nounwind {
; RV32I-ILP32-LABEL: double_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: call __truncdfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: double_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: call __truncdfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: double_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __truncdfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: double_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: call __truncdfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: double_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: call __truncdfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: double_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: call __truncdfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fptrunc double %a to bfloat
ret bfloat %1
}
define float @bfloat_to_float(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_float:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_float:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_float:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_float:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: slli a0, a0, 48
; RV64ID-LP64-NEXT: srli a0, a0, 48
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_float:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_float:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: slli a0, a0, 48
; RV64ID-LP64D-NEXT: srli a0, a0, 48
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ret
%1 = fpext bfloat %a to float
ret float %1
}
define double @bfloat_to_double(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_double:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: call __extendsfdf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_double:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: call __extendsfdf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_double:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32-NEXT: fcvt.d.s fa5, fa5
; RV32ID-ILP32-NEXT: fsd fa5, 8(sp)
; RV32ID-ILP32-NEXT: lw a0, 8(sp)
; RV32ID-ILP32-NEXT: lw a1, 12(sp)
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_double:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: slli a0, a0, 48
; RV64ID-LP64-NEXT: srli a0, a0, 48
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: fcvt.d.s fa5, fa5
; RV64ID-LP64-NEXT: fmv.x.d a0, fa5
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_double:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32D-NEXT: fcvt.d.s fa0, fa5
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_double:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: slli a0, a0, 48
; RV64ID-LP64D-NEXT: srli a0, a0, 48
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64D-NEXT: fcvt.d.s fa0, fa5
; RV64ID-LP64D-NEXT: ret
%1 = fpext bfloat %a to double
ret double %1
}
define bfloat @i16_to_bfloat(i16 %a) nounwind {
; RV32I-ILP32-LABEL: i16_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: i16_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: i16_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: i16_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: i16_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: i16_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ret
%1 = bitcast i16 %a to bfloat
ret bfloat %1
}
define i16 @bfloat_to_i16(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_i16:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_i16:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_i16:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_i16:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_i16:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_i16:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: ret
%1 = bitcast bfloat %a to i16
ret i16 %1
}
define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
; RV32I-ILP32-LABEL: bfloat_add:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: slli a1, a1, 16
; RV32I-ILP32-NEXT: call __addsf3@plt
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_add:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: slliw a1, a1, 16
; RV64I-LP64-NEXT: call __addsf3@plt
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_add:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_add:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: lui a2, 16
; RV64ID-LP64-NEXT: addiw a2, a2, -1
; RV64ID-LP64-NEXT: and a0, a0, a2
; RV64ID-LP64-NEXT: and a1, a1, a2
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_add:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_add:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 16
; RV64ID-LP64D-NEXT: addiw a1, a1, -1
; RV64ID-LP64D-NEXT: and a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1
; RV64ID-LP64D-NEXT: and a1, a2, a1
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fadd bfloat %a, %b
ret bfloat %1
}
define bfloat @bfloat_load(ptr %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_load:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: lh a1, 0(a0)
; RV32I-ILP32-NEXT: lh a2, 6(a0)
; RV32I-ILP32-NEXT: slli a0, a1, 16
; RV32I-ILP32-NEXT: slli a1, a2, 16
; RV32I-ILP32-NEXT: call __addsf3@plt
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_load:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: lh a1, 0(a0)
; RV64I-LP64-NEXT: lh a2, 6(a0)
; RV64I-LP64-NEXT: slliw a0, a1, 16
; RV64I-LP64-NEXT: slliw a1, a2, 16
; RV64I-LP64-NEXT: call __addsf3@plt
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_load:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: lhu a1, 6(a0)
; RV32ID-ILP32-NEXT: lhu a0, 0(a0)
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_load:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: lhu a1, 6(a0)
; RV64ID-LP64-NEXT: lhu a0, 0(a0)
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_load:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: lhu a1, 6(a0)
; RV32ID-ILP32D-NEXT: lhu a0, 0(a0)
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_load:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: lhu a1, 6(a0)
; RV64ID-LP64D-NEXT: lhu a0, 0(a0)
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = load bfloat, ptr %a
%2 = getelementptr bfloat, ptr %a, i32 3
%3 = load bfloat, ptr %2
%4 = fadd bfloat %1, %3
ret bfloat %4
}
define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
; RV32I-ILP32-LABEL: bfloat_store:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: mv s0, a0
; RV32I-ILP32-NEXT: slli a0, a1, 16
; RV32I-ILP32-NEXT: slli a1, a2, 16
; RV32I-ILP32-NEXT: call __addsf3@plt
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: sh a0, 0(s0)
; RV32I-ILP32-NEXT: sh a0, 16(s0)
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_store:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: mv s0, a0
; RV64I-LP64-NEXT: slliw a0, a1, 16
; RV64I-LP64-NEXT: slliw a1, a2, 16
; RV64I-LP64-NEXT: call __addsf3@plt
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: sh a0, 0(s0)
; RV64I-LP64-NEXT: sh a0, 16(s0)
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_store:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: mv s0, a0
; RV32ID-ILP32-NEXT: slli a2, a2, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: sh a0, 0(s0)
; RV32ID-ILP32-NEXT: sh a0, 16(s0)
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_store:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: mv s0, a0
; RV64ID-LP64-NEXT: lui a0, 16
; RV64ID-LP64-NEXT: addiw a0, a0, -1
; RV64ID-LP64-NEXT: and a1, a1, a0
; RV64ID-LP64-NEXT: and a0, a2, a0
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a1
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: sh a0, 0(s0)
; RV64ID-LP64-NEXT: sh a0, 16(s0)
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_store:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: mv s0, a0
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: sh a0, 0(s0)
; RV32ID-ILP32D-NEXT: sh a0, 16(s0)
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_store:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: mv s0, a0
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 16
; RV64ID-LP64D-NEXT: addiw a1, a1, -1
; RV64ID-LP64D-NEXT: and a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1
; RV64ID-LP64D-NEXT: and a1, a2, a1
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: sh a0, 0(s0)
; RV64ID-LP64D-NEXT: sh a0, 16(s0)
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fadd bfloat %b, %c
store bfloat %1, ptr %a
%2 = getelementptr bfloat, ptr %a, i32 8
store bfloat %1, ptr %2
ret void
}