Files
clang-p2996/llvm/test/CodeGen/RISCV/bfloat.ll
Philip Reames 8624075105 [RISCV] Strip W suffix from ADDIW (#68425)
The motivation of this change is simply to reduce test duplication. As
can be seen in the (massive) test delta, we have many tests whose output
differ only due to the use of addi on rv32 vs addiw on rv64 when the
high bits are don't care.

As an aside, we don't need to worry about the non-zero immediate
restriction on the compressed variants because we're not directly
forming the compressed variants. If we happen to get a zero immediate
for the ADDI, then either a later optimization will strip the useless
instruction or the encoder is responsible for not compressing the
instruction.
2023-10-06 10:28:01 -07:00

678 lines
24 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64D
define bfloat @float_to_bfloat(float %a) nounwind {
; RV32I-ILP32-LABEL: float_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: float_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: float_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: float_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: float_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: float_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fptrunc float %a to bfloat
ret bfloat %1
}
define bfloat @double_to_bfloat(double %a) nounwind {
; RV32I-ILP32-LABEL: double_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: call __truncdfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: double_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: call __truncdfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: double_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __truncdfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: double_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: call __truncdfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: double_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: call __truncdfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: double_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: call __truncdfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fptrunc double %a to bfloat
ret bfloat %1
}
define float @bfloat_to_float(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_float:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_float:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_float:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_float:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: slli a0, a0, 48
; RV64ID-LP64-NEXT: srli a0, a0, 48
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_float:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_float:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: slli a0, a0, 48
; RV64ID-LP64D-NEXT: srli a0, a0, 48
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ret
%1 = fpext bfloat %a to float
ret float %1
}
define double @bfloat_to_double(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_double:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: call __extendsfdf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_double:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: call __extendsfdf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_double:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32-NEXT: fcvt.d.s fa5, fa5
; RV32ID-ILP32-NEXT: fsd fa5, 8(sp)
; RV32ID-ILP32-NEXT: lw a0, 8(sp)
; RV32ID-ILP32-NEXT: lw a1, 12(sp)
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_double:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: slli a0, a0, 48
; RV64ID-LP64-NEXT: srli a0, a0, 48
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: fcvt.d.s fa5, fa5
; RV64ID-LP64-NEXT: fmv.x.d a0, fa5
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_double:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32D-NEXT: fcvt.d.s fa0, fa5
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_double:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: slli a0, a0, 48
; RV64ID-LP64D-NEXT: srli a0, a0, 48
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64D-NEXT: fcvt.d.s fa0, fa5
; RV64ID-LP64D-NEXT: ret
%1 = fpext bfloat %a to double
ret double %1
}
define bfloat @i16_to_bfloat(i16 %a) nounwind {
; RV32I-ILP32-LABEL: i16_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: i16_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: i16_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: i16_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: i16_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: i16_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ret
%1 = bitcast i16 %a to bfloat
ret bfloat %1
}
define i16 @bfloat_to_i16(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_i16:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_i16:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_i16:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_i16:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_i16:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_i16:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: ret
%1 = bitcast bfloat %a to i16
ret i16 %1
}
define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
; RV32I-ILP32-LABEL: bfloat_add:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: slli a1, a1, 16
; RV32I-ILP32-NEXT: call __addsf3@plt
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_add:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: slliw a1, a1, 16
; RV64I-LP64-NEXT: call __addsf3@plt
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_add:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_add:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: lui a2, 16
; RV64ID-LP64-NEXT: addi a2, a2, -1
; RV64ID-LP64-NEXT: and a0, a0, a2
; RV64ID-LP64-NEXT: and a1, a1, a2
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_add:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_add:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 16
; RV64ID-LP64D-NEXT: addi a1, a1, -1
; RV64ID-LP64D-NEXT: and a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1
; RV64ID-LP64D-NEXT: and a1, a2, a1
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fadd bfloat %a, %b
ret bfloat %1
}
define bfloat @bfloat_load(ptr %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_load:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: lh a1, 0(a0)
; RV32I-ILP32-NEXT: lh a2, 6(a0)
; RV32I-ILP32-NEXT: slli a0, a1, 16
; RV32I-ILP32-NEXT: slli a1, a2, 16
; RV32I-ILP32-NEXT: call __addsf3@plt
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_load:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: lh a1, 0(a0)
; RV64I-LP64-NEXT: lh a2, 6(a0)
; RV64I-LP64-NEXT: slliw a0, a1, 16
; RV64I-LP64-NEXT: slliw a1, a2, 16
; RV64I-LP64-NEXT: call __addsf3@plt
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_load:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: lhu a1, 6(a0)
; RV32ID-ILP32-NEXT: lhu a0, 0(a0)
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_load:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: lhu a1, 6(a0)
; RV64ID-LP64-NEXT: lhu a0, 0(a0)
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_load:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: lhu a1, 6(a0)
; RV32ID-ILP32D-NEXT: lhu a0, 0(a0)
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_load:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: lhu a1, 6(a0)
; RV64ID-LP64D-NEXT: lhu a0, 0(a0)
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = load bfloat, ptr %a
%2 = getelementptr bfloat, ptr %a, i32 3
%3 = load bfloat, ptr %2
%4 = fadd bfloat %1, %3
ret bfloat %4
}
define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
; RV32I-ILP32-LABEL: bfloat_store:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: mv s0, a0
; RV32I-ILP32-NEXT: slli a0, a1, 16
; RV32I-ILP32-NEXT: slli a1, a2, 16
; RV32I-ILP32-NEXT: call __addsf3@plt
; RV32I-ILP32-NEXT: call __truncsfbf2@plt
; RV32I-ILP32-NEXT: sh a0, 0(s0)
; RV32I-ILP32-NEXT: sh a0, 16(s0)
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_store:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: mv s0, a0
; RV64I-LP64-NEXT: slliw a0, a1, 16
; RV64I-LP64-NEXT: slliw a1, a2, 16
; RV64I-LP64-NEXT: call __addsf3@plt
; RV64I-LP64-NEXT: call __truncsfbf2@plt
; RV64I-LP64-NEXT: sh a0, 0(s0)
; RV64I-LP64-NEXT: sh a0, 16(s0)
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_store:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: mv s0, a0
; RV32ID-ILP32-NEXT: slli a2, a2, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32-NEXT: sh a0, 0(s0)
; RV32ID-ILP32-NEXT: sh a0, 16(s0)
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_store:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: mv s0, a0
; RV64ID-LP64-NEXT: lui a0, 16
; RV64ID-LP64-NEXT: addi a0, a0, -1
; RV64ID-LP64-NEXT: and a1, a1, a0
; RV64ID-LP64-NEXT: and a0, a2, a0
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a1
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2@plt
; RV64ID-LP64-NEXT: sh a0, 0(s0)
; RV64ID-LP64-NEXT: sh a0, 16(s0)
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_store:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: mv s0, a0
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: sh a0, 0(s0)
; RV32ID-ILP32D-NEXT: sh a0, 16(s0)
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_store:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: mv s0, a0
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 16
; RV64ID-LP64D-NEXT: addi a1, a1, -1
; RV64ID-LP64D-NEXT: and a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1
; RV64ID-LP64D-NEXT: and a1, a2, a1
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2@plt
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: sh a0, 0(s0)
; RV64ID-LP64D-NEXT: sh a0, 16(s0)
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fadd bfloat %b, %c
store bfloat %1, ptr %a
%2 = getelementptr bfloat, ptr %a, i32 8
store bfloat %1, ptr %2
ret void
}