Compressed instructions usually require one of the source registers to also be the source register. The register allocator doesn't have that bias on its own. This patch adds register allocation hints to introduce this bias. I've started with ADDI, ADDIW, and SLLI. These all have a 5-bit field for the register. If the source and dest register are the same they are guaranteed to compress as long as the immediate is also 6 bits. This code was inspired by similar code from the SystemZ target. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D138242
54 lines
2.4 KiB
LLVM
54 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
; The complex floating value will be returned by a single register for LP64 ABI.
|
|
; The test case check that the real part returned by __addsf3 will be
|
|
; cleared upper bits by shifts to avoid corrupting the imaginary part.
|
|
|
|
define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
|
|
; CHECK-LABEL: complex_float_add:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -32
|
|
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: srli s0, a0, 32
|
|
; CHECK-NEXT: srli s1, a1, 32
|
|
; CHECK-NEXT: call __addsf3@plt
|
|
; CHECK-NEXT: mv s2, a0
|
|
; CHECK-NEXT: mv a0, s0
|
|
; CHECK-NEXT: mv a1, s1
|
|
; CHECK-NEXT: call __addsf3@plt
|
|
; CHECK-NEXT: slli a0, a0, 32
|
|
; CHECK-NEXT: slli s2, s2, 32
|
|
; CHECK-NEXT: srli a1, s2, 32
|
|
; CHECK-NEXT: or a0, a0, a1
|
|
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 32
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%a.sroa.0.0.extract.trunc = trunc i64 %a.coerce to i32
|
|
%0 = bitcast i32 %a.sroa.0.0.extract.trunc to float
|
|
%a.sroa.2.0.extract.shift = lshr i64 %a.coerce, 32
|
|
%a.sroa.2.0.extract.trunc = trunc i64 %a.sroa.2.0.extract.shift to i32
|
|
%1 = bitcast i32 %a.sroa.2.0.extract.trunc to float
|
|
%b.sroa.0.0.extract.trunc = trunc i64 %b.coerce to i32
|
|
%2 = bitcast i32 %b.sroa.0.0.extract.trunc to float
|
|
%b.sroa.2.0.extract.shift = lshr i64 %b.coerce, 32
|
|
%b.sroa.2.0.extract.trunc = trunc i64 %b.sroa.2.0.extract.shift to i32
|
|
%3 = bitcast i32 %b.sroa.2.0.extract.trunc to float
|
|
%add.r = fadd float %0, %2
|
|
%add.i = fadd float %1, %3
|
|
%4 = bitcast float %add.r to i32
|
|
%5 = bitcast float %add.i to i32
|
|
%retval.sroa.2.0.insert.ext = zext i32 %5 to i64
|
|
%retval.sroa.2.0.insert.shift = shl nuw i64 %retval.sroa.2.0.insert.ext, 32
|
|
%retval.sroa.0.0.insert.ext = zext i32 %4 to i64
|
|
%retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.shift, %retval.sroa.0.0.insert.ext
|
|
ret i64 %retval.sroa.0.0.insert.insert
|
|
}
|