This is a tiny change that can save up to 16 bytes of stack allocation, which is more beneficial on RV32 than RV64. cm.push allocates multiples of 16 bytes, but only uses a subset of those bytes for pushing callee-saved registers. Up to 12 (rv32) or 8 (rv64) bytes are left unused, depending on how many registers are pushed. Before this change, we told LLVM that the entire allocation was used, by creating a fixed stack object which covered the whole allocation. This change instead gives an accurate extent to the fixed stack object, to only cover the registers that have been pushed. This allows the PrologEpilogInserter to use any unused bytes for spills. Potentially this saves an extra move of the stack pointer after the push, because the push can allocate up to 48 more bytes than it needs for registers. We cannot do the same change for save/restore, because the restore routines restore in batches of `stackalign/(xlen/8)` registers, and we don't want to clobber the saved values of registers that we didn't tell the compiler we were saving/restoring - for instance `__riscv_restore_0` is used by the compiler when it only wants to save `ra`, but will end up restoring `ra` and `s0`.
91 lines
2.9 KiB
LLVM
91 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=riscv32 -mattr=+f,+zcmp -target-abi ilp32f -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
|
|
; RUN: llc -mtriple=riscv64 -mattr=+f,+zcmp -target-abi lp64f -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
|
|
|
|
declare void @callee()
|
|
|
|
; Test the file could be compiled successfully.
|
|
define float @foo(float %arg) {
|
|
; RV32-LABEL: foo:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: cm.push {ra}, -16
|
|
; RV32-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32-NEXT: .cfi_offset ra, -4
|
|
; RV32-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: .cfi_offset fs0, -8
|
|
; RV32-NEXT: fmv.s fs0, fa0
|
|
; RV32-NEXT: call callee
|
|
; RV32-NEXT: fmv.s fa0, fs0
|
|
; RV32-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: .cfi_restore fs0
|
|
; RV32-NEXT: cm.popret {ra}, 16
|
|
;
|
|
; RV64-LABEL: foo:
|
|
; RV64: # %bb.0: # %entry
|
|
; RV64-NEXT: cm.push {ra}, -16
|
|
; RV64-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64-NEXT: .cfi_offset ra, -8
|
|
; RV64-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset fs0, -12
|
|
; RV64-NEXT: fmv.s fs0, fa0
|
|
; RV64-NEXT: call callee
|
|
; RV64-NEXT: fmv.s fa0, fs0
|
|
; RV64-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
|
|
; RV64-NEXT: .cfi_restore fs0
|
|
; RV64-NEXT: cm.popret {ra}, 16
|
|
entry:
|
|
call void @callee()
|
|
ret float %arg
|
|
}
|
|
|
|
define void @foo2(i32 %x, float %y) {
|
|
; RV32-LABEL: foo2:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: cm.push {ra, s0}, -16
|
|
; RV32-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32-NEXT: .cfi_offset ra, -8
|
|
; RV32-NEXT: .cfi_offset s0, -4
|
|
; RV32-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: .cfi_offset fs0, -12
|
|
; RV32-NEXT: fmv.s fs0, fa0
|
|
; RV32-NEXT: mv s0, a0
|
|
; RV32-NEXT: call bar
|
|
; RV32-NEXT: mv a0, s0
|
|
; RV32-NEXT: fmv.s fa0, fs0
|
|
; RV32-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: .cfi_restore fs0
|
|
; RV32-NEXT: cm.pop {ra, s0}, 16
|
|
; RV32-NEXT: .cfi_restore ra
|
|
; RV32-NEXT: .cfi_restore s0
|
|
; RV32-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32-NEXT: tail func
|
|
;
|
|
; RV64-LABEL: foo2:
|
|
; RV64: # %bb.0: # %entry
|
|
; RV64-NEXT: cm.push {ra, s0}, -32
|
|
; RV64-NEXT: .cfi_def_cfa_offset 32
|
|
; RV64-NEXT: .cfi_offset ra, -16
|
|
; RV64-NEXT: .cfi_offset s0, -8
|
|
; RV64-NEXT: fsw fs0, 12(sp) # 4-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset fs0, -20
|
|
; RV64-NEXT: fmv.s fs0, fa0
|
|
; RV64-NEXT: mv s0, a0
|
|
; RV64-NEXT: call bar
|
|
; RV64-NEXT: mv a0, s0
|
|
; RV64-NEXT: fmv.s fa0, fs0
|
|
; RV64-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload
|
|
; RV64-NEXT: .cfi_restore fs0
|
|
; RV64-NEXT: cm.pop {ra, s0}, 32
|
|
; RV64-NEXT: .cfi_restore ra
|
|
; RV64-NEXT: .cfi_restore s0
|
|
; RV64-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64-NEXT: tail func
|
|
entry:
|
|
tail call void @bar()
|
|
tail call void @func(i32 %x, float %y)
|
|
ret void
|
|
}
|
|
|
|
declare void @bar()
|
|
declare void @func(i32, float)
|