Files
clang-p2996/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll
Sam Elliott 50cdf6cbc5 [RISCV] Allow spilling to unused Zcmp Stack (#125959)
This is a tiny change that can save up to 16 bytes of stack allocation,
which is more beneficial on RV32 than RV64.

cm.push allocates multiples of 16 bytes, but only uses a subset of those
bytes for pushing callee-saved registers. Up to 12 (rv32) or 8 (rv64)
bytes are left unused, depending on how many registers are pushed.
Before this change, we told LLVM that the entire allocation was used, by
creating a fixed stack object which covered the whole allocation.

This change instead gives an accurate extent to the fixed stack object,
to only cover the registers that have been pushed. This allows the
PrologEpilogInserter to use any unused bytes for spills. Potentially
this saves an extra move of the stack pointer after the push, because
the push can allocate up to 48 more bytes than it needs for registers.

We cannot do the same change for save/restore, because the restore
routines restore in batches of `stackalign/(xlen/8)` registers, and we
don't want to clobber the saved values of registers that we didn't tell
the compiler we were saving/restoring - for instance `__riscv_restore_0`
is used by the compiler when it only wants to save `ra`, but will end up
restoring `ra` and `s0`.
2025-02-06 19:45:47 -08:00

54 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv32 -mattr=+zcmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
; RV32-LABEL: func:
; RV32: # %bb.0: # %entry
; RV32-NEXT: cm.push {ra, s0-s1}, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: .cfi_offset ra, -12
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: .cfi_offset s1, -4
; RV32-NEXT: addi sp, sp, -4
; RV32-NEXT: .cfi_def_cfa_offset 20
; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw a2, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv a2, a1
; RV32-NEXT: mv s1, a0
; RV32-NEXT: li a0, 1
; RV32-NEXT: andi a3, a3, 1
; RV32-NEXT: .LBB0_1: # %while.body
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: mv s0, a0
; RV32-NEXT: li a0, 0
; RV32-NEXT: bnez a3, .LBB0_1
; RV32-NEXT: # %bb.2: # %while.end
; RV32-NEXT: lui a0, 4112
; RV32-NEXT: addi a1, a0, 257
; RV32-NEXT: mv a0, a2
; RV32-NEXT: call __mulsi3
; RV32-NEXT: sw a0, 0(zero)
; RV32-NEXT: andi s0, s0, 1
; RV32-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: add s0, s0, a0
; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: sb a0, 0(s0)
; RV32-NEXT: mv a0, s1
; RV32-NEXT: addi sp, sp, 4
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: cm.popret {ra, s0-s1}, 16
entry:
br label %while.body
while.body: ; preds = %while.body, %entry
%n.addr.042 = phi i32 [ 1, %entry ], [ 0, %while.body ]
br i1 %0, label %while.body, label %while.end
while.end: ; preds = %while.body
%or5 = mul i32 %_c, 16843009
store i32 %or5, ptr null, align 4
%1 = and i32 %n.addr.042, 1
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
store i8 %conv14, ptr %scevgep, align 1
ret ptr %s
}