Files
clang-p2996/llvm/test/CodeGen/RISCV/pr58286.ll
Philip Reames b775333068 [RISCV] Fold low 12 bits into instruction during frame index elimination
Fold the low 12 bits of an immediate offset into the offset field of the using instruction. That using instruction will be a load, store, or addi which performs an add of a signed 12-bit immediate as part of it's operation. Splitting out the low bits allows the high bits to be generated via a single LUI instead of needing an LUI/ADDI pair.

The codegen effect of this is mostly converting cases where "split addi" kicks in to using LUI + a folded offset. There are a couple of straight dynamic instruction count wins, and using a canonical LUI is probably better than a chain of SP adds if the dynamic instruction count is equal.

Differential Revision: https://reviews.llvm.org/D139037
2022-12-02 11:54:06 -08:00

278 lines
9.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 < %s | FileCheck --check-prefix=RV64I %s
; RUN: llc -mtriple=riscv32 < %s | FileCheck --check-prefix=RV32I %s
@var = external global i32
define void @func() {
; RV64I-LABEL: func:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 4112
; RV64I-NEXT: lui a0, %hi(var)
; RV64I-NEXT: lw a1, %lo(var)(a0)
; RV64I-NEXT: lw a2, %lo(var)(a0)
; RV64I-NEXT: lw a3, %lo(var)(a0)
; RV64I-NEXT: lw a4, %lo(var)(a0)
; RV64I-NEXT: lw a5, %lo(var)(a0)
; RV64I-NEXT: lw a6, %lo(var)(a0)
; RV64I-NEXT: lw a7, %lo(var)(a0)
; RV64I-NEXT: lw t0, %lo(var)(a0)
; RV64I-NEXT: lw t1, %lo(var)(a0)
; RV64I-NEXT: lw t2, %lo(var)(a0)
; RV64I-NEXT: lw t3, %lo(var)(a0)
; RV64I-NEXT: lw t4, %lo(var)(a0)
; RV64I-NEXT: lw t5, %lo(var)(a0)
; RV64I-NEXT: lw t6, %lo(var)(a0)
; RV64I-NEXT: sd s0, 0(sp)
; RV64I-NEXT: lui s0, 1
; RV64I-NEXT: add s0, sp, s0
; RV64I-NEXT: sw a1, 12(s0)
; RV64I-NEXT: ld s0, 0(sp)
; RV64I-NEXT: sw a1, %lo(var)(a0)
; RV64I-NEXT: sw a2, %lo(var)(a0)
; RV64I-NEXT: sw a3, %lo(var)(a0)
; RV64I-NEXT: sw a4, %lo(var)(a0)
; RV64I-NEXT: sw a5, %lo(var)(a0)
; RV64I-NEXT: sw a6, %lo(var)(a0)
; RV64I-NEXT: sw a7, %lo(var)(a0)
; RV64I-NEXT: sw t0, %lo(var)(a0)
; RV64I-NEXT: sw t1, %lo(var)(a0)
; RV64I-NEXT: sw t2, %lo(var)(a0)
; RV64I-NEXT: sw t3, %lo(var)(a0)
; RV64I-NEXT: sw t4, %lo(var)(a0)
; RV64I-NEXT: sw t5, %lo(var)(a0)
; RV64I-NEXT: sw t6, %lo(var)(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ret
;
; RV32I-LABEL: func:
; RV32I: # %bb.0:
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 16
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: .cfi_def_cfa_offset 4112
; RV32I-NEXT: lui a0, %hi(var)
; RV32I-NEXT: lw a1, %lo(var)(a0)
; RV32I-NEXT: lw a2, %lo(var)(a0)
; RV32I-NEXT: lw a3, %lo(var)(a0)
; RV32I-NEXT: lw a4, %lo(var)(a0)
; RV32I-NEXT: lw a5, %lo(var)(a0)
; RV32I-NEXT: lw a6, %lo(var)(a0)
; RV32I-NEXT: lw a7, %lo(var)(a0)
; RV32I-NEXT: lw t0, %lo(var)(a0)
; RV32I-NEXT: lw t1, %lo(var)(a0)
; RV32I-NEXT: lw t2, %lo(var)(a0)
; RV32I-NEXT: lw t3, %lo(var)(a0)
; RV32I-NEXT: lw t4, %lo(var)(a0)
; RV32I-NEXT: lw t5, %lo(var)(a0)
; RV32I-NEXT: lw t6, %lo(var)(a0)
; RV32I-NEXT: sw s0, 0(sp)
; RV32I-NEXT: lui s0, 1
; RV32I-NEXT: add s0, sp, s0
; RV32I-NEXT: sw a1, 12(s0)
; RV32I-NEXT: lw s0, 0(sp)
; RV32I-NEXT: sw a1, %lo(var)(a0)
; RV32I-NEXT: sw a2, %lo(var)(a0)
; RV32I-NEXT: sw a3, %lo(var)(a0)
; RV32I-NEXT: sw a4, %lo(var)(a0)
; RV32I-NEXT: sw a5, %lo(var)(a0)
; RV32I-NEXT: sw a6, %lo(var)(a0)
; RV32I-NEXT: sw a7, %lo(var)(a0)
; RV32I-NEXT: sw t0, %lo(var)(a0)
; RV32I-NEXT: sw t1, %lo(var)(a0)
; RV32I-NEXT: sw t2, %lo(var)(a0)
; RV32I-NEXT: sw t3, %lo(var)(a0)
; RV32I-NEXT: sw t4, %lo(var)(a0)
; RV32I-NEXT: sw t5, %lo(var)(a0)
; RV32I-NEXT: sw t6, %lo(var)(a0)
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 16
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: ret
%space = alloca i32, align 4
%stackspace = alloca[1024 x i32], align 4
;; Load values to increase register pressure.
%v0 = load volatile i32, i32* @var
%v1 = load volatile i32, i32* @var
%v2 = load volatile i32, i32* @var
%v3 = load volatile i32, i32* @var
%v4 = load volatile i32, i32* @var
%v5 = load volatile i32, i32* @var
%v6 = load volatile i32, i32* @var
%v7 = load volatile i32, i32* @var
%v8 = load volatile i32, i32* @var
%v9 = load volatile i32, i32* @var
%v10 = load volatile i32, i32* @var
%v11 = load volatile i32, i32* @var
%v12 = load volatile i32, i32* @var
%v13 = load volatile i32, i32* @var
store volatile i32 %v0, i32* %space
;; store values so they are used.
store volatile i32 %v0, i32* @var
store volatile i32 %v1, i32* @var
store volatile i32 %v2, i32* @var
store volatile i32 %v3, i32* @var
store volatile i32 %v4, i32* @var
store volatile i32 %v5, i32* @var
store volatile i32 %v6, i32* @var
store volatile i32 %v7, i32* @var
store volatile i32 %v8, i32* @var
store volatile i32 %v9, i32* @var
store volatile i32 %v10, i32* @var
store volatile i32 %v11, i32* @var
store volatile i32 %v12, i32* @var
store volatile i32 %v13, i32* @var
ret void
}
define void @shrink_wrap(i1 %c) {
; RV64I-LABEL: shrink_wrap:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: bnez a0, .LBB1_2
; RV64I-NEXT: # %bb.1: # %bar
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 4112
; RV64I-NEXT: lui a0, %hi(var)
; RV64I-NEXT: lw a1, %lo(var)(a0)
; RV64I-NEXT: lw a2, %lo(var)(a0)
; RV64I-NEXT: lw a3, %lo(var)(a0)
; RV64I-NEXT: lw a4, %lo(var)(a0)
; RV64I-NEXT: lw a5, %lo(var)(a0)
; RV64I-NEXT: lw a6, %lo(var)(a0)
; RV64I-NEXT: lw a7, %lo(var)(a0)
; RV64I-NEXT: lw t0, %lo(var)(a0)
; RV64I-NEXT: lw t1, %lo(var)(a0)
; RV64I-NEXT: lw t2, %lo(var)(a0)
; RV64I-NEXT: lw t3, %lo(var)(a0)
; RV64I-NEXT: lw t4, %lo(var)(a0)
; RV64I-NEXT: lw t5, %lo(var)(a0)
; RV64I-NEXT: lw t6, %lo(var)(a0)
; RV64I-NEXT: sd s0, 0(sp)
; RV64I-NEXT: lui s0, 1
; RV64I-NEXT: add s0, sp, s0
; RV64I-NEXT: sw a1, 12(s0)
; RV64I-NEXT: ld s0, 0(sp)
; RV64I-NEXT: sw a1, %lo(var)(a0)
; RV64I-NEXT: sw a2, %lo(var)(a0)
; RV64I-NEXT: sw a3, %lo(var)(a0)
; RV64I-NEXT: sw a4, %lo(var)(a0)
; RV64I-NEXT: sw a5, %lo(var)(a0)
; RV64I-NEXT: sw a6, %lo(var)(a0)
; RV64I-NEXT: sw a7, %lo(var)(a0)
; RV64I-NEXT: sw t0, %lo(var)(a0)
; RV64I-NEXT: sw t1, %lo(var)(a0)
; RV64I-NEXT: sw t2, %lo(var)(a0)
; RV64I-NEXT: sw t3, %lo(var)(a0)
; RV64I-NEXT: sw t4, %lo(var)(a0)
; RV64I-NEXT: sw t5, %lo(var)(a0)
; RV64I-NEXT: sw t6, %lo(var)(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: .LBB1_2: # %foo
; RV64I-NEXT: ret
;
; RV32I-LABEL: shrink_wrap:
; RV32I: # %bb.0:
; RV32I-NEXT: andi a0, a0, 1
; RV32I-NEXT: bnez a0, .LBB1_2
; RV32I-NEXT: # %bb.1: # %bar
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 16
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: .cfi_def_cfa_offset 4112
; RV32I-NEXT: lui a0, %hi(var)
; RV32I-NEXT: lw a1, %lo(var)(a0)
; RV32I-NEXT: lw a2, %lo(var)(a0)
; RV32I-NEXT: lw a3, %lo(var)(a0)
; RV32I-NEXT: lw a4, %lo(var)(a0)
; RV32I-NEXT: lw a5, %lo(var)(a0)
; RV32I-NEXT: lw a6, %lo(var)(a0)
; RV32I-NEXT: lw a7, %lo(var)(a0)
; RV32I-NEXT: lw t0, %lo(var)(a0)
; RV32I-NEXT: lw t1, %lo(var)(a0)
; RV32I-NEXT: lw t2, %lo(var)(a0)
; RV32I-NEXT: lw t3, %lo(var)(a0)
; RV32I-NEXT: lw t4, %lo(var)(a0)
; RV32I-NEXT: lw t5, %lo(var)(a0)
; RV32I-NEXT: lw t6, %lo(var)(a0)
; RV32I-NEXT: sw s0, 0(sp)
; RV32I-NEXT: lui s0, 1
; RV32I-NEXT: add s0, sp, s0
; RV32I-NEXT: sw a1, 12(s0)
; RV32I-NEXT: lw s0, 0(sp)
; RV32I-NEXT: sw a1, %lo(var)(a0)
; RV32I-NEXT: sw a2, %lo(var)(a0)
; RV32I-NEXT: sw a3, %lo(var)(a0)
; RV32I-NEXT: sw a4, %lo(var)(a0)
; RV32I-NEXT: sw a5, %lo(var)(a0)
; RV32I-NEXT: sw a6, %lo(var)(a0)
; RV32I-NEXT: sw a7, %lo(var)(a0)
; RV32I-NEXT: sw t0, %lo(var)(a0)
; RV32I-NEXT: sw t1, %lo(var)(a0)
; RV32I-NEXT: sw t2, %lo(var)(a0)
; RV32I-NEXT: sw t3, %lo(var)(a0)
; RV32I-NEXT: sw t4, %lo(var)(a0)
; RV32I-NEXT: sw t5, %lo(var)(a0)
; RV32I-NEXT: sw t6, %lo(var)(a0)
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 16
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: .LBB1_2: # %foo
; RV32I-NEXT: ret
%space = alloca i32, align 4
%stackspace = alloca[1024 x i32], align 4
br i1 %c, label %foo, label %bar
bar:
;; Load values to increase register pressure.
%v0 = load volatile i32, i32* @var
%v1 = load volatile i32, i32* @var
%v2 = load volatile i32, i32* @var
%v3 = load volatile i32, i32* @var
%v4 = load volatile i32, i32* @var
%v5 = load volatile i32, i32* @var
%v6 = load volatile i32, i32* @var
%v7 = load volatile i32, i32* @var
%v8 = load volatile i32, i32* @var
%v9 = load volatile i32, i32* @var
%v10 = load volatile i32, i32* @var
%v11 = load volatile i32, i32* @var
%v12 = load volatile i32, i32* @var
%v13 = load volatile i32, i32* @var
store volatile i32 %v0, i32* %space
;; store values so they are used.
store volatile i32 %v0, i32* @var
store volatile i32 %v1, i32* @var
store volatile i32 %v2, i32* @var
store volatile i32 %v3, i32* @var
store volatile i32 %v4, i32* @var
store volatile i32 %v5, i32* @var
store volatile i32 %v6, i32* @var
store volatile i32 %v7, i32* @var
store volatile i32 %v8, i32* @var
store volatile i32 %v9, i32* @var
store volatile i32 %v10, i32* @var
store volatile i32 %v11, i32* @var
store volatile i32 %v12, i32* @var
store volatile i32 %v13, i32* @var
br label %foo
foo:
ret void
}