If we're falling back to generic constant formation in a register + add/sub, we can check if we have a constant which is 12-bits but left shifted by 2 or 3. If so, we can use a sh2add or sh3add to perform the shift and add in a single instruction. This is profitable when the unshifted constant would require two instructions (LUI/ADDI) to form, but is never harmful since we're going to need at least two instructions regardless of the constant value. Since stacks are aligned to 16 bytes by default, sh3add allows addresing (aligned) data out to 2^14 (i.e. 16kb) in at most two instructions w/zba.
377 lines
12 KiB
LLVM
377 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
|
|
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
|
|
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
|
|
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck %s -check-prefixes=RV64,RV64I
|
|
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+zba < %s \
|
|
; RUN: | FileCheck %s -check-prefixes=RV64,RV64ZBA
|
|
|
|
declare void @inspect(...)
|
|
|
|
define void @test() {
|
|
; RV32I-LABEL: test:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: addi sp, sp, -2048
|
|
; RV32I-NEXT: addi sp, sp, -1120
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 5200
|
|
; RV32I-NEXT: addi a0, sp, 12
|
|
; RV32I-NEXT: addi a1, sp, 2047
|
|
; RV32I-NEXT: addi a1, a1, 13
|
|
; RV32I-NEXT: lui a2, 1
|
|
; RV32I-NEXT: addi a2, a2, 12
|
|
; RV32I-NEXT: add a2, sp, a2
|
|
; RV32I-NEXT: lui a3, 1
|
|
; RV32I-NEXT: addi a3, a3, 1036
|
|
; RV32I-NEXT: add a3, sp, a3
|
|
; RV32I-NEXT: call inspect
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: addi sp, sp, 1136
|
|
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV32ZBA-LABEL: test:
|
|
; RV32ZBA: # %bb.0:
|
|
; RV32ZBA-NEXT: addi sp, sp, -2032
|
|
; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32ZBA-NEXT: .cfi_offset ra, -4
|
|
; RV32ZBA-NEXT: addi sp, sp, -2048
|
|
; RV32ZBA-NEXT: addi sp, sp, -1120
|
|
; RV32ZBA-NEXT: .cfi_def_cfa_offset 5200
|
|
; RV32ZBA-NEXT: addi a0, sp, 12
|
|
; RV32ZBA-NEXT: addi a1, sp, 2047
|
|
; RV32ZBA-NEXT: addi a1, a1, 13
|
|
; RV32ZBA-NEXT: li a2, 1027
|
|
; RV32ZBA-NEXT: sh2add a2, a2, sp
|
|
; RV32ZBA-NEXT: li a3, 1283
|
|
; RV32ZBA-NEXT: sh2add a3, a3, sp
|
|
; RV32ZBA-NEXT: call inspect
|
|
; RV32ZBA-NEXT: addi sp, sp, 2032
|
|
; RV32ZBA-NEXT: addi sp, sp, 1136
|
|
; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32ZBA-NEXT: addi sp, sp, 2032
|
|
; RV32ZBA-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: test:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: addi sp, sp, -2048
|
|
; RV64I-NEXT: addi sp, sp, -1120
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 5200
|
|
; RV64I-NEXT: addi a0, sp, 8
|
|
; RV64I-NEXT: addi a1, sp, 2047
|
|
; RV64I-NEXT: addi a1, a1, 9
|
|
; RV64I-NEXT: lui a2, 1
|
|
; RV64I-NEXT: addiw a2, a2, 8
|
|
; RV64I-NEXT: add a2, sp, a2
|
|
; RV64I-NEXT: lui a3, 1
|
|
; RV64I-NEXT: addiw a3, a3, 1032
|
|
; RV64I-NEXT: add a3, sp, a3
|
|
; RV64I-NEXT: call inspect
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: addi sp, sp, 1136
|
|
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV64ZBA-LABEL: test:
|
|
; RV64ZBA: # %bb.0:
|
|
; RV64ZBA-NEXT: addi sp, sp, -2032
|
|
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64ZBA-NEXT: .cfi_offset ra, -8
|
|
; RV64ZBA-NEXT: addi sp, sp, -2048
|
|
; RV64ZBA-NEXT: addi sp, sp, -1120
|
|
; RV64ZBA-NEXT: .cfi_def_cfa_offset 5200
|
|
; RV64ZBA-NEXT: addi a0, sp, 8
|
|
; RV64ZBA-NEXT: addi a1, sp, 2047
|
|
; RV64ZBA-NEXT: addi a1, a1, 9
|
|
; RV64ZBA-NEXT: li a2, 513
|
|
; RV64ZBA-NEXT: sh3add a2, a2, sp
|
|
; RV64ZBA-NEXT: li a3, 641
|
|
; RV64ZBA-NEXT: sh3add a3, a3, sp
|
|
; RV64ZBA-NEXT: call inspect
|
|
; RV64ZBA-NEXT: addi sp, sp, 2032
|
|
; RV64ZBA-NEXT: addi sp, sp, 1136
|
|
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64ZBA-NEXT: addi sp, sp, 2032
|
|
; RV64ZBA-NEXT: ret
|
|
%p4 = alloca [64 x i8], align 1
|
|
%p3 = alloca [1024 x i8], align 1
|
|
%p2 = alloca [2048 x i8], align 1
|
|
%p1 = alloca [2048 x i8], align 1
|
|
call void (...) @inspect(ptr %p1, ptr %p2, ptr %p3, ptr %p4)
|
|
ret void
|
|
}
|
|
|
|
define void @align_8() {
|
|
; RV32I-LABEL: align_8:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: addi sp, sp, -2048
|
|
; RV32I-NEXT: addi sp, sp, -32
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 4112
|
|
; RV32I-NEXT: addi a0, sp, 7
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: addi a1, a1, 8
|
|
; RV32I-NEXT: add a1, sp, a1
|
|
; RV32I-NEXT: call inspect
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: addi sp, sp, 48
|
|
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV32ZBA-LABEL: align_8:
|
|
; RV32ZBA: # %bb.0:
|
|
; RV32ZBA-NEXT: addi sp, sp, -2032
|
|
; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32ZBA-NEXT: .cfi_offset ra, -4
|
|
; RV32ZBA-NEXT: addi sp, sp, -2048
|
|
; RV32ZBA-NEXT: addi sp, sp, -32
|
|
; RV32ZBA-NEXT: .cfi_def_cfa_offset 4112
|
|
; RV32ZBA-NEXT: addi a0, sp, 7
|
|
; RV32ZBA-NEXT: li a1, 513
|
|
; RV32ZBA-NEXT: sh3add a1, a1, sp
|
|
; RV32ZBA-NEXT: call inspect
|
|
; RV32ZBA-NEXT: addi sp, sp, 2032
|
|
; RV32ZBA-NEXT: addi sp, sp, 48
|
|
; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32ZBA-NEXT: addi sp, sp, 2032
|
|
; RV32ZBA-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: align_8:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: addi sp, sp, -2048
|
|
; RV64I-NEXT: addi sp, sp, -48
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 4128
|
|
; RV64I-NEXT: addi a0, sp, 15
|
|
; RV64I-NEXT: lui a1, 1
|
|
; RV64I-NEXT: addiw a1, a1, 16
|
|
; RV64I-NEXT: add a1, sp, a1
|
|
; RV64I-NEXT: call inspect
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: addi sp, sp, 64
|
|
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV64ZBA-LABEL: align_8:
|
|
; RV64ZBA: # %bb.0:
|
|
; RV64ZBA-NEXT: addi sp, sp, -2032
|
|
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64ZBA-NEXT: .cfi_offset ra, -8
|
|
; RV64ZBA-NEXT: addi sp, sp, -2048
|
|
; RV64ZBA-NEXT: addi sp, sp, -48
|
|
; RV64ZBA-NEXT: .cfi_def_cfa_offset 4128
|
|
; RV64ZBA-NEXT: addi a0, sp, 15
|
|
; RV64ZBA-NEXT: li a1, 514
|
|
; RV64ZBA-NEXT: sh3add a1, a1, sp
|
|
; RV64ZBA-NEXT: call inspect
|
|
; RV64ZBA-NEXT: addi sp, sp, 2032
|
|
; RV64ZBA-NEXT: addi sp, sp, 64
|
|
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64ZBA-NEXT: addi sp, sp, 2032
|
|
; RV64ZBA-NEXT: ret
|
|
%p2 = alloca i8, align 8
|
|
%p1 = alloca [4097 x i8], align 1
|
|
call void (...) @inspect(ptr %p1, ptr %p2)
|
|
ret void
|
|
}
|
|
|
|
define void @align_4() {
|
|
; RV32I-LABEL: align_4:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: addi sp, sp, -2048
|
|
; RV32I-NEXT: addi sp, sp, -32
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 4112
|
|
; RV32I-NEXT: addi a0, sp, 7
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: addi a1, a1, 8
|
|
; RV32I-NEXT: add a1, sp, a1
|
|
; RV32I-NEXT: call inspect
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: addi sp, sp, 48
|
|
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV32ZBA-LABEL: align_4:
|
|
; RV32ZBA: # %bb.0:
|
|
; RV32ZBA-NEXT: addi sp, sp, -2032
|
|
; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32ZBA-NEXT: .cfi_offset ra, -4
|
|
; RV32ZBA-NEXT: addi sp, sp, -2048
|
|
; RV32ZBA-NEXT: addi sp, sp, -32
|
|
; RV32ZBA-NEXT: .cfi_def_cfa_offset 4112
|
|
; RV32ZBA-NEXT: addi a0, sp, 7
|
|
; RV32ZBA-NEXT: li a1, 513
|
|
; RV32ZBA-NEXT: sh3add a1, a1, sp
|
|
; RV32ZBA-NEXT: call inspect
|
|
; RV32ZBA-NEXT: addi sp, sp, 2032
|
|
; RV32ZBA-NEXT: addi sp, sp, 48
|
|
; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32ZBA-NEXT: addi sp, sp, 2032
|
|
; RV32ZBA-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: align_4:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: addi sp, sp, -2048
|
|
; RV64I-NEXT: addi sp, sp, -48
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 4128
|
|
; RV64I-NEXT: addi a0, sp, 19
|
|
; RV64I-NEXT: lui a1, 1
|
|
; RV64I-NEXT: addiw a1, a1, 20
|
|
; RV64I-NEXT: add a1, sp, a1
|
|
; RV64I-NEXT: call inspect
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: addi sp, sp, 64
|
|
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV64ZBA-LABEL: align_4:
|
|
; RV64ZBA: # %bb.0:
|
|
; RV64ZBA-NEXT: addi sp, sp, -2032
|
|
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64ZBA-NEXT: .cfi_offset ra, -8
|
|
; RV64ZBA-NEXT: addi sp, sp, -2048
|
|
; RV64ZBA-NEXT: addi sp, sp, -48
|
|
; RV64ZBA-NEXT: .cfi_def_cfa_offset 4128
|
|
; RV64ZBA-NEXT: addi a0, sp, 19
|
|
; RV64ZBA-NEXT: li a1, 1029
|
|
; RV64ZBA-NEXT: sh2add a1, a1, sp
|
|
; RV64ZBA-NEXT: call inspect
|
|
; RV64ZBA-NEXT: addi sp, sp, 2032
|
|
; RV64ZBA-NEXT: addi sp, sp, 64
|
|
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64ZBA-NEXT: addi sp, sp, 2032
|
|
; RV64ZBA-NEXT: ret
|
|
%p2 = alloca i8, align 4
|
|
%p1 = alloca [4097 x i8], align 1
|
|
call void (...) @inspect(ptr %p1, ptr %p2)
|
|
ret void
|
|
}
|
|
|
|
define void @align_2() {
|
|
; RV32-LABEL: align_2:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: addi sp, sp, -2032
|
|
; RV32-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: .cfi_offset ra, -4
|
|
; RV32-NEXT: addi sp, sp, -2048
|
|
; RV32-NEXT: addi sp, sp, -32
|
|
; RV32-NEXT: .cfi_def_cfa_offset 4112
|
|
; RV32-NEXT: addi a0, sp, 9
|
|
; RV32-NEXT: lui a1, 1
|
|
; RV32-NEXT: addi a1, a1, 10
|
|
; RV32-NEXT: add a1, sp, a1
|
|
; RV32-NEXT: call inspect
|
|
; RV32-NEXT: addi sp, sp, 2032
|
|
; RV32-NEXT: addi sp, sp, 48
|
|
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: addi sp, sp, 2032
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: align_2:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -2032
|
|
; RV64-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset ra, -8
|
|
; RV64-NEXT: addi sp, sp, -2048
|
|
; RV64-NEXT: addi sp, sp, -48
|
|
; RV64-NEXT: .cfi_def_cfa_offset 4128
|
|
; RV64-NEXT: addi a0, sp, 21
|
|
; RV64-NEXT: lui a1, 1
|
|
; RV64-NEXT: addiw a1, a1, 22
|
|
; RV64-NEXT: add a1, sp, a1
|
|
; RV64-NEXT: call inspect
|
|
; RV64-NEXT: addi sp, sp, 2032
|
|
; RV64-NEXT: addi sp, sp, 64
|
|
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 2032
|
|
; RV64-NEXT: ret
|
|
%p2 = alloca i8, align 2
|
|
%p1 = alloca [4097 x i8], align 1
|
|
call void (...) @inspect(ptr %p1, ptr %p2)
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @align_1() {
|
|
; RV32-LABEL: align_1:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: addi sp, sp, -2032
|
|
; RV32-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: .cfi_offset ra, -4
|
|
; RV32-NEXT: addi sp, sp, -2048
|
|
; RV32-NEXT: addi sp, sp, -32
|
|
; RV32-NEXT: .cfi_def_cfa_offset 4112
|
|
; RV32-NEXT: addi a0, sp, 10
|
|
; RV32-NEXT: lui a1, 1
|
|
; RV32-NEXT: addi a1, a1, 11
|
|
; RV32-NEXT: add a1, sp, a1
|
|
; RV32-NEXT: call inspect
|
|
; RV32-NEXT: addi sp, sp, 2032
|
|
; RV32-NEXT: addi sp, sp, 48
|
|
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: addi sp, sp, 2032
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: align_1:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -2032
|
|
; RV64-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset ra, -8
|
|
; RV64-NEXT: addi sp, sp, -2048
|
|
; RV64-NEXT: addi sp, sp, -48
|
|
; RV64-NEXT: .cfi_def_cfa_offset 4128
|
|
; RV64-NEXT: addi a0, sp, 22
|
|
; RV64-NEXT: lui a1, 1
|
|
; RV64-NEXT: addiw a1, a1, 23
|
|
; RV64-NEXT: add a1, sp, a1
|
|
; RV64-NEXT: call inspect
|
|
; RV64-NEXT: addi sp, sp, 2032
|
|
; RV64-NEXT: addi sp, sp, 64
|
|
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 2032
|
|
; RV64-NEXT: ret
|
|
%p2 = alloca i8, align 1
|
|
%p1 = alloca [4097 x i8], align 1
|
|
call void (...) @inspect(ptr %p1, ptr %p2)
|
|
ret void
|
|
}
|