If we know vlen is a multiple of 16, we don't need any alignment padding. I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment.
43 lines
1.5 KiB
LLVM
43 lines
1.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s
|
|
|
|
target triple = "riscv64-unknown-unknown-elf"
|
|
|
|
%my_type = type [3 x <vscale x 1 x double>]
|
|
|
|
define void @test(ptr %addr) {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: csrrs a1, vlenb, zero
|
|
; CHECK-NEXT: slli a2, a1, 1
|
|
; CHECK-NEXT: add a1, a2, a1
|
|
; CHECK-NEXT: sub sp, sp, a1
|
|
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 3 * vlenb
|
|
; CHECK-NEXT: csrrs a1, vlenb, zero
|
|
; CHECK-NEXT: add a2, a0, a1
|
|
; CHECK-NEXT: vl1re64.v v8, (a2)
|
|
; CHECK-NEXT: slli a2, a1, 1
|
|
; CHECK-NEXT: vl1re64.v v9, (a0)
|
|
; CHECK-NEXT: add a0, a0, a2
|
|
; CHECK-NEXT: vl1re64.v v10, (a0)
|
|
; CHECK-NEXT: addi a0, sp, 16
|
|
; CHECK-NEXT: vs1r.v v9, (a0)
|
|
; CHECK-NEXT: add a2, a0, a2
|
|
; CHECK-NEXT: vs1r.v v10, (a2)
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: vs1r.v v8, (a0)
|
|
; CHECK-NEXT: csrrs a0, vlenb, zero
|
|
; CHECK-NEXT: slli a1, a0, 1
|
|
; CHECK-NEXT: add a0, a1, a0
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: jalr zero, 0(ra)
|
|
entry:
|
|
%ret = alloca %my_type, align 8
|
|
%val = load %my_type, ptr %addr
|
|
store %my_type %val, ptr %ret, align 8
|
|
ret void
|
|
}
|