If we know vlen is a multiple of 16, we don't need any alignment padding. I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment.
85 lines
3.0 KiB
LLVM
85 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck %s --check-prefixes=RV32,RV32-ZVE64
|
|
; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck %s --check-prefixes=RV64,RV64-ZVE64
|
|
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck %s --check-prefixes=RV32,RV32-V
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck %s --check-prefixes=RV64,RV64-V
|
|
|
|
define ptr @scalar_stack_align16() nounwind {
|
|
; RV32-ZVE64-LABEL: scalar_stack_align16:
|
|
; RV32-ZVE64: # %bb.0:
|
|
; RV32-ZVE64-NEXT: addi sp, sp, -48
|
|
; RV32-ZVE64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
|
|
; RV32-ZVE64-NEXT: csrr a0, vlenb
|
|
; RV32-ZVE64-NEXT: slli a0, a0, 1
|
|
; RV32-ZVE64-NEXT: sub sp, sp, a0
|
|
; RV32-ZVE64-NEXT: addi a0, sp, 32
|
|
; RV32-ZVE64-NEXT: call extern
|
|
; RV32-ZVE64-NEXT: addi a0, sp, 16
|
|
; RV32-ZVE64-NEXT: csrr a1, vlenb
|
|
; RV32-ZVE64-NEXT: slli a1, a1, 1
|
|
; RV32-ZVE64-NEXT: add sp, sp, a1
|
|
; RV32-ZVE64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
|
|
; RV32-ZVE64-NEXT: addi sp, sp, 48
|
|
; RV32-ZVE64-NEXT: ret
|
|
;
|
|
; RV64-ZVE64-LABEL: scalar_stack_align16:
|
|
; RV64-ZVE64: # %bb.0:
|
|
; RV64-ZVE64-NEXT: addi sp, sp, -48
|
|
; RV64-ZVE64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; RV64-ZVE64-NEXT: csrr a0, vlenb
|
|
; RV64-ZVE64-NEXT: slli a0, a0, 1
|
|
; RV64-ZVE64-NEXT: sub sp, sp, a0
|
|
; RV64-ZVE64-NEXT: addi a0, sp, 32
|
|
; RV64-ZVE64-NEXT: call extern
|
|
; RV64-ZVE64-NEXT: addi a0, sp, 16
|
|
; RV64-ZVE64-NEXT: csrr a1, vlenb
|
|
; RV64-ZVE64-NEXT: slli a1, a1, 1
|
|
; RV64-ZVE64-NEXT: add sp, sp, a1
|
|
; RV64-ZVE64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; RV64-ZVE64-NEXT: addi sp, sp, 48
|
|
; RV64-ZVE64-NEXT: ret
|
|
;
|
|
; RV32-V-LABEL: scalar_stack_align16:
|
|
; RV32-V: # %bb.0:
|
|
; RV32-V-NEXT: addi sp, sp, -48
|
|
; RV32-V-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
|
|
; RV32-V-NEXT: csrr a0, vlenb
|
|
; RV32-V-NEXT: sub sp, sp, a0
|
|
; RV32-V-NEXT: addi a0, sp, 32
|
|
; RV32-V-NEXT: call extern
|
|
; RV32-V-NEXT: addi a0, sp, 16
|
|
; RV32-V-NEXT: csrr a1, vlenb
|
|
; RV32-V-NEXT: add sp, sp, a1
|
|
; RV32-V-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
|
|
; RV32-V-NEXT: addi sp, sp, 48
|
|
; RV32-V-NEXT: ret
|
|
;
|
|
; RV64-V-LABEL: scalar_stack_align16:
|
|
; RV64-V: # %bb.0:
|
|
; RV64-V-NEXT: addi sp, sp, -48
|
|
; RV64-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
|
; RV64-V-NEXT: csrr a0, vlenb
|
|
; RV64-V-NEXT: sub sp, sp, a0
|
|
; RV64-V-NEXT: addi a0, sp, 32
|
|
; RV64-V-NEXT: call extern
|
|
; RV64-V-NEXT: addi a0, sp, 16
|
|
; RV64-V-NEXT: csrr a1, vlenb
|
|
; RV64-V-NEXT: add sp, sp, a1
|
|
; RV64-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
|
; RV64-V-NEXT: addi sp, sp, 48
|
|
; RV64-V-NEXT: ret
|
|
%a = alloca <vscale x 2 x i32>
|
|
%c = alloca i64, align 16
|
|
call void @extern(ptr %a)
|
|
ret ptr %c
|
|
}
|
|
|
|
declare void @extern(ptr)
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; RV32: {{.*}}
|
|
; RV64: {{.*}}
|