Since https://github.com/ARM-software/acle/pull/276 the ACLE defines attributes to better describe the use of a given SME state. Previously the attributes merely described the possibility of it being 'shared' or 'preserved', whereas the new attributes have more semantics and also describe how the data flows through the program. For ZT0 we already had to add new LLVM IR attributes: * aarch64_new_zt0 * aarch64_in_zt0 * aarch64_out_zt0 * aarch64_inout_zt0 * aarch64_preserves_zt0 We have now done the same for ZA, such that we add: * aarch64_new_za (previously `aarch64_pstate_za_new`) * aarch64_in_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_out_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_inout_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_preserves_za (previously `aarch64_pstate_za_shared, aarch64_pstate_za_preserved`) This explicitly removes 'pstate' from the name, because with SME2 and the new ACLE attributes there is a difference between "sharing ZA" (sharing the ZA matrix register with the caller) and "sharing PSTATE.ZA" (sharing either the ZA or ZT0 register, both part of PSTATE.ZA with the caller).
71 lines
2.5 KiB
LLVM
71 lines
2.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
declare void @private_za_callee()
|
|
|
|
; Ensure that we don't use tail call optimization when a lazy-save is required.
|
|
define void @disable_tailcallopt() "aarch64_inout_za" nounwind {
|
|
; CHECK-LABEL: disable_tailcallopt:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: sub sp, sp, #16
|
|
; CHECK-NEXT: rdsvl x8, #1
|
|
; CHECK-NEXT: mov x9, sp
|
|
; CHECK-NEXT: msub x9, x8, x8, x9
|
|
; CHECK-NEXT: mov sp, x9
|
|
; CHECK-NEXT: sub x10, x29, #16
|
|
; CHECK-NEXT: stur wzr, [x29, #-4]
|
|
; CHECK-NEXT: sturh wzr, [x29, #-6]
|
|
; CHECK-NEXT: stur x9, [x29, #-16]
|
|
; CHECK-NEXT: sturh w8, [x29, #-8]
|
|
; CHECK-NEXT: msr TPIDR2_EL0, x10
|
|
; CHECK-NEXT: bl private_za_callee
|
|
; CHECK-NEXT: smstart za
|
|
; CHECK-NEXT: mrs x8, TPIDR2_EL0
|
|
; CHECK-NEXT: sub x0, x29, #16
|
|
; CHECK-NEXT: cbnz x8, .LBB0_2
|
|
; CHECK-NEXT: // %bb.1:
|
|
; CHECK-NEXT: bl __arm_tpidr2_restore
|
|
; CHECK-NEXT: .LBB0_2:
|
|
; CHECK-NEXT: msr TPIDR2_EL0, xzr
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
tail call void @private_za_callee()
|
|
ret void
|
|
}
|
|
|
|
; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls
|
|
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
|
|
; CHECK-LABEL: f128_call_za:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: sub sp, sp, #16
|
|
; CHECK-NEXT: rdsvl x8, #1
|
|
; CHECK-NEXT: mov x9, sp
|
|
; CHECK-NEXT: msub x9, x8, x8, x9
|
|
; CHECK-NEXT: mov sp, x9
|
|
; CHECK-NEXT: sub x10, x29, #16
|
|
; CHECK-NEXT: stur wzr, [x29, #-4]
|
|
; CHECK-NEXT: sturh wzr, [x29, #-6]
|
|
; CHECK-NEXT: stur x9, [x29, #-16]
|
|
; CHECK-NEXT: sturh w8, [x29, #-8]
|
|
; CHECK-NEXT: msr TPIDR2_EL0, x10
|
|
; CHECK-NEXT: bl __addtf3
|
|
; CHECK-NEXT: smstart za
|
|
; CHECK-NEXT: mrs x8, TPIDR2_EL0
|
|
; CHECK-NEXT: sub x0, x29, #16
|
|
; CHECK-NEXT: cbnz x8, .LBB1_2
|
|
; CHECK-NEXT: // %bb.1:
|
|
; CHECK-NEXT: bl __arm_tpidr2_restore
|
|
; CHECK-NEXT: .LBB1_2:
|
|
; CHECK-NEXT: msr TPIDR2_EL0, xzr
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%res = fadd fp128 %a, %b
|
|
ret fp128 %res
|
|
}
|