Files
clang-p2996/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
Sander de Smalen 3f9d64a2ad [AArch64][SME] Always allocate a lazy-save buffer if a function has ZA state.
We already do this for most cases, with the exception of instructions that
get expanded to function calls (e.g. for lowering operations on fp128
values), in which case we temporarily allocate a lazy-save buffer.

The code that is generated in this case, is however incorrect, as it seems
to pass an incorrect address for the TPIDR2 object to the ZA restore
function. By always allocating the lazy-save buffer once, we avoid this
issue entirely.

The cost is that we also allocate such a buffer when it is not
needed. We could fix that in a follow-up patch, where we remove the
lazy-save buffer when it isn't used.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D138208
2022-11-21 16:33:23 +00:00

69 lines
2.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
declare void @private_za_callee()
; Ensure that we don't use tail call optimization when a lazy-save is required.
define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
; CHECK-LABEL: disable_tailcallopt:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #16
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbnz x8, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
tail call void @private_za_callee()
ret void
}
; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-LABEL: f128_call_za:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #16
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: smstart za
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = fadd fp128 %a, %b
ret fp128 %res
}