Since https://github.com/ARM-software/acle/pull/276 the ACLE defines attributes to better describe the use of a given SME state. Previously the attributes merely described the possibility of it being 'shared' or 'preserved', whereas the new attributes have more semantics and also describe how the data flows through the program. For ZT0 we already had to add new LLVM IR attributes: * aarch64_new_zt0 * aarch64_in_zt0 * aarch64_out_zt0 * aarch64_inout_zt0 * aarch64_preserves_zt0 We have now done the same for ZA, such that we add: * aarch64_new_za (previously `aarch64_pstate_za_new`) * aarch64_in_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_out_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_inout_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_preserves_za (previously `aarch64_pstate_za_shared, aarch64_pstate_za_preserved`) This explicitly removes 'pstate' from the name, because with SME2 and the new ACLE attributes there is a difference between "sharing ZA" (sharing the ZA matrix register with the caller) and "sharing PSTATE.ZA" (sharing either the ZA or ZT0 register, both part of PSTATE.ZA with the caller).
66 lines
2.5 KiB
LLVM
66 lines
2.5 KiB
LLVM
; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
|
|
; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s
|
|
|
|
declare void @shared_za_callee() "aarch64_inout_za"
|
|
|
|
define void @private_za() "aarch64_new_za" {
|
|
; CHECK-LABEL: @private_za(
|
|
; CHECK-NEXT: prelude:
|
|
; CHECK-NEXT: [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2()
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[TMP0:%.*]]
|
|
; CHECK: save.za:
|
|
; CHECK-NEXT: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save()
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0)
|
|
; CHECK-NEXT: br label [[TMP0]]
|
|
; CHECK: 0:
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable()
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.zero(i32 255)
|
|
; CHECK-NEXT: call void @shared_za_callee()
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @shared_za_callee()
|
|
ret void
|
|
}
|
|
|
|
define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" {
|
|
; CHECK-LABEL: @private_za_multiple_exit(
|
|
; CHECK-NEXT: prelude:
|
|
; CHECK-NEXT: [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2()
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[ENTRY:%.*]]
|
|
; CHECK: save.za:
|
|
; CHECK-NEXT: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save()
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0)
|
|
; CHECK-NEXT: br label [[ENTRY]]
|
|
; CHECK: entry:
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable()
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.zero(i32 255)
|
|
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[COND:%.*]], 1
|
|
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
|
|
; CHECK: if.else:
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable()
|
|
; CHECK-NEXT: ret i32 [[ADD]]
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A]], [[B]]
|
|
; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable()
|
|
; CHECK-NEXT: ret i32 [[SUB]]
|
|
;
|
|
entry:
|
|
%tobool = icmp eq i64 %cond, 1
|
|
br i1 %tobool, label %if.else, label %if.end
|
|
|
|
if.else:
|
|
%add = add i32 %a, %b
|
|
ret i32 %add
|
|
|
|
if.end:
|
|
%sub = sub i32 %a, %b
|
|
ret i32 %sub
|
|
}
|
|
|
|
; CHECK: declare void @__arm_tpidr2_save() #[[ATTR:[0-9]+]]
|
|
; CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_compatible" }
|