[builtins] Refactor cpu_model support to reduce #if nesting. NFCI

Reviewers: petrhosek, DavidSpickett

Pull Request: https://github.com/llvm/llvm-project/pull/75635
This commit is contained in:
Jon Roelofs
2023-12-19 11:09:50 -07:00
parent b72e160914
commit 9237cfa65b
14 changed files with 746 additions and 619 deletions

View File

@@ -271,7 +271,7 @@ endif()
# These files are used on 32-bit and 64-bit x86.
set(x86_ARCH_SOURCES
cpu_model.c
cpu_model/x86.c
)
if (NOT MSVC)
@@ -556,7 +556,7 @@ endif()
set(aarch64_SOURCES
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES}
cpu_model.c
cpu_model/aarch64.c
aarch64/fp_mode.c
)

View File

@@ -0,0 +1,143 @@
//===-- cpu_model/aarch64.c - Support for __cpu_model builtin ----*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is based on LLVM's lib/Support/Host.cpp.
// It implements __aarch64_have_lse_atomics, __aarch64_cpu_features for
// AArch64.
//
//===----------------------------------------------------------------------===//
#include "cpu_model.h"
#if !defined(__aarch64__)
#error This file is intended only for aarch64-based targets
#endif
#if __has_include(<sys/ifunc.h>)
#include <sys/ifunc.h>
#else
typedef struct __ifunc_arg_t {
unsigned long _size;
unsigned long _hwcap;
unsigned long _hwcap2;
} __ifunc_arg_t;
#endif // __has_include(<sys/ifunc.h>)
// LSE support detection for out-of-line atomics
// using HWCAP and Auxiliary vector
_Bool __aarch64_have_lse_atomics
__attribute__((visibility("hidden"), nocommon)) = false;
#if defined(__FreeBSD__)
#include "aarch64/lse_atomics/freebsd.inc"
#elif defined(__Fuchsia__)
#include "aarch64/lse_atomics/fuchsia.inc"
#elif defined(__ANDROID__)
#include "aarch64/lse_atomics/android.inc"
#elif __has_include(<sys/auxv.h>)
#include "aarch64/lse_atomics/sysauxv.inc"
#else
// When unimplemented, we leave __aarch64_have_lse_atomics initialized to false.
#endif
#if !defined(DISABLE_AARCH64_FMV)
// CPUFeatures must correspond to the same AArch64 features in
// AArch64TargetParser.h
enum CPUFeatures {
FEAT_RNG,
FEAT_FLAGM,
FEAT_FLAGM2,
FEAT_FP16FML,
FEAT_DOTPROD,
FEAT_SM4,
FEAT_RDM,
FEAT_LSE,
FEAT_FP,
FEAT_SIMD,
FEAT_CRC,
FEAT_SHA1,
FEAT_SHA2,
FEAT_SHA3,
FEAT_AES,
FEAT_PMULL,
FEAT_FP16,
FEAT_DIT,
FEAT_DPB,
FEAT_DPB2,
FEAT_JSCVT,
FEAT_FCMA,
FEAT_RCPC,
FEAT_RCPC2,
FEAT_FRINTTS,
FEAT_DGH,
FEAT_I8MM,
FEAT_BF16,
FEAT_EBF16,
FEAT_RPRES,
FEAT_SVE,
FEAT_SVE_BF16,
FEAT_SVE_EBF16,
FEAT_SVE_I8MM,
FEAT_SVE_F32MM,
FEAT_SVE_F64MM,
FEAT_SVE2,
FEAT_SVE_AES,
FEAT_SVE_PMULL128,
FEAT_SVE_BITPERM,
FEAT_SVE_SHA3,
FEAT_SVE_SM4,
FEAT_SME,
FEAT_MEMTAG,
FEAT_MEMTAG2,
FEAT_MEMTAG3,
FEAT_SB,
FEAT_PREDRES,
FEAT_SSBS,
FEAT_SSBS2,
FEAT_BTI,
FEAT_LS64,
FEAT_LS64_V,
FEAT_LS64_ACCDATA,
FEAT_WFXT,
FEAT_SME_F64,
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
FEAT_MAX,
FEAT_EXT = 62, // Reserved to indicate presence of additional features field
// in __aarch64_cpu_features
FEAT_INIT // Used as flag of features initialization completion
};
// Architecture features used
// in Function Multi Versioning
struct {
unsigned long long features;
// As features grows new fields could be added
} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
// The formatter wants to re-order these includes, but doing so is incorrect:
// clang-format off
#if defined(__FreeBSD__)
#include "aarch64/fmv/mrs.inc"
#include "aarch64/fmv/freebsd.inc"
#elif defined(__Fuchsia__)
#include "aarch64/fmv/mrs.inc"
#include "aarch64/fmv/fuchsia.inc"
#elif defined(__ANDROID__)
#include "aarch64/fmv/mrs.inc"
#include "aarch64/fmv/android.inc"
#elif __has_include(<sys/auxv.h>)
#include "aarch64/fmv/mrs.inc"
#include "aarch64/fmv/sysauxv.inc"
#else
#include "aarch64/fmv/unimplemented.inc"
#endif
// clang-format on
#endif // !defined(DISABLE_AARCH64_FMV)

View File

@@ -0,0 +1,33 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
return;
// ifunc resolvers don't have hwcaps in arguments on Android API lower
// than 30. If so, set feature detection done and keep all CPU features
// unsupported (zeros). To detect this case in runtime we check existence
// of memfd_create function from Standard C library which was introduced in
// Android API 30.
int memfd_create(const char *, unsigned int) __attribute__((weak));
if (!memfd_create)
return;
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
return;
// Don't set any CPU features,
// detection could be wrong on Exynos 9810.
if (__isExynos9810())
return;
__ifunc_arg_t arg;
arg._size = sizeof(__ifunc_arg_t);
arg._hwcap = getauxval(AT_HWCAP);
arg._hwcap2 = getauxval(AT_HWCAP2);
__init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
}

View File

@@ -0,0 +1,27 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
return;
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
unsigned long hwcap = 0;
unsigned long hwcap2 = 0;
// CPU features already initialized.
if (__aarch64_cpu_features.features)
return;
int res = 0;
res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
if (res)
return;
__ifunc_arg_t arg;
arg._size = sizeof(__ifunc_arg_t);
arg._hwcap = hwcap;
arg._hwcap2 = hwcap2;
__init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
}

View File

@@ -0,0 +1,19 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
return;
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
return;
__ifunc_arg_t arg;
arg._size = sizeof(__ifunc_arg_t);
arg._hwcap = getauxval(AT_HWCAP);
arg._hwcap2 = getauxval(AT_HWCAP2);
__init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
}

View File

@@ -0,0 +1,375 @@
#if __has_include(<sys/auxv.h>)
#include <sys/auxv.h>
#define HAVE_SYS_AUXV_H
#endif
#if __has_include(<sys/hwcap.h>)
#include <sys/hwcap.h>
#define HAVE_SYS_HWCAP_H
#endif
#ifndef _IFUNC_ARG_HWCAP
#define _IFUNC_ARG_HWCAP (1ULL << 62)
#endif
#ifndef AT_HWCAP
#define AT_HWCAP 16
#endif
#ifndef HWCAP_CPUID
#define HWCAP_CPUID (1 << 11)
#endif
#ifndef HWCAP_FP
#define HWCAP_FP (1 << 0)
#endif
#ifndef HWCAP_ASIMD
#define HWCAP_ASIMD (1 << 1)
#endif
#ifndef HWCAP_AES
#define HWCAP_AES (1 << 3)
#endif
#ifndef HWCAP_PMULL
#define HWCAP_PMULL (1 << 4)
#endif
#ifndef HWCAP_SHA1
#define HWCAP_SHA1 (1 << 5)
#endif
#ifndef HWCAP_SHA2
#define HWCAP_SHA2 (1 << 6)
#endif
#ifndef HWCAP_CRC32
#define HWCAP_CRC32 (1 << 7)
#endif
#ifndef HWCAP_ATOMICS
#define HWCAP_ATOMICS (1 << 8)
#endif
#ifndef HWCAP_FPHP
#define HWCAP_FPHP (1 << 9)
#endif
#ifndef HWCAP_ASIMDHP
#define HWCAP_ASIMDHP (1 << 10)
#endif
#ifndef HWCAP_ASIMDRDM
#define HWCAP_ASIMDRDM (1 << 12)
#endif
#ifndef HWCAP_JSCVT
#define HWCAP_JSCVT (1 << 13)
#endif
#ifndef HWCAP_FCMA
#define HWCAP_FCMA (1 << 14)
#endif
#ifndef HWCAP_LRCPC
#define HWCAP_LRCPC (1 << 15)
#endif
#ifndef HWCAP_DCPOP
#define HWCAP_DCPOP (1 << 16)
#endif
#ifndef HWCAP_SHA3
#define HWCAP_SHA3 (1 << 17)
#endif
#ifndef HWCAP_SM3
#define HWCAP_SM3 (1 << 18)
#endif
#ifndef HWCAP_SM4
#define HWCAP_SM4 (1 << 19)
#endif
#ifndef HWCAP_ASIMDDP
#define HWCAP_ASIMDDP (1 << 20)
#endif
#ifndef HWCAP_SHA512
#define HWCAP_SHA512 (1 << 21)
#endif
#ifndef HWCAP_SVE
#define HWCAP_SVE (1 << 22)
#endif
#ifndef HWCAP_ASIMDFHM
#define HWCAP_ASIMDFHM (1 << 23)
#endif
#ifndef HWCAP_DIT
#define HWCAP_DIT (1 << 24)
#endif
#ifndef HWCAP_ILRCPC
#define HWCAP_ILRCPC (1 << 26)
#endif
#ifndef HWCAP_FLAGM
#define HWCAP_FLAGM (1 << 27)
#endif
#ifndef HWCAP_SSBS
#define HWCAP_SSBS (1 << 28)
#endif
#ifndef HWCAP_SB
#define HWCAP_SB (1 << 29)
#endif
#ifndef AT_HWCAP2
#define AT_HWCAP2 26
#endif
#ifndef HWCAP2_DCPODP
#define HWCAP2_DCPODP (1 << 0)
#endif
#ifndef HWCAP2_SVE2
#define HWCAP2_SVE2 (1 << 1)
#endif
#ifndef HWCAP2_SVEAES
#define HWCAP2_SVEAES (1 << 2)
#endif
#ifndef HWCAP2_SVEPMULL
#define HWCAP2_SVEPMULL (1 << 3)
#endif
#ifndef HWCAP2_SVEBITPERM
#define HWCAP2_SVEBITPERM (1 << 4)
#endif
#ifndef HWCAP2_SVESHA3
#define HWCAP2_SVESHA3 (1 << 5)
#endif
#ifndef HWCAP2_SVESM4
#define HWCAP2_SVESM4 (1 << 6)
#endif
#ifndef HWCAP2_FLAGM2
#define HWCAP2_FLAGM2 (1 << 7)
#endif
#ifndef HWCAP2_FRINT
#define HWCAP2_FRINT (1 << 8)
#endif
#ifndef HWCAP2_SVEI8MM
#define HWCAP2_SVEI8MM (1 << 9)
#endif
#ifndef HWCAP2_SVEF32MM
#define HWCAP2_SVEF32MM (1 << 10)
#endif
#ifndef HWCAP2_SVEF64MM
#define HWCAP2_SVEF64MM (1 << 11)
#endif
#ifndef HWCAP2_SVEBF16
#define HWCAP2_SVEBF16 (1 << 12)
#endif
#ifndef HWCAP2_I8MM
#define HWCAP2_I8MM (1 << 13)
#endif
#ifndef HWCAP2_BF16
#define HWCAP2_BF16 (1 << 14)
#endif
#ifndef HWCAP2_DGH
#define HWCAP2_DGH (1 << 15)
#endif
#ifndef HWCAP2_RNG
#define HWCAP2_RNG (1 << 16)
#endif
#ifndef HWCAP2_BTI
#define HWCAP2_BTI (1 << 17)
#endif
#ifndef HWCAP2_MTE
#define HWCAP2_MTE (1 << 18)
#endif
#ifndef HWCAP2_RPRES
#define HWCAP2_RPRES (1 << 21)
#endif
#ifndef HWCAP2_MTE3
#define HWCAP2_MTE3 (1 << 22)
#endif
#ifndef HWCAP2_SME
#define HWCAP2_SME (1 << 23)
#endif
#ifndef HWCAP2_SME_I16I64
#define HWCAP2_SME_I16I64 (1 << 24)
#endif
#ifndef HWCAP2_SME_F64F64
#define HWCAP2_SME_F64F64 (1 << 25)
#endif
#ifndef HWCAP2_WFXT
#define HWCAP2_WFXT (1UL << 31)
#endif
#ifndef HWCAP2_EBF16
#define HWCAP2_EBF16 (1ULL << 32)
#endif
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
static void __init_cpu_features_constructor(unsigned long hwcap,
const __ifunc_arg_t *arg) {
#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
#define extractBits(val, start, number) \
(val & ((1ULL << number) - 1ULL) << start) >> start
unsigned long hwcap2 = 0;
if (hwcap & _IFUNC_ARG_HWCAP)
hwcap2 = arg->_hwcap2;
if (hwcap & HWCAP_CRC32)
setCPUFeature(FEAT_CRC);
if (hwcap & HWCAP_PMULL)
setCPUFeature(FEAT_PMULL);
if (hwcap & HWCAP_FLAGM)
setCPUFeature(FEAT_FLAGM);
if (hwcap2 & HWCAP2_FLAGM2) {
setCPUFeature(FEAT_FLAGM);
setCPUFeature(FEAT_FLAGM2);
}
if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
setCPUFeature(FEAT_SM4);
if (hwcap & HWCAP_ASIMDDP)
setCPUFeature(FEAT_DOTPROD);
if (hwcap & HWCAP_ASIMDFHM)
setCPUFeature(FEAT_FP16FML);
if (hwcap & HWCAP_FPHP) {
setCPUFeature(FEAT_FP16);
setCPUFeature(FEAT_FP);
}
if (hwcap & HWCAP_DIT)
setCPUFeature(FEAT_DIT);
if (hwcap & HWCAP_ASIMDRDM)
setCPUFeature(FEAT_RDM);
if (hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC2);
if (hwcap & HWCAP_AES)
setCPUFeature(FEAT_AES);
if (hwcap & HWCAP_SHA1)
setCPUFeature(FEAT_SHA1);
if (hwcap & HWCAP_SHA2)
setCPUFeature(FEAT_SHA2);
if (hwcap & HWCAP_JSCVT)
setCPUFeature(FEAT_JSCVT);
if (hwcap & HWCAP_FCMA)
setCPUFeature(FEAT_FCMA);
if (hwcap & HWCAP_SB)
setCPUFeature(FEAT_SB);
if (hwcap & HWCAP_SSBS)
setCPUFeature(FEAT_SSBS2);
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
}
if (hwcap2 & HWCAP2_MTE3) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
setCPUFeature(FEAT_MEMTAG3);
}
if (hwcap2 & HWCAP2_SVEAES)
setCPUFeature(FEAT_SVE_AES);
if (hwcap2 & HWCAP2_SVEPMULL) {
setCPUFeature(FEAT_SVE_AES);
setCPUFeature(FEAT_SVE_PMULL128);
}
if (hwcap2 & HWCAP2_SVEBITPERM)
setCPUFeature(FEAT_SVE_BITPERM);
if (hwcap2 & HWCAP2_SVESHA3)
setCPUFeature(FEAT_SVE_SHA3);
if (hwcap2 & HWCAP2_SVESM4)
setCPUFeature(FEAT_SVE_SM4);
if (hwcap2 & HWCAP2_DCPODP)
setCPUFeature(FEAT_DPB2);
if (hwcap & HWCAP_ATOMICS)
setCPUFeature(FEAT_LSE);
if (hwcap2 & HWCAP2_RNG)
setCPUFeature(FEAT_RNG);
if (hwcap2 & HWCAP2_I8MM)
setCPUFeature(FEAT_I8MM);
if (hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_EBF16);
if (hwcap2 & HWCAP2_SVE_EBF16)
setCPUFeature(FEAT_SVE_EBF16);
if (hwcap2 & HWCAP2_DGH)
setCPUFeature(FEAT_DGH);
if (hwcap2 & HWCAP2_FRINT)
setCPUFeature(FEAT_FRINTTS);
if (hwcap2 & HWCAP2_SVEI8MM)
setCPUFeature(FEAT_SVE_I8MM);
if (hwcap2 & HWCAP2_SVEF32MM)
setCPUFeature(FEAT_SVE_F32MM);
if (hwcap2 & HWCAP2_SVEF64MM)
setCPUFeature(FEAT_SVE_F64MM);
if (hwcap2 & HWCAP2_BTI)
setCPUFeature(FEAT_BTI);
if (hwcap2 & HWCAP2_RPRES)
setCPUFeature(FEAT_RPRES);
if (hwcap2 & HWCAP2_WFXT)
setCPUFeature(FEAT_WFXT);
if (hwcap2 & HWCAP2_SME)
setCPUFeature(FEAT_SME);
if (hwcap2 & HWCAP2_SME_I16I64)
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
setCPUFeature(FEAT_SME_F64);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
getCPUFeature(ID_AA64PFR1_EL1, ftr);
// ID_AA64PFR1_EL1.MTE >= 0b0001
if (extractBits(ftr, 8, 4) >= 0x1)
setCPUFeature(FEAT_MEMTAG);
// ID_AA64PFR1_EL1.SSBS == 0b0001
if (extractBits(ftr, 4, 4) == 0x1)
setCPUFeature(FEAT_SSBS);
// ID_AA64PFR1_EL1.SME == 0b0010
if (extractBits(ftr, 24, 4) == 0x2)
setCPUFeature(FEAT_SME2);
getCPUFeature(ID_AA64PFR0_EL1, ftr);
// ID_AA64PFR0_EL1.FP != 0b1111
if (extractBits(ftr, 16, 4) != 0xF) {
setCPUFeature(FEAT_FP);
// ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
setCPUFeature(FEAT_SIMD);
}
// ID_AA64PFR0_EL1.SVE != 0b0000
if (extractBits(ftr, 32, 4) != 0x0) {
// get ID_AA64ZFR0_EL1, that name supported
// if sve enabled only
getCPUFeature(S3_0_C0_C4_4, ftr);
// ID_AA64ZFR0_EL1.SVEver == 0b0000
if (extractBits(ftr, 0, 4) == 0x0)
setCPUFeature(FEAT_SVE);
// ID_AA64ZFR0_EL1.SVEver == 0b0001
if (extractBits(ftr, 0, 4) == 0x1)
setCPUFeature(FEAT_SVE2);
// ID_AA64ZFR0_EL1.BF16 != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_SVE_BF16);
}
getCPUFeature(ID_AA64ISAR0_EL1, ftr);
// ID_AA64ISAR0_EL1.SHA3 != 0b0000
if (extractBits(ftr, 32, 4) != 0x0)
setCPUFeature(FEAT_SHA3);
getCPUFeature(ID_AA64ISAR1_EL1, ftr);
// ID_AA64ISAR1_EL1.DPB >= 0b0001
if (extractBits(ftr, 0, 4) >= 0x1)
setCPUFeature(FEAT_DPB);
// ID_AA64ISAR1_EL1.LRCPC != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_RCPC);
// ID_AA64ISAR1_EL1.LRCPC == 0b0011
if (extractBits(ftr, 20, 4) == 0x3)
setCPUFeature(FEAT_RCPC3);
// ID_AA64ISAR1_EL1.SPECRES == 0b0001
if (extractBits(ftr, 40, 4) == 0x2)
setCPUFeature(FEAT_PREDRES);
// ID_AA64ISAR1_EL1.BF16 != 0b0000
if (extractBits(ftr, 44, 4) != 0x0)
setCPUFeature(FEAT_BF16);
// ID_AA64ISAR1_EL1.LS64 >= 0b0001
if (extractBits(ftr, 60, 4) >= 0x1)
setCPUFeature(FEAT_LS64);
// ID_AA64ISAR1_EL1.LS64 >= 0b0010
if (extractBits(ftr, 60, 4) >= 0x2)
setCPUFeature(FEAT_LS64_V);
// ID_AA64ISAR1_EL1.LS64 >= 0b0011
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
} else {
// Set some features in case of no CPUID support
if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
setCPUFeature(FEAT_FP);
// FP and AdvSIMD fields have the same value
setCPUFeature(FEAT_SIMD);
}
if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
setCPUFeature(FEAT_DPB);
if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC);
if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE2);
if (hwcap & HWCAP_SHA3)
setCPUFeature(FEAT_SHA3);
}
setCPUFeature(FEAT_INIT);
}

View File

@@ -0,0 +1,17 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
return;
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
return;
__ifunc_arg_t arg;
arg._size = sizeof(__ifunc_arg_t);
arg._hwcap = getauxval(AT_HWCAP);
arg._hwcap2 = getauxval(AT_HWCAP2);
__init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
}

View File

@@ -0,0 +1,8 @@
// On platforms that have not implemented this yet, we provide an implementation
// that does not claim support for any features by leaving
// __aarch64_cpu_features.features initialized to 0.
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {}
void __init_cpu_features(void) {}

View File

@@ -0,0 +1,27 @@
#include <string.h>
#include <sys/system_properties.h>
static void __isExynos9810(void) {
char arch[PROP_VALUE_MAX];
return (__system_property_get("ro.arch", arch) > 0 &&
strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0;
}
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
unsigned long hwcap = getauxval(AT_HWCAP);
_Bool result = (hwcap & HWCAP_ATOMICS) != 0;
if (result) {
// Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
// only the former support LSE atomics. However, the kernel in the
// initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
// reported the feature as being supported.
//
// The kernel appears to have been corrected to mark it unsupported as of
// the Android 9.0 release on those devices, and this issue has not been
// observed anywhere else. Thus, this workaround may be removed if
// compiler-rt ever drops support for Android 8.0.
if (__isExynos9810())
result = false;
}
__aarch64_have_lse_atomics = result;
}

View File

@@ -0,0 +1,5 @@
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
unsigned long hwcap;
int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
__aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
}

View File

@@ -0,0 +1,12 @@
#include <zircon/features.h>
#include <zircon/syscalls.h>
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
// This ensures the vDSO is a direct link-time dependency of anything that
// needs this initializer code.
#pragma comment(lib, "zircon")
uint32_t features;
zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
__aarch64_have_lse_atomics =
status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
}

View File

@@ -0,0 +1,6 @@
#include <sys/auxv.h>
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
unsigned long hwcap = getauxval(AT_HWCAP);
__aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
}

View File

@@ -0,0 +1,41 @@
//===-- cpu_model_common.c - Utilities for cpu model detection ----*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements common utilities for runtime cpu model detection.
//
//===----------------------------------------------------------------------===//
#ifndef COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H
#define COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H
#define bool int
#define true 1
#define false 0
#ifndef __has_attribute
#define __has_attribute(attr) 0
#endif
#if __has_attribute(constructor)
#if __GNUC__ >= 9
// Ordinarily init priorities below 101 are disallowed as they are reserved for
// the implementation. However, we are the implementation, so silence the
// diagnostic, since it doesn't apply to us.
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
// We're choosing init priority 90 to force our constructors to run before any
// constructors in the end user application (starting at priority 101). This
// value matches the libgcc choice for the same functions.
#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
#else
// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
// this runs during initialization.
#define CONSTRUCTOR_ATTRIBUTE
#endif
#endif

View File

@@ -1,4 +1,4 @@
//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
//===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,42 +8,21 @@
//
// This file is based on LLVM's lib/Support/Host.cpp.
// It implements the operating system Host concept and builtin
// __cpu_model for the compiler_rt library for x86 and
// __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
// __cpu_model for the compiler_rt library for x86.
//
//===----------------------------------------------------------------------===//
#ifndef __has_attribute
#define __has_attribute(attr) 0
#include "cpu_model.h"
#if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
defined(_M_X64))
#error This file is intended only for x86-based targets
#endif
#if __has_attribute(constructor)
#if __GNUC__ >= 9
// Ordinarily init priorities below 101 are disallowed as they are reserved for the
// implementation. However, we are the implementation, so silence the diagnostic,
// since it doesn't apply to us.
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
// We're choosing init priority 90 to force our constructors to run before any
// constructors in the end user application (starting at priority 101). This value
// matches the libgcc choice for the same functions.
#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
#else
// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
// this runs during initialization.
#define CONSTRUCTOR_ATTRIBUTE
#endif
#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
defined(_M_X64)) && \
(defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
#include <assert.h>
#define bool int
#define true 1
#define false 0
#ifdef _MSC_VER
#include <intrin.h>
#endif
@@ -319,12 +298,12 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
}
}
static const char *
getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
const unsigned *Features,
unsigned *Type, unsigned *Subtype) {
#define testFeature(F) \
(Features[F / 32] & (1 << (F % 32))) != 0
static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
unsigned Model,
const unsigned *Features,
unsigned *Type,
unsigned *Subtype) {
#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
// We select CPU strings to match the code in Host.cpp, but we don't use them
// in compiler-rt.
@@ -357,7 +336,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
// As found in a Summer 2010 model iMac.
case 0x1f:
case 0x2e: // Nehalem EX
case 0x2e: // Nehalem EX
CPU = "nehalem";
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_NEHALEM;
@@ -378,7 +357,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = INTEL_COREI7_SANDYBRIDGE;
break;
case 0x3a:
case 0x3e: // Ivy Bridge EP
case 0x3e: // Ivy Bridge EP
CPU = "ivybridge";
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_IVYBRIDGE;
@@ -405,12 +384,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
// Skylake:
case 0x4e: // Skylake mobile
case 0x5e: // Skylake desktop
case 0x8e: // Kaby Lake mobile
case 0x9e: // Kaby Lake desktop
case 0xa5: // Comet Lake-H/S
case 0xa6: // Comet Lake-U
case 0x4e: // Skylake mobile
case 0x5e: // Skylake desktop
case 0x8e: // Kaby Lake mobile
case 0x9e: // Kaby Lake desktop
case 0xa5: // Comet Lake-H/S
case 0xa6: // Comet Lake-U
CPU = "skylake";
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_SKYLAKE;
@@ -609,10 +588,11 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
return CPU;
}
static const char *
getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
const unsigned *Features,
unsigned *Type, unsigned *Subtype) {
static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
unsigned Model,
const unsigned *Features,
unsigned *Type,
unsigned *Subtype) {
// We select CPU strings to match the code in Host.cpp, but we don't use them
// in compiler-rt.
const char *CPU = 0;
@@ -689,10 +669,8 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = AMDFAM19H_ZNVER3;
break;
}
if ((Model >= 0x10 && Model <= 0x1f) ||
(Model >= 0x60 && Model <= 0x74) ||
(Model >= 0x78 && Model <= 0x7b) ||
(Model >= 0xA0 && Model <= 0xAf)) {
if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x74) ||
(Model >= 0x78 && Model <= 0x7b) || (Model >= 0xA0 && Model <= 0xAf)) {
CPU = "znver4";
*Subtype = AMDFAM19H_ZNVER4;
break; // "znver4"
@@ -710,8 +688,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
unsigned EAX = 0, EBX = 0;
#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
#define setFeature(F) \
Features[F / 32] |= 1U << (F % 32)
#define setFeature(F) Features[F / 32] |= 1U << (F % 32)
if ((EDX >> 15) & 1)
setFeature(FEATURE_CMOV);
@@ -938,567 +915,4 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
return 0;
}
#elif defined(__aarch64__)
// LSE support detection for out-of-line atomics
// using HWCAP and Auxiliary vector
_Bool __aarch64_have_lse_atomics
__attribute__((visibility("hidden"), nocommon));
#if defined(__has_include)
#if __has_include(<sys/auxv.h>)
#include <sys/auxv.h>
#if __has_include(<sys/ifunc.h>)
#include <sys/ifunc.h>
#else
typedef struct __ifunc_arg_t {
unsigned long _size;
unsigned long _hwcap;
unsigned long _hwcap2;
} __ifunc_arg_t;
#endif // __has_include(<sys/ifunc.h>)
#if __has_include(<asm/hwcap.h>)
#include <asm/hwcap.h>
#if defined(__ANDROID__)
#include <string.h>
#include <sys/system_properties.h>
#elif defined(__Fuchsia__)
#include <zircon/features.h>
#include <zircon/syscalls.h>
#endif
#ifndef _IFUNC_ARG_HWCAP
#define _IFUNC_ARG_HWCAP (1ULL << 62)
#endif
#ifndef AT_HWCAP
#define AT_HWCAP 16
#endif
#ifndef HWCAP_CPUID
#define HWCAP_CPUID (1 << 11)
#endif
#ifndef HWCAP_FP
#define HWCAP_FP (1 << 0)
#endif
#ifndef HWCAP_ASIMD
#define HWCAP_ASIMD (1 << 1)
#endif
#ifndef HWCAP_AES
#define HWCAP_AES (1 << 3)
#endif
#ifndef HWCAP_PMULL
#define HWCAP_PMULL (1 << 4)
#endif
#ifndef HWCAP_SHA1
#define HWCAP_SHA1 (1 << 5)
#endif
#ifndef HWCAP_SHA2
#define HWCAP_SHA2 (1 << 6)
#endif
#ifndef HWCAP_ATOMICS
#define HWCAP_ATOMICS (1 << 8)
#endif
#ifndef HWCAP_FPHP
#define HWCAP_FPHP (1 << 9)
#endif
#ifndef HWCAP_ASIMDHP
#define HWCAP_ASIMDHP (1 << 10)
#endif
#ifndef HWCAP_ASIMDRDM
#define HWCAP_ASIMDRDM (1 << 12)
#endif
#ifndef HWCAP_JSCVT
#define HWCAP_JSCVT (1 << 13)
#endif
#ifndef HWCAP_FCMA
#define HWCAP_FCMA (1 << 14)
#endif
#ifndef HWCAP_LRCPC
#define HWCAP_LRCPC (1 << 15)
#endif
#ifndef HWCAP_DCPOP
#define HWCAP_DCPOP (1 << 16)
#endif
#ifndef HWCAP_SHA3
#define HWCAP_SHA3 (1 << 17)
#endif
#ifndef HWCAP_SM3
#define HWCAP_SM3 (1 << 18)
#endif
#ifndef HWCAP_SM4
#define HWCAP_SM4 (1 << 19)
#endif
#ifndef HWCAP_ASIMDDP
#define HWCAP_ASIMDDP (1 << 20)
#endif
#ifndef HWCAP_SHA512
#define HWCAP_SHA512 (1 << 21)
#endif
#ifndef HWCAP_SVE
#define HWCAP_SVE (1 << 22)
#endif
#ifndef HWCAP_ASIMDFHM
#define HWCAP_ASIMDFHM (1 << 23)
#endif
#ifndef HWCAP_DIT
#define HWCAP_DIT (1 << 24)
#endif
#ifndef HWCAP_ILRCPC
#define HWCAP_ILRCPC (1 << 26)
#endif
#ifndef HWCAP_FLAGM
#define HWCAP_FLAGM (1 << 27)
#endif
#ifndef HWCAP_SSBS
#define HWCAP_SSBS (1 << 28)
#endif
#ifndef HWCAP_SB
#define HWCAP_SB (1 << 29)
#endif
#ifndef AT_HWCAP2
#define AT_HWCAP2 26
#endif
#ifndef HWCAP2_DCPODP
#define HWCAP2_DCPODP (1 << 0)
#endif
#ifndef HWCAP2_SVE2
#define HWCAP2_SVE2 (1 << 1)
#endif
#ifndef HWCAP2_SVEAES
#define HWCAP2_SVEAES (1 << 2)
#endif
#ifndef HWCAP2_SVEPMULL
#define HWCAP2_SVEPMULL (1 << 3)
#endif
#ifndef HWCAP2_SVEBITPERM
#define HWCAP2_SVEBITPERM (1 << 4)
#endif
#ifndef HWCAP2_SVESHA3
#define HWCAP2_SVESHA3 (1 << 5)
#endif
#ifndef HWCAP2_SVESM4
#define HWCAP2_SVESM4 (1 << 6)
#endif
#ifndef HWCAP2_FLAGM2
#define HWCAP2_FLAGM2 (1 << 7)
#endif
#ifndef HWCAP2_FRINT
#define HWCAP2_FRINT (1 << 8)
#endif
#ifndef HWCAP2_SVEI8MM
#define HWCAP2_SVEI8MM (1 << 9)
#endif
#ifndef HWCAP2_SVEF32MM
#define HWCAP2_SVEF32MM (1 << 10)
#endif
#ifndef HWCAP2_SVEF64MM
#define HWCAP2_SVEF64MM (1 << 11)
#endif
#ifndef HWCAP2_SVEBF16
#define HWCAP2_SVEBF16 (1 << 12)
#endif
#ifndef HWCAP2_I8MM
#define HWCAP2_I8MM (1 << 13)
#endif
#ifndef HWCAP2_BF16
#define HWCAP2_BF16 (1 << 14)
#endif
#ifndef HWCAP2_DGH
#define HWCAP2_DGH (1 << 15)
#endif
#ifndef HWCAP2_RNG
#define HWCAP2_RNG (1 << 16)
#endif
#ifndef HWCAP2_BTI
#define HWCAP2_BTI (1 << 17)
#endif
#ifndef HWCAP2_MTE
#define HWCAP2_MTE (1 << 18)
#endif
#ifndef HWCAP2_RPRES
#define HWCAP2_RPRES (1 << 21)
#endif
#ifndef HWCAP2_MTE3
#define HWCAP2_MTE3 (1 << 22)
#endif
#ifndef HWCAP2_SME
#define HWCAP2_SME (1 << 23)
#endif
#ifndef HWCAP2_SME_I16I64
#define HWCAP2_SME_I16I64 (1 << 24)
#endif
#ifndef HWCAP2_SME_F64F64
#define HWCAP2_SME_F64F64 (1 << 25)
#endif
#ifndef HWCAP2_WFXT
#define HWCAP2_WFXT (1UL << 31)
#endif
#ifndef HWCAP2_EBF16
#define HWCAP2_EBF16 (1ULL << 32)
#endif
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
// Detect Exynos 9810 CPU
#define IF_EXYNOS9810 \
char arch[PROP_VALUE_MAX]; \
if (__system_property_get("ro.arch", arch) > 0 && \
strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
#if defined(__FreeBSD__)
unsigned long hwcap;
int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
__aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
#elif defined(__Fuchsia__)
// This ensures the vDSO is a direct link-time dependency of anything that
// needs this initializer code.
#pragma comment(lib, "zircon")
uint32_t features;
zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
__aarch64_have_lse_atomics =
status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
#else
unsigned long hwcap = getauxval(AT_HWCAP);
_Bool result = (hwcap & HWCAP_ATOMICS) != 0;
#if defined(__ANDROID__)
if (result) {
// Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
// only the former support LSE atomics. However, the kernel in the
// initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
// reported the feature as being supported.
//
// The kernel appears to have been corrected to mark it unsupported as of
// the Android 9.0 release on those devices, and this issue has not been
// observed anywhere else. Thus, this workaround may be removed if
// compiler-rt ever drops support for Android 8.0.
IF_EXYNOS9810 result = false;
}
#endif // defined(__ANDROID__)
__aarch64_have_lse_atomics = result;
#endif // defined(__FreeBSD__)
}
#if !defined(DISABLE_AARCH64_FMV)
// CPUFeatures must correspond to the same AArch64 features in
// AArch64TargetParser.h
enum CPUFeatures {
FEAT_RNG,
FEAT_FLAGM,
FEAT_FLAGM2,
FEAT_FP16FML,
FEAT_DOTPROD,
FEAT_SM4,
FEAT_RDM,
FEAT_LSE,
FEAT_FP,
FEAT_SIMD,
FEAT_CRC,
FEAT_SHA1,
FEAT_SHA2,
FEAT_SHA3,
FEAT_AES,
FEAT_PMULL,
FEAT_FP16,
FEAT_DIT,
FEAT_DPB,
FEAT_DPB2,
FEAT_JSCVT,
FEAT_FCMA,
FEAT_RCPC,
FEAT_RCPC2,
FEAT_FRINTTS,
FEAT_DGH,
FEAT_I8MM,
FEAT_BF16,
FEAT_EBF16,
FEAT_RPRES,
FEAT_SVE,
FEAT_SVE_BF16,
FEAT_SVE_EBF16,
FEAT_SVE_I8MM,
FEAT_SVE_F32MM,
FEAT_SVE_F64MM,
FEAT_SVE2,
FEAT_SVE_AES,
FEAT_SVE_PMULL128,
FEAT_SVE_BITPERM,
FEAT_SVE_SHA3,
FEAT_SVE_SM4,
FEAT_SME,
FEAT_MEMTAG,
FEAT_MEMTAG2,
FEAT_MEMTAG3,
FEAT_SB,
FEAT_PREDRES,
FEAT_SSBS,
FEAT_SSBS2,
FEAT_BTI,
FEAT_LS64,
FEAT_LS64_V,
FEAT_LS64_ACCDATA,
FEAT_WFXT,
FEAT_SME_F64,
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
FEAT_MAX,
FEAT_EXT = 62, // Reserved to indicate presence of additional features field
// in __aarch64_cpu_features
FEAT_INIT // Used as flag of features initialization completion
};
// Architecture features used
// in Function Multi Versioning
struct {
unsigned long long features;
// As features grows new fields could be added
} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
static void __init_cpu_features_constructor(unsigned long hwcap,
const __ifunc_arg_t *arg) {
#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
#define extractBits(val, start, number) \
(val & ((1ULL << number) - 1ULL) << start) >> start
unsigned long hwcap2 = 0;
if (hwcap & _IFUNC_ARG_HWCAP)
hwcap2 = arg->_hwcap2;
if (hwcap & HWCAP_CRC32)
setCPUFeature(FEAT_CRC);
if (hwcap & HWCAP_PMULL)
setCPUFeature(FEAT_PMULL);
if (hwcap & HWCAP_FLAGM)
setCPUFeature(FEAT_FLAGM);
if (hwcap2 & HWCAP2_FLAGM2) {
setCPUFeature(FEAT_FLAGM);
setCPUFeature(FEAT_FLAGM2);
}
if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
setCPUFeature(FEAT_SM4);
if (hwcap & HWCAP_ASIMDDP)
setCPUFeature(FEAT_DOTPROD);
if (hwcap & HWCAP_ASIMDFHM)
setCPUFeature(FEAT_FP16FML);
if (hwcap & HWCAP_FPHP) {
setCPUFeature(FEAT_FP16);
setCPUFeature(FEAT_FP);
}
if (hwcap & HWCAP_DIT)
setCPUFeature(FEAT_DIT);
if (hwcap & HWCAP_ASIMDRDM)
setCPUFeature(FEAT_RDM);
if (hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC2);
if (hwcap & HWCAP_AES)
setCPUFeature(FEAT_AES);
if (hwcap & HWCAP_SHA1)
setCPUFeature(FEAT_SHA1);
if (hwcap & HWCAP_SHA2)
setCPUFeature(FEAT_SHA2);
if (hwcap & HWCAP_JSCVT)
setCPUFeature(FEAT_JSCVT);
if (hwcap & HWCAP_FCMA)
setCPUFeature(FEAT_FCMA);
if (hwcap & HWCAP_SB)
setCPUFeature(FEAT_SB);
if (hwcap & HWCAP_SSBS)
setCPUFeature(FEAT_SSBS2);
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
}
if (hwcap2 & HWCAP2_MTE3) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
setCPUFeature(FEAT_MEMTAG3);
}
if (hwcap2 & HWCAP2_SVEAES)
setCPUFeature(FEAT_SVE_AES);
if (hwcap2 & HWCAP2_SVEPMULL) {
setCPUFeature(FEAT_SVE_AES);
setCPUFeature(FEAT_SVE_PMULL128);
}
if (hwcap2 & HWCAP2_SVEBITPERM)
setCPUFeature(FEAT_SVE_BITPERM);
if (hwcap2 & HWCAP2_SVESHA3)
setCPUFeature(FEAT_SVE_SHA3);
if (hwcap2 & HWCAP2_SVESM4)
setCPUFeature(FEAT_SVE_SM4);
if (hwcap2 & HWCAP2_DCPODP)
setCPUFeature(FEAT_DPB2);
if (hwcap & HWCAP_ATOMICS)
setCPUFeature(FEAT_LSE);
if (hwcap2 & HWCAP2_RNG)
setCPUFeature(FEAT_RNG);
if (hwcap2 & HWCAP2_I8MM)
setCPUFeature(FEAT_I8MM);
if (hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_EBF16);
if (hwcap2 & HWCAP2_SVE_EBF16)
setCPUFeature(FEAT_SVE_EBF16);
if (hwcap2 & HWCAP2_DGH)
setCPUFeature(FEAT_DGH);
if (hwcap2 & HWCAP2_FRINT)
setCPUFeature(FEAT_FRINTTS);
if (hwcap2 & HWCAP2_SVEI8MM)
setCPUFeature(FEAT_SVE_I8MM);
if (hwcap2 & HWCAP2_SVEF32MM)
setCPUFeature(FEAT_SVE_F32MM);
if (hwcap2 & HWCAP2_SVEF64MM)
setCPUFeature(FEAT_SVE_F64MM);
if (hwcap2 & HWCAP2_BTI)
setCPUFeature(FEAT_BTI);
if (hwcap2 & HWCAP2_RPRES)
setCPUFeature(FEAT_RPRES);
if (hwcap2 & HWCAP2_WFXT)
setCPUFeature(FEAT_WFXT);
if (hwcap2 & HWCAP2_SME)
setCPUFeature(FEAT_SME);
if (hwcap2 & HWCAP2_SME_I16I64)
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
setCPUFeature(FEAT_SME_F64);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
getCPUFeature(ID_AA64PFR1_EL1, ftr);
// ID_AA64PFR1_EL1.MTE >= 0b0001
if (extractBits(ftr, 8, 4) >= 0x1)
setCPUFeature(FEAT_MEMTAG);
// ID_AA64PFR1_EL1.SSBS == 0b0001
if (extractBits(ftr, 4, 4) == 0x1)
setCPUFeature(FEAT_SSBS);
// ID_AA64PFR1_EL1.SME == 0b0010
if (extractBits(ftr, 24, 4) == 0x2)
setCPUFeature(FEAT_SME2);
getCPUFeature(ID_AA64PFR0_EL1, ftr);
// ID_AA64PFR0_EL1.FP != 0b1111
if (extractBits(ftr, 16, 4) != 0xF) {
setCPUFeature(FEAT_FP);
// ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
setCPUFeature(FEAT_SIMD);
}
// ID_AA64PFR0_EL1.SVE != 0b0000
if (extractBits(ftr, 32, 4) != 0x0) {
// get ID_AA64ZFR0_EL1, that name supported
// if sve enabled only
getCPUFeature(S3_0_C0_C4_4, ftr);
// ID_AA64ZFR0_EL1.SVEver == 0b0000
if (extractBits(ftr, 0, 4) == 0x0)
setCPUFeature(FEAT_SVE);
// ID_AA64ZFR0_EL1.SVEver == 0b0001
if (extractBits(ftr, 0, 4) == 0x1)
setCPUFeature(FEAT_SVE2);
// ID_AA64ZFR0_EL1.BF16 != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_SVE_BF16);
}
getCPUFeature(ID_AA64ISAR0_EL1, ftr);
// ID_AA64ISAR0_EL1.SHA3 != 0b0000
if (extractBits(ftr, 32, 4) != 0x0)
setCPUFeature(FEAT_SHA3);
getCPUFeature(ID_AA64ISAR1_EL1, ftr);
// ID_AA64ISAR1_EL1.DPB >= 0b0001
if (extractBits(ftr, 0, 4) >= 0x1)
setCPUFeature(FEAT_DPB);
// ID_AA64ISAR1_EL1.LRCPC != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_RCPC);
// ID_AA64ISAR1_EL1.LRCPC == 0b0011
if (extractBits(ftr, 20, 4) == 0x3)
setCPUFeature(FEAT_RCPC3);
// ID_AA64ISAR1_EL1.SPECRES == 0b0001
if (extractBits(ftr, 40, 4) == 0x2)
setCPUFeature(FEAT_PREDRES);
// ID_AA64ISAR1_EL1.BF16 != 0b0000
if (extractBits(ftr, 44, 4) != 0x0)
setCPUFeature(FEAT_BF16);
// ID_AA64ISAR1_EL1.LS64 >= 0b0001
if (extractBits(ftr, 60, 4) >= 0x1)
setCPUFeature(FEAT_LS64);
// ID_AA64ISAR1_EL1.LS64 >= 0b0010
if (extractBits(ftr, 60, 4) >= 0x2)
setCPUFeature(FEAT_LS64_V);
// ID_AA64ISAR1_EL1.LS64 >= 0b0011
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
} else {
// Set some features in case of no CPUID support
if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
setCPUFeature(FEAT_FP);
// FP and AdvSIMD fields have the same value
setCPUFeature(FEAT_SIMD);
}
if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
setCPUFeature(FEAT_DPB);
if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC);
if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE2);
if (hwcap & HWCAP_SHA3)
setCPUFeature(FEAT_SHA3);
}
setCPUFeature(FEAT_INIT);
}
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
return;
#if defined(__ANDROID__)
// ifunc resolvers don't have hwcaps in arguments on Android API lower
// than 30. If so, set feature detection done and keep all CPU features
// unsupported (zeros). To detect this case in runtime we check existence
// of memfd_create function from Standard C library which was introduced in
// Android API 30.
int memfd_create(const char *, unsigned int) __attribute__((weak));
if (!memfd_create)
return;
#endif // defined(__ANDROID__)
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
unsigned long hwcap;
unsigned long hwcap2;
// CPU features already initialized.
if (__aarch64_cpu_features.features)
return;
#if defined(__FreeBSD__)
int res = 0;
res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
if (res)
return;
#else
#if defined(__ANDROID__)
// Don't set any CPU features,
// detection could be wrong on Exynos 9810.
IF_EXYNOS9810 return;
#endif // defined(__ANDROID__)
hwcap = getauxval(AT_HWCAP);
hwcap2 = getauxval(AT_HWCAP2);
#endif // defined(__FreeBSD__)
__ifunc_arg_t arg;
arg._size = sizeof(__ifunc_arg_t);
arg._hwcap = hwcap;
arg._hwcap2 = hwcap2;
__init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
#undef extractBits
#undef getCPUFeature
#undef setCPUFeature
#undef IF_EXYNOS9810
}
#endif // !defined(DISABLE_AARCH64_FMV)
#endif // defined(__has_include)
#endif // __has_include(<sys/auxv.h>)
#endif // __has_include(<asm/hwcap.h>)
#endif // defined(__aarch64__)
#endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)