[libc][NFC] Remove FloatProperties (#76508)
Access is now done through `FPBits` exclusively. This patch also renames a few internal structs and uses `T` instead of `FP` as a template parameter.
This commit is contained in:
committed by
GitHub
parent
b7d5b0d0ee
commit
c09e690556
@@ -22,18 +22,17 @@
|
||||
|
||||
#include "utils/MPFRWrapper/mpfr_inc.h"
|
||||
|
||||
using LIBC_NAMESPACE::fputil::FloatProperties;
|
||||
using LIBC_NAMESPACE::fputil::FPBits;
|
||||
|
||||
// This function calculates the effective precision for a given float type and
|
||||
// exponent. Subnormals have a lower effective precision since they don't
|
||||
// necessarily use all of the bits of the mantissa.
|
||||
template <typename F> inline constexpr int effective_precision(int exponent) {
|
||||
const int full_precision = FloatProperties<F>::MANTISSA_PRECISION;
|
||||
const int full_precision = FPBits<F>::MANTISSA_PRECISION;
|
||||
|
||||
// This is intended to be 0 when the exponent is the lowest normal and
|
||||
// increase as the exponent's magnitude increases.
|
||||
const int bits_below_normal =
|
||||
(-exponent) - (FloatProperties<F>::EXP_BIAS - 1);
|
||||
const int bits_below_normal = (-exponent) - (FPBits<F>::EXP_BIAS - 1);
|
||||
|
||||
// The precision should be the normal, full precision, minus the bits lost
|
||||
// by this being a subnormal, minus one for the implicit leading one.
|
||||
|
||||
@@ -39,64 +39,66 @@ enum class FPEncoding {
|
||||
X86_ExtendedPrecision,
|
||||
};
|
||||
|
||||
template <FPType> struct FPBaseProperties {};
|
||||
// Defines the layout (sign, exponent, significand) of a floating point type in
|
||||
// memory. It also defines its associated StorageType, i.e., the unsigned
|
||||
// integer type used to manipulate its representation.
|
||||
template <FPType> struct FPLayout {};
|
||||
|
||||
template <> struct FPBaseProperties<FPType::IEEE754_Binary16> {
|
||||
template <> struct FPLayout<FPType::IEEE754_Binary16> {
|
||||
using StorageType = uint16_t;
|
||||
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 16;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
|
||||
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
|
||||
LIBC_INLINE_VAR static constexpr int EXP_LEN = 5;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
|
||||
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
|
||||
};
|
||||
|
||||
template <> struct FPBaseProperties<FPType::IEEE754_Binary32> {
|
||||
template <> struct FPLayout<FPType::IEEE754_Binary32> {
|
||||
using StorageType = uint32_t;
|
||||
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 32;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
|
||||
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
|
||||
LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
|
||||
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
|
||||
};
|
||||
|
||||
template <> struct FPBaseProperties<FPType::IEEE754_Binary64> {
|
||||
template <> struct FPLayout<FPType::IEEE754_Binary64> {
|
||||
using StorageType = uint64_t;
|
||||
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 64;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
|
||||
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
|
||||
LIBC_INLINE_VAR static constexpr int EXP_LEN = 11;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
|
||||
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
|
||||
};
|
||||
|
||||
template <> struct FPBaseProperties<FPType::IEEE754_Binary128> {
|
||||
template <> struct FPLayout<FPType::IEEE754_Binary128> {
|
||||
using StorageType = UInt128;
|
||||
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 128;
|
||||
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
|
||||
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 112;
|
||||
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
|
||||
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
|
||||
};
|
||||
|
||||
template <> struct FPBaseProperties<FPType::X86_Binary80> {
|
||||
template <> struct FPLayout<FPType::X86_Binary80> {
|
||||
using StorageType = UInt128;
|
||||
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 80;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
|
||||
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
|
||||
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
|
||||
LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
|
||||
LIBC_INLINE_VAR static constexpr auto ENCODING =
|
||||
FPEncoding::X86_ExtendedPrecision;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// FPBaseMasksAndShifts derives useful constants from the FPLayout.
|
||||
template <FPType fp_type>
|
||||
struct FPProperties : public internal::FPBaseProperties<fp_type> {
|
||||
struct FPBaseMasksAndShifts : public internal::FPLayout<fp_type> {
|
||||
private:
|
||||
using UP = internal::FPBaseProperties<fp_type>;
|
||||
using UP = internal::FPLayout<fp_type>;
|
||||
|
||||
public:
|
||||
// The number of bits to represent sign. For documentation purpose, always 1.
|
||||
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
|
||||
using UP::EXP_LEN; // The number of bits for the *exponent* part
|
||||
using UP::SIG_LEN; // The number of bits for the *significand* part
|
||||
using UP::TOTAL_LEN; // For convenience, the sum of `SIG_LEN`, `EXP_LEN`,
|
||||
// and `SIGN_LEN`.
|
||||
static_assert(SIGN_LEN + EXP_LEN + SIG_LEN == TOTAL_LEN);
|
||||
using UP::EXP_LEN; // The number of bits for the *exponent* part
|
||||
using UP::SIG_LEN; // The number of bits for the *significand* part
|
||||
using UP::SIGN_LEN; // The number of bits for the *sign* part
|
||||
// For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`.
|
||||
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN;
|
||||
|
||||
// An unsigned integer that is wide enough to contain all of the floating
|
||||
// point bits.
|
||||
@@ -173,45 +175,12 @@ protected:
|
||||
: bit_at(SIG_LEN - 2); // 0b0100...
|
||||
};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
template <typename FP> LIBC_INLINE static constexpr FPType get_fp_type() {
|
||||
if constexpr (cpp::is_same_v<FP, float> && __FLT_MANT_DIG__ == 24)
|
||||
return FPType::IEEE754_Binary32;
|
||||
else if constexpr (cpp::is_same_v<FP, double> && __DBL_MANT_DIG__ == 53)
|
||||
return FPType::IEEE754_Binary64;
|
||||
else if constexpr (cpp::is_same_v<FP, long double>) {
|
||||
if constexpr (__LDBL_MANT_DIG__ == 53)
|
||||
return FPType::IEEE754_Binary64;
|
||||
else if constexpr (__LDBL_MANT_DIG__ == 64)
|
||||
return FPType::X86_Binary80;
|
||||
else if constexpr (__LDBL_MANT_DIG__ == 113)
|
||||
return FPType::IEEE754_Binary128;
|
||||
}
|
||||
#if defined(LIBC_COMPILER_HAS_C23_FLOAT16)
|
||||
else if constexpr (cpp::is_same_v<FP, _Float16>)
|
||||
return FPType::IEEE754_Binary16;
|
||||
#endif
|
||||
#if defined(LIBC_COMPILER_HAS_C23_FLOAT128)
|
||||
else if constexpr (cpp::is_same_v<FP, _Float128>)
|
||||
return FPType::IEEE754_Binary128;
|
||||
#endif
|
||||
#if defined(LIBC_COMPILER_HAS_FLOAT128_EXTENSION)
|
||||
else if constexpr (cpp::is_same_v<FP, __float128>)
|
||||
return FPType::IEEE754_Binary128;
|
||||
#endif
|
||||
else
|
||||
static_assert(cpp::always_false<FP>, "Unsupported type");
|
||||
}
|
||||
|
||||
template <typename FP>
|
||||
struct FloatProperties : public FPProperties<get_fp_type<FP>()> {};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// This is a temporary class to unify common methods and properties between
|
||||
// FPBits and FPBits<long double>.
|
||||
template <FPType fp_type> struct FPBitsCommon : private FPProperties<fp_type> {
|
||||
using UP = FPProperties<fp_type>;
|
||||
template <FPType fp_type> struct FPRep : private FPBaseMasksAndShifts<fp_type> {
|
||||
using UP = FPBaseMasksAndShifts<fp_type>;
|
||||
using typename UP::StorageType;
|
||||
using UP::TOTAL_LEN;
|
||||
|
||||
@@ -227,15 +196,17 @@ public:
|
||||
using UP::FP_MASK;
|
||||
using UP::FRACTION_LEN;
|
||||
using UP::FRACTION_MASK;
|
||||
using UP::MANTISSA_PRECISION;
|
||||
using UP::SIGN_MASK;
|
||||
using UP::STORAGE_LEN;
|
||||
|
||||
// Reinterpreting bits as an integer value and interpreting the bits of an
|
||||
// integer value as a floating point value is used in tests. So, a convenient
|
||||
// type is provided for such reinterpretations.
|
||||
StorageType bits;
|
||||
|
||||
LIBC_INLINE constexpr FPBitsCommon() : bits(0) {}
|
||||
LIBC_INLINE explicit constexpr FPBitsCommon(StorageType bits) : bits(bits) {}
|
||||
LIBC_INLINE constexpr FPRep() : bits(0) {}
|
||||
LIBC_INLINE explicit constexpr FPRep(StorageType bits) : bits(bits) {}
|
||||
|
||||
LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
|
||||
mantVal &= FRACTION_MASK;
|
||||
@@ -297,6 +268,37 @@ public:
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// Returns the FPType corresponding to C++ type T on the host.
|
||||
template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
|
||||
using UnqualT = cpp::remove_cv_t<T>;
|
||||
if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24)
|
||||
return FPType::IEEE754_Binary32;
|
||||
else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53)
|
||||
return FPType::IEEE754_Binary64;
|
||||
else if constexpr (cpp::is_same_v<UnqualT, long double>) {
|
||||
if constexpr (__LDBL_MANT_DIG__ == 53)
|
||||
return FPType::IEEE754_Binary64;
|
||||
else if constexpr (__LDBL_MANT_DIG__ == 64)
|
||||
return FPType::X86_Binary80;
|
||||
else if constexpr (__LDBL_MANT_DIG__ == 113)
|
||||
return FPType::IEEE754_Binary128;
|
||||
}
|
||||
#if defined(LIBC_COMPILER_HAS_C23_FLOAT16)
|
||||
else if constexpr (cpp::is_same_v<UnqualT, _Float16>)
|
||||
return FPType::IEEE754_Binary16;
|
||||
#endif
|
||||
#if defined(LIBC_COMPILER_HAS_C23_FLOAT128)
|
||||
else if constexpr (cpp::is_same_v<UnqualT, _Float128>)
|
||||
return FPType::IEEE754_Binary128;
|
||||
#endif
|
||||
#if defined(LIBC_COMPILER_HAS_FLOAT128_EXTENSION)
|
||||
else if constexpr (cpp::is_same_v<UnqualT, __float128>)
|
||||
return FPType::IEEE754_Binary128;
|
||||
#endif
|
||||
else
|
||||
static_assert(cpp::always_false<UnqualT>, "Unsupported type");
|
||||
}
|
||||
|
||||
// A generic class to represent single precision, double precision, and quad
|
||||
// precision IEEE 754 floating point formats.
|
||||
// On most platforms, the 'float' type corresponds to single precision floating
|
||||
@@ -305,11 +307,10 @@ public:
|
||||
// floating numbers. On x86 platforms however, the 'long double' type maps to
|
||||
// an x87 floating point format. This format is an IEEE 754 extension format.
|
||||
// It is handled as an explicit specialization of this class.
|
||||
template <typename T>
|
||||
struct FPBits : public internal::FPBitsCommon<get_fp_type<T>()> {
|
||||
template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
|
||||
static_assert(cpp::is_floating_point_v<T>,
|
||||
"FPBits instantiated with invalid type.");
|
||||
using UP = internal::FPBitsCommon<get_fp_type<T>()>;
|
||||
using UP = internal::FPRep<get_fp_type<T>()>;
|
||||
using StorageType = typename UP::StorageType;
|
||||
using UP::bits;
|
||||
|
||||
|
||||
@@ -174,13 +174,13 @@ LIBC_INLINE T nextafter(T from, U to) {
|
||||
} else {
|
||||
int_val = FPBits<T>::MIN_SUBNORMAL;
|
||||
if (to_bits.get_sign())
|
||||
int_val |= FloatProperties<T>::SIGN_MASK;
|
||||
int_val |= FPBits<T>::SIGN_MASK;
|
||||
}
|
||||
|
||||
StorageType exponent_bits = int_val & FloatProperties<T>::EXP_MASK;
|
||||
StorageType exponent_bits = int_val & FPBits<T>::EXP_MASK;
|
||||
if (exponent_bits == StorageType(0))
|
||||
raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
|
||||
else if (exponent_bits == FloatProperties<T>::EXP_MASK)
|
||||
else if (exponent_bits == FPBits<T>::EXP_MASK)
|
||||
raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
|
||||
|
||||
return cpp::bit_cast<T>(int_val);
|
||||
|
||||
@@ -41,10 +41,10 @@ template <size_t Bits> struct DyadicFloat {
|
||||
|
||||
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
|
||||
DyadicFloat(T x) {
|
||||
static_assert(FloatProperties<T>::FRACTION_LEN < Bits);
|
||||
static_assert(FPBits<T>::FRACTION_LEN < Bits);
|
||||
FPBits<T> x_bits(x);
|
||||
sign = x_bits.get_sign();
|
||||
exponent = x_bits.get_exponent() - FloatProperties<T>::FRACTION_LEN;
|
||||
exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
|
||||
mantissa = MantissaType(x_bits.get_explicit_mantissa());
|
||||
normalize();
|
||||
}
|
||||
@@ -83,21 +83,20 @@ template <size_t Bits> struct DyadicFloat {
|
||||
// Output is rounded correctly with respect to the current rounding mode.
|
||||
// TODO(lntue): Add support for underflow.
|
||||
// TODO(lntue): Test or add specialization for x86 long double.
|
||||
template <typename T, typename = cpp::enable_if_t<
|
||||
cpp::is_floating_point_v<T> &&
|
||||
(FloatProperties<T>::FRACTION_LEN < Bits),
|
||||
void>>
|
||||
template <typename T,
|
||||
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
|
||||
(FPBits<T>::FRACTION_LEN < Bits),
|
||||
void>>
|
||||
explicit operator T() const {
|
||||
// TODO(lntue): Do we need to treat signed zeros properly?
|
||||
if (mantissa.is_zero())
|
||||
return 0.0;
|
||||
|
||||
// Assume that it is normalized, and output is also normal.
|
||||
constexpr uint32_t PRECISION = FloatProperties<T>::MANTISSA_PRECISION;
|
||||
constexpr uint32_t PRECISION = FPBits<T>::MANTISSA_PRECISION;
|
||||
using output_bits_t = typename FPBits<T>::StorageType;
|
||||
|
||||
int exp_hi =
|
||||
exponent + static_cast<int>((Bits - 1) + FloatProperties<T>::EXP_BIAS);
|
||||
int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);
|
||||
|
||||
bool denorm = false;
|
||||
uint32_t shift = Bits - PRECISION;
|
||||
@@ -106,7 +105,7 @@ template <size_t Bits> struct DyadicFloat {
|
||||
denorm = true;
|
||||
shift = (Bits - PRECISION) + static_cast<uint32_t>(1 - exp_hi);
|
||||
|
||||
exp_hi = FloatProperties<T>::EXP_BIAS;
|
||||
exp_hi = FPBits<T>::EXP_BIAS;
|
||||
}
|
||||
|
||||
int exp_lo = exp_hi - static_cast<int>(PRECISION) - 1;
|
||||
@@ -115,7 +114,7 @@ template <size_t Bits> struct DyadicFloat {
|
||||
|
||||
T d_hi = FPBits<T>::create_value(sign, exp_hi,
|
||||
static_cast<output_bits_t>(m_hi) &
|
||||
FloatProperties<T>::FRACTION_MASK)
|
||||
FPBits<T>::FRACTION_MASK)
|
||||
.get_val();
|
||||
|
||||
const MantissaType round_mask = MantissaType(1) << (shift - 1);
|
||||
@@ -129,15 +128,13 @@ template <size_t Bits> struct DyadicFloat {
|
||||
if (LIBC_UNLIKELY(exp_lo <= 0)) {
|
||||
// d_lo is denormal, but the output is normal.
|
||||
int scale_up_exponent = 2 * PRECISION;
|
||||
T scale_up_factor = FPBits<T>::create_value(sign,
|
||||
FloatProperties<T>::EXP_BIAS +
|
||||
scale_up_exponent,
|
||||
output_bits_t(0))
|
||||
.get_val();
|
||||
T scale_up_factor =
|
||||
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
|
||||
output_bits_t(0))
|
||||
.get_val();
|
||||
T scale_down_factor =
|
||||
FPBits<T>::create_value(
|
||||
sign, FloatProperties<T>::EXP_BIAS - scale_up_exponent,
|
||||
output_bits_t(0))
|
||||
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
|
||||
output_bits_t(0))
|
||||
.get_val();
|
||||
|
||||
d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
|
||||
@@ -156,7 +153,7 @@ template <size_t Bits> struct DyadicFloat {
|
||||
if (LIBC_UNLIKELY(denorm)) {
|
||||
// Output is denormal, simply clear the exponent field.
|
||||
output_bits_t clear_exp = output_bits_t(exp_hi)
|
||||
<< FloatProperties<T>::FRACTION_LEN;
|
||||
<< FPBits<T>::FRACTION_LEN;
|
||||
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
|
||||
return FPBits<T>(r_bits).get_val();
|
||||
}
|
||||
|
||||
@@ -94,7 +94,6 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
|
||||
|
||||
template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
|
||||
using FPBits = fputil::FPBits<double>;
|
||||
using FloatProp = fputil::FloatProperties<double>;
|
||||
|
||||
if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
|
||||
return x * y + z;
|
||||
@@ -267,10 +266,10 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
|
||||
}
|
||||
|
||||
// Remove hidden bit and append the exponent field and sign bit.
|
||||
result = (result & FloatProp::FRACTION_MASK) |
|
||||
(static_cast<uint64_t>(r_exp) << FloatProp::FRACTION_LEN);
|
||||
result = (result & FPBits::FRACTION_MASK) |
|
||||
(static_cast<uint64_t>(r_exp) << FPBits::FRACTION_LEN);
|
||||
if (prod_sign) {
|
||||
result |= FloatProp::SIGN_MASK;
|
||||
result |= FPBits::SIGN_MASK;
|
||||
}
|
||||
|
||||
// Rounding.
|
||||
|
||||
@@ -27,9 +27,8 @@ namespace LIBC_NAMESPACE {
|
||||
namespace fputil {
|
||||
|
||||
template <>
|
||||
struct FPBits<long double>
|
||||
: public internal::FPBitsCommon<FPType::X86_Binary80> {
|
||||
using UP = internal::FPBitsCommon<FPType::X86_Binary80>;
|
||||
struct FPBits<long double> : public internal::FPRep<FPType::X86_Binary80> {
|
||||
using UP = internal::FPRep<FPType::X86_Binary80>;
|
||||
using StorageType = typename UP::StorageType;
|
||||
using UP::bits;
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE {
|
||||
using BlockInt = uint32_t;
|
||||
constexpr uint32_t BLOCK_SIZE = 9;
|
||||
|
||||
using FloatProp = fputil::FloatProperties<long double>;
|
||||
using FPBits = fputil::FPBits<long double>;
|
||||
|
||||
// Larger numbers prefer a slightly larger constant than is used for the smaller
|
||||
// numbers.
|
||||
@@ -382,10 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
|
||||
(1000000000 * shifted));
|
||||
}
|
||||
|
||||
LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::StorageType mantissa,
|
||||
LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa,
|
||||
const cpp::UInt<MID_INT_SIZE> &large,
|
||||
const int32_t shift_amount) {
|
||||
cpp::UInt<MID_INT_SIZE + FloatProp::STORAGE_LEN> val(large);
|
||||
cpp::UInt<MID_INT_SIZE + FPBits::STORAGE_LEN> val(large);
|
||||
val = (val * mantissa) >> shift_amount;
|
||||
return static_cast<uint32_t>(
|
||||
val.div_uint32_times_pow_2(1000000000, 0).value());
|
||||
@@ -414,7 +414,7 @@ class FloatToString {
|
||||
fputil::FPBits<T> float_bits;
|
||||
bool is_negative;
|
||||
int exponent;
|
||||
FloatProp::StorageType mantissa;
|
||||
FPBits::StorageType mantissa;
|
||||
|
||||
static constexpr int FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
|
||||
static constexpr int EXP_BIAS = fputil::FPBits<T>::EXP_BIAS;
|
||||
|
||||
@@ -71,7 +71,6 @@ LIBC_INLINE cpp::optional<ExpandedFloat<T>>
|
||||
eisel_lemire(ExpandedFloat<T> init_num,
|
||||
RoundDirection round = RoundDirection::Nearest) {
|
||||
using FPBits = typename fputil::FPBits<T>;
|
||||
using FloatProp = typename fputil::FloatProperties<T>;
|
||||
using StorageType = typename FPBits::StorageType;
|
||||
|
||||
StorageType mantissa = init_num.mantissa;
|
||||
@@ -93,7 +92,7 @@ eisel_lemire(ExpandedFloat<T> init_num,
|
||||
mantissa <<= clz;
|
||||
|
||||
int32_t exp2 =
|
||||
exp10_to_exp2(exp10) + FloatProp::STORAGE_LEN + FloatProp::EXP_BIAS - clz;
|
||||
exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz;
|
||||
|
||||
// Multiplication
|
||||
const uint64_t *power_of_ten =
|
||||
@@ -110,9 +109,7 @@ eisel_lemire(ExpandedFloat<T> init_num,
|
||||
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
|
||||
// it's 6 bits for floats in this case.
|
||||
const uint64_t halfway_constant =
|
||||
(uint64_t(1) << (FloatProp::STORAGE_LEN -
|
||||
(FloatProp::FRACTION_LEN + 3))) -
|
||||
1;
|
||||
(uint64_t(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1;
|
||||
if ((high64(first_approx) & halfway_constant) == halfway_constant &&
|
||||
low64(first_approx) + mantissa < mantissa) {
|
||||
UInt128 low_bits =
|
||||
@@ -132,10 +129,10 @@ eisel_lemire(ExpandedFloat<T> init_num,
|
||||
|
||||
// Shifting to 54 bits for doubles and 25 bits for floats
|
||||
StorageType msb = static_cast<StorageType>(high64(final_approx) >>
|
||||
(FloatProp::STORAGE_LEN - 1));
|
||||
(FPBits::STORAGE_LEN - 1));
|
||||
StorageType final_mantissa = static_cast<StorageType>(
|
||||
high64(final_approx) >>
|
||||
(msb + FloatProp::STORAGE_LEN - (FloatProp::FRACTION_LEN + 3)));
|
||||
(msb + FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3)));
|
||||
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
|
||||
|
||||
if (round == RoundDirection::Nearest) {
|
||||
@@ -161,14 +158,14 @@ eisel_lemire(ExpandedFloat<T> init_num,
|
||||
|
||||
// From 54 to 53 bits for doubles and 25 to 24 bits for floats
|
||||
final_mantissa >>= 1;
|
||||
if ((final_mantissa >> (FloatProp::FRACTION_LEN + 1)) > 0) {
|
||||
if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) {
|
||||
final_mantissa >>= 1;
|
||||
++exp2;
|
||||
}
|
||||
|
||||
// The if block is equivalent to (but has fewer branches than):
|
||||
// if exp2 <= 0 || exp2 >= 0x7FF { etc }
|
||||
if (static_cast<uint32_t>(exp2) - 1 >= (1 << FloatProp::EXP_LEN) - 2) {
|
||||
if (static_cast<uint32_t>(exp2) - 1 >= (1 << FPBits::EXP_LEN) - 2) {
|
||||
return cpp::nullopt;
|
||||
}
|
||||
|
||||
@@ -184,7 +181,6 @@ LIBC_INLINE cpp::optional<ExpandedFloat<long double>>
|
||||
eisel_lemire<long double>(ExpandedFloat<long double> init_num,
|
||||
RoundDirection round) {
|
||||
using FPBits = typename fputil::FPBits<long double>;
|
||||
using FloatProp = typename fputil::FloatProperties<long double>;
|
||||
using StorageType = typename FPBits::StorageType;
|
||||
|
||||
StorageType mantissa = init_num.mantissa;
|
||||
@@ -210,7 +206,7 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
|
||||
mantissa <<= clz;
|
||||
|
||||
int32_t exp2 =
|
||||
exp10_to_exp2(exp10) + FloatProp::STORAGE_LEN + FloatProp::EXP_BIAS - clz;
|
||||
exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz;
|
||||
|
||||
// Multiplication
|
||||
const uint64_t *power_of_ten =
|
||||
@@ -247,8 +243,7 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
|
||||
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
|
||||
// it's 12 bits for 128 bit floats in this case.
|
||||
constexpr UInt128 HALFWAY_CONSTANT =
|
||||
(UInt128(1) << (FloatProp::STORAGE_LEN - (FloatProp::FRACTION_LEN + 3))) -
|
||||
1;
|
||||
(UInt128(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1;
|
||||
|
||||
if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
|
||||
final_approx_lower + mantissa < mantissa) {
|
||||
@@ -257,10 +252,10 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
|
||||
|
||||
// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
|
||||
uint32_t msb =
|
||||
static_cast<uint32_t>(final_approx_upper >> (FloatProp::STORAGE_LEN - 1));
|
||||
static_cast<uint32_t>(final_approx_upper >> (FPBits::STORAGE_LEN - 1));
|
||||
StorageType final_mantissa =
|
||||
final_approx_upper >>
|
||||
(msb + FloatProp::STORAGE_LEN - (FloatProp::FRACTION_LEN + 3));
|
||||
(msb + FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3));
|
||||
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
|
||||
|
||||
if (round == RoundDirection::Nearest) {
|
||||
@@ -285,14 +280,14 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
|
||||
// From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit
|
||||
// floats
|
||||
final_mantissa >>= 1;
|
||||
if ((final_mantissa >> (FloatProp::FRACTION_LEN + 1)) > 0) {
|
||||
if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) {
|
||||
final_mantissa >>= 1;
|
||||
++exp2;
|
||||
}
|
||||
|
||||
// The if block is equivalent to (but has fewer branches than):
|
||||
// if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
|
||||
if (exp2 - 1 >= (1 << FloatProp::EXP_LEN) - 2) {
|
||||
if (exp2 - 1 >= (1 << FPBits::EXP_LEN) - 2) {
|
||||
return cpp::nullopt;
|
||||
}
|
||||
|
||||
@@ -321,7 +316,6 @@ LIBC_INLINE FloatConvertReturn<T>
|
||||
simple_decimal_conversion(const char *__restrict numStart,
|
||||
RoundDirection round = RoundDirection::Nearest) {
|
||||
using FPBits = typename fputil::FPBits<T>;
|
||||
using FloatProp = typename fputil::FloatProperties<T>;
|
||||
using StorageType = typename FPBits::StorageType;
|
||||
|
||||
int32_t exp2 = 0;
|
||||
@@ -337,7 +331,7 @@ simple_decimal_conversion(const char *__restrict numStart,
|
||||
// If the exponent is too large and can't be represented in this size of
|
||||
// float, return inf.
|
||||
if (hpd.get_decimal_point() > 0 &&
|
||||
exp10_to_exp2(hpd.get_decimal_point() - 1) > FloatProp::EXP_BIAS) {
|
||||
exp10_to_exp2(hpd.get_decimal_point() - 1) > FPBits::EXP_BIAS) {
|
||||
output.num = {0, fputil::FPBits<T>::MAX_BIASED_EXPONENT};
|
||||
output.error = ERANGE;
|
||||
return output;
|
||||
@@ -345,8 +339,7 @@ simple_decimal_conversion(const char *__restrict numStart,
|
||||
// If the exponent is too small even for a subnormal, return 0.
|
||||
if (hpd.get_decimal_point() < 0 &&
|
||||
exp10_to_exp2(-hpd.get_decimal_point()) >
|
||||
(FloatProp::EXP_BIAS +
|
||||
static_cast<int32_t>(FloatProp::FRACTION_LEN))) {
|
||||
(FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN))) {
|
||||
output.num = {0, 0};
|
||||
output.error = ERANGE;
|
||||
return output;
|
||||
@@ -385,7 +378,7 @@ simple_decimal_conversion(const char *__restrict numStart,
|
||||
hpd.shift(1);
|
||||
|
||||
// Get the biased exponent
|
||||
exp2 += FloatProp::EXP_BIAS;
|
||||
exp2 += FPBits::EXP_BIAS;
|
||||
|
||||
// Handle the exponent being too large (and return inf).
|
||||
if (exp2 >= FPBits::MAX_BIASED_EXPONENT) {
|
||||
@@ -395,7 +388,7 @@ simple_decimal_conversion(const char *__restrict numStart,
|
||||
}
|
||||
|
||||
// Shift left to fill the mantissa
|
||||
hpd.shift(FloatProp::FRACTION_LEN);
|
||||
hpd.shift(FPBits::FRACTION_LEN);
|
||||
StorageType final_mantissa = hpd.round_to_integer_type<StorageType>();
|
||||
|
||||
// Handle subnormals
|
||||
@@ -411,13 +404,13 @@ simple_decimal_conversion(const char *__restrict numStart,
|
||||
final_mantissa = hpd.round_to_integer_type<StorageType>(round);
|
||||
|
||||
// Check if by shifting right we've caused this to round to a normal number.
|
||||
if ((final_mantissa >> FloatProp::FRACTION_LEN) != 0) {
|
||||
if ((final_mantissa >> FPBits::FRACTION_LEN) != 0) {
|
||||
++exp2;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if rounding added a bit, and shift down if that's the case.
|
||||
if (final_mantissa == StorageType(2) << FloatProp::FRACTION_LEN) {
|
||||
if (final_mantissa == StorageType(2) << FPBits::FRACTION_LEN) {
|
||||
final_mantissa >>= 1;
|
||||
++exp2;
|
||||
|
||||
@@ -515,13 +508,12 @@ LIBC_INLINE cpp::optional<ExpandedFloat<T>>
|
||||
clinger_fast_path(ExpandedFloat<T> init_num,
|
||||
RoundDirection round = RoundDirection::Nearest) {
|
||||
using FPBits = typename fputil::FPBits<T>;
|
||||
using FloatProp = typename fputil::FloatProperties<T>;
|
||||
using StorageType = typename FPBits::StorageType;
|
||||
|
||||
StorageType mantissa = init_num.mantissa;
|
||||
int32_t exp10 = init_num.exponent;
|
||||
|
||||
if ((mantissa >> FloatProp::FRACTION_LEN) > 0) {
|
||||
if ((mantissa >> FPBits::FRACTION_LEN) > 0) {
|
||||
return cpp::nullopt;
|
||||
}
|
||||
|
||||
@@ -605,7 +597,7 @@ clinger_fast_path(ExpandedFloat<T> init_num,
|
||||
// log10(2^(exponent bias)).
|
||||
// The generic approximation uses the fact that log10(2^x) ~= x/3
|
||||
template <typename T> constexpr int32_t get_upper_bound() {
|
||||
return fputil::FloatProperties<T>::EXP_BIAS / 3;
|
||||
return fputil::FPBits<T>::EXP_BIAS / 3;
|
||||
}
|
||||
|
||||
template <> constexpr int32_t get_upper_bound<float>() { return 39; }
|
||||
@@ -621,11 +613,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
|
||||
// other out, and subnormal numbers allow for the result to be at the very low
|
||||
// end of the final mantissa.
|
||||
template <typename T> constexpr int32_t get_lower_bound() {
|
||||
using FloatProp = typename fputil::FloatProperties<T>;
|
||||
return -(
|
||||
(FloatProp::EXP_BIAS +
|
||||
static_cast<int32_t>(FloatProp::FRACTION_LEN + FloatProp::STORAGE_LEN)) /
|
||||
3);
|
||||
using FPBits = typename fputil::FPBits<T>;
|
||||
return -((FPBits::EXP_BIAS +
|
||||
static_cast<int32_t>(FPBits::FRACTION_LEN + FPBits::STORAGE_LEN)) /
|
||||
3);
|
||||
}
|
||||
|
||||
template <> constexpr int32_t get_lower_bound<float>() {
|
||||
@@ -723,7 +714,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
|
||||
bool truncated,
|
||||
RoundDirection round) {
|
||||
using FPBits = typename fputil::FPBits<T>;
|
||||
using FloatProp = typename fputil::FloatProperties<T>;
|
||||
using StorageType = typename FPBits::StorageType;
|
||||
|
||||
StorageType mantissa = init_num.mantissa;
|
||||
@@ -733,7 +723,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
|
||||
|
||||
// This is the number of leading zeroes a properly normalized float of type T
|
||||
// should have.
|
||||
constexpr int32_t INF_EXP = (1 << FloatProp::EXP_LEN) - 1;
|
||||
constexpr int32_t INF_EXP = (1 << FPBits::EXP_LEN) - 1;
|
||||
|
||||
// Normalization step 1: Bring the leading bit to the highest bit of
|
||||
// StorageType.
|
||||
@@ -744,26 +734,25 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
|
||||
exp2 -= amount_to_shift_left;
|
||||
|
||||
// biased_exponent represents the biased exponent of the most significant bit.
|
||||
int32_t biased_exponent =
|
||||
exp2 + FloatProp::STORAGE_LEN + FPBits::EXP_BIAS - 1;
|
||||
int32_t biased_exponent = exp2 + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - 1;
|
||||
|
||||
// Handle numbers that're too large and get squashed to inf
|
||||
if (biased_exponent >= INF_EXP) {
|
||||
// This indicates an overflow, so we make the result INF and set errno.
|
||||
output.num = {0, (1 << FloatProp::EXP_LEN) - 1};
|
||||
output.num = {0, (1 << FPBits::EXP_LEN) - 1};
|
||||
output.error = ERANGE;
|
||||
return output;
|
||||
}
|
||||
|
||||
uint32_t amount_to_shift_right =
|
||||
FloatProp::STORAGE_LEN - FloatProp::FRACTION_LEN - 1;
|
||||
FPBits::STORAGE_LEN - FPBits::FRACTION_LEN - 1;
|
||||
|
||||
// Handle subnormals.
|
||||
if (biased_exponent <= 0) {
|
||||
amount_to_shift_right += 1 - biased_exponent;
|
||||
biased_exponent = 0;
|
||||
|
||||
if (amount_to_shift_right > FloatProp::STORAGE_LEN) {
|
||||
if (amount_to_shift_right > FPBits::STORAGE_LEN) {
|
||||
// Return 0 if the exponent is too small.
|
||||
output.num = {0, 0};
|
||||
output.error = ERANGE;
|
||||
@@ -776,10 +765,10 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
|
||||
bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
|
||||
bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;
|
||||
|
||||
if (amount_to_shift_right < FloatProp::STORAGE_LEN) {
|
||||
if (amount_to_shift_right < FPBits::STORAGE_LEN) {
|
||||
// Shift the mantissa and clear the implicit bit.
|
||||
mantissa >>= amount_to_shift_right;
|
||||
mantissa &= FloatProp::FRACTION_MASK;
|
||||
mantissa &= FPBits::FRACTION_MASK;
|
||||
} else {
|
||||
mantissa = 0;
|
||||
}
|
||||
@@ -802,7 +791,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
|
||||
}
|
||||
}
|
||||
|
||||
if (mantissa > FloatProp::FRACTION_MASK) {
|
||||
if (mantissa > FPBits::FRACTION_MASK) {
|
||||
// Rounding causes the exponent to increase.
|
||||
++biased_exponent;
|
||||
|
||||
@@ -815,7 +804,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
|
||||
output.error = ERANGE;
|
||||
}
|
||||
|
||||
output.num = {mantissa & FloatProp::FRACTION_MASK, biased_exponent};
|
||||
output.num = {mantissa & FPBits::FRACTION_MASK, biased_exponent};
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
@@ -224,7 +224,6 @@ double set_exceptional(double x) {
|
||||
|
||||
LLVM_LIBC_FUNCTION(double, exp, (double x)) {
|
||||
using FPBits = typename fputil::FPBits<double>;
|
||||
using FloatProp = typename fputil::FloatProperties<double>;
|
||||
FPBits xbits(x);
|
||||
|
||||
uint64_t x_u = xbits.uintval();
|
||||
@@ -385,7 +384,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
|
||||
if (LIBC_LIKELY(upper == lower)) {
|
||||
// to multiply by 2^hi, a fast way is to simply add hi to the exponent
|
||||
// field.
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
|
||||
return r;
|
||||
}
|
||||
@@ -403,7 +402,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
|
||||
double lower_dd = r_dd.hi + (r_dd.lo - ERR_DD);
|
||||
|
||||
if (LIBC_LIKELY(upper_dd == lower_dd)) {
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r =
|
||||
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
|
||||
return r;
|
||||
|
||||
@@ -274,7 +274,6 @@ double set_exceptional(double x) {
|
||||
|
||||
LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
|
||||
using FPBits = typename fputil::FPBits<double>;
|
||||
using FloatProp = typename fputil::FloatProperties<double>;
|
||||
FPBits xbits(x);
|
||||
|
||||
uint64_t x_u = xbits.uintval();
|
||||
@@ -398,7 +397,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
|
||||
if (LIBC_LIKELY(upper == lower)) {
|
||||
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
|
||||
// field.
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
|
||||
return r;
|
||||
}
|
||||
@@ -465,7 +464,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
|
||||
if (LIBC_LIKELY(upper_dd == lower_dd)) {
|
||||
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
|
||||
// field.
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -249,7 +249,6 @@ double set_exceptional(double x) {
|
||||
|
||||
LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
|
||||
using FPBits = typename fputil::FPBits<double>;
|
||||
using FloatProp = typename fputil::FloatProperties<double>;
|
||||
FPBits xbits(x);
|
||||
|
||||
uint64_t x_u = xbits.uintval();
|
||||
@@ -365,7 +364,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
|
||||
if (LIBC_LIKELY(upper == lower)) {
|
||||
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
|
||||
// field.
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
|
||||
return r;
|
||||
}
|
||||
@@ -379,7 +378,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
|
||||
if (LIBC_LIKELY(upper_dd == lower_dd)) {
|
||||
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
|
||||
// field.
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -137,7 +137,7 @@ LIBC_INLINE float exp2f(float x) {
|
||||
// exp_hi = shift hi to the exponent field of double precision.
|
||||
int64_t exp_hi =
|
||||
static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
|
||||
<< fputil::FloatProperties<double>::FRACTION_LEN);
|
||||
<< fputil::FPBits<double>::FRACTION_LEN);
|
||||
// mh = 2^hi * 2^mid
|
||||
// mh_bits = bit field of mh
|
||||
int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp_hi;
|
||||
|
||||
@@ -162,7 +162,7 @@ template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
|
||||
// hi = floor(kd * 2^(-MID_BITS))
|
||||
// exp_hi = shift hi to the exponent field of double precision.
|
||||
int64_t exp_hi = static_cast<int64_t>((k >> Base::MID_BITS))
|
||||
<< fputil::FloatProperties<double>::FRACTION_LEN;
|
||||
<< fputil::FPBits<double>::FRACTION_LEN;
|
||||
// mh = 2^hi * 2^mid
|
||||
// mh_bits = bit field of mh
|
||||
int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
|
||||
@@ -235,9 +235,9 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
|
||||
// hi = floor(kf * 2^(-5))
|
||||
// exp_hi = shift hi to the exponent field of double precision.
|
||||
int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
|
||||
<< fputil::FloatProperties<double>::FRACTION_LEN;
|
||||
<< fputil::FPBits<double>::FRACTION_LEN;
|
||||
int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
|
||||
<< fputil::FloatProperties<double>::FRACTION_LEN;
|
||||
<< fputil::FPBits<double>::FRACTION_LEN;
|
||||
// mh_p = 2^(hi + mid)
|
||||
// mh_m = 2^(-(hi + mid))
|
||||
// mh_bits_* = bit field of mh_*
|
||||
@@ -342,10 +342,10 @@ LIBC_INLINE static double log_eval(double x) {
|
||||
// double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
|
||||
LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
|
||||
double err) {
|
||||
using FloatProp = typename fputil::FloatProperties<double>;
|
||||
using FPBits = typename fputil::FPBits<double>;
|
||||
|
||||
// Scaling factor = 1/(min normal number) = 2^1022
|
||||
int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN;
|
||||
double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid));
|
||||
double lo_scaled =
|
||||
(lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo))
|
||||
|
||||
@@ -275,7 +275,6 @@ double set_exceptional(double x) {
|
||||
|
||||
LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
|
||||
using FPBits = typename fputil::FPBits<double>;
|
||||
using FloatProp = typename fputil::FloatProperties<double>;
|
||||
FPBits xbits(x);
|
||||
|
||||
bool x_sign = xbits.get_sign();
|
||||
@@ -468,7 +467,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
|
||||
if (LIBC_LIKELY(upper == lower)) {
|
||||
// to multiply by 2^hi, a fast way is to simply add hi to the exponent
|
||||
// field.
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
|
||||
return r;
|
||||
}
|
||||
@@ -482,7 +481,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
|
||||
double lower_dd = r_dd.hi + (r_dd.lo - err_dd);
|
||||
|
||||
if (LIBC_LIKELY(upper_dd == lower_dd)) {
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
|
||||
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
|
||||
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -387,24 +387,24 @@ static constexpr DoubleDouble LOG2_R2_DD[] = {
|
||||
};
|
||||
|
||||
LIBC_INLINE bool is_odd_integer(float x) {
|
||||
using FloatProp = typename fputil::FloatProperties<float>;
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
uint32_t x_u = cpp::bit_cast<uint32_t>(x);
|
||||
int32_t x_e = static_cast<int32_t>((x_u & FloatProp::EXP_MASK) >>
|
||||
FloatProp::FRACTION_LEN);
|
||||
int32_t lsb = cpp::countr_zero(x_u | FloatProp::EXP_MASK);
|
||||
int32_t x_e =
|
||||
static_cast<int32_t>((x_u & FPBits::EXP_MASK) >> FPBits::FRACTION_LEN);
|
||||
int32_t lsb = cpp::countr_zero(x_u | FPBits::EXP_MASK);
|
||||
constexpr int32_t UNIT_EXPONENT =
|
||||
FloatProp::EXP_BIAS + static_cast<int32_t>(FloatProp::FRACTION_LEN);
|
||||
FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN);
|
||||
return (x_e + lsb == UNIT_EXPONENT);
|
||||
}
|
||||
|
||||
LIBC_INLINE bool is_integer(float x) {
|
||||
using FloatProp = typename fputil::FloatProperties<float>;
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
uint32_t x_u = cpp::bit_cast<uint32_t>(x);
|
||||
int32_t x_e = static_cast<int32_t>((x_u & FloatProp::EXP_MASK) >>
|
||||
FloatProp::FRACTION_LEN);
|
||||
int32_t lsb = cpp::countr_zero(x_u | FloatProp::EXP_MASK);
|
||||
int32_t x_e =
|
||||
static_cast<int32_t>((x_u & FPBits::EXP_MASK) >> FPBits::FRACTION_LEN);
|
||||
int32_t lsb = cpp::countr_zero(x_u | FPBits::EXP_MASK);
|
||||
constexpr int32_t UNIT_EXPONENT =
|
||||
FloatProp::EXP_BIAS + static_cast<int32_t>(FloatProp::FRACTION_LEN);
|
||||
FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN);
|
||||
return (x_e + lsb >= UNIT_EXPONENT);
|
||||
}
|
||||
|
||||
@@ -424,7 +424,6 @@ LIBC_INLINE bool larger_exponent(double a, double b) {
|
||||
double powf_double_double(int idx_x, double dx, double y6, double lo6_hi,
|
||||
const DoubleDouble &exp2_hi_mid) {
|
||||
using DoubleBits = typename fputil::FPBits<double>;
|
||||
using DoubleProp = typename fputil::FloatProperties<double>;
|
||||
// Perform a second range reduction step:
|
||||
// idx2 = round(2^14 * (dx + 2^-8)) = round ( dx * 2^14 + 2^6)
|
||||
// dx2 = (1 + dx) * r2 - 1
|
||||
@@ -500,7 +499,7 @@ double powf_double_double(int idx_x, double dx, double y6, double lo6_hi,
|
||||
bool lo_sign = DoubleBits(r.lo).get_sign();
|
||||
if (hi_sign == lo_sign) {
|
||||
++r_bits;
|
||||
} else if ((r_bits & DoubleProp::FRACTION_MASK) > 0) {
|
||||
} else if ((r_bits & DoubleBits::FRACTION_MASK) > 0) {
|
||||
--r_bits;
|
||||
}
|
||||
}
|
||||
@@ -512,8 +511,7 @@ double powf_double_double(int idx_x, double dx, double y6, double lo6_hi,
|
||||
|
||||
LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
|
||||
using FloatBits = typename fputil::FPBits<float>;
|
||||
using FloatProp = typename fputil::FloatProperties<float>;
|
||||
using DoubleProp = typename fputil::FloatProperties<double>;
|
||||
using DoubleBits = typename fputil::FPBits<double>;
|
||||
FloatBits xbits(x), ybits(y);
|
||||
|
||||
uint32_t x_u = xbits.uintval();
|
||||
@@ -584,7 +582,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
|
||||
// x^y will be overflow / underflow in single precision. Set y to a
|
||||
// large enough exponent but not too large, so that the computations
|
||||
// won't be overflow in double precision.
|
||||
y = cpp::bit_cast<float>((y_u & FloatProp::SIGN_MASK) + 0x4f800000U);
|
||||
y = cpp::bit_cast<float>((y_u & FloatBits::SIGN_MASK) + 0x4f800000U);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -607,11 +605,11 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
|
||||
return generic::exp10f(y);
|
||||
}
|
||||
|
||||
bool x_sign = x_u >= FloatProp::SIGN_MASK;
|
||||
bool x_sign = x_u >= FloatBits::SIGN_MASK;
|
||||
|
||||
switch (x_abs) {
|
||||
case 0x0000'0000: { // x = +-0.0f
|
||||
bool x_sign = (x_u >= FloatProp::SIGN_MASK);
|
||||
bool x_sign = (x_u >= FloatBits::SIGN_MASK);
|
||||
bool out_sign = x_sign && is_odd_integer(FloatBits(y_u).get_val());
|
||||
if (y_u > 0x8000'0000U) {
|
||||
// pow(0, negative number) = inf
|
||||
@@ -623,9 +621,9 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
|
||||
return out_sign ? -0.0f : 0.0f;
|
||||
}
|
||||
case 0x7f80'0000: { // x = +-Inf
|
||||
bool x_sign = (x_u >= FloatProp::SIGN_MASK);
|
||||
bool x_sign = (x_u >= FloatBits::SIGN_MASK);
|
||||
bool out_sign = x_sign && is_odd_integer(FloatBits(y_u).get_val());
|
||||
if (y_u >= FloatProp::SIGN_MASK) {
|
||||
if (y_u >= FloatBits::SIGN_MASK) {
|
||||
return out_sign ? -0.0f : 0.0f;
|
||||
}
|
||||
return FloatBits::inf(out_sign);
|
||||
@@ -669,11 +667,11 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
|
||||
x_u = FloatBits(x).uintval();
|
||||
|
||||
// Extract exponent field of x.
|
||||
ex += (x_u >> FloatProp::FRACTION_LEN);
|
||||
ex += (x_u >> FloatBits::FRACTION_LEN);
|
||||
double e_x = static_cast<double>(ex);
|
||||
// Use the highest 7 fractional bits of m_x as the index for look up tables.
|
||||
uint32_t x_mant = x_u & FloatProp::FRACTION_MASK;
|
||||
int idx_x = static_cast<int>(x_mant >> (FloatProp::FRACTION_LEN - 7));
|
||||
uint32_t x_mant = x_u & FloatBits::FRACTION_MASK;
|
||||
int idx_x = static_cast<int>(x_mant >> (FloatBits::FRACTION_LEN - 7));
|
||||
// Add the hidden bit to the mantissa.
|
||||
// 1 <= m_x < 2
|
||||
float m_x = cpp::bit_cast<float>(x_mant | 0x3f800000);
|
||||
@@ -774,7 +772,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
|
||||
int idx_y = hm_i & 0x3f;
|
||||
|
||||
// 2^hi
|
||||
int64_t exp_hi_i = (hm_i >> 6) << DoubleProp::FRACTION_LEN;
|
||||
int64_t exp_hi_i = (hm_i >> 6) << DoubleBits::FRACTION_LEN;
|
||||
// 2^mid
|
||||
int64_t exp_mid_i = cpp::bit_cast<uint64_t>(EXP2_MID1[idx_y].hi);
|
||||
// (-1)^sign * 2^hi * 2^mid
|
||||
|
||||
@@ -59,7 +59,7 @@ LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
|
||||
LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
|
||||
int idx = 0;
|
||||
y = 0;
|
||||
int x_lsb_exp_m4 = x_exp - fputil::FloatProperties<float>::FRACTION_LEN;
|
||||
int x_lsb_exp_m4 = x_exp - fputil::FPBits<float>::FRACTION_LEN;
|
||||
|
||||
// Skipping the first parts of 32/pi such that:
|
||||
// LSB of x * LSB of THIRTYTWO_OVER_PI_28[i] >= 32.
|
||||
|
||||
@@ -89,7 +89,7 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) {
|
||||
// -hi = floor(-k * 2^(-MID_BITS))
|
||||
// exp_mhi = shift -hi to the exponent field of double precision.
|
||||
int64_t exp_mhi = static_cast<int64_t>(mk >> ExpBase::MID_BITS)
|
||||
<< fputil::FloatProperties<double>::FRACTION_LEN;
|
||||
<< fputil::FPBits<double>::FRACTION_LEN;
|
||||
// mh = 2^(-hi - mid)
|
||||
int64_t mh_bits = ExpBase::EXP_2_MID[mk & ExpBase::MID_MASK] + exp_mhi;
|
||||
double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();
|
||||
|
||||
@@ -240,8 +240,7 @@ class FloatWriter {
|
||||
// -exponent will never overflow because all long double types we support
|
||||
// have at most 15 bits of mantissa and the C standard defines an int as
|
||||
// being at least 16 bits.
|
||||
static_assert(fputil::FloatProperties<long double>::EXP_LEN <
|
||||
(sizeof(int) * 8));
|
||||
static_assert(fputil::FPBits<long double>::EXP_LEN < (sizeof(int) * 8));
|
||||
|
||||
public:
|
||||
LIBC_INLINE FloatWriter(Writer *init_writer, bool init_has_decimal_point,
|
||||
@@ -474,7 +473,7 @@ LIBC_INLINE int convert_float_decimal_typed(Writer *writer,
|
||||
const FormatSection &to_conv,
|
||||
fputil::FPBits<T> float_bits) {
|
||||
// signed because later we use -FRACTION_LEN
|
||||
constexpr int32_t FRACTION_LEN = fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
constexpr int32_t FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
|
||||
bool is_negative = float_bits.get_sign();
|
||||
int exponent = float_bits.get_explicit_exponent();
|
||||
|
||||
@@ -587,7 +586,7 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
|
||||
const FormatSection &to_conv,
|
||||
fputil::FPBits<T> float_bits) {
|
||||
// signed because later we use -FRACTION_LEN
|
||||
constexpr int32_t FRACTION_LEN = fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
constexpr int32_t FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
|
||||
bool is_negative = float_bits.get_sign();
|
||||
int exponent = float_bits.get_explicit_exponent();
|
||||
StorageType mantissa = float_bits.get_explicit_mantissa();
|
||||
@@ -750,7 +749,7 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer,
|
||||
const FormatSection &to_conv,
|
||||
fputil::FPBits<T> float_bits) {
|
||||
// signed because later we use -FRACTION_LEN
|
||||
constexpr int32_t FRACTION_LEN = fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
constexpr int32_t FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
|
||||
bool is_negative = float_bits.get_sign();
|
||||
int exponent = float_bits.get_explicit_exponent();
|
||||
StorageType mantissa = float_bits.get_explicit_mantissa();
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
namespace LIBC_NAMESPACE {
|
||||
|
||||
template <typename T> struct LlvmLibcStrToFloatTest : public testing::Test {
|
||||
using StorageType = typename fputil::FloatProperties<T>::StorageType;
|
||||
using StorageType = typename fputil::FPBits<T>::StorageType;
|
||||
|
||||
void clinger_fast_path_test(const StorageType inputMantissa,
|
||||
const int32_t inputExp10,
|
||||
|
||||
@@ -20,8 +20,7 @@ template <typename T> class FrexpTest : public LIBC_NAMESPACE::testing::Test {
|
||||
DECLARE_SPECIAL_CONSTANTS(T)
|
||||
|
||||
static constexpr StorageType HIDDEN_BIT =
|
||||
StorageType(1)
|
||||
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
|
||||
|
||||
public:
|
||||
typedef T (*FrexpFunc)(T, int *);
|
||||
|
||||
@@ -20,8 +20,7 @@ template <typename T> class LogbTest : public LIBC_NAMESPACE::testing::Test {
|
||||
DECLARE_SPECIAL_CONSTANTS(T)
|
||||
|
||||
static constexpr StorageType HIDDEN_BIT =
|
||||
StorageType(1)
|
||||
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
|
||||
|
||||
public:
|
||||
typedef T (*LogbFunc)(T);
|
||||
|
||||
@@ -20,8 +20,7 @@ template <typename T> class SqrtTest : public LIBC_NAMESPACE::testing::Test {
|
||||
DECLARE_SPECIAL_CONSTANTS(T)
|
||||
|
||||
static constexpr StorageType HIDDEN_BIT =
|
||||
StorageType(1)
|
||||
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
|
||||
|
||||
public:
|
||||
typedef T (*SqrtFunc)(T);
|
||||
|
||||
@@ -17,8 +17,7 @@ template <typename T> class FrexpTest : public LIBC_NAMESPACE::testing::Test {
|
||||
DECLARE_SPECIAL_CONSTANTS(T)
|
||||
|
||||
static constexpr StorageType HIDDEN_BIT =
|
||||
StorageType(1)
|
||||
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
|
||||
|
||||
public:
|
||||
typedef T (*FrexpFunc)(T, int *);
|
||||
|
||||
@@ -17,8 +17,7 @@ template <typename T> class LogbTest : public LIBC_NAMESPACE::testing::Test {
|
||||
DECLARE_SPECIAL_CONSTANTS(T)
|
||||
|
||||
static constexpr StorageType HIDDEN_BIT =
|
||||
StorageType(1)
|
||||
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
|
||||
|
||||
public:
|
||||
typedef T (*LogbFunc)(T);
|
||||
|
||||
@@ -17,8 +17,7 @@ template <typename T> class SqrtTest : public LIBC_NAMESPACE::testing::Test {
|
||||
DECLARE_SPECIAL_CONSTANTS(T)
|
||||
|
||||
static constexpr StorageType HIDDEN_BIT =
|
||||
StorageType(1)
|
||||
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
|
||||
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
|
||||
|
||||
public:
|
||||
typedef T (*SqrtFunc)(T);
|
||||
|
||||
@@ -49,7 +49,7 @@ template <> struct ExtraPrecision<long double> {
|
||||
template <typename T>
|
||||
static inline unsigned int get_precision(double ulp_tolerance) {
|
||||
if (ulp_tolerance <= 0.5) {
|
||||
return LIBC_NAMESPACE::fputil::FloatProperties<T>::MANTISSA_PRECISION;
|
||||
return LIBC_NAMESPACE::fputil::FPBits<T>::MANTISSA_PRECISION;
|
||||
} else {
|
||||
return ExtraPrecision<T>::VALUE;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user