[libc][NFC] Remove FloatProperties (#76508)

Access is now done through `FPBits` exclusively.
This patch also renames a few internal structs and uses `T` instead of
`FP` as a template parameter.
This commit is contained in:
Guillaume Chatelet
2024-01-03 09:51:58 +01:00
committed by GitHub
parent b7d5b0d0ee
commit c09e690556
26 changed files with 180 additions and 209 deletions

View File

@@ -22,18 +22,17 @@
#include "utils/MPFRWrapper/mpfr_inc.h"
using LIBC_NAMESPACE::fputil::FloatProperties;
using LIBC_NAMESPACE::fputil::FPBits;
// This function calculates the effective precision for a given float type and
// exponent. Subnormals have a lower effective precision since they don't
// necessarily use all of the bits of the mantissa.
template <typename F> inline constexpr int effective_precision(int exponent) {
const int full_precision = FloatProperties<F>::MANTISSA_PRECISION;
const int full_precision = FPBits<F>::MANTISSA_PRECISION;
// This is intended to be 0 when the exponent is the lowest normal and
// increase as the exponent's magnitude increases.
const int bits_below_normal =
(-exponent) - (FloatProperties<F>::EXP_BIAS - 1);
const int bits_below_normal = (-exponent) - (FPBits<F>::EXP_BIAS - 1);
// The precision should be the normal, full precision, minus the bits lost
// by this being a subnormal, minus one for the implicit leading one.

View File

@@ -39,64 +39,66 @@ enum class FPEncoding {
X86_ExtendedPrecision,
};
template <FPType> struct FPBaseProperties {};
// Defines the layout (sign, exponent, significand) of a floating point type in
// memory. It also defines its associated StorageType, i.e., the unsigned
// integer type used to manipulate its representation.
template <FPType> struct FPLayout {};
template <> struct FPBaseProperties<FPType::IEEE754_Binary16> {
template <> struct FPLayout<FPType::IEEE754_Binary16> {
using StorageType = uint16_t;
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 16;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 5;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};
template <> struct FPBaseProperties<FPType::IEEE754_Binary32> {
template <> struct FPLayout<FPType::IEEE754_Binary32> {
using StorageType = uint32_t;
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 32;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};
template <> struct FPBaseProperties<FPType::IEEE754_Binary64> {
template <> struct FPLayout<FPType::IEEE754_Binary64> {
using StorageType = uint64_t;
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 64;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 11;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};
template <> struct FPBaseProperties<FPType::IEEE754_Binary128> {
template <> struct FPLayout<FPType::IEEE754_Binary128> {
using StorageType = UInt128;
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 128;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 112;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};
template <> struct FPBaseProperties<FPType::X86_Binary80> {
template <> struct FPLayout<FPType::X86_Binary80> {
using StorageType = UInt128;
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = 80;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
LIBC_INLINE_VAR static constexpr auto ENCODING =
FPEncoding::X86_ExtendedPrecision;
};
} // namespace internal
// FPBaseMasksAndShifts derives useful constants from the FPLayout.
template <FPType fp_type>
struct FPProperties : public internal::FPBaseProperties<fp_type> {
struct FPBaseMasksAndShifts : public internal::FPLayout<fp_type> {
private:
using UP = internal::FPBaseProperties<fp_type>;
using UP = internal::FPLayout<fp_type>;
public:
// The number of bits to represent sign. For documentation purpose, always 1.
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
using UP::EXP_LEN; // The number of bits for the *exponent* part
using UP::SIG_LEN; // The number of bits for the *significand* part
using UP::TOTAL_LEN; // For convenience, the sum of `SIG_LEN`, `EXP_LEN`,
// and `SIGN_LEN`.
static_assert(SIGN_LEN + EXP_LEN + SIG_LEN == TOTAL_LEN);
using UP::EXP_LEN; // The number of bits for the *exponent* part
using UP::SIG_LEN; // The number of bits for the *significand* part
using UP::SIGN_LEN; // The number of bits for the *sign* part
// For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`.
LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN;
// An unsigned integer that is wide enough to contain all of the floating
// point bits.
@@ -173,45 +175,12 @@ protected:
: bit_at(SIG_LEN - 2); // 0b0100...
};
//-----------------------------------------------------------------------------
template <typename FP> LIBC_INLINE static constexpr FPType get_fp_type() {
if constexpr (cpp::is_same_v<FP, float> && __FLT_MANT_DIG__ == 24)
return FPType::IEEE754_Binary32;
else if constexpr (cpp::is_same_v<FP, double> && __DBL_MANT_DIG__ == 53)
return FPType::IEEE754_Binary64;
else if constexpr (cpp::is_same_v<FP, long double>) {
if constexpr (__LDBL_MANT_DIG__ == 53)
return FPType::IEEE754_Binary64;
else if constexpr (__LDBL_MANT_DIG__ == 64)
return FPType::X86_Binary80;
else if constexpr (__LDBL_MANT_DIG__ == 113)
return FPType::IEEE754_Binary128;
}
#if defined(LIBC_COMPILER_HAS_C23_FLOAT16)
else if constexpr (cpp::is_same_v<FP, _Float16>)
return FPType::IEEE754_Binary16;
#endif
#if defined(LIBC_COMPILER_HAS_C23_FLOAT128)
else if constexpr (cpp::is_same_v<FP, _Float128>)
return FPType::IEEE754_Binary128;
#endif
#if defined(LIBC_COMPILER_HAS_FLOAT128_EXTENSION)
else if constexpr (cpp::is_same_v<FP, __float128>)
return FPType::IEEE754_Binary128;
#endif
else
static_assert(cpp::always_false<FP>, "Unsupported type");
}
template <typename FP>
struct FloatProperties : public FPProperties<get_fp_type<FP>()> {};
namespace internal {
// This is a temporary class to unify common methods and properties between
// FPBits and FPBits<long double>.
template <FPType fp_type> struct FPBitsCommon : private FPProperties<fp_type> {
using UP = FPProperties<fp_type>;
template <FPType fp_type> struct FPRep : private FPBaseMasksAndShifts<fp_type> {
using UP = FPBaseMasksAndShifts<fp_type>;
using typename UP::StorageType;
using UP::TOTAL_LEN;
@@ -227,15 +196,17 @@ public:
using UP::FP_MASK;
using UP::FRACTION_LEN;
using UP::FRACTION_MASK;
using UP::MANTISSA_PRECISION;
using UP::SIGN_MASK;
using UP::STORAGE_LEN;
// Reinterpreting bits as an integer value and interpreting the bits of an
// integer value as a floating point value is used in tests. So, a convenient
// type is provided for such reinterpretations.
StorageType bits;
LIBC_INLINE constexpr FPBitsCommon() : bits(0) {}
LIBC_INLINE explicit constexpr FPBitsCommon(StorageType bits) : bits(bits) {}
LIBC_INLINE constexpr FPRep() : bits(0) {}
LIBC_INLINE explicit constexpr FPRep(StorageType bits) : bits(bits) {}
LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
mantVal &= FRACTION_MASK;
@@ -297,6 +268,37 @@ public:
} // namespace internal
// Returns the FPType corresponding to C++ type T on the host.
template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
using UnqualT = cpp::remove_cv_t<T>;
if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24)
return FPType::IEEE754_Binary32;
else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53)
return FPType::IEEE754_Binary64;
else if constexpr (cpp::is_same_v<UnqualT, long double>) {
if constexpr (__LDBL_MANT_DIG__ == 53)
return FPType::IEEE754_Binary64;
else if constexpr (__LDBL_MANT_DIG__ == 64)
return FPType::X86_Binary80;
else if constexpr (__LDBL_MANT_DIG__ == 113)
return FPType::IEEE754_Binary128;
}
#if defined(LIBC_COMPILER_HAS_C23_FLOAT16)
else if constexpr (cpp::is_same_v<UnqualT, _Float16>)
return FPType::IEEE754_Binary16;
#endif
#if defined(LIBC_COMPILER_HAS_C23_FLOAT128)
else if constexpr (cpp::is_same_v<UnqualT, _Float128>)
return FPType::IEEE754_Binary128;
#endif
#if defined(LIBC_COMPILER_HAS_FLOAT128_EXTENSION)
else if constexpr (cpp::is_same_v<UnqualT, __float128>)
return FPType::IEEE754_Binary128;
#endif
else
static_assert(cpp::always_false<UnqualT>, "Unsupported type");
}
// A generic class to represent single precision, double precision, and quad
// precision IEEE 754 floating point formats.
// On most platforms, the 'float' type corresponds to single precision floating
@@ -305,11 +307,10 @@ public:
// floating numbers. On x86 platforms however, the 'long double' type maps to
// an x87 floating point format. This format is an IEEE 754 extension format.
// It is handled as an explicit specialization of this class.
template <typename T>
struct FPBits : public internal::FPBitsCommon<get_fp_type<T>()> {
template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
static_assert(cpp::is_floating_point_v<T>,
"FPBits instantiated with invalid type.");
using UP = internal::FPBitsCommon<get_fp_type<T>()>;
using UP = internal::FPRep<get_fp_type<T>()>;
using StorageType = typename UP::StorageType;
using UP::bits;

View File

@@ -174,13 +174,13 @@ LIBC_INLINE T nextafter(T from, U to) {
} else {
int_val = FPBits<T>::MIN_SUBNORMAL;
if (to_bits.get_sign())
int_val |= FloatProperties<T>::SIGN_MASK;
int_val |= FPBits<T>::SIGN_MASK;
}
StorageType exponent_bits = int_val & FloatProperties<T>::EXP_MASK;
StorageType exponent_bits = int_val & FPBits<T>::EXP_MASK;
if (exponent_bits == StorageType(0))
raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
else if (exponent_bits == FloatProperties<T>::EXP_MASK)
else if (exponent_bits == FPBits<T>::EXP_MASK)
raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
return cpp::bit_cast<T>(int_val);

View File

@@ -41,10 +41,10 @@ template <size_t Bits> struct DyadicFloat {
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
DyadicFloat(T x) {
static_assert(FloatProperties<T>::FRACTION_LEN < Bits);
static_assert(FPBits<T>::FRACTION_LEN < Bits);
FPBits<T> x_bits(x);
sign = x_bits.get_sign();
exponent = x_bits.get_exponent() - FloatProperties<T>::FRACTION_LEN;
exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
mantissa = MantissaType(x_bits.get_explicit_mantissa());
normalize();
}
@@ -83,21 +83,20 @@ template <size_t Bits> struct DyadicFloat {
// Output is rounded correctly with respect to the current rounding mode.
// TODO(lntue): Add support for underflow.
// TODO(lntue): Test or add specialization for x86 long double.
template <typename T, typename = cpp::enable_if_t<
cpp::is_floating_point_v<T> &&
(FloatProperties<T>::FRACTION_LEN < Bits),
void>>
template <typename T,
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
(FPBits<T>::FRACTION_LEN < Bits),
void>>
explicit operator T() const {
// TODO(lntue): Do we need to treat signed zeros properly?
if (mantissa.is_zero())
return 0.0;
// Assume that it is normalized, and output is also normal.
constexpr uint32_t PRECISION = FloatProperties<T>::MANTISSA_PRECISION;
constexpr uint32_t PRECISION = FPBits<T>::MANTISSA_PRECISION;
using output_bits_t = typename FPBits<T>::StorageType;
int exp_hi =
exponent + static_cast<int>((Bits - 1) + FloatProperties<T>::EXP_BIAS);
int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);
bool denorm = false;
uint32_t shift = Bits - PRECISION;
@@ -106,7 +105,7 @@ template <size_t Bits> struct DyadicFloat {
denorm = true;
shift = (Bits - PRECISION) + static_cast<uint32_t>(1 - exp_hi);
exp_hi = FloatProperties<T>::EXP_BIAS;
exp_hi = FPBits<T>::EXP_BIAS;
}
int exp_lo = exp_hi - static_cast<int>(PRECISION) - 1;
@@ -115,7 +114,7 @@ template <size_t Bits> struct DyadicFloat {
T d_hi = FPBits<T>::create_value(sign, exp_hi,
static_cast<output_bits_t>(m_hi) &
FloatProperties<T>::FRACTION_MASK)
FPBits<T>::FRACTION_MASK)
.get_val();
const MantissaType round_mask = MantissaType(1) << (shift - 1);
@@ -129,15 +128,13 @@ template <size_t Bits> struct DyadicFloat {
if (LIBC_UNLIKELY(exp_lo <= 0)) {
// d_lo is denormal, but the output is normal.
int scale_up_exponent = 2 * PRECISION;
T scale_up_factor = FPBits<T>::create_value(sign,
FloatProperties<T>::EXP_BIAS +
scale_up_exponent,
output_bits_t(0))
.get_val();
T scale_up_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
output_bits_t(0))
.get_val();
T scale_down_factor =
FPBits<T>::create_value(
sign, FloatProperties<T>::EXP_BIAS - scale_up_exponent,
output_bits_t(0))
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
output_bits_t(0))
.get_val();
d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
@@ -156,7 +153,7 @@ template <size_t Bits> struct DyadicFloat {
if (LIBC_UNLIKELY(denorm)) {
// Output is denormal, simply clear the exponent field.
output_bits_t clear_exp = output_bits_t(exp_hi)
<< FloatProperties<T>::FRACTION_LEN;
<< FPBits<T>::FRACTION_LEN;
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
return FPBits<T>(r_bits).get_val();
}

View File

@@ -94,7 +94,6 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
using FPBits = fputil::FPBits<double>;
using FloatProp = fputil::FloatProperties<double>;
if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
return x * y + z;
@@ -267,10 +266,10 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
}
// Remove hidden bit and append the exponent field and sign bit.
result = (result & FloatProp::FRACTION_MASK) |
(static_cast<uint64_t>(r_exp) << FloatProp::FRACTION_LEN);
result = (result & FPBits::FRACTION_MASK) |
(static_cast<uint64_t>(r_exp) << FPBits::FRACTION_LEN);
if (prod_sign) {
result |= FloatProp::SIGN_MASK;
result |= FPBits::SIGN_MASK;
}
// Rounding.

View File

@@ -27,9 +27,8 @@ namespace LIBC_NAMESPACE {
namespace fputil {
template <>
struct FPBits<long double>
: public internal::FPBitsCommon<FPType::X86_Binary80> {
using UP = internal::FPBitsCommon<FPType::X86_Binary80>;
struct FPBits<long double> : public internal::FPRep<FPType::X86_Binary80> {
using UP = internal::FPRep<FPType::X86_Binary80>;
using StorageType = typename UP::StorageType;
using UP::bits;

View File

@@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE {
using BlockInt = uint32_t;
constexpr uint32_t BLOCK_SIZE = 9;
using FloatProp = fputil::FloatProperties<long double>;
using FPBits = fputil::FPBits<long double>;
// Larger numbers prefer a slightly larger constant than is used for the smaller
// numbers.
@@ -382,10 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
(1000000000 * shifted));
}
LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::StorageType mantissa,
LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa,
const cpp::UInt<MID_INT_SIZE> &large,
const int32_t shift_amount) {
cpp::UInt<MID_INT_SIZE + FloatProp::STORAGE_LEN> val(large);
cpp::UInt<MID_INT_SIZE + FPBits::STORAGE_LEN> val(large);
val = (val * mantissa) >> shift_amount;
return static_cast<uint32_t>(
val.div_uint32_times_pow_2(1000000000, 0).value());
@@ -414,7 +414,7 @@ class FloatToString {
fputil::FPBits<T> float_bits;
bool is_negative;
int exponent;
FloatProp::StorageType mantissa;
FPBits::StorageType mantissa;
static constexpr int FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
static constexpr int EXP_BIAS = fputil::FPBits<T>::EXP_BIAS;

View File

@@ -71,7 +71,6 @@ LIBC_INLINE cpp::optional<ExpandedFloat<T>>
eisel_lemire(ExpandedFloat<T> init_num,
RoundDirection round = RoundDirection::Nearest) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename fputil::FloatProperties<T>;
using StorageType = typename FPBits::StorageType;
StorageType mantissa = init_num.mantissa;
@@ -93,7 +92,7 @@ eisel_lemire(ExpandedFloat<T> init_num,
mantissa <<= clz;
int32_t exp2 =
exp10_to_exp2(exp10) + FloatProp::STORAGE_LEN + FloatProp::EXP_BIAS - clz;
exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz;
// Multiplication
const uint64_t *power_of_ten =
@@ -110,9 +109,7 @@ eisel_lemire(ExpandedFloat<T> init_num,
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
// it's 6 bits for floats in this case.
const uint64_t halfway_constant =
(uint64_t(1) << (FloatProp::STORAGE_LEN -
(FloatProp::FRACTION_LEN + 3))) -
1;
(uint64_t(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1;
if ((high64(first_approx) & halfway_constant) == halfway_constant &&
low64(first_approx) + mantissa < mantissa) {
UInt128 low_bits =
@@ -132,10 +129,10 @@ eisel_lemire(ExpandedFloat<T> init_num,
// Shifting to 54 bits for doubles and 25 bits for floats
StorageType msb = static_cast<StorageType>(high64(final_approx) >>
(FloatProp::STORAGE_LEN - 1));
(FPBits::STORAGE_LEN - 1));
StorageType final_mantissa = static_cast<StorageType>(
high64(final_approx) >>
(msb + FloatProp::STORAGE_LEN - (FloatProp::FRACTION_LEN + 3)));
(msb + FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3)));
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
if (round == RoundDirection::Nearest) {
@@ -161,14 +158,14 @@ eisel_lemire(ExpandedFloat<T> init_num,
// From 54 to 53 bits for doubles and 25 to 24 bits for floats
final_mantissa >>= 1;
if ((final_mantissa >> (FloatProp::FRACTION_LEN + 1)) > 0) {
if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) {
final_mantissa >>= 1;
++exp2;
}
// The if block is equivalent to (but has fewer branches than):
// if exp2 <= 0 || exp2 >= 0x7FF { etc }
if (static_cast<uint32_t>(exp2) - 1 >= (1 << FloatProp::EXP_LEN) - 2) {
if (static_cast<uint32_t>(exp2) - 1 >= (1 << FPBits::EXP_LEN) - 2) {
return cpp::nullopt;
}
@@ -184,7 +181,6 @@ LIBC_INLINE cpp::optional<ExpandedFloat<long double>>
eisel_lemire<long double>(ExpandedFloat<long double> init_num,
RoundDirection round) {
using FPBits = typename fputil::FPBits<long double>;
using FloatProp = typename fputil::FloatProperties<long double>;
using StorageType = typename FPBits::StorageType;
StorageType mantissa = init_num.mantissa;
@@ -210,7 +206,7 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
mantissa <<= clz;
int32_t exp2 =
exp10_to_exp2(exp10) + FloatProp::STORAGE_LEN + FloatProp::EXP_BIAS - clz;
exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz;
// Multiplication
const uint64_t *power_of_ten =
@@ -247,8 +243,7 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
// it's 12 bits for 128 bit floats in this case.
constexpr UInt128 HALFWAY_CONSTANT =
(UInt128(1) << (FloatProp::STORAGE_LEN - (FloatProp::FRACTION_LEN + 3))) -
1;
(UInt128(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1;
if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
final_approx_lower + mantissa < mantissa) {
@@ -257,10 +252,10 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
uint32_t msb =
static_cast<uint32_t>(final_approx_upper >> (FloatProp::STORAGE_LEN - 1));
static_cast<uint32_t>(final_approx_upper >> (FPBits::STORAGE_LEN - 1));
StorageType final_mantissa =
final_approx_upper >>
(msb + FloatProp::STORAGE_LEN - (FloatProp::FRACTION_LEN + 3));
(msb + FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3));
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
if (round == RoundDirection::Nearest) {
@@ -285,14 +280,14 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit
// floats
final_mantissa >>= 1;
if ((final_mantissa >> (FloatProp::FRACTION_LEN + 1)) > 0) {
if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) {
final_mantissa >>= 1;
++exp2;
}
// The if block is equivalent to (but has fewer branches than):
// if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
if (exp2 - 1 >= (1 << FloatProp::EXP_LEN) - 2) {
if (exp2 - 1 >= (1 << FPBits::EXP_LEN) - 2) {
return cpp::nullopt;
}
@@ -321,7 +316,6 @@ LIBC_INLINE FloatConvertReturn<T>
simple_decimal_conversion(const char *__restrict numStart,
RoundDirection round = RoundDirection::Nearest) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename fputil::FloatProperties<T>;
using StorageType = typename FPBits::StorageType;
int32_t exp2 = 0;
@@ -337,7 +331,7 @@ simple_decimal_conversion(const char *__restrict numStart,
// If the exponent is too large and can't be represented in this size of
// float, return inf.
if (hpd.get_decimal_point() > 0 &&
exp10_to_exp2(hpd.get_decimal_point() - 1) > FloatProp::EXP_BIAS) {
exp10_to_exp2(hpd.get_decimal_point() - 1) > FPBits::EXP_BIAS) {
output.num = {0, fputil::FPBits<T>::MAX_BIASED_EXPONENT};
output.error = ERANGE;
return output;
@@ -345,8 +339,7 @@ simple_decimal_conversion(const char *__restrict numStart,
// If the exponent is too small even for a subnormal, return 0.
if (hpd.get_decimal_point() < 0 &&
exp10_to_exp2(-hpd.get_decimal_point()) >
(FloatProp::EXP_BIAS +
static_cast<int32_t>(FloatProp::FRACTION_LEN))) {
(FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN))) {
output.num = {0, 0};
output.error = ERANGE;
return output;
@@ -385,7 +378,7 @@ simple_decimal_conversion(const char *__restrict numStart,
hpd.shift(1);
// Get the biased exponent
exp2 += FloatProp::EXP_BIAS;
exp2 += FPBits::EXP_BIAS;
// Handle the exponent being too large (and return inf).
if (exp2 >= FPBits::MAX_BIASED_EXPONENT) {
@@ -395,7 +388,7 @@ simple_decimal_conversion(const char *__restrict numStart,
}
// Shift left to fill the mantissa
hpd.shift(FloatProp::FRACTION_LEN);
hpd.shift(FPBits::FRACTION_LEN);
StorageType final_mantissa = hpd.round_to_integer_type<StorageType>();
// Handle subnormals
@@ -411,13 +404,13 @@ simple_decimal_conversion(const char *__restrict numStart,
final_mantissa = hpd.round_to_integer_type<StorageType>(round);
// Check if by shifting right we've caused this to round to a normal number.
if ((final_mantissa >> FloatProp::FRACTION_LEN) != 0) {
if ((final_mantissa >> FPBits::FRACTION_LEN) != 0) {
++exp2;
}
}
// Check if rounding added a bit, and shift down if that's the case.
if (final_mantissa == StorageType(2) << FloatProp::FRACTION_LEN) {
if (final_mantissa == StorageType(2) << FPBits::FRACTION_LEN) {
final_mantissa >>= 1;
++exp2;
@@ -515,13 +508,12 @@ LIBC_INLINE cpp::optional<ExpandedFloat<T>>
clinger_fast_path(ExpandedFloat<T> init_num,
RoundDirection round = RoundDirection::Nearest) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename fputil::FloatProperties<T>;
using StorageType = typename FPBits::StorageType;
StorageType mantissa = init_num.mantissa;
int32_t exp10 = init_num.exponent;
if ((mantissa >> FloatProp::FRACTION_LEN) > 0) {
if ((mantissa >> FPBits::FRACTION_LEN) > 0) {
return cpp::nullopt;
}
@@ -605,7 +597,7 @@ clinger_fast_path(ExpandedFloat<T> init_num,
// log10(2^(exponent bias)).
// The generic approximation uses the fact that log10(2^x) ~= x/3
template <typename T> constexpr int32_t get_upper_bound() {
return fputil::FloatProperties<T>::EXP_BIAS / 3;
return fputil::FPBits<T>::EXP_BIAS / 3;
}
template <> constexpr int32_t get_upper_bound<float>() { return 39; }
@@ -621,11 +613,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
// other out, and subnormal numbers allow for the result to be at the very low
// end of the final mantissa.
template <typename T> constexpr int32_t get_lower_bound() {
using FloatProp = typename fputil::FloatProperties<T>;
return -(
(FloatProp::EXP_BIAS +
static_cast<int32_t>(FloatProp::FRACTION_LEN + FloatProp::STORAGE_LEN)) /
3);
using FPBits = typename fputil::FPBits<T>;
return -((FPBits::EXP_BIAS +
static_cast<int32_t>(FPBits::FRACTION_LEN + FPBits::STORAGE_LEN)) /
3);
}
template <> constexpr int32_t get_lower_bound<float>() {
@@ -723,7 +714,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
bool truncated,
RoundDirection round) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename fputil::FloatProperties<T>;
using StorageType = typename FPBits::StorageType;
StorageType mantissa = init_num.mantissa;
@@ -733,7 +723,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
// This is the number of leading zeroes a properly normalized float of type T
// should have.
constexpr int32_t INF_EXP = (1 << FloatProp::EXP_LEN) - 1;
constexpr int32_t INF_EXP = (1 << FPBits::EXP_LEN) - 1;
// Normalization step 1: Bring the leading bit to the highest bit of
// StorageType.
@@ -744,26 +734,25 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
exp2 -= amount_to_shift_left;
// biased_exponent represents the biased exponent of the most significant bit.
int32_t biased_exponent =
exp2 + FloatProp::STORAGE_LEN + FPBits::EXP_BIAS - 1;
int32_t biased_exponent = exp2 + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - 1;
// Handle numbers that're too large and get squashed to inf
if (biased_exponent >= INF_EXP) {
// This indicates an overflow, so we make the result INF and set errno.
output.num = {0, (1 << FloatProp::EXP_LEN) - 1};
output.num = {0, (1 << FPBits::EXP_LEN) - 1};
output.error = ERANGE;
return output;
}
uint32_t amount_to_shift_right =
FloatProp::STORAGE_LEN - FloatProp::FRACTION_LEN - 1;
FPBits::STORAGE_LEN - FPBits::FRACTION_LEN - 1;
// Handle subnormals.
if (biased_exponent <= 0) {
amount_to_shift_right += 1 - biased_exponent;
biased_exponent = 0;
if (amount_to_shift_right > FloatProp::STORAGE_LEN) {
if (amount_to_shift_right > FPBits::STORAGE_LEN) {
// Return 0 if the exponent is too small.
output.num = {0, 0};
output.error = ERANGE;
@@ -776,10 +765,10 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;
if (amount_to_shift_right < FloatProp::STORAGE_LEN) {
if (amount_to_shift_right < FPBits::STORAGE_LEN) {
// Shift the mantissa and clear the implicit bit.
mantissa >>= amount_to_shift_right;
mantissa &= FloatProp::FRACTION_MASK;
mantissa &= FPBits::FRACTION_MASK;
} else {
mantissa = 0;
}
@@ -802,7 +791,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
}
}
if (mantissa > FloatProp::FRACTION_MASK) {
if (mantissa > FPBits::FRACTION_MASK) {
// Rounding causes the exponent to increase.
++biased_exponent;
@@ -815,7 +804,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
output.error = ERANGE;
}
output.num = {mantissa & FloatProp::FRACTION_MASK, biased_exponent};
output.num = {mantissa & FPBits::FRACTION_MASK, biased_exponent};
return output;
}

View File

@@ -224,7 +224,6 @@ double set_exceptional(double x) {
LLVM_LIBC_FUNCTION(double, exp, (double x)) {
using FPBits = typename fputil::FPBits<double>;
using FloatProp = typename fputil::FloatProperties<double>;
FPBits xbits(x);
uint64_t x_u = xbits.uintval();
@@ -385,7 +384,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
if (LIBC_LIKELY(upper == lower)) {
// to multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
return r;
}
@@ -403,7 +402,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
double lower_dd = r_dd.hi + (r_dd.lo - ERR_DD);
if (LIBC_LIKELY(upper_dd == lower_dd)) {
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r =
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
return r;

View File

@@ -274,7 +274,6 @@ double set_exceptional(double x) {
LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
using FPBits = typename fputil::FPBits<double>;
using FloatProp = typename fputil::FloatProperties<double>;
FPBits xbits(x);
uint64_t x_u = xbits.uintval();
@@ -398,7 +397,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
if (LIBC_LIKELY(upper == lower)) {
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
return r;
}
@@ -465,7 +464,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
if (LIBC_LIKELY(upper_dd == lower_dd)) {
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
return r;
}

View File

@@ -249,7 +249,6 @@ double set_exceptional(double x) {
LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
using FPBits = typename fputil::FPBits<double>;
using FloatProp = typename fputil::FloatProperties<double>;
FPBits xbits(x);
uint64_t x_u = xbits.uintval();
@@ -365,7 +364,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
if (LIBC_LIKELY(upper == lower)) {
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
return r;
}
@@ -379,7 +378,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
if (LIBC_LIKELY(upper_dd == lower_dd)) {
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
return r;
}

View File

@@ -137,7 +137,7 @@ LIBC_INLINE float exp2f(float x) {
// exp_hi = shift hi to the exponent field of double precision.
int64_t exp_hi =
static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
<< fputil::FloatProperties<double>::FRACTION_LEN);
<< fputil::FPBits<double>::FRACTION_LEN);
// mh = 2^hi * 2^mid
// mh_bits = bit field of mh
int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp_hi;

View File

@@ -162,7 +162,7 @@ template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
// hi = floor(kd * 2^(-MID_BITS))
// exp_hi = shift hi to the exponent field of double precision.
int64_t exp_hi = static_cast<int64_t>((k >> Base::MID_BITS))
<< fputil::FloatProperties<double>::FRACTION_LEN;
<< fputil::FPBits<double>::FRACTION_LEN;
// mh = 2^hi * 2^mid
// mh_bits = bit field of mh
int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
@@ -235,9 +235,9 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
// hi = floor(kf * 2^(-5))
// exp_hi = shift hi to the exponent field of double precision.
int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
<< fputil::FloatProperties<double>::FRACTION_LEN;
<< fputil::FPBits<double>::FRACTION_LEN;
int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
<< fputil::FloatProperties<double>::FRACTION_LEN;
<< fputil::FPBits<double>::FRACTION_LEN;
// mh_p = 2^(hi + mid)
// mh_m = 2^(-(hi + mid))
// mh_bits_* = bit field of mh_*
@@ -342,10 +342,10 @@ LIBC_INLINE static double log_eval(double x) {
// double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
double err) {
using FloatProp = typename fputil::FloatProperties<double>;
using FPBits = typename fputil::FPBits<double>;
// Scaling factor = 1/(min normal number) = 2^1022
int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN;
double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid));
double lo_scaled =
(lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo))

View File

@@ -275,7 +275,6 @@ double set_exceptional(double x) {
LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
using FPBits = typename fputil::FPBits<double>;
using FloatProp = typename fputil::FloatProperties<double>;
FPBits xbits(x);
bool x_sign = xbits.get_sign();
@@ -468,7 +467,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
if (LIBC_LIKELY(upper == lower)) {
// to multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
return r;
}
@@ -482,7 +481,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
double lower_dd = r_dd.hi + (r_dd.lo - err_dd);
if (LIBC_LIKELY(upper_dd == lower_dd)) {
int64_t exp_hi = static_cast<int64_t>(hi) << FloatProp::FRACTION_LEN;
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
return r;
}

View File

@@ -387,24 +387,24 @@ static constexpr DoubleDouble LOG2_R2_DD[] = {
};
LIBC_INLINE bool is_odd_integer(float x) {
using FloatProp = typename fputil::FloatProperties<float>;
using FPBits = typename fputil::FPBits<float>;
uint32_t x_u = cpp::bit_cast<uint32_t>(x);
int32_t x_e = static_cast<int32_t>((x_u & FloatProp::EXP_MASK) >>
FloatProp::FRACTION_LEN);
int32_t lsb = cpp::countr_zero(x_u | FloatProp::EXP_MASK);
int32_t x_e =
static_cast<int32_t>((x_u & FPBits::EXP_MASK) >> FPBits::FRACTION_LEN);
int32_t lsb = cpp::countr_zero(x_u | FPBits::EXP_MASK);
constexpr int32_t UNIT_EXPONENT =
FloatProp::EXP_BIAS + static_cast<int32_t>(FloatProp::FRACTION_LEN);
FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN);
return (x_e + lsb == UNIT_EXPONENT);
}
LIBC_INLINE bool is_integer(float x) {
using FloatProp = typename fputil::FloatProperties<float>;
using FPBits = typename fputil::FPBits<float>;
uint32_t x_u = cpp::bit_cast<uint32_t>(x);
int32_t x_e = static_cast<int32_t>((x_u & FloatProp::EXP_MASK) >>
FloatProp::FRACTION_LEN);
int32_t lsb = cpp::countr_zero(x_u | FloatProp::EXP_MASK);
int32_t x_e =
static_cast<int32_t>((x_u & FPBits::EXP_MASK) >> FPBits::FRACTION_LEN);
int32_t lsb = cpp::countr_zero(x_u | FPBits::EXP_MASK);
constexpr int32_t UNIT_EXPONENT =
FloatProp::EXP_BIAS + static_cast<int32_t>(FloatProp::FRACTION_LEN);
FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN);
return (x_e + lsb >= UNIT_EXPONENT);
}
@@ -424,7 +424,6 @@ LIBC_INLINE bool larger_exponent(double a, double b) {
double powf_double_double(int idx_x, double dx, double y6, double lo6_hi,
const DoubleDouble &exp2_hi_mid) {
using DoubleBits = typename fputil::FPBits<double>;
using DoubleProp = typename fputil::FloatProperties<double>;
// Perform a second range reduction step:
// idx2 = round(2^14 * (dx + 2^-8)) = round ( dx * 2^14 + 2^6)
// dx2 = (1 + dx) * r2 - 1
@@ -500,7 +499,7 @@ double powf_double_double(int idx_x, double dx, double y6, double lo6_hi,
bool lo_sign = DoubleBits(r.lo).get_sign();
if (hi_sign == lo_sign) {
++r_bits;
} else if ((r_bits & DoubleProp::FRACTION_MASK) > 0) {
} else if ((r_bits & DoubleBits::FRACTION_MASK) > 0) {
--r_bits;
}
}
@@ -512,8 +511,7 @@ double powf_double_double(int idx_x, double dx, double y6, double lo6_hi,
LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
using FloatBits = typename fputil::FPBits<float>;
using FloatProp = typename fputil::FloatProperties<float>;
using DoubleProp = typename fputil::FloatProperties<double>;
using DoubleBits = typename fputil::FPBits<double>;
FloatBits xbits(x), ybits(y);
uint32_t x_u = xbits.uintval();
@@ -584,7 +582,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
// x^y will be overflow / underflow in single precision. Set y to a
// large enough exponent but not too large, so that the computations
// won't be overflow in double precision.
y = cpp::bit_cast<float>((y_u & FloatProp::SIGN_MASK) + 0x4f800000U);
y = cpp::bit_cast<float>((y_u & FloatBits::SIGN_MASK) + 0x4f800000U);
}
}
}
@@ -607,11 +605,11 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
return generic::exp10f(y);
}
bool x_sign = x_u >= FloatProp::SIGN_MASK;
bool x_sign = x_u >= FloatBits::SIGN_MASK;
switch (x_abs) {
case 0x0000'0000: { // x = +-0.0f
bool x_sign = (x_u >= FloatProp::SIGN_MASK);
bool x_sign = (x_u >= FloatBits::SIGN_MASK);
bool out_sign = x_sign && is_odd_integer(FloatBits(y_u).get_val());
if (y_u > 0x8000'0000U) {
// pow(0, negative number) = inf
@@ -623,9 +621,9 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
return out_sign ? -0.0f : 0.0f;
}
case 0x7f80'0000: { // x = +-Inf
bool x_sign = (x_u >= FloatProp::SIGN_MASK);
bool x_sign = (x_u >= FloatBits::SIGN_MASK);
bool out_sign = x_sign && is_odd_integer(FloatBits(y_u).get_val());
if (y_u >= FloatProp::SIGN_MASK) {
if (y_u >= FloatBits::SIGN_MASK) {
return out_sign ? -0.0f : 0.0f;
}
return FloatBits::inf(out_sign);
@@ -669,11 +667,11 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
x_u = FloatBits(x).uintval();
// Extract exponent field of x.
ex += (x_u >> FloatProp::FRACTION_LEN);
ex += (x_u >> FloatBits::FRACTION_LEN);
double e_x = static_cast<double>(ex);
// Use the highest 7 fractional bits of m_x as the index for look up tables.
uint32_t x_mant = x_u & FloatProp::FRACTION_MASK;
int idx_x = static_cast<int>(x_mant >> (FloatProp::FRACTION_LEN - 7));
uint32_t x_mant = x_u & FloatBits::FRACTION_MASK;
int idx_x = static_cast<int>(x_mant >> (FloatBits::FRACTION_LEN - 7));
// Add the hidden bit to the mantissa.
// 1 <= m_x < 2
float m_x = cpp::bit_cast<float>(x_mant | 0x3f800000);
@@ -774,7 +772,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
int idx_y = hm_i & 0x3f;
// 2^hi
int64_t exp_hi_i = (hm_i >> 6) << DoubleProp::FRACTION_LEN;
int64_t exp_hi_i = (hm_i >> 6) << DoubleBits::FRACTION_LEN;
// 2^mid
int64_t exp_mid_i = cpp::bit_cast<uint64_t>(EXP2_MID1[idx_y].hi);
// (-1)^sign * 2^hi * 2^mid

View File

@@ -59,7 +59,7 @@ LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
int idx = 0;
y = 0;
int x_lsb_exp_m4 = x_exp - fputil::FloatProperties<float>::FRACTION_LEN;
int x_lsb_exp_m4 = x_exp - fputil::FPBits<float>::FRACTION_LEN;
// Skipping the first parts of 32/pi such that:
// LSB of x * LSB of THIRTYTWO_OVER_PI_28[i] >= 32.

View File

@@ -89,7 +89,7 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) {
// -hi = floor(-k * 2^(-MID_BITS))
// exp_mhi = shift -hi to the exponent field of double precision.
int64_t exp_mhi = static_cast<int64_t>(mk >> ExpBase::MID_BITS)
<< fputil::FloatProperties<double>::FRACTION_LEN;
<< fputil::FPBits<double>::FRACTION_LEN;
// mh = 2^(-hi - mid)
int64_t mh_bits = ExpBase::EXP_2_MID[mk & ExpBase::MID_MASK] + exp_mhi;
double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();

View File

@@ -240,8 +240,7 @@ class FloatWriter {
// -exponent will never overflow because all long double types we support
// have at most 15 bits of mantissa and the C standard defines an int as
// being at least 16 bits.
static_assert(fputil::FloatProperties<long double>::EXP_LEN <
(sizeof(int) * 8));
static_assert(fputil::FPBits<long double>::EXP_LEN < (sizeof(int) * 8));
public:
LIBC_INLINE FloatWriter(Writer *init_writer, bool init_has_decimal_point,
@@ -474,7 +473,7 @@ LIBC_INLINE int convert_float_decimal_typed(Writer *writer,
const FormatSection &to_conv,
fputil::FPBits<T> float_bits) {
// signed because later we use -FRACTION_LEN
constexpr int32_t FRACTION_LEN = fputil::FloatProperties<T>::FRACTION_LEN;
constexpr int32_t FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
bool is_negative = float_bits.get_sign();
int exponent = float_bits.get_explicit_exponent();
@@ -587,7 +586,7 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
const FormatSection &to_conv,
fputil::FPBits<T> float_bits) {
// signed because later we use -FRACTION_LEN
constexpr int32_t FRACTION_LEN = fputil::FloatProperties<T>::FRACTION_LEN;
constexpr int32_t FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
bool is_negative = float_bits.get_sign();
int exponent = float_bits.get_explicit_exponent();
StorageType mantissa = float_bits.get_explicit_mantissa();
@@ -750,7 +749,7 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer,
const FormatSection &to_conv,
fputil::FPBits<T> float_bits) {
// signed because later we use -FRACTION_LEN
constexpr int32_t FRACTION_LEN = fputil::FloatProperties<T>::FRACTION_LEN;
constexpr int32_t FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
bool is_negative = float_bits.get_sign();
int exponent = float_bits.get_explicit_exponent();
StorageType mantissa = float_bits.get_explicit_mantissa();

View File

@@ -16,7 +16,7 @@
namespace LIBC_NAMESPACE {
template <typename T> struct LlvmLibcStrToFloatTest : public testing::Test {
using StorageType = typename fputil::FloatProperties<T>::StorageType;
using StorageType = typename fputil::FPBits<T>::StorageType;
void clinger_fast_path_test(const StorageType inputMantissa,
const int32_t inputExp10,

View File

@@ -20,8 +20,7 @@ template <typename T> class FrexpTest : public LIBC_NAMESPACE::testing::Test {
DECLARE_SPECIAL_CONSTANTS(T)
static constexpr StorageType HIDDEN_BIT =
StorageType(1)
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
public:
typedef T (*FrexpFunc)(T, int *);

View File

@@ -20,8 +20,7 @@ template <typename T> class LogbTest : public LIBC_NAMESPACE::testing::Test {
DECLARE_SPECIAL_CONSTANTS(T)
static constexpr StorageType HIDDEN_BIT =
StorageType(1)
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
public:
typedef T (*LogbFunc)(T);

View File

@@ -20,8 +20,7 @@ template <typename T> class SqrtTest : public LIBC_NAMESPACE::testing::Test {
DECLARE_SPECIAL_CONSTANTS(T)
static constexpr StorageType HIDDEN_BIT =
StorageType(1)
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
public:
typedef T (*SqrtFunc)(T);

View File

@@ -17,8 +17,7 @@ template <typename T> class FrexpTest : public LIBC_NAMESPACE::testing::Test {
DECLARE_SPECIAL_CONSTANTS(T)
static constexpr StorageType HIDDEN_BIT =
StorageType(1)
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
public:
typedef T (*FrexpFunc)(T, int *);

View File

@@ -17,8 +17,7 @@ template <typename T> class LogbTest : public LIBC_NAMESPACE::testing::Test {
DECLARE_SPECIAL_CONSTANTS(T)
static constexpr StorageType HIDDEN_BIT =
StorageType(1)
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
public:
typedef T (*LogbFunc)(T);

View File

@@ -17,8 +17,7 @@ template <typename T> class SqrtTest : public LIBC_NAMESPACE::testing::Test {
DECLARE_SPECIAL_CONSTANTS(T)
static constexpr StorageType HIDDEN_BIT =
StorageType(1)
<< LIBC_NAMESPACE::fputil::FloatProperties<T>::FRACTION_LEN;
StorageType(1) << LIBC_NAMESPACE::fputil::FPBits<T>::FRACTION_LEN;
public:
typedef T (*SqrtFunc)(T);

View File

@@ -49,7 +49,7 @@ template <> struct ExtraPrecision<long double> {
template <typename T>
static inline unsigned int get_precision(double ulp_tolerance) {
if (ulp_tolerance <= 0.5) {
return LIBC_NAMESPACE::fputil::FloatProperties<T>::MANTISSA_PRECISION;
return LIBC_NAMESPACE::fputil::FPBits<T>::MANTISSA_PRECISION;
} else {
return ExtraPrecision<T>::VALUE;
}