This adds the Arm Optimized Routines (see https://github.com/ARM-software/optimized-routines) source code under the the LLVM license. The version of the code provided in this patch is v20.02 of the Arm Optimized Routines project. This entire contribution is being committed as is even though it does not currently fit the LLVM libc model and does not follow the LLVM coding style. In the near future, implementations from this patch will be moved over to their right place in the LLVM-libc tree. This will be done over many small patches, all of which will go through the normal LLVM code review process. See this libc-dev post for the plan: http://lists.llvm.org/pipermail/libc-dev/2020-March/000044.html Differential revision of the original upload: https://reviews.llvm.org/D75355
364 lines
9.6 KiB
C
364 lines
9.6 KiB
C
/*
|
|
* Generic functions for ULP error estimation.
|
|
*
|
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
* See https://llvm.org/LICENSE.txt for license information.
|
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
*/
|
|
|
|
/* For each different math function type,
|
|
T(x) should add a different suffix to x.
|
|
RT(x) should add a return type specific suffix to x. */
|
|
|
|
#ifdef NEW_RT
|
|
#undef NEW_RT
|
|
|
|
# if USE_MPFR
|
|
static int RT(ulpscale_mpfr) (mpfr_t x, int t)
|
|
{
|
|
/* TODO: pow of 2 cases. */
|
|
if (mpfr_regular_p (x))
|
|
{
|
|
mpfr_exp_t e = mpfr_get_exp (x) - RT(prec);
|
|
if (e < RT(emin))
|
|
e = RT(emin) - 1;
|
|
if (e > RT(emax) - RT(prec))
|
|
e = RT(emax) - RT(prec);
|
|
return e;
|
|
}
|
|
if (mpfr_zero_p (x))
|
|
return RT(emin) - 1;
|
|
if (mpfr_inf_p (x))
|
|
return RT(emax) - RT(prec);
|
|
/* NaN. */
|
|
return 0;
|
|
}
|
|
# endif
|
|
|
|
/* Difference between exact result and closest real number that
|
|
gets rounded to got, i.e. error before rounding, for a correctly
|
|
rounded result the difference is 0. */
|
|
static double RT(ulperr) (RT(float) got, const struct RT(ret) * p, int r)
|
|
{
|
|
RT(float) want = p->y;
|
|
RT(float) d;
|
|
double e;
|
|
|
|
if (RT(asuint) (got) == RT(asuint) (want))
|
|
return 0.0;
|
|
if (signbit (got) != signbit (want))
|
|
/* May have false positives with NaN. */
|
|
//return isnan(got) && isnan(want) ? 0 : INFINITY;
|
|
return INFINITY;
|
|
if (!isfinite (want) || !isfinite (got))
|
|
{
|
|
if (isnan (got) != isnan (want))
|
|
return INFINITY;
|
|
if (isnan (want))
|
|
return 0;
|
|
if (isinf (got))
|
|
{
|
|
got = RT(copysign) (RT(halfinf), got);
|
|
want *= 0.5f;
|
|
}
|
|
if (isinf (want))
|
|
{
|
|
want = RT(copysign) (RT(halfinf), want);
|
|
got *= 0.5f;
|
|
}
|
|
}
|
|
if (r == FE_TONEAREST)
|
|
{
|
|
// TODO: incorrect when got vs want cross a powof2 boundary
|
|
/* error = got > want
|
|
? got - want - tail ulp - 0.5 ulp
|
|
: got - want - tail ulp + 0.5 ulp; */
|
|
d = got - want;
|
|
e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
|
|
}
|
|
else
|
|
{
|
|
if ((r == FE_DOWNWARD && got < want) || (r == FE_UPWARD && got > want)
|
|
|| (r == FE_TOWARDZERO && fabs (got) < fabs (want)))
|
|
got = RT(nextafter) (got, want);
|
|
d = got - want;
|
|
e = -p->tail;
|
|
}
|
|
return RT(scalbn) (d, -p->ulpexp) + e;
|
|
}
|
|
|
|
static int RT(isok) (RT(float) ygot, int exgot, RT(float) ywant, int exwant,
|
|
int exmay)
|
|
{
|
|
return RT(asuint) (ygot) == RT(asuint) (ywant)
|
|
&& ((exgot ^ exwant) & ~exmay) == 0;
|
|
}
|
|
|
|
static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
|
|
{
|
|
return RT(asuint) (ygot) == RT(asuint) (ywant);
|
|
}
|
|
#endif
|
|
|
|
static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
|
|
RT(float) * y, int *ex)
|
|
{
|
|
if (r != FE_TONEAREST)
|
|
fesetround (r);
|
|
feclearexcept (FE_ALL_EXCEPT);
|
|
*y = T(call) (f, a);
|
|
*ex = fetestexcept (FE_ALL_EXCEPT);
|
|
if (r != FE_TONEAREST)
|
|
fesetround (FE_TONEAREST);
|
|
}
|
|
|
|
static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
|
|
int r, RT(float) * y, int *ex)
|
|
{
|
|
*y = T(call) (f, a);
|
|
*ex = 0;
|
|
}
|
|
|
|
static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
|
|
int r, struct RT(ret) * p,
|
|
RT(float) ygot, int exgot)
|
|
{
|
|
if (r != FE_TONEAREST)
|
|
fesetround (r);
|
|
feclearexcept (FE_ALL_EXCEPT);
|
|
volatile struct T(args) va = a; // TODO: barrier
|
|
a = va;
|
|
RT(double) yl = T(call_long) (f, a);
|
|
p->y = (RT(float)) yl;
|
|
volatile RT(float) vy = p->y; // TODO: barrier
|
|
(void) vy;
|
|
p->ex = fetestexcept (FE_ALL_EXCEPT);
|
|
if (r != FE_TONEAREST)
|
|
fesetround (FE_TONEAREST);
|
|
p->ex_may = FE_INEXACT;
|
|
if (RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
|
|
return 1;
|
|
p->ulpexp = RT(ulpscale) (p->y);
|
|
if (isinf (p->y))
|
|
p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
|
|
else
|
|
p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
|
|
if (RT(fabs) (p->y) < RT(min_normal))
|
|
{
|
|
/* TODO: subnormal result is treated as undeflow even if it's
|
|
exact since call_long may not raise inexact correctly. */
|
|
if (p->y != 0 || (p->ex & FE_INEXACT))
|
|
p->ex |= FE_UNDERFLOW | FE_INEXACT;
|
|
}
|
|
return 0;
|
|
}
|
|
static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
|
|
int r, struct RT(ret) * p,
|
|
RT(float) ygot, int exgot)
|
|
{
|
|
RT(double) yl = T(call_long) (f, a);
|
|
p->y = (RT(float)) yl;
|
|
if (RT(isok_nofenv) (ygot, p->y))
|
|
return 1;
|
|
p->ulpexp = RT(ulpscale) (p->y);
|
|
if (isinf (p->y))
|
|
p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
|
|
else
|
|
p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
|
|
return 0;
|
|
}
|
|
|
|
/* There are nan input args and all quiet. */
|
|
static inline int T(qnanpropagation) (struct T(args) a)
|
|
{
|
|
return T(reduce) (a, isnan, ||) && !T(reduce) (a, RT(issignaling), ||);
|
|
}
|
|
static inline RT(float) T(sum) (struct T(args) a)
|
|
{
|
|
return T(reduce) (a, , +);
|
|
}
|
|
|
|
/* returns 1 if the got result is ok. */
|
|
static inline int T(call_mpfr_fix) (const struct fun *f, struct T(args) a,
|
|
int r_fenv, struct RT(ret) * p,
|
|
RT(float) ygot, int exgot)
|
|
{
|
|
#if USE_MPFR
|
|
int t, t2;
|
|
mpfr_rnd_t r = rmap (r_fenv);
|
|
MPFR_DECL_INIT(my, RT(prec_mpfr));
|
|
MPFR_DECL_INIT(mr, RT(prec));
|
|
MPFR_DECL_INIT(me, RT(prec_mpfr));
|
|
mpfr_clear_flags ();
|
|
t = T(call_mpfr) (my, f, a, r);
|
|
/* Double rounding. */
|
|
t2 = mpfr_set (mr, my, r);
|
|
if (t2)
|
|
t = t2;
|
|
mpfr_set_emin (RT(emin));
|
|
mpfr_set_emax (RT(emax));
|
|
t = mpfr_check_range (mr, t, r);
|
|
t = mpfr_subnormalize (mr, t, r);
|
|
mpfr_set_emax (MPFR_EMAX_DEFAULT);
|
|
mpfr_set_emin (MPFR_EMIN_DEFAULT);
|
|
p->y = mpfr_get_d (mr, r);
|
|
p->ex = t ? FE_INEXACT : 0;
|
|
p->ex_may = FE_INEXACT;
|
|
if (mpfr_underflow_p () && (p->ex & FE_INEXACT))
|
|
/* TODO: handle before and after rounding uflow cases. */
|
|
p->ex |= FE_UNDERFLOW;
|
|
if (mpfr_overflow_p ())
|
|
p->ex |= FE_OVERFLOW | FE_INEXACT;
|
|
if (mpfr_divby0_p ())
|
|
p->ex |= FE_DIVBYZERO;
|
|
//if (mpfr_erangeflag_p ())
|
|
// p->ex |= FE_INVALID;
|
|
if (!mpfr_nanflag_p () && RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
|
|
return 1;
|
|
if (mpfr_nanflag_p () && !T(qnanpropagation) (a))
|
|
p->ex |= FE_INVALID;
|
|
p->ulpexp = RT(ulpscale_mpfr) (my, t);
|
|
if (!isfinite (p->y))
|
|
{
|
|
p->tail = 0;
|
|
if (isnan (p->y))
|
|
{
|
|
/* If an input was nan keep its sign. */
|
|
p->y = T(sum) (a);
|
|
if (!isnan (p->y))
|
|
p->y = (p->y - p->y) / (p->y - p->y);
|
|
return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
|
|
}
|
|
mpfr_set_si_2exp (mr, signbit (p->y) ? -1 : 1, 1024, MPFR_RNDN);
|
|
if (mpfr_cmpabs (my, mr) >= 0)
|
|
return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
|
|
}
|
|
mpfr_sub (me, my, mr, MPFR_RNDN);
|
|
mpfr_mul_2si (me, me, -p->ulpexp, MPFR_RNDN);
|
|
p->tail = mpfr_get_d (me, MPFR_RNDN);
|
|
return 0;
|
|
#else
|
|
abort ();
|
|
#endif
|
|
}
|
|
|
|
static int T(cmp) (const struct fun *f, struct gen *gen,
|
|
const struct conf *conf)
|
|
{
|
|
double maxerr = 0;
|
|
uint64_t cnt = 0;
|
|
uint64_t cnt1 = 0;
|
|
uint64_t cnt2 = 0;
|
|
uint64_t cntfail = 0;
|
|
int r = conf->r;
|
|
int use_mpfr = conf->mpfr;
|
|
int fenv = conf->fenv;
|
|
for (;;)
|
|
{
|
|
struct RT(ret) want;
|
|
struct T(args) a = T(next) (gen);
|
|
int exgot;
|
|
int exgot2;
|
|
RT(float) ygot;
|
|
RT(float) ygot2;
|
|
int fail = 0;
|
|
if (fenv)
|
|
T(call_fenv) (f, a, r, &ygot, &exgot);
|
|
else
|
|
T(call_nofenv) (f, a, r, &ygot, &exgot);
|
|
if (f->twice) {
|
|
secondcall = 1;
|
|
if (fenv)
|
|
T(call_fenv) (f, a, r, &ygot2, &exgot2);
|
|
else
|
|
T(call_nofenv) (f, a, r, &ygot2, &exgot2);
|
|
secondcall = 0;
|
|
if (RT(asuint) (ygot) != RT(asuint) (ygot2))
|
|
{
|
|
fail = 1;
|
|
cntfail++;
|
|
T(printcall) (f, a);
|
|
printf (" got %a then %a for same input\n", ygot, ygot2);
|
|
}
|
|
}
|
|
cnt++;
|
|
int ok = use_mpfr
|
|
? T(call_mpfr_fix) (f, a, r, &want, ygot, exgot)
|
|
: (fenv ? T(call_long_fenv) (f, a, r, &want, ygot, exgot)
|
|
: T(call_long_nofenv) (f, a, r, &want, ygot, exgot));
|
|
if (!ok)
|
|
{
|
|
int print = 0;
|
|
double err = RT(ulperr) (ygot, &want, r);
|
|
double abserr = fabs (err);
|
|
// TODO: count errors below accuracy limit.
|
|
if (abserr > 0)
|
|
cnt1++;
|
|
if (abserr > 1)
|
|
cnt2++;
|
|
if (abserr > conf->errlim)
|
|
{
|
|
print = 1;
|
|
if (!fail)
|
|
{
|
|
fail = 1;
|
|
cntfail++;
|
|
}
|
|
}
|
|
if (abserr > maxerr)
|
|
{
|
|
maxerr = abserr;
|
|
if (!conf->quiet && abserr > conf->softlim)
|
|
print = 1;
|
|
}
|
|
if (print)
|
|
{
|
|
T(printcall) (f, a);
|
|
// TODO: inf ulp handling
|
|
printf (" got %a want %a %+g ulp err %g\n", ygot, want.y,
|
|
want.tail, err);
|
|
}
|
|
int diff = fenv ? exgot ^ want.ex : 0;
|
|
if (fenv && (diff & ~want.ex_may))
|
|
{
|
|
if (!fail)
|
|
{
|
|
fail = 1;
|
|
cntfail++;
|
|
}
|
|
T(printcall) (f, a);
|
|
printf (" is %a %+g ulp, got except 0x%0x", want.y, want.tail,
|
|
exgot);
|
|
if (diff & exgot)
|
|
printf (" wrongly set: 0x%x", diff & exgot);
|
|
if (diff & ~exgot)
|
|
printf (" wrongly clear: 0x%x", diff & ~exgot);
|
|
putchar ('\n');
|
|
}
|
|
}
|
|
if (cnt >= conf->n)
|
|
break;
|
|
if (!conf->quiet && cnt % 0x100000 == 0)
|
|
printf ("progress: %6.3f%% cnt %llu cnt1 %llu cnt2 %llu cntfail %llu "
|
|
"maxerr %g\n",
|
|
100.0 * cnt / conf->n, (unsigned long long) cnt,
|
|
(unsigned long long) cnt1, (unsigned long long) cnt2,
|
|
(unsigned long long) cntfail, maxerr);
|
|
}
|
|
double cc = cnt;
|
|
if (cntfail)
|
|
printf ("FAIL ");
|
|
else
|
|
printf ("PASS ");
|
|
T(printgen) (f, gen);
|
|
printf (" round %c errlim %g maxerr %g %s cnt %llu cnt1 %llu %g%% cnt2 %llu "
|
|
"%g%% cntfail %llu %g%%\n",
|
|
conf->rc, conf->errlim,
|
|
maxerr, conf->r == FE_TONEAREST ? "+0.5" : "+1.0",
|
|
(unsigned long long) cnt,
|
|
(unsigned long long) cnt1, 100.0 * cnt1 / cc,
|
|
(unsigned long long) cnt2, 100.0 * cnt2 / cc,
|
|
(unsigned long long) cntfail, 100.0 * cntfail / cc);
|
|
return !!cntfail;
|
|
}
|