/*
* exp.h
* The basic idea is to exploit Pade polynomials.
* A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
* moshier@na-net.ornl.gov) as well as actual code.
* The Cephes library can be found here: http://www.netlib.org/cephes/
*
* Created on: Jun 23, 2012
* Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
*/
/*
* VDT is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License
* along with this program. If not, see .
*/
#ifndef _VDT_EXP_
#define _VDT_EXP_
#include "vdtcore_common.h"
#include
namespace vdt{
namespace details{
const double EXP_LIMIT = 708;
const double PX1exp = 1.26177193074810590878E-4;
const double PX2exp = 3.02994407707441961300E-2;
const double PX3exp = 9.99999999999999999910E-1;
const double QX1exp = 3.00198505138664455042E-6;
const double QX2exp = 2.52448340349684104192E-3;
const double QX3exp = 2.27265548208155028766E-1;
const double QX4exp = 2.00000000000000000009E0;
const double LOG2E = 1.4426950408889634073599; // 1/log(2)
const float MAXLOGF = 88.72283905206835f;
const float MINLOGF = -88.f;
const float C1F = 0.693359375f;
const float C2F = -2.12194440e-4f;
const float PX1expf = 1.9875691500E-4f;
const float PX2expf =1.3981999507E-3f;
const float PX3expf =8.3334519073E-3f;
const float PX4expf =4.1665795894E-2f;
const float PX5expf =1.6666665459E-1f;
const float PX6expf =5.0000001201E-1f;
const float LOG2EF = 1.44269504088896341f;
}
// Exp double precision --------------------------------------------------------
/// Exponential Function double precision
inline double fast_exp(double initial_x){
double x = initial_x;
double px=details::fpfloor(details::LOG2E * x +0.5);
const int32_t n = int32_t(px);
x -= px * 6.93145751953125E-1;
x -= px * 1.42860682030941723212E-6;
const double xx = x * x;
// px = x * P(x**2).
px = details::PX1exp;
px *= xx;
px += details::PX2exp;
px *= xx;
px += details::PX3exp;
px *= x;
// Evaluate Q(x**2).
double qx = details::QX1exp;
qx *= xx;
qx += details::QX2exp;
qx *= xx;
qx += details::QX3exp;
qx *= xx;
qx += details::QX4exp;
// e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
x = px / (qx - px);
x = 1.0 + 2.0 * x;
// Build 2^n in double.
x *= details::uint642dp(( ((uint64_t)n) +1023)<<52);
if (initial_x > details::EXP_LIMIT)
x = std::numeric_limits::infinity();
if (initial_x < -details::EXP_LIMIT)
x = 0.;
return x;
}
// Exp single precision --------------------------------------------------------
/// Exponential Function single precision
inline float fast_expf(float initial_x) {
float x = initial_x;
float z = details::fpfloor( details::LOG2EF * x +0.5f ); /* floor() truncates toward -infinity. */
x -= z * details::C1F;
x -= z * details::C2F;
const int32_t n = int32_t ( z );
const float x2 = x * x;
z = x*details::PX1expf;
z += details::PX2expf;
z *= x;
z += details::PX3expf;
z *= x;
z += details::PX4expf;
z *= x;
z += details::PX5expf;
z *= x;
z += details::PX6expf;
z *= x2;
z += x + 1.0f;
/* multiply by power of 2 */
z *= details::uint322sp((n+0x7f)<<23);
if (initial_x > details::MAXLOGF) z=std::numeric_limits::infinity();
if (initial_x < details::MINLOGF) z=0.f;
return z;
}
//------------------------------------------------------------------------------
// void expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
// void fast_expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
// void expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
// void fast_expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
} // end namespace vdt
#endif