/* * atan.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef ATAN_H_ #define ATAN_H_ #include "vdtcore_common.h" namespace vdt{ namespace details{ const double T3PO8 = 2.41421356237309504880; const double MOREBITSO2 = MOREBITS * 0.5; inline double get_atan_px(const double x2){ const double PX1atan = -8.750608600031904122785E-1; const double PX2atan = -1.615753718733365076637E1; const double PX3atan = -7.500855792314704667340E1; const double PX4atan = -1.228866684490136173410E2; const double PX5atan = -6.485021904942025371773E1; double px = PX1atan; px *= x2; px += PX2atan; px *= x2; px += PX3atan; px *= x2; px += PX4atan; px *= x2; px += PX5atan; return px; } inline double get_atan_qx(const double x2){ const double QX1atan = 2.485846490142306297962E1; const double QX2atan = 1.650270098316988542046E2; const double QX3atan = 4.328810604912902668951E2; const double QX4atan = 4.853903996359136964868E2; const double QX5atan = 1.945506571482613964425E2; double qx=x2; qx += QX1atan; qx *=x2; qx += QX2atan; qx *=x2; qx += QX3atan; qx *=x2; qx += QX4atan; qx *=x2; qx += QX5atan; return qx; } } /// Fast Atan implementation double precision inline double fast_atan(double x){ /* make argument positive and save the sign */ const uint64_t sign_mask = details::getSignMask(x); x=std::fabs(x); /* range reduction */ const double originalx=x; double y = details::PIO4; double factor = details::MOREBITSO2; x = (x-1.0) / (x+1.0); if( originalx > details::T3PO8 ) { y = details::PIO2; factor = details::MOREBITS; x = -1.0 / originalx ; } if ( originalx <= 0.66 ) { y = 0.; factor = 0.; x = originalx; } const double x2 = x * x; const double px = details::get_atan_px(x2); const double qx = details::get_atan_qx(x2); //double res = y +x * x2 * px / qx + x +factor; const double poq=px / qx; double res = x * x2 * poq + x; res+=y; res+=factor; return details::dpORuint64(res,sign_mask); } //------------------------------------------------------------------------------ /// Fast Atan implementation single precision inline float fast_atanf( float xx ) { const uint32_t sign_mask = details::getSignMask(xx); float x= std::fabs(xx); const float x0=x; float y=0.0f; /* range reduction */ if( x0 > 0.4142135623730950f ){ // * tan pi/8 x = (x0-1.0f)/(x0+1.0f); y = details::PIO4F; } if( x0 > 2.414213562373095f ){ // tan 3pi/8 x = -( 1.0f/x0 ); y = details::PIO2F; } const float x2 = x * x; y += ((( 8.05374449538e-2f * x2 - 1.38776856032E-1f) * x2 + 1.99777106478E-1f) * x2 - 3.33329491539E-1f) * x2 * x + x; return details::spORuint32(y,sign_mask); } //------------------------------------------------------------------------------ // Vector signatures void atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); }// end of vdt #endif // end of atan