/* * tan.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef TAN_H_ #define TAN_H_ #include "vdtcore_common.h" #include "sincos.h" namespace vdt{ namespace details{ const double PX1tan=-1.30936939181383777646E4; const double PX2tan=1.15351664838587416140E6; const double PX3tan=-1.79565251976484877988E7; const double QX1tan = 1.36812963470692954678E4; const double QX2tan = -1.32089234440210967447E6; const double QX3tan = 2.50083801823357915839E7; const double QX4tan = -5.38695755929454629881E7; const double DP1tan = 7.853981554508209228515625E-1; const double DP2tan = 7.94662735614792836714E-9; const double DP3tan = 3.06161699786838294307E-17; const float DP1Ftan = 0.78515625; const float DP2Ftan = 2.4187564849853515625e-4; const float DP3Ftan = 3.77489497744594108e-8; //------------------------------------------------------------------------------ /// Reduce to -45 to 45 inline double reduce2quadranttan(double x, int32_t& quad) { x = fabs(x); quad = int( ONEOPIO4 * x ); // always positive, so (int) == std::floor quad = (quad+1) & (~1); const double y = quad; // Extended precision modular arithmetic return ((x - y * DP1tan) - y * DP2tan) - y * DP3tan; } //------------------------------------------------------------------------------ /// Reduce to -45 to 45 inline float reduce2quadranttan(float x, int32_t& quad) { x = fabs(x); quad = int( ONEOPIO4F * x ); // always positive, so (int) == std::floor quad = (quad+1) & (~1); const float y = quad; // Extended precision modular arithmetic return ((x - y * DP1Ftan) - y * DP2Ftan) - y * DP3Ftan; } } //------------------------------------------------------------------------------ /// Double precision tangent implementation inline double fast_tan(double x){ const uint64_t sign_mask = details::getSignMask(x); int32_t quad =0; const double z=details::reduce2quadranttan(x,quad); const double zz = z * z; double res=z; if( zz > 1.0e-14 ){ double px = details::PX1tan; px *= zz; px += details::PX2tan; px *= zz; px += details::PX3tan; double qx=zz; qx += details::QX1tan; qx *=zz; qx += details::QX2tan; qx *=zz; qx += details::QX3tan; qx *=zz; qx += details::QX4tan; res = z + z * zz * px / qx; } // A no branching way to say: if j&2 res = -1/res. You can!!! quad &=2; quad >>=1; const int32_t alt = quad^1; // Avoid fpe generated by 1/0 if res is 0 const double zeroIfXNonZero = (x==0.); res += zeroIfXNonZero; res = quad * (-1./res) + alt * res; // one coeff is one and one is 0! // Again, return 0 if res==0, the correct result otherwhise return details::dpXORuint64(res,sign_mask) * (1.-zeroIfXNonZero); } // Single precision ------------------------------------------------------------ inline float fast_tanf(float x){ const uint32_t sign_mask = details::getSignMask(x); int32_t quad =0; const float z=details::reduce2quadranttan(x,quad); const float zz = z * z; float res=z; if( zz > 1.0e-14f ){ res = ((((( 9.38540185543E-3f * zz + 3.11992232697E-3f) * zz + 2.44301354525E-2f) * zz + 5.34112807005E-2f) * zz + 1.33387994085E-1f) * zz + 3.33331568548E-1f) * zz * z + z; } // A no branching way to say: if j&2 res = -1/res. You can!!! quad &=2; quad >>=1; const int32_t alt = quad^1; // Avoid fpe generated by 1/0 if res is 0 const float zeroIfXNonZero = (x==0.f); res += zeroIfXNonZero; res = quad * (-1.f/res) + alt * res; // one coeff is one and one is 0! return details::spXORuint32(res,sign_mask) * (1.f-zeroIfXNonZero); } //------------------------------------------------------------------------------ void tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); //------------------------------------------------------------------------------ } //vdt namespace #endif /* TAN_H_ */