/* * aasin.h * The basic idea is to exploit Pade' polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef ASIN_H_ #define ASIN_H_ #include "vdtcore_common.h" namespace vdt{ namespace details{ const double RX1asin = 2.967721961301243206100E-3; const double RX2asin = -5.634242780008963776856E-1; const double RX3asin = 6.968710824104713396794E0; const double RX4asin = -2.556901049652824852289E1; const double RX5asin = 2.853665548261061424989E1; const double SX1asin = -2.194779531642920639778E1; const double SX2asin = 1.470656354026814941758E2; const double SX3asin = -3.838770957603691357202E2; const double SX4asin = 3.424398657913078477438E2; const double PX1asin = 4.253011369004428248960E-3; const double PX2asin = -6.019598008014123785661E-1; const double PX3asin = 5.444622390564711410273E0; const double PX4asin = -1.626247967210700244449E1; const double PX5asin = 1.956261983317594739197E1; const double PX6asin = -8.198089802484824371615E0; const double QX1asin = -1.474091372988853791896E1; const double QX2asin = 7.049610280856842141659E1; const double QX3asin = -1.471791292232726029859E2; const double QX4asin = 1.395105614657485689735E2; const double QX5asin = -4.918853881490881290097E1; inline double getRX(const double x){ double rx = RX1asin; rx*= x; rx+= RX2asin; rx*= x; rx+= RX3asin; rx*= x; rx+= RX4asin; rx*= x; rx+= RX5asin; return rx; } inline double getSX(const double x){ double sx = x; sx+= SX1asin; sx*= x; sx+= SX2asin; sx*= x; sx+= SX3asin; sx*= x; sx+= SX4asin; return sx; } inline double getPX(const double x){ double px = PX1asin; px*= x; px+= PX2asin; px*= x; px+= PX3asin; px*= x; px+= PX4asin; px*= x; px+= PX5asin; px*= x; px+= PX6asin; return px; } inline double getQX(const double x){ double qx = x; qx+= QX1asin; qx*= x; qx+= QX2asin; qx*= x; qx+= QX3asin; qx*= x; qx+= QX4asin; qx*= x; qx+= QX5asin; return qx; } } } namespace vdt{ // asin double precision -------------------------------------------------------- /// Double Precision asin inline double fast_asin(double x){ const uint64_t sign_mask = details::getSignMask(x); x = std::fabs(x); const double a = x; double zz = 1.0 - a; double px = details::getRX(zz); double qx = details::getSX(zz); const double p = zz * px/qx; zz = std::sqrt(zz+zz); double z = details::PIO4 - zz; zz = zz * p - details::MOREBITS; z -= zz; z += details::PIO4; if( a < 0.625 ){ zz = a * a; px = details::getPX(zz); qx = details::getQX(zz); z = zz*px/qx; z = a * z + a; } // Linear approx, not sooo needed but seable. Price is cheap though double res = a < 1e-8? a : z ; // Restore Sign return details::dpORuint64(res,sign_mask); } //------------------------------------------------------------------------------ /// Single Precision asin inline float fast_asinf(float x){ uint32_t flag=0; const uint32_t sign_mask = details::getSignMask(x); const float a = std::fabs(x); float z; if( a > 0.5f ) { z = 0.5f * (1.0f - a); x = sqrtf( z ); flag = 1; } else { x = a; z = x * x; } z = (((( 4.2163199048E-2f * z + 2.4181311049E-2f) * z + 4.5470025998E-2f) * z + 7.4953002686E-2f) * z + 1.6666752422E-1f) * z * x + x; // if( flag != 0 ) // { // z = z + z; // z = PIO2F - z; // } // No branch with the two coefficients float tmp = z + z; tmp = details::PIO2F - tmp; // Linear approx, not sooo needed but seable. Price is cheap though float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ; // Restore Sign return details::spORuint32(res,sign_mask); return( z ); } //------------------------------------------------------------------------------ // The cos is in this file as well inline double fast_acos( double x ){return details::PIO2 - fast_asin(x);} //------------------------------------------------------------------------------ inline float fast_acosf( float x ){return details::PIO2F - fast_asinf(x);} //------------------------------------------------------------------------------ // // Vector signatures // // void asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); // void fast_asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); // void asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); // void fast_asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); // // void acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); // void fast_acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); // void acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); // void fast_acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } //vdt namespace #endif /* ASIN_H_ */