/* * atan2.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Sept 20, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef ATAN2_H_ #define ATAN2_H_ #include "vdtcore_common.h" #include "atan.h" namespace vdt{ inline double fast_atan2( double y, double x ) { // move in first octant double xx = std::fabs(x); double yy = std::fabs(y); double tmp (0.0); if (yy>xx) { tmp = yy; yy=xx; xx=tmp; tmp=1.; } // To avoid the fpe, we protect against /0. const double oneIfXXZero = (xx==0.); double t=yy/(xx+oneIfXXZero); double z=t; double s = details::PIO4; double factor = details::MOREBITSO2; t = (t-1.0) / (t+1.0); if( z > details::T3PO8 ) { s = details::PIO2; factor = details::MOREBITS; t = -1.0 / z ; } if ( z <= 0.66 ) { s = 0.; factor = 0.; t = z; } const double t2 = t * t; const double px = details::get_atan_px(t2); const double qx = details::get_atan_qx(t2); //double res = y +x * x2 * px / qx + x +factor; const double poq=px / qx; double ret = t * t2 * poq + t; ret+=s; ret+=factor; // Here we put the result to 0 if xx was 0, if not nothing happens! ret*= (1.-oneIfXXZero); // move back in place if (y==0) ret=0.0; if (tmp!=0) ret = details::PIO2 - ret; if (x<0) ret = details::PI - ret; if (y<0) ret = -ret; return ret; } inline float fast_atan2f( float y, float x ) { // move in first octant float xx = std::fabs(x); float yy = std::fabs(y); float tmp (0.0f); if (yy>xx) { tmp = yy; yy=xx; xx=tmp; tmp =1.f; } // To avoid the fpe, we protect against /0. const float oneIfXXZero = (xx==0.f); float t=yy/(xx/*+oneIfXXZero*/); float z=t; if( t > 0.4142135623730950f ) // * tan pi/8 z = (t-1.0f)/(t+1.0f); //printf("%e %e %e %e\n",yy,xx,t,z); float z2 = z * z; float ret =(((( 8.05374449538e-2f * z2 - 1.38776856032E-1f) * z2 + 1.99777106478E-1f) * z2 - 3.33329491539E-1f) * z2 * z + z ); // Here we put the result to 0 if xx was 0, if not nothing happens! ret*= (1.f - oneIfXXZero); // move back in place if (y==0.f) ret=0.f; if( t > 0.4142135623730950f ) ret += details::PIO4F; if (tmp!=0) ret = details::PIO2F - ret; if (x<0.f) ret = details::PIF - ret; if (y<0.f) ret = -ret; return ret; } //------------------------------------------------------------------------------ // Vector signatures void atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); void fast_atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); void atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); void fast_atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); } // end namespace vdt #endif