#ifndef COMMON_EXPONENTIAL_H #define COMMON_EXPONENTIAL_H /* This file is part of the Vc library. {{{ Copyright (C) 2012 Matthias Kretz Vc is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Vc is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Vc. If not, see . ------------------------------------------------------------------- The exp implementation is derived from Cephes, which carries the following Copyright notice: Cephes Math Library Release 2.2: June, 1992 Copyright 1984, 1987, 1989 by Stephen L. Moshier Direct inquiries to 30 Frost Street, Cambridge, MA 02140 }}}*/ #ifndef VC_COMMON_EXPONENTIAL_H #define VC_COMMON_EXPONENTIAL_H #include "macros.h" namespace ROOT { namespace Vc { namespace Common { using Vc::VC__USE_NAMESPACE::c_log; using Vc::VC__USE_NAMESPACE::Vector; using Vc::VC__USE_NAMESPACE::floor; using Vc::VC__USE_NAMESPACE::ldexp; static const float log2_e = 1.44269504088896341f; static const float MAXLOGF = 88.72283905206835f; static const float MINLOGF = -103.278929903431851103f; /* log(2^-149) */ static const float MAXNUMF = 3.4028234663852885981170418348451692544e38f; template struct TypenameForLdexp { typedef Vector Type; }; template<> struct TypenameForLdexp { typedef Vector Type; }; template static inline Vector exp(VC_ALIGNED_PARAMETER(Vector) _x) { typedef Vector V; typedef typename V::Mask M; typedef typename TypenameForLdexp::Type I; typedef Const C; V x(_x); const M overflow = x > MAXLOGF; const M underflow = x < MINLOGF; // log₂(eˣ) = x * log₂(e) * log₂(2) // = log₂(2^(x * log₂(e))) // => eˣ = 2^(x * log₂(e)) // => n = ⌊x * log₂(e) + ½⌋ // => y = x - n * ln(2) | recall that: ln(2) * log₂(e) == 1 // <=> eˣ = 2ⁿ * eʸ V z = floor(C::log2_e() * x + 0.5f); I n = static_cast(z); x -= z * C::ln2_large(); x -= z * C::ln2_small(); /* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */ z = ((((( 1.9875691500E-4f * x + 1.3981999507E-3f) * x + 8.3334519073E-3f) * x + 4.1665795894E-2f) * x + 1.6666665459E-1f) * x + 5.0000001201E-1f) * (x * x) + x + 1.0f; x = ldexp(z, n); // == z * 2ⁿ x(overflow) = std::numeric_limits::infinity(); x.setZero(underflow); return x; } static inline Vector exp(Vector::AsArg _x) { Vector x = _x; typedef Vector V; typedef V::Mask M; typedef Const C; const M overflow = x > Vc_buildDouble( 1, 0x0006232bdd7abcd2ull, 9); // max log const M underflow = x < Vc_buildDouble(-1, 0x0006232bdd7abcd2ull, 9); // min log V px = floor(C::log2_e() * x + 0.5); #ifdef VC_IMPL_SSE Vector n(px); n.data() = Mem::permute(n.data()); #elif defined(VC_IMPL_AVX) __m128i tmp = _mm256_cvttpd_epi32(px.data()); Vector n = AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); #endif x -= px * C::ln2_large(); //Vc_buildDouble(1, 0x00062e4000000000ull, -1); // ln2 x -= px * C::ln2_small(); //Vc_buildDouble(1, 0x0007f7d1cf79abcaull, -20); // ln2 const double P[] = { Vc_buildDouble(1, 0x000089cdd5e44be8ull, -13), Vc_buildDouble(1, 0x000f06d10cca2c7eull, -6), Vc_buildDouble(1, 0x0000000000000000ull, 0) }; const double Q[] = { Vc_buildDouble(1, 0x00092eb6bc365fa0ull, -19), Vc_buildDouble(1, 0x0004ae39b508b6c0ull, -9), Vc_buildDouble(1, 0x000d17099887e074ull, -3), Vc_buildDouble(1, 0x0000000000000000ull, 1) }; const V x2 = x * x; px = x * ((P[0] * x2 + P[1]) * x2 + P[2]); x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px); x = V::One() + 2.0 * x; x = ldexp(x, n); // == x * 2ⁿ x(overflow) = std::numeric_limits::infinity(); x.setZero(underflow); return x; } } // namespace Common namespace VC__USE_NAMESPACE { using Vc::Common::exp; } // namespace VC__USE_NAMESPACE } // namespace Vc } // namespace ROOT #include "undomacros.h" #endif // VC_COMMON_EXPONENTIAL_H #endif // COMMON_EXPONENTIAL_H