/*
* inv.h
* An experiment: implement division with the square fo the approximate
* inverse square root.
* In other words one transforms a shift, multiplications and sums into a
* sqrt.
*
* Created on: Jun 24, 2012
* Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
*
* VDT is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License
* along with this program. If not, see .
*/
#ifndef INV_H_
#define INV_H_
#include "vdtcore_common.h"
#include "sqrt.h"
#include
#include
namespace vdt{
//------------------------------------------------------------------------------
/// General implementation of the inversion
inline double fast_inv_general(double x, const uint32_t isqrt_iterations) {
const uint64_t sign_mask = details::getSignMask(x);
const double sqrt_one_over_x = fast_isqrt_general(std::fabs(x),
isqrt_iterations);
return sqrt_one_over_x*(details::dpORuint64(sqrt_one_over_x , sign_mask ));
}
//------------------------------------------------------------------------------
/// Four iterations inversion
inline double fast_inv(double x) {return fast_inv_general(x,4);}
//------------------------------------------------------------------------------
/// Three iterations
inline double fast_approx_inv(double x) {return fast_inv_general(x,3);}
//------------------------------------------------------------------------------
/// For comparisons
inline double inv (double x) {return 1./x;}
//------------------------------------------------------------------------------
// Single precision
/// General implementation of the inversion
inline float fast_invf_general(float x, const uint32_t isqrt_iterations) {
const uint32_t sign_mask = details::getSignMask(x);
const float sqrt_one_over_x = fast_isqrtf_general(std::fabs(x),
isqrt_iterations);
return sqrt_one_over_x*(details::spORuint32(sqrt_one_over_x , sign_mask ));
}
//------------------------------------------------------------------------------
/// Two iterations
inline float fast_invf(float x) {return fast_invf_general(x,2);}
//------------------------------------------------------------------------------
/// One iterations
inline float fast_approx_invf(float x) {return fast_invf_general(x,1);}
//------------------------------------------------------------------------------
/// For comparisons
inline float invf (float x) {return 1.f/x;}
//------------------------------------------------------------------------------
// void invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
// void fast_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
// void fast_approx_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
// void invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
// void fast_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
// void fast_approx_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
} // end namespace vdt
#endif /* INV_H_ */