#include <MathFunctionsImpl.h>

Static Public Member Functions
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet	run (const Packet &a, const Packet &approx_rsqrt)

Detailed Description

template<typename Packet, int Steps = 1>
struct Eigen::internal::generic_sqrt_newton_step< Packet, Steps >

Fast sqrt using Newton-Raphson's method.

Preconditions:

The starting guess for the reciprocal sqrt provided in approx_rsqrt must have at least half the leading mantissa bits in the correct result, such that a single Newton-Raphson step is sufficient to get within 1-2 ulps of the correct result.
If a is zero, approx_rsqrt must be infinite.
If a is infinite, approx_rsqrt must be zero.

If the preconditions are satisfied, which they are for for the _*_rsqrt_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero and infinity, and NaN. Positive denormal inputs are treated as zero.

Member Function Documentation

◆ run()

template<typename Packet , int Steps = 1>

static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet Eigen::internal::generic_sqrt_newton_step< Packet, Steps >::run	(	const Packet &	a,
		const Packet &	approx_rsqrt
	)

inlinestatic

                                                                                                        {
     using Scalar = typename unpacket_traits<Packet>::type;
     const Packet one_point_five = pset1<Packet>(Scalar(1.5));
     const Packet minus_half = pset1<Packet>(Scalar(-0.5));
     // If a is inf or zero, return a directly.
     const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
     const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
     // Do a single step of Newton's iteration for reciprocal square root:
     //   x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
     // The Newton's step is computed this way to avoid over/under-flows.
     Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
     for (int step = 1; step < Steps; ++step) {
       rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
     }
  
     // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
     // Return a itself for 0 or +inf, NaN for negative arguments.
     return pselect(return_a, a, pmul(a, rsqrt));
   }

References a, Eigen::internal::pcmp_eq(), Eigen::internal::pmadd(), Eigen::internal::pmul(), Eigen::internal::por(), Eigen::internal::pselect(), Eigen::internal::pzero(), and Eigen::numext::rsqrt().

The documentation for this struct was generated from the following file:

MathFunctionsImpl.h

Static Public Member Functions

Detailed Description

template<typename Packet, int Steps = 1> struct Eigen::internal::generic_sqrt_newton_step< Packet, Steps >

Member Function Documentation

◆ run()

template<typename Packet, int Steps = 1>
struct Eigen::internal::generic_sqrt_newton_step< Packet, Steps >