#include <MathFunctionsImpl.h>

Public Types
using	Scalar = typename unpacket_traits< Packet >::type

Static Public Member Functions
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet	run (const Packet &a, const Packet &approx_rsqrt)

Detailed Description

template<typename Packet, int Steps>
struct Eigen::internal::generic_rsqrt_newton_step< Packet, Steps >

Fast reciprocal sqrt using Newton-Raphson's method.

Preconditions:

The starting guess provided in approx_a_recip must have at least half the leading mantissa bits in the correct result, such that a single Newton-Raphson step is sufficient to get within 1-2 ulps of the correct result.
If a is zero, approx_a_recip must be infinite with the same sign as a.
If a is infinite, approx_a_recip must be zero with the same sign as a.

If the preconditions are satisfied, which they are for for the _*_rcp_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero, infinity, and NaN. Positive denormals are treated as zero.

Member Typedef Documentation

◆ Scalar

template<typename Packet , int Steps>

using Eigen::internal::generic_rsqrt_newton_step< Packet, Steps >::Scalar = typename unpacket_traits<Packet>::type

Member Function Documentation

◆ run()

template<typename Packet , int Steps>

static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet Eigen::internal::generic_rsqrt_newton_step< Packet, Steps >::run	(	const Packet &	a,
		const Packet &	approx_rsqrt
	)

inlinestatic

                                                                                                        {
     constexpr Scalar kMinusHalf = Scalar(-1) / Scalar(2);
     const Packet cst_minus_half = pset1<Packet>(kMinusHalf);
     const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
  
     Packet inv_sqrt = approx_rsqrt;
     for (int step = 0; step < Steps; ++step) {
       // Refine the approximation using one Newton-Raphson step:
       // h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0).
       // inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
       Packet r2 = pmul(a, inv_sqrt);
       Packet half_r = pmul(inv_sqrt, cst_minus_half);
       Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one);
       inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
     }
  
     // If x is NaN, then either:
     // 1) the input is NaN
     // 2) zero and infinity were multiplied
     // In either of these cases, return approx_rsqrt
     return pselect(pisnan(inv_sqrt), approx_rsqrt, inv_sqrt);
   }

References a, Eigen::internal::pisnan(), Eigen::internal::pmadd(), Eigen::internal::pmul(), and Eigen::internal::pselect().

The documentation for this struct was generated from the following file:

MathFunctionsImpl.h

Public Types