Eigen::internal::generic_rsqrt_newton_step< Packet, Steps > Struct Template Reference

#include <MathFunctionsImpl.h>

Public Types

using Scalar = typename unpacket_traits< Packet >::type
 

Static Public Member Functions

static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run (const Packet &a, const Packet &approx_rsqrt)
 

Detailed Description

template<typename Packet, int Steps>
struct Eigen::internal::generic_rsqrt_newton_step< Packet, Steps >

Fast reciprocal sqrt using Newton-Raphson's method.

Preconditions:

  1. The starting guess provided in approx_a_recip must have at least half the leading mantissa bits in the correct result, such that a single Newton-Raphson step is sufficient to get within 1-2 ulps of the correct result.
  2. If a is zero, approx_a_recip must be infinite with the same sign as a.
  3. If a is infinite, approx_a_recip must be zero with the same sign as a.

If the preconditions are satisfied, which they are for for the _*_rcp_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero, infinity, and NaN. Positive denormals are treated as zero.

Member Typedef Documentation

◆ Scalar

template<typename Packet , int Steps>
using Eigen::internal::generic_rsqrt_newton_step< Packet, Steps >::Scalar = typename unpacket_traits<Packet>::type

Member Function Documentation

◆ run()

template<typename Packet , int Steps>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet Eigen::internal::generic_rsqrt_newton_step< Packet, Steps >::run ( const Packet a,
const Packet approx_rsqrt 
)
inlinestatic
78  {
79  constexpr Scalar kMinusHalf = Scalar(-1) / Scalar(2);
80  const Packet cst_minus_half = pset1<Packet>(kMinusHalf);
81  const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
82 
83  Packet inv_sqrt = approx_rsqrt;
84  for (int step = 0; step < Steps; ++step) {
85  // Refine the approximation using one Newton-Raphson step:
86  // h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0).
87  // inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
88  Packet r2 = pmul(a, inv_sqrt);
89  Packet half_r = pmul(inv_sqrt, cst_minus_half);
90  Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one);
91  inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
92  }
93 
94  // If x is NaN, then either:
95  // 1) the input is NaN
96  // 2) zero and infinity were multiplied
97  // In either of these cases, return approx_rsqrt
98  return pselect(pisnan(inv_sqrt), approx_rsqrt, inv_sqrt);
99  }
SCALAR Scalar
Definition: bench_gemm.cpp:45
const Scalar * a
Definition: level2_cplx_impl.h:32
EIGEN_STRONG_INLINE Packet8f pisnan(const Packet8f &a)
Definition: AVX/PacketMath.h:1034
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
typename unpacket_traits< Packet >::type Scalar
Definition: MathFunctionsImpl.h:77

References a, Eigen::internal::pisnan(), Eigen::internal::pmadd(), Eigen::internal::pmul(), and Eigen::internal::pselect().


The documentation for this struct was generated from the following file: