Eigen::internal::generic_sqrt_newton_step< Packet, Steps > Struct Template Reference

#include <MathFunctionsImpl.h>

Static Public Member Functions

static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run (const Packet &a, const Packet &approx_rsqrt)
 

Detailed Description

template<typename Packet, int Steps = 1>
struct Eigen::internal::generic_sqrt_newton_step< Packet, Steps >

Fast sqrt using Newton-Raphson's method.

Preconditions:

  1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must have at least half the leading mantissa bits in the correct result, such that a single Newton-Raphson step is sufficient to get within 1-2 ulps of the correct result.
  2. If a is zero, approx_rsqrt must be infinite.
  3. If a is infinite, approx_rsqrt must be zero.

If the preconditions are satisfied, which they are for for the _*_rsqrt_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero and infinity, and NaN. Positive denormal inputs are treated as zero.

Member Function Documentation

◆ run()

template<typename Packet , int Steps = 1>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet Eigen::internal::generic_sqrt_newton_step< Packet, Steps >::run ( const Packet a,
const Packet approx_rsqrt 
)
inlinestatic
128  {
129  using Scalar = typename unpacket_traits<Packet>::type;
130  const Packet one_point_five = pset1<Packet>(Scalar(1.5));
131  const Packet minus_half = pset1<Packet>(Scalar(-0.5));
132  // If a is inf or zero, return a directly.
133  const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
134  const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
135  // Do a single step of Newton's iteration for reciprocal square root:
136  // x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
137  // The Newton's step is computed this way to avoid over/under-flows.
138  Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
139  for (int step = 1; step < Steps; ++step) {
140  rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
141  }
142 
143  // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
144  // Return a itself for 0 or +inf, NaN for negative arguments.
145  return pselect(return_a, a, pmul(a, rsqrt));
146  }
SCALAR Scalar
Definition: bench_gemm.cpp:45
const Scalar * a
Definition: level2_cplx_impl.h:32
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T rsqrt(const T &x)
Definition: MathFunctions.h:1327
T type
Definition: GenericPacketMath.h:135

References a, Eigen::internal::pcmp_eq(), Eigen::internal::pmadd(), Eigen::internal::pmul(), Eigen::internal::por(), Eigen::internal::pselect(), Eigen::internal::pzero(), and Eigen::numext::rsqrt().


The documentation for this struct was generated from the following file: