Eigen::internal::generic_fast_erfc< Scalar > Struct Template Reference

#include <SpecialFunctionsImpl.h>

Public Member Functions

EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T run (const T &x_in)
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T run (const T &x_in)
 

Static Public Member Functions

template<typename T >
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T run (const T &x_in)
 

Member Function Documentation

◆ run() [1/3]

template<typename Scalar >
template<typename T >
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T Eigen::internal::generic_fast_erfc< Scalar >::run ( const T x_in)
static

◆ run() [2/3]

283  {
284  constexpr float kClamp = 11.0f;
285  const T x = pmin(pmax(x_in, pset1<T>(-kClamp)), pset1<T>(kClamp));
286 
287  // erfc(x) = 1 + x * S(x^2), |x| <= 1.
288  //
289  // Coefficients for S and T generated with Rminimax command:
290  // ./ratapprox --function="erfc(x)-1" --dom='[-1,1]' --type=[11,0] --num="odd"
291  // --numF="[SG]" --denF="[SG]" --log --dispCoeff="dec"
292  constexpr float alpha[] = {5.61802298761904239654541015625e-04, -4.91381669417023658752441406250e-03,
293  2.67075151205062866210937500000e-02, -1.12800106406211853027343750000e-01,
294  3.76122951507568359375000000000e-01, -1.12837910652160644531250000000e+00};
295  const T x2 = pmul(x, x);
296  const T one = pset1<T>(1.0f);
297  const T erfc_small = pmadd(x, ppolevl<T, 5>::run(x2, alpha), one);
298 
299  // Return early if we don't need the more expensive approximation for any
300  // entry in a.
301  const T x_abs_gt_one_mask = pcmp_lt(one, x2);
302  if (!predux_any(x_abs_gt_one_mask)) return erfc_small;
303 
304  // erfc(x) = exp(-x^2) * 1/x * P(1/x^2) / Q(1/x^2), 1 < x < 9.
305  //
306  // Coefficients for P and Q generated with Rminimax command:
307  // ./ratapprox --function="erfc(1/sqrt(x))*exp(1/x)/sqrt(x)"
308  // --dom='[0.01,1]' --type=[3,4] --numF="[SG]" --denF="[SG]" --log
309  // --dispCoeff="dec"
310  constexpr float gamma[] = {1.0208116471767425537109375e-01f, 4.2920666933059692382812500e-01f,
311  3.2379078865051269531250000e-01f, 5.3971976041793823242187500e-02f};
312  constexpr float delta[] = {1.7251677811145782470703125e-02f, 3.9137163758277893066406250e-01f,
313  1.0000000000000000000000000e+00f, 6.2173241376876831054687500e-01f,
314  9.5662862062454223632812500e-02f};
315  const T x2_lo = twoprod_low(x, x, x2);
316  // Here we use that
317  // exp(-x^2) = exp(-(x2+x2_lo)^2) ~= exp(-x2)*exp(-x2_lo) ~= exp(-x2)*(1-x2_lo)
318  // since x2_lo < kClamp * eps << 1 in the region we care about. This trick reduces the max error
319  // from 34 ulps to below 5 ulps.
320  const T exp2_hi = pexp(pnegate(x2));
321  const T z = pnmadd(exp2_hi, x2_lo, exp2_hi);
322  const T q2 = preciprocal(x2);
323  const T num = ppolevl<T, 3>::run(q2, gamma);
324  const T denom = pmul(x, ppolevl<T, 4>::run(q2, delta));
325  const T r = pdiv(num, denom);
326  const T maybe_two = pand(pcmp_lt(x, pset1<T>(0.0)), pset1<T>(2.0));
327  const T erfc_large = pmadd(z, r, maybe_two);
328  return pselect(x_abs_gt_one_mask, erfc_large, erfc_small);
329 }
RealScalar alpha
Definition: level1_cplx_impl.h:151
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet &x, const Packet &y, const Packet &xy)
Definition: GenericPacketMathFunctions.h:1716
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: AltiVec/PacketMath.h:2751
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:368
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:663
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_DEVICE_FUNC Packet preciprocal(const Packet &a)
Definition: GenericPacketMath.h:1433
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:649
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_STRONG_INLINE Packet4f pexp(const Packet4f &_x)
Definition: LSX/PacketMath.h:2663
EIGEN_DEVICE_FUNC const Scalar & x
Definition: SpecialFunctionsImpl.h:2024
Vector< double > x2(const Vector< double > &coord)
Cartesian coordinates centered at the point (1.5,1)
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:102
int delta
Definition: MultiOpt.py:96
r
Definition: UniformPSDSelfTest.py:20
Mdouble gamma(Mdouble gamma_in)
This is the gamma function returns the true value for the half integer value.
Definition: ExtendedMath.cc:116
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet &x, const typename unpacket_traits< Packet >::type coeff[])
Definition: GenericPacketMathFunctions.h:86

References alpha, MultiOpt::delta, mathsFunc::gamma(), Eigen::internal::pand(), Eigen::internal::pcmp_lt(), Eigen::internal::pdiv(), Eigen::internal::pexp(), Eigen::internal::pmadd(), Eigen::internal::pmax(), Eigen::internal::pmin(), Eigen::internal::pmul(), Eigen::internal::pnegate(), Eigen::internal::pnmadd(), Eigen::internal::preciprocal(), Eigen::internal::predux_any(), Eigen::internal::pselect(), UniformPSDSelfTest::r, Eigen::internal::ppolevl< Packet, N >::run(), Eigen::internal::twoprod_low(), Eigen::numext::x, and Global_parameters::x2().

◆ run() [3/3]

406  {
407  // Clamp x to [-28:28] beyond which erfc(x) is either two or zero (below the underflow threshold).
408  // This avoids having to deal with twoprod(x,x) producing NaN for sufficiently large x.
409  constexpr double kClamp = 28.0;
410  const T x = pmin(pmax(x_in, pset1<T>(-kClamp)), pset1<T>(kClamp));
411 
412  // For |x| < 1, we use erfc(x) = 1 - erf(x).
413  const T x2 = pmul(x, x);
414  const T one = pset1<T>(1.0);
415  const T erfc_small = pnmadd(x, erf_over_x_double_small(x2), one);
416 
417  // Return early if we don't need the more expensive approximation for any
418  // entry in a.
419  const T x_abs_gt_one_mask = pcmp_lt(one, x2);
420  if (!predux_any(x_abs_gt_one_mask)) return erfc_small;
421 
422  const T erfc_large = erfc_double_large(x, x2);
423  return pselect(x_abs_gt_one_mask, erfc_large, erfc_small);
424 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T erfc_double_large(const T &x, const T &x2)
Definition: SpecialFunctionsImpl.h:366
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T erf_over_x_double_small(const T &x2)
Definition: SpecialFunctionsImpl.h:336

References Eigen::internal::erf_over_x_double_small(), Eigen::internal::erfc_double_large(), Eigen::internal::pcmp_lt(), Eigen::internal::pmax(), Eigen::internal::pmin(), Eigen::internal::pmul(), Eigen::internal::pnmadd(), Eigen::internal::predux_any(), Eigen::internal::pselect(), Eigen::numext::x, and Global_parameters::x2().


The documentation for this struct was generated from the following file: