arch/AVX/MathFunctions.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_MATH_FUNCTIONS_AVX_H
11 #define EIGEN_MATH_FUNCTIONS_AVX_H
12 
13 /* The sin and cos functions of this file are loosely derived from
14  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
15  */
16 
17 // IWYU pragma: private
18 #include "../../InternalHeaderCheck.h"
19 
20 namespace Eigen {
21 
22 namespace internal {
23 
25 
31 #ifdef EIGEN_VECTORIZE_AVX2
34 #endif
37 
38 // Notice that for newer processors, it is counterproductive to use Newton
39 // iteration for square root. In particular, Skylake and Zen2 processors
40 // have approximately doubled throughput of the _mm_sqrt_ps instruction
41 // compared to their predecessors.
42 template <>
44  return _mm256_sqrt_ps(_x);
45 }
46 template <>
48  return _mm256_sqrt_pd(_x);
49 }
50 
51 // Even on Skylake, using Newton iteration is a win for reciprocal square root.
52 #if EIGEN_FAST_MATH
53 template <>
55  // _mm256_rsqrt_ps returns -inf for negative denormals.
56  // _mm512_rsqrt**_ps returns -NaN for negative denormals. We may want
57  // consistency here.
58  // const Packet8f rsqrt = pselect(pcmp_lt(a, pzero(a)),
59  // pset1<Packet8f>(-NumTraits<float>::quiet_NaN()),
60  // _mm256_rsqrt_ps(a));
61  return generic_rsqrt_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rsqrt_ps(a));
62 }
63 
64 template <>
65 EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a) {
66  return generic_reciprocal_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rcp_ps(a));
67 }
68 
69 #endif
70 
71 template <>
73  Packet8f fexponent;
74  const Packet8h out = float2half(pfrexp<Packet8f>(half2float(a), fexponent));
75  exponent = float2half(fexponent);
76  return out;
77 }
78 
79 template <>
81  return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
82 }
83 
84 template <>
86  Packet8f fexponent;
87  const Packet8bf out = F32ToBf16(pfrexp<Packet8f>(Bf16ToF32(a), fexponent));
88  exponent = F32ToBf16(fexponent);
89  return out;
90 }
91 
92 template <>
94  return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
95 }
96 
121 
122 } // end namespace internal
123 
124 } // end namespace Eigen
125 
126 #endif // EIGEN_MATH_FUNCTIONS_AVX_H
AnnoyingScalar cos(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:136
AnnoyingScalar sin(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:137
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)
Definition: BFloat16.h:34
#define EIGEN_DOUBLE_PACKET_FUNCTION(METHOD, PACKET)
Definition: GenericPacketMathFunctionsFwd.h:186
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET)
Definition: GenericPacketMathFunctionsFwd.h:188
#define EIGEN_GENERIC_PACKET_FUNCTION(METHOD, PACKET)
Definition: GenericPacketMathFunctionsFwd.h:179
#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)
Definition: Half.h:52
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:900
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
const Scalar * a
Definition: level2_cplx_impl.h:32
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16 &a)
Definition: BFloat16.h:639
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16 &a)
Definition: BFloat16.h:642
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16 &a)
Definition: BFloat16.h:636
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16 &a)
Definition: BFloat16.h:615
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16 &a)
Definition: BFloat16.h:618
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16 &a)
Definition: BFloat16.h:616
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Definition: GenericPacketMath.h:1097
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f &a)
Definition: AVX/PacketMath.h:2283
EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf &a)
Definition: AVX/PacketMath.h:2558
EIGEN_STRONG_INLINE Packet8f pfrexp< Packet8f >(const Packet8f &a, Packet8f &exponent)
Definition: AVX/PacketMath.h:1874
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
Definition: GenericPacketMath.h:1123
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Definition: GenericPacketMath.h:1103
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
Definition: GenericPacketMath.h:1022
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
Definition: GenericPacketMath.h:1015
EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h &a)
Definition: AVX/PacketMath.h:2273
EIGEN_DEVICE_FUNC Packet preciprocal(const Packet &a)
Definition: GenericPacketMath.h:1433
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
Definition: AltiVec/PacketMath.h:42
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f psqrt< Packet8f >(const Packet8f &_x)
Definition: arch/AVX/MathFunctions.h:43
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Definition: GenericPacketMath.h:1071
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet &a)
Definition: GenericPacketMath.h:1091
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Definition: GenericPacketMath.h:1110
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2176
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Definition: arch/AVX/MathFunctions.h:80
EIGEN_STRONG_INLINE Packet8f pldexp< Packet8f >(const Packet8f &a, const Packet8f &exponent)
Definition: AVX/PacketMath.h:1906
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Definition: arch/AVX/MathFunctions.h:72
__m256 Packet8f
Definition: AVX/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2528
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f)
Definition: AltiVec/PacketMath.h:2059
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d psqrt< Packet4d >(const Packet4d &_x)
Definition: arch/AVX/MathFunctions.h:47
__m256d Packet4d
Definition: AVX/PacketMath.h:36
eigen_packet_wrapper< __m128i, 2 > Packet8h
Definition: AVX/PacketMath.h:38
EIGEN_STRONG_INLINE Packet4f pexp(const Packet4f &_x)
Definition: LSX/PacketMath.h:2663
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414
Definition: Eigen_Colamd.h:49
Scalar log2(Scalar x)
Definition: packetmath.cpp:754
Definition: GenericPacketMath.h:225
std::ofstream out("Result.txt")