d0/da7/PacketMathFP16_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 //

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_PACKET_MATH_FP16_AVX512_H

 #define EIGEN_PACKET_MATH_FP16_AVX512_H


 // IWYU pragma: private

 #include "../../InternalHeaderCheck.h"


 namespace Eigen {


 namespace internal {


 typedef __m512h Packet32h;

 typedef eigen_packet_wrapper<__m256i, 1> Packet16h;

 typedef eigen_packet_wrapper<__m128i, 2> Packet8h;


 template <>

 struct is_arithmetic<Packet8h> {

   enum { value = true };

 };


 template <>

 struct packet_traits<half> : default_packet_traits {

   typedef Packet32h type;

   typedef Packet16h half;

   enum {

     Vectorizable = 1,

     AlignedOnScalar = 1,

     size = 32,


     HasCmp = 1,

     HasAdd = 1,

     HasSub = 1,

     HasMul = 1,

     HasDiv = 1,

     HasNegate = 1,

     HasAbs = 1,

     HasAbs2 = 0,

     HasMin = 1,

     HasMax = 1,

     HasConj = 1,

     HasSetLinear = 0,

     HasLog = 1,

     HasLog1p = 1,

     HasExp = 1,

     HasExpm1 = 1,

     HasSqrt = 1,

     HasRsqrt = 1,

     // These ones should be implemented in future

     HasBessel = 0,

     HasNdtri = 0,

     HasSin = EIGEN_FAST_MATH,

     HasCos = EIGEN_FAST_MATH,

     HasTanh = EIGEN_FAST_MATH,

     HasErf = 0,  // EIGEN_FAST_MATH,

     HasBlend = 0

   };

 };


 template <>

 struct unpacket_traits<Packet32h> {

   typedef Eigen::half type;

   typedef Packet16h half;

   enum {

     size = 32,

     alignment = Aligned64,

     vectorizable = true,

     masked_load_available = false,

     masked_store_available = false

   };

 };


 template <>

 struct unpacket_traits<Packet16h> {

   typedef Eigen::half type;

   typedef Packet8h half;

   enum {

     size = 16,

     alignment = Aligned32,

     vectorizable = true,

     masked_load_available = false,

     masked_store_available = false

   };

 };


 template <>

 struct unpacket_traits<Packet8h> {

   typedef Eigen::half type;

   typedef Packet8h half;

   enum {

     size = 8,

     alignment = Aligned16,

     vectorizable = true,

     masked_load_available = false,

     masked_store_available = false

   };

 };


 // Memory functions


 // pset1


 template <>

 EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {

   // half/half_raw is bit compatible

   return _mm512_set1_ph(numext::bit_cast<_Float16>(from));

 }


 template <>

 EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) {

   return _mm512_setzero_ph();

 }


 // pset1frombits

 template <>

 EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {

   return _mm512_castsi512_ph(_mm512_set1_epi16(from));

 }


 // pfirst


 template <>

 EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {

 #ifdef EIGEN_VECTORIZE_AVX512DQ

   return half_impl::raw_uint16_to_half(

       static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));

 #else

   Eigen::half dest[32];

   _mm512_storeu_ph(dest, from);

   return dest[0];

 #endif

 }


 // pload


 template <>

 EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {

   EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);

 }


 // ploadu


 template <>

 EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {

   EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);

 }


 // pstore


 template <>

 EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {

   EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);

 }


 // pstoreu


 template <>

 EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {

   EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);

 }


 // ploaddup

 template <>

 EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {

   __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));

   return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,

                                                 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),

                                a);

 }


 // ploadquad

 template <>

 EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {

   __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));

   return _mm512_permutexvar_ph(

       _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),

       a);

 }


 // pabs


 template <>

 EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {

   return _mm512_abs_ph(a);

 }


 // psignbit


 template <>

 EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {

   return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));

 }


 // pmin


 template <>

 EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {

   return _mm512_min_ph(a, b);

 }


 // pmax


 template <>

 EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {

   return _mm512_max_ph(a, b);

 }


 // plset

 template <>

 EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {

   return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,

                                                           16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));

 }


 // por


 template <>

 EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {

   return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

 }


 // pxor


 template <>

 EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {

   return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

 }


 // pand


 template <>

 EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {

   return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

 }


 // pandnot


 template <>

 EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {

   return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));

 }


 // pselect


 template <>

 EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {

   __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);

   return _mm512_mask_blend_ph(mask32, a, b);

 }


 // pcmp_eq


 template <>

 EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {

   __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);

   return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));

 }


 // pcmp_le


 template <>

 EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {

   __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);

   return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));

 }


 // pcmp_lt


 template <>

 EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {

   __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);

   return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));

 }


 // pcmp_lt_or_nan


 template <>

 EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {

   __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);

   return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu)));

 }


 // padd


 template <>

 EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {

   return _mm512_add_ph(a, b);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {

   return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {

   return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

 }


 // psub


 template <>

 EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {

   return _mm512_sub_ph(a, b);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {

   return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {

   return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

 }


 // pmul


 template <>

 EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {

   return _mm512_mul_ph(a, b);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {

   return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {

   return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

 }


 // pdiv


 template <>

 EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {

   return _mm512_div_ph(a, b);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {

   return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {

   return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

 }


 // pround


 template <>

 EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {

   // Work-around for default std::round rounding mode.


   // Mask for the sign bit

   const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));

   // The largest half-preicision float less than 0.5

   const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));


   return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);

 }


 // print


 template <>

 EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {

   return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);

 }


 // pceil


 template <>

 EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {

   return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);

 }


 // pfloor


 template <>

 EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {

   return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);

 }


 // ptrunc


 template <>

 EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) {

   return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO);

 }


 // predux

 template <>

 EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {

   return (half)_mm512_reduce_add_ph(a);

 }


 template <>

 EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {

   return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));

 }


 template <>

 EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {

   return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));

 }


 // predux_half_dowto4

 template <>

 EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {

 #ifdef EIGEN_VECTORIZE_AVX512DQ

   __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));

   __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));


   return Packet16h(padd<Packet16h>(lowHalf, highHalf));

 #else

   Eigen::half data[32];

   _mm512_storeu_ph(data, a);


   __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));

   __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));


   return Packet16h(padd<Packet16h>(lowHalf, highHalf));

 #endif

 }


 // predux_max


 // predux_min


 // predux_mul


 #ifdef EIGEN_VECTORIZE_FMA


 // pmadd


 template <>

 EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

   return _mm512_fmadd_ph(a, b, c);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

   return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

   return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

 }


 // pmsub


 template <>

 EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

   return _mm512_fmsub_ph(a, b, c);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

   return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

   return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

 }


 // pnmadd


 template <>

 EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

   return _mm512_fnmadd_ph(a, b, c);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

   return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

   return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

 }


 // pnmsub


 template <>

 EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

   return _mm512_fnmsub_ph(a, b, c);

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

   return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

   return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

 }


 #endif


 // pnegate


 template <>

 EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {

   return psub(pzero(a), a);

 }


 // pconj


 template <>

 EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {

   return a;

 }


 // psqrt


 template <>

 EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {

   return _mm512_sqrt_ph(a);

 }


 // prsqrt


 template <>

 EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {

   return _mm512_rsqrt_ph(a);

 }


 // preciprocal


 template <>

 EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {

   return _mm512_rcp_ph(a);

 }


 // ptranspose


 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {

   __m512i t[32];


   EIGEN_UNROLL_LOOP

   for (int i = 0; i < 16; i++) {

     t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

     t[2 * i + 1] =

         _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

   }


   __m512i p[32];


   EIGEN_UNROLL_LOOP

   for (int i = 0; i < 8; i++) {

     p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);

     p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);

     p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);

     p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);

   }


   __m512i q[32];


   EIGEN_UNROLL_LOOP

   for (int i = 0; i < 4; i++) {

     q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);

     q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);

     q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);

     q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);

     q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);

     q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);

     q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);

     q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);

   }


   __m512i f[32];


 #define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \

   do {                                                                                              \

     f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \

     f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \

     f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \

     f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \

     f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \

     f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \

     f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \

     f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \

   } while (false);


   PACKET32H_TRANSPOSE_HELPER(0, 0);

   PACKET32H_TRANSPOSE_HELPER(1, 1);

   PACKET32H_TRANSPOSE_HELPER(2, 2);

   PACKET32H_TRANSPOSE_HELPER(3, 3);


   PACKET32H_TRANSPOSE_HELPER(1, 0);

   PACKET32H_TRANSPOSE_HELPER(2, 0);

   PACKET32H_TRANSPOSE_HELPER(3, 0);

   PACKET32H_TRANSPOSE_HELPER(2, 1);

   PACKET32H_TRANSPOSE_HELPER(3, 1);

   PACKET32H_TRANSPOSE_HELPER(3, 2);


   PACKET32H_TRANSPOSE_HELPER(0, 1);

   PACKET32H_TRANSPOSE_HELPER(0, 2);

   PACKET32H_TRANSPOSE_HELPER(0, 3);

   PACKET32H_TRANSPOSE_HELPER(1, 2);

   PACKET32H_TRANSPOSE_HELPER(1, 3);

   PACKET32H_TRANSPOSE_HELPER(2, 3);


 #undef PACKET32H_TRANSPOSE_HELPER


   EIGEN_UNROLL_LOOP

   for (int i = 0; i < 32; i++) {

     a.packet[i] = _mm512_castsi512_ph(f[i]);

   }

 }


 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {

   __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;

   t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

   t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

   t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

   t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));


   p0 = _mm512_unpacklo_epi32(t0, t2);

   p1 = _mm512_unpackhi_epi32(t0, t2);

   p2 = _mm512_unpacklo_epi32(t1, t3);

   p3 = _mm512_unpackhi_epi32(t1, t3);


   a0 = p0;

   a1 = p1;

   a2 = p2;

   a3 = p3;


   a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);

   a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);


   a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);

   a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);


   a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);

   a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);


   a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);

   a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);


   a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);

   a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);


   a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);

   a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);


   a.packet[0] = _mm512_castsi512_ph(a0);

   a.packet[1] = _mm512_castsi512_ph(a1);

   a.packet[2] = _mm512_castsi512_ph(a2);

   a.packet[3] = _mm512_castsi512_ph(a3);

 }


 // preverse


 template <>

 EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {

   return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,

                                                 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),

                                a);

 }


 // pscatter


 template <>

 EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {

   EIGEN_ALIGN64 half aux[32];

   pstore(aux, from);


   EIGEN_UNROLL_LOOP

   for (int i = 0; i < 32; i++) {

     to[stride * i] = aux[i];

   }

 }


 // pgather


 template <>

 EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {

   return _mm512_castsi512_ph(_mm512_set_epi16(

       from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,

       from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,

       from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,

       from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,

       from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,

       from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,

       from[1 * stride].x, from[0 * stride].x));

 }


 template <>

 EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);

 template <>

 EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);


 EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {

   __m512d result = _mm512_undefined_pd();

   result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);

   result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);

   return _mm512_castpd_ph(result);

 }


 EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {

   a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));

   b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));

 }


 // psin

 template <>

 EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = psin(low);

   Packet16h highOut = psin(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // pcos

 template <>

 EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = pcos(low);

   Packet16h highOut = pcos(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // plog

 template <>

 EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = plog(low);

   Packet16h highOut = plog(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // plog2

 template <>

 EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = plog2(low);

   Packet16h highOut = plog2(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // plog1p

 template <>

 EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = plog1p(low);

   Packet16h highOut = plog1p(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // pexp

 template <>

 EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = pexp(low);

   Packet16h highOut = pexp(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // pexpm1

 template <>

 EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = pexpm1(low);

   Packet16h highOut = pexpm1(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // ptanh

 template <>

 EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h lowOut = ptanh(low);

   Packet16h highOut = ptanh(high);


   return combine2Packet16h(lowOut, highOut);

 }


 // pfrexp

 template <>

 EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h exp1 = _mm256_undefined_si256();

   Packet16h exp2 = _mm256_undefined_si256();


   Packet16h lowOut = pfrexp(low, exp1);

   Packet16h highOut = pfrexp(high, exp2);


   exponent = combine2Packet16h(exp1, exp2);


   return combine2Packet16h(lowOut, highOut);

 }


 // pldexp

 template <>

 EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {

   Packet16h low;

   Packet16h high;

   extract2Packet16h(a, low, high);


   Packet16h exp1;

   Packet16h exp2;

   extract2Packet16h(exponent, exp1, exp2);


   Packet16h lowOut = pldexp(low, exp1);

   Packet16h highOut = pldexp(high, exp2);


   return combine2Packet16h(lowOut, highOut);

 }


 }  // end namespace internal

 }  // end namespace Eigen


 #endif  // EIGEN_PACKET_MATH_FP16_AVX512_H

i
int i
Definition: BiCGSTAB_step_by_step.cpp:9

EIGEN_ALIGN64
#define EIGEN_ALIGN64
Definition: ConfigureVectorization.h:144

EIGEN_DEBUG_ALIGNED_STORE
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38

EIGEN_DEBUG_ALIGNED_LOAD
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30

EIGEN_DEBUG_UNALIGNED_STORE
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42

EIGEN_DEBUG_UNALIGNED_LOAD
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34

EIGEN_UNROLL_LOOP
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298

EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892

EIGEN_FAST_MATH
#define EIGEN_FAST_MATH
Definition: Macros.h:51

EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834

data
int data[]
Definition: Map_placement_new.cpp:1

p0
Vector3f p0
Definition: MatrixBase_all.cpp:2

p1
Vector3f p1
Definition: MatrixBase_all.cpp:2

PACKET32H_TRANSPOSE_HELPER
#define PACKET32H_TRANSPOSE_HELPER(X, Y)

p
float * p
Definition: Tutorial_Map_using.cpp:9

b
Scalar * b
Definition: benchVecAdd.cpp:17

f
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237

Eigen::Aligned64
@ Aligned64
Definition: Constants.h:239

Eigen::Aligned32
@ Aligned32
Definition: Constants.h:238

Eigen::Aligned16
@ Aligned16
Definition: Constants.h:237

a
const Scalar * a
Definition: level2_cplx_impl.h:32

Eigen::bfloat16_impl::exp2
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16 &a)
Definition: BFloat16.h:616

Eigen::half_impl::raw_uint16_to_half
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:496

Eigen::internal::pexpm1
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Definition: GenericPacketMath.h:1097

Eigen::internal::psqrt< Packet32h >
EIGEN_STRONG_INLINE Packet32h psqrt< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:530

Eigen::internal::pfirst< Packet32h >
EIGEN_STRONG_INLINE Eigen::half pfirst< Packet32h >(const Packet32h &from)
Definition: PacketMathFP16.h:130

Eigen::internal::plog2< Packet32h >
EIGEN_STRONG_INLINE Packet32h plog2< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:776

Eigen::internal::print< Packet32h >
EIGEN_STRONG_INLINE Packet32h print< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:375

Eigen::internal::padd
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318

Eigen::internal::pdiv< Packet32h >
EIGEN_STRONG_INLINE Packet32h pdiv< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:344

Eigen::internal::pmul< Packet8h >
EIGEN_STRONG_INLINE Packet8h pmul< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2406

Eigen::internal::pzero
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774

Eigen::internal::pscatter< half, Packet32h >
EIGEN_STRONG_INLINE void pscatter< half, Packet32h >(half *to, const Packet32h &from, Index stride)
Definition: PacketMathFP16.h:678

Eigen::internal::psignbit< Packet32h >
EIGEN_STRONG_INLINE Packet32h psignbit< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:197

Eigen::internal::padd< Packet32h >
EIGEN_STRONG_INLINE Packet32h padd< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:293

Eigen::internal::pfrexp< Packet16h >
EIGEN_STRONG_INLINE Packet16h pfrexp< Packet16h >(const Packet16h &, Packet16h &)

Eigen::internal::psin< Packet32h >
EIGEN_STRONG_INLINE Packet32h psin< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:737

Eigen::internal::predux< Packet32h >
EIGEN_STRONG_INLINE half predux< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:402

Eigen::internal::pexp< Packet16h >
EIGEN_STRONG_INLINE Packet16h pexp< Packet16h >(const Packet16h &)

Eigen::internal::ploadquad< Packet32h >
EIGEN_STRONG_INLINE Packet32h ploadquad< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:180

Eigen::internal::pnegate< Packet32h >
EIGEN_STRONG_INLINE Packet32h pnegate< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:516

Eigen::internal::psub< Packet8h >
EIGEN_STRONG_INLINE Packet8h psub< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2398

Eigen::internal::ploaddup< Packet32h >
EIGEN_STRONG_INLINE Packet32h ploaddup< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:171

Eigen::internal::plog< Packet16h >
EIGEN_STRONG_INLINE Packet16h plog< Packet16h >(const Packet16h &)

Eigen::internal::ptranspose
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339

Eigen::internal::plog2
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
Definition: GenericPacketMath.h:1123

Eigen::internal::plog
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Definition: GenericPacketMath.h:1103

Eigen::internal::psin< Packet16h >
EIGEN_STRONG_INLINE Packet16h psin< Packet16h >(const Packet16h &)

Eigen::internal::pabs< Packet32h >
EIGEN_STRONG_INLINE Packet32h pabs< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:190

Eigen::internal::ptanh< Packet16h >
EIGEN_STRONG_INLINE Packet16h ptanh< Packet16h >(const Packet16h &)

Eigen::internal::plog< Packet32h >
EIGEN_STRONG_INLINE Packet32h plog< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:763

Eigen::internal::pcos
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
Definition: GenericPacketMath.h:1022

Eigen::internal::combine2Packet16h
EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h &a, const Packet16h &b)
Definition: PacketMathFP16.h:723

Eigen::internal::pset1frombits< Packet32h >
EIGEN_STRONG_INLINE Packet32h pset1frombits< Packet32h >(unsigned short from)
Definition: PacketMathFP16.h:123

Eigen::internal::predux< Packet8h >
EIGEN_STRONG_INLINE Eigen::half predux< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2451

Eigen::internal::psub< Packet32h >
EIGEN_STRONG_INLINE Packet32h psub< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:310

Eigen::internal::pcmp_le
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1314

Eigen::internal::pcos< Packet32h >
EIGEN_STRONG_INLINE Packet32h pcos< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:750

Eigen::internal::psin
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
Definition: GenericPacketMath.h:1015

Eigen::internal::pload< Packet32h >
EIGEN_STRONG_INLINE Packet32h pload< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:144

Eigen::internal::pdiv< Packet8h >
EIGEN_STRONG_INLINE Packet8h pdiv< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2414

Eigen::internal::pexpm1< Packet16h >
EIGEN_STRONG_INLINE Packet16h pexpm1< Packet16h >(const Packet16h &)

Eigen::internal::por
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309

Eigen::internal::pcmp_lt
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341

Eigen::internal::preverse
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303

Eigen::internal::pmadd
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218

Eigen::internal::pandnot
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2323

Eigen::internal::ptanh< Packet32h >
EIGEN_STRONG_INLINE Packet32h ptanh< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:828

Eigen::internal::plset< Packet32h >
EIGEN_STRONG_INLINE Packet32h plset< Packet32h >(const half &a)
Definition: PacketMathFP16.h:217

Eigen::internal::pset1< Packet32h >
EIGEN_STRONG_INLINE Packet32h pset1< Packet32h >(const Eigen::half &from)
Definition: PacketMathFP16.h:111

Eigen::internal::ptanh
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Definition: GenericPacketMath.h:1071

Eigen::internal::pfrexp< Packet32h >
EIGEN_STRONG_INLINE Packet32h pfrexp< Packet32h >(const Packet32h &a, Packet32h &exponent)
Definition: PacketMathFP16.h:841

Eigen::internal::pstoreu< half >
EIGEN_STRONG_INLINE void pstoreu< half >(Eigen::half *to, const Packet16h &from)
Definition: AVX512/PacketMath.h:2230

Eigen::internal::prsqrt< Packet32h >
EIGEN_STRONG_INLINE Packet32h prsqrt< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:537

Eigen::internal::plog2< Packet16h >
EIGEN_STRONG_INLINE Packet16h plog2< Packet16h >(const Packet16h &)

Eigen::internal::pldexp< Packet32h >
EIGEN_STRONG_INLINE Packet32h pldexp< Packet32h >(const Packet32h &a, const Packet32h &exponent)
Definition: PacketMathFP16.h:859

Eigen::internal::pexpm1< Packet32h >
EIGEN_STRONG_INLINE Packet32h pexpm1< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:815

Eigen::internal::plog1p
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Definition: GenericPacketMath.h:1110

Eigen::internal::pfloor< Packet32h >
EIGEN_STRONG_INLINE Packet32h pfloor< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:389

Eigen::internal::padd< Packet16h >
EIGEN_STRONG_INLINE Packet16h padd< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2374

Eigen::internal::extract2Packet16h
EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h &x, Packet16h &a, Packet16h &b)
Definition: PacketMathFP16.h:730

Eigen::internal::pstore
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891

Eigen::internal::pnmsub
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835

Eigen::internal::preciprocal< Packet32h >
EIGEN_STRONG_INLINE Packet32h preciprocal< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:544

Eigen::internal::pcmp_eq
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353

Eigen::internal::pldexp
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Definition: arch/AVX/MathFunctions.h:80

Eigen::internal::pround< Packet32h >
EIGEN_STRONG_INLINE Packet32h pround< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:361

Eigen::internal::pmsub
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819

Eigen::internal::pand
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319

Eigen::internal::pcos< Packet16h >
EIGEN_STRONG_INLINE Packet16h pcos< Packet16h >(const Packet16h &)

Eigen::internal::pxor
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315

Eigen::internal::pnmadd
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827

Eigen::internal::predux< Packet16h >
EIGEN_STRONG_INLINE half predux< Packet16h >(const Packet16h &from)
Definition: AVX512/PacketMath.h:2406

Eigen::internal::pselect
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474

Eigen::internal::ploadu< Packet32h >
EIGEN_STRONG_INLINE Packet32h ploadu< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:151

Eigen::internal::pceil< Packet32h >
EIGEN_STRONG_INLINE Packet32h pceil< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:382

Eigen::internal::psub
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:337

Eigen::internal::pstore< half >
EIGEN_STRONG_INLINE void pstore< half >(Eigen::half *to, const Packet16h &from)
Definition: AVX512/PacketMath.h:2223

Eigen::internal::plog1p< Packet16h >
EIGEN_STRONG_INLINE Packet16h plog1p< Packet16h >(const Packet16h &)

Eigen::internal::ptrunc< Packet32h >
EIGEN_STRONG_INLINE Packet32h ptrunc< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:396

Eigen::internal::predux_half_dowto4< Packet32h >
EIGEN_STRONG_INLINE Packet16h predux_half_dowto4< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:418

Eigen::internal::pmul< Packet32h >
EIGEN_STRONG_INLINE Packet32h pmul< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:327

Eigen::internal::pmin< Packet32h >
EIGEN_STRONG_INLINE Packet32h pmin< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:204

Eigen::internal::pfrexp
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Definition: arch/AVX/MathFunctions.h:72

Eigen::internal::pmul< Packet16h >
EIGEN_STRONG_INLINE Packet16h pmul< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2390

Eigen::internal::padd< Packet8h >
EIGEN_STRONG_INLINE Packet8h padd< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2390

Eigen::internal::pdiv< Packet16h >
EIGEN_STRONG_INLINE Packet16h pdiv< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2398

Eigen::internal::plog1p< Packet32h >
EIGEN_STRONG_INLINE Packet32h plog1p< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:789

Eigen::internal::Packet16h
eigen_packet_wrapper< __m256i, 1 > Packet16h
Definition: AVX512/PacketMath.h:39

Eigen::internal::pmax< Packet32h >
EIGEN_STRONG_INLINE Packet32h pmax< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:211

Eigen::internal::pcmp_lt_or_nan
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1329

Eigen::internal::pexp< Packet32h >
EIGEN_STRONG_INLINE Packet32h pexp< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:802

Eigen::internal::Packet32h
__m512h Packet32h
Definition: PacketMathFP16.h:20

Eigen::internal::Packet8h
eigen_packet_wrapper< __m128i, 2 > Packet8h
Definition: AVX/PacketMath.h:38

Eigen::internal::pconj< Packet32h >
EIGEN_STRONG_INLINE Packet32h pconj< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:523

Eigen::internal::pexp
EIGEN_STRONG_INLINE Packet4f pexp(const Packet4f &_x)
Definition: LSX/PacketMath.h:2663

Eigen::internal::psub< Packet16h >
EIGEN_STRONG_INLINE Packet16h psub< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2382

Eigen::internal::pldexp< Packet16h >
EIGEN_STRONG_INLINE Packet16h pldexp< Packet16h >(const Packet16h &, const Packet16h &)

Eigen::numext::q
EIGEN_DEVICE_FUNC const Scalar & q
Definition: SpecialFunctionsImpl.h:2019

Eigen::numext::uint16_t
std::uint16_t uint16_t
Definition: Meta.h:38

Eigen
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70

Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

calibrate.c
int c
Definition: calibrate.py:100

internal
Definition: Eigen_Colamd.h:49

plotDoE.x
list x
Definition: plotDoE.py:28

plotPSD.t
t
Definition: plotPSD.py:36

Eigen::half
Definition: Half.h:139

Eigen::internal::PacketBlock
Definition: GenericPacketMath.h:1407

Eigen::internal::default_packet_traits::HasRsqrt
@ HasRsqrt
Definition: GenericPacketMath.h:74

Eigen::internal::default_packet_traits::HasSin
@ HasSin
Definition: GenericPacketMath.h:81

Eigen::internal::default_packet_traits::HasBlend
@ HasBlend
Definition: GenericPacketMath.h:66

Eigen::internal::default_packet_traits::HasNdtri
@ HasNdtri
Definition: GenericPacketMath.h:97

Eigen::internal::default_packet_traits::HasCos
@ HasCos
Definition: GenericPacketMath.h:82

Eigen::internal::default_packet_traits::HasCmp
@ HasCmp
Definition: GenericPacketMath.h:69

Eigen::internal::default_packet_traits::HasLog1p
@ HasLog1p
Definition: GenericPacketMath.h:78

Eigen::internal::default_packet_traits::HasExp
@ HasExp
Definition: GenericPacketMath.h:75

Eigen::internal::default_packet_traits::HasSqrt
@ HasSqrt
Definition: GenericPacketMath.h:73

Eigen::internal::default_packet_traits::HasErf
@ HasErf
Definition: GenericPacketMath.h:95

Eigen::internal::default_packet_traits::HasBessel
@ HasBessel
Definition: GenericPacketMath.h:98

Eigen::internal::default_packet_traits::HasExpm1
@ HasExpm1
Definition: GenericPacketMath.h:76

Eigen::internal::default_packet_traits::HasLog
@ HasLog
Definition: GenericPacketMath.h:77

Eigen::internal::default_packet_traits::HasTanh
@ HasTanh
Definition: GenericPacketMath.h:90

Eigen::internal::default_packet_traits::HasDiv
@ HasDiv
Definition: GenericPacketMath.h:71

Eigen::internal::eigen_packet_wrapper
Definition: GenericPacketMath.h:225

Eigen::internal::is_arithmetic
Definition: Meta.h:145

Eigen::internal::is_arithmetic::value
@ value
Definition: Meta.h:146

Eigen::internal::packet_traits< half >::type
Packet32h type
Definition: PacketMathFP16.h:31

Eigen::internal::packet_traits< half >::half
Packet16h half
Definition: PacketMathFP16.h:32

Eigen::internal::packet_traits::size
@ size
Definition: GenericPacketMath.h:113

Eigen::internal::packet_traits::AlignedOnScalar
@ AlignedOnScalar
Definition: GenericPacketMath.h:114

Eigen::internal::packet_traits::Vectorizable
@ Vectorizable
Definition: GenericPacketMath.h:112

Eigen::internal::packet_traits::HasSub
@ HasSub
Definition: GenericPacketMath.h:118

Eigen::internal::packet_traits::HasMax
@ HasMax
Definition: GenericPacketMath.h:124

Eigen::internal::packet_traits::HasNegate
@ HasNegate
Definition: GenericPacketMath.h:120

Eigen::internal::packet_traits::HasMul
@ HasMul
Definition: GenericPacketMath.h:119

Eigen::internal::packet_traits::HasAdd
@ HasAdd
Definition: GenericPacketMath.h:117

Eigen::internal::packet_traits::HasSetLinear
@ HasSetLinear
Definition: GenericPacketMath.h:126

Eigen::internal::packet_traits::HasMin
@ HasMin
Definition: GenericPacketMath.h:123

Eigen::internal::packet_traits::HasConj
@ HasConj
Definition: GenericPacketMath.h:125

Eigen::internal::packet_traits::HasAbs2
@ HasAbs2
Definition: GenericPacketMath.h:122

Eigen::internal::packet_traits::HasAbs
@ HasAbs
Definition: GenericPacketMath.h:121

Eigen::internal::unpacket_traits< Packet16h >::half
Packet8h half
Definition: PacketMathFP16.h:83

Eigen::internal::unpacket_traits< Packet16h >::type
Eigen::half type
Definition: PacketMathFP16.h:82

Eigen::internal::unpacket_traits< Packet32h >::half
Packet16h half
Definition: PacketMathFP16.h:70

Eigen::internal::unpacket_traits< Packet32h >::type
Eigen::half type
Definition: PacketMathFP16.h:69

Eigen::internal::unpacket_traits< Packet8h >::half
Packet8h half
Definition: PacketMathFP16.h:96

Eigen::internal::unpacket_traits< Packet8h >::type
Eigen::half type
Definition: PacketMathFP16.h:95

Eigen::internal::unpacket_traits
Definition: GenericPacketMath.h:134

Eigen::internal::unpacket_traits::masked_load_available
@ masked_load_available
Definition: GenericPacketMath.h:142

Eigen::internal::unpacket_traits::size
@ size
Definition: GenericPacketMath.h:139

Eigen::internal::unpacket_traits::masked_store_available
@ masked_store_available
Definition: GenericPacketMath.h:143

Eigen::internal::unpacket_traits::vectorizable
@ vectorizable
Definition: GenericPacketMath.h:141

Eigen::internal::unpacket_traits::alignment
@ alignment
Definition: GenericPacketMath.h:140