10 #ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
11 #define EIGEN_PACKET_MATH_FP16_AVX512_H
14 #include "../../InternalHeaderCheck.h"
26 enum {
value =
true };
30 struct packet_traits<
half> : default_packet_traits {
113 return _mm512_set1_ph(numext::bit_cast<_Float16>(from));
118 return _mm512_setzero_ph();
124 return _mm512_castsi512_ph(_mm512_set1_epi16(from));
131 #ifdef EIGEN_VECTORIZE_AVX512DQ
133 static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
136 _mm512_storeu_ph(dest, from);
172 __m512h
a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
173 return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
174 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
181 __m512h
a = _mm512_castph128_ph512(_mm_loadu_ph(from));
182 return _mm512_permutexvar_ph(
183 _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
191 return _mm512_abs_ph(
a);
198 return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(
a), 15));
205 return _mm512_min_ph(
a,
b);
212 return _mm512_max_ph(
a,
b);
218 return _mm512_add_ph(
pset1<Packet32h>(
a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
219 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
226 return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(
a), _mm512_castph_si512(
b)));
233 return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(
a), _mm512_castph_si512(
b)));
240 return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(
a), _mm512_castph_si512(
b)));
247 return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(
b), _mm512_castph_si512(
a)));
254 __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
255 return _mm512_mask_blend_ph(mask32,
a,
b);
262 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_EQ_OQ);
263 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask,
static_cast<short>(0xffffu)));
270 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_LE_OQ);
271 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask,
static_cast<short>(0xffffu)));
278 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_LT_OQ);
279 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask,
static_cast<short>(0xffffu)));
286 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_NGE_UQ);
287 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask,
static_cast<short>(0xffffu)));
294 return _mm512_add_ph(
a,
b);
299 return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
304 return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
311 return _mm512_sub_ph(
a,
b);
316 return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
321 return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
328 return _mm512_mul_ph(
a,
b);
333 return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
338 return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
345 return _mm512_div_ph(
a,
b);
350 return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
355 return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
369 return _mm512_roundscale_ph(
padd(
por(
pand(
a, signMask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
376 return _mm512_roundscale_ph(
a, _MM_FROUND_CUR_DIRECTION);
383 return _mm512_roundscale_ph(
a, _MM_FROUND_TO_POS_INF);
390 return _mm512_roundscale_ph(
a, _MM_FROUND_TO_NEG_INF);
397 return _mm512_roundscale_ph(
a, _MM_FROUND_TO_ZERO);
403 return (
half)_mm512_reduce_add_ph(
a);
408 return (
half)_mm256_reduce_add_ph(_mm256_castsi256_ph(
a));
413 return (half)_mm_reduce_add_ph(_mm_castsi128_ph(
a));
419 #ifdef EIGEN_VECTORIZE_AVX512DQ
420 __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(
a), 0));
421 __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(
a), 1));
426 _mm512_storeu_ph(
data,
a);
428 __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(
data));
429 __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(
data + 16));
441 #ifdef EIGEN_VECTORIZE_FMA
447 return _mm512_fmadd_ph(
a,
b,
c);
452 return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
457 return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
464 return _mm512_fmsub_ph(
a,
b,
c);
469 return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
474 return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
481 return _mm512_fnmadd_ph(
a,
b,
c);
486 return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
491 return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
498 return _mm512_fnmsub_ph(
a,
b,
c);
503 return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
508 return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
531 return _mm512_sqrt_ph(
a);
538 return _mm512_rsqrt_ph(
a);
545 return _mm512_rcp_ph(
a);
554 for (
int i = 0;
i < 16;
i++) {
555 t[2 *
i] = _mm512_unpacklo_epi16(_mm512_castph_si512(
a.packet[2 *
i]), _mm512_castph_si512(
a.packet[2 *
i + 1]));
557 _mm512_unpackhi_epi16(_mm512_castph_si512(
a.packet[2 *
i]), _mm512_castph_si512(
a.packet[2 *
i + 1]));
563 for (
int i = 0;
i < 8;
i++) {
564 p[4 *
i] = _mm512_unpacklo_epi32(
t[4 *
i],
t[4 *
i + 2]);
565 p[4 *
i + 1] = _mm512_unpackhi_epi32(
t[4 *
i],
t[4 *
i + 2]);
566 p[4 *
i + 2] = _mm512_unpacklo_epi32(
t[4 *
i + 1],
t[4 *
i + 3]);
567 p[4 *
i + 3] = _mm512_unpackhi_epi32(
t[4 *
i + 1],
t[4 *
i + 3]);
573 for (
int i = 0;
i < 4;
i++) {
574 q[8 *
i] = _mm512_unpacklo_epi64(
p[8 *
i],
p[8 *
i + 4]);
575 q[8 *
i + 1] = _mm512_unpackhi_epi64(
p[8 *
i],
p[8 *
i + 4]);
576 q[8 *
i + 2] = _mm512_unpacklo_epi64(
p[8 *
i + 1],
p[8 *
i + 5]);
577 q[8 *
i + 3] = _mm512_unpackhi_epi64(
p[8 *
i + 1],
p[8 *
i + 5]);
578 q[8 *
i + 4] = _mm512_unpacklo_epi64(
p[8 *
i + 2],
p[8 *
i + 6]);
579 q[8 *
i + 5] = _mm512_unpackhi_epi64(
p[8 *
i + 2],
p[8 *
i + 6]);
580 q[8 *
i + 6] = _mm512_unpacklo_epi64(
p[8 *
i + 3],
p[8 *
i + 7]);
581 q[8 *
i + 7] = _mm512_unpackhi_epi64(
p[8 *
i + 3],
p[8 *
i + 7]);
586 #define PACKET32H_TRANSPOSE_HELPER(X, Y) \
588 f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \
589 f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
590 f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
591 f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
592 f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
593 f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
594 f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
595 f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
617 #undef PACKET32H_TRANSPOSE_HELPER
620 for (
int i = 0;
i < 32;
i++) {
621 a.packet[
i] = _mm512_castsi512_ph(
f[
i]);
626 __m512i
p0,
p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
627 t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(
a.packet[0]), _mm512_castph_si512(
a.packet[1]));
628 t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(
a.packet[0]), _mm512_castph_si512(
a.packet[1]));
629 t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(
a.packet[2]), _mm512_castph_si512(
a.packet[3]));
630 t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(
a.packet[2]), _mm512_castph_si512(
a.packet[3]));
632 p0 = _mm512_unpacklo_epi32(t0, t2);
633 p1 = _mm512_unpackhi_epi32(t0, t2);
634 p2 = _mm512_unpacklo_epi32(t1, t3);
635 p3 = _mm512_unpackhi_epi32(t1, t3);
642 a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(
p1, 0), 1);
643 a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(
p0, 1), 0);
645 a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
646 a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(
p0, 2), 0);
648 a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
649 a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(
p0, 3), 0);
651 a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
652 a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(
p1, 2), 1);
654 a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
655 a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
657 a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
658 a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(
p1, 3), 1);
660 a.packet[0] = _mm512_castsi512_ph(a0);
661 a.packet[1] = _mm512_castsi512_ph(a1);
662 a.packet[2] = _mm512_castsi512_ph(a2);
663 a.packet[3] = _mm512_castsi512_ph(a3);
670 return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
671 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
683 for (
int i = 0;
i < 32;
i++) {
684 to[stride *
i] = aux[
i];
692 return _mm512_castsi512_ph(_mm512_set_epi16(
693 from[31 * stride].
x, from[30 * stride].
x, from[29 * stride].
x, from[28 * stride].
x, from[27 * stride].
x,
694 from[26 * stride].
x, from[25 * stride].
x, from[24 * stride].
x, from[23 * stride].
x, from[22 * stride].
x,
695 from[21 * stride].
x, from[20 * stride].
x, from[19 * stride].
x, from[18 * stride].
x, from[17 * stride].
x,
696 from[16 * stride].
x, from[15 * stride].
x, from[14 * stride].
x, from[13 * stride].
x, from[12 * stride].
x,
697 from[11 * stride].
x, from[10 * stride].
x, from[9 * stride].
x, from[8 * stride].
x, from[7 * stride].
x,
698 from[6 * stride].
x, from[5 * stride].
x, from[4 * stride].
x, from[3 * stride].
x, from[2 * stride].
x,
699 from[1 * stride].
x, from[0 * stride].
x));
724 __m512d result = _mm512_undefined_pd();
725 result = _mm512_insertf64x4(result, _mm256_castsi256_pd(
a), 0);
726 result = _mm512_insertf64x4(result, _mm256_castsi256_pd(
b), 1);
727 return _mm512_castpd_ph(result);
731 a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(
x), 0));
732 b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(
x), 1));
846 Packet16h exp1 = _mm256_undefined_si256();
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_FAST_MATH
Definition: Macros.h:51
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
int data[]
Definition: Map_placement_new.cpp:1
Vector3f p0
Definition: MatrixBase_all.cpp:2
Vector3f p1
Definition: MatrixBase_all.cpp:2
#define PACKET32H_TRANSPOSE_HELPER(X, Y)
float * p
Definition: Tutorial_Map_using.cpp:9
Scalar * b
Definition: benchVecAdd.cpp:17
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
@ Aligned64
Definition: Constants.h:239
@ Aligned32
Definition: Constants.h:238
@ Aligned16
Definition: Constants.h:237
const Scalar * a
Definition: level2_cplx_impl.h:32
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16 &a)
Definition: BFloat16.h:616
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:496
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Definition: GenericPacketMath.h:1097
EIGEN_STRONG_INLINE Packet32h psqrt< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:530
EIGEN_STRONG_INLINE Eigen::half pfirst< Packet32h >(const Packet32h &from)
Definition: PacketMathFP16.h:130
EIGEN_STRONG_INLINE Packet32h plog2< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:776
EIGEN_STRONG_INLINE Packet32h print< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:375
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE Packet32h pdiv< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:344
EIGEN_STRONG_INLINE Packet8h pmul< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2406
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
EIGEN_STRONG_INLINE void pscatter< half, Packet32h >(half *to, const Packet32h &from, Index stride)
Definition: PacketMathFP16.h:678
EIGEN_STRONG_INLINE Packet32h psignbit< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:197
EIGEN_STRONG_INLINE Packet32h padd< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:293
EIGEN_STRONG_INLINE Packet16h pfrexp< Packet16h >(const Packet16h &, Packet16h &)
EIGEN_STRONG_INLINE Packet32h psin< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:737
EIGEN_STRONG_INLINE half predux< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:402
EIGEN_STRONG_INLINE Packet16h pexp< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet32h ploadquad< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:180
EIGEN_STRONG_INLINE Packet32h pnegate< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:516
EIGEN_STRONG_INLINE Packet8h psub< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2398
EIGEN_STRONG_INLINE Packet32h ploaddup< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:171
EIGEN_STRONG_INLINE Packet16h plog< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
Definition: GenericPacketMath.h:1123
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Definition: GenericPacketMath.h:1103
EIGEN_STRONG_INLINE Packet16h psin< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet32h pabs< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:190
EIGEN_STRONG_INLINE Packet16h ptanh< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet32h plog< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:763
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
Definition: GenericPacketMath.h:1022
EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h &a, const Packet16h &b)
Definition: PacketMathFP16.h:723
EIGEN_STRONG_INLINE Packet32h pset1frombits< Packet32h >(unsigned short from)
Definition: PacketMathFP16.h:123
EIGEN_STRONG_INLINE Eigen::half predux< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2451
EIGEN_STRONG_INLINE Packet32h psub< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:310
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1314
EIGEN_STRONG_INLINE Packet32h pcos< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:750
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
Definition: GenericPacketMath.h:1015
EIGEN_STRONG_INLINE Packet32h pload< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:144
EIGEN_STRONG_INLINE Packet8h pdiv< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2414
EIGEN_STRONG_INLINE Packet16h pexpm1< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2323
EIGEN_STRONG_INLINE Packet32h ptanh< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:828
EIGEN_STRONG_INLINE Packet32h plset< Packet32h >(const half &a)
Definition: PacketMathFP16.h:217
EIGEN_STRONG_INLINE Packet32h pset1< Packet32h >(const Eigen::half &from)
Definition: PacketMathFP16.h:111
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Definition: GenericPacketMath.h:1071
EIGEN_STRONG_INLINE Packet32h pfrexp< Packet32h >(const Packet32h &a, Packet32h &exponent)
Definition: PacketMathFP16.h:841
EIGEN_STRONG_INLINE void pstoreu< half >(Eigen::half *to, const Packet16h &from)
Definition: AVX512/PacketMath.h:2230
EIGEN_STRONG_INLINE Packet32h prsqrt< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:537
EIGEN_STRONG_INLINE Packet16h plog2< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet32h pldexp< Packet32h >(const Packet32h &a, const Packet32h &exponent)
Definition: PacketMathFP16.h:859
EIGEN_STRONG_INLINE Packet32h pexpm1< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:815
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Definition: GenericPacketMath.h:1110
EIGEN_STRONG_INLINE Packet32h pfloor< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:389
EIGEN_STRONG_INLINE Packet16h padd< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2374
EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h &x, Packet16h &a, Packet16h &b)
Definition: PacketMathFP16.h:730
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835
EIGEN_STRONG_INLINE Packet32h preciprocal< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:544
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Definition: arch/AVX/MathFunctions.h:80
EIGEN_STRONG_INLINE Packet32h pround< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:361
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet16h pcos< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE half predux< Packet16h >(const Packet16h &from)
Definition: AVX512/PacketMath.h:2406
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_STRONG_INLINE Packet32h ploadu< Packet32h >(const Eigen::half *from)
Definition: PacketMathFP16.h:151
EIGEN_STRONG_INLINE Packet32h pceil< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:382
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:337
EIGEN_STRONG_INLINE void pstore< half >(Eigen::half *to, const Packet16h &from)
Definition: AVX512/PacketMath.h:2223
EIGEN_STRONG_INLINE Packet16h plog1p< Packet16h >(const Packet16h &)
EIGEN_STRONG_INLINE Packet32h ptrunc< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:396
EIGEN_STRONG_INLINE Packet16h predux_half_dowto4< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:418
EIGEN_STRONG_INLINE Packet32h pmul< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:327
EIGEN_STRONG_INLINE Packet32h pmin< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:204
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Definition: arch/AVX/MathFunctions.h:72
EIGEN_STRONG_INLINE Packet16h pmul< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2390
EIGEN_STRONG_INLINE Packet8h padd< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2390
EIGEN_STRONG_INLINE Packet16h pdiv< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2398
EIGEN_STRONG_INLINE Packet32h plog1p< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:789
eigen_packet_wrapper< __m256i, 1 > Packet16h
Definition: AVX512/PacketMath.h:39
EIGEN_STRONG_INLINE Packet32h pmax< Packet32h >(const Packet32h &a, const Packet32h &b)
Definition: PacketMathFP16.h:211
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1329
EIGEN_STRONG_INLINE Packet32h pexp< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:802
__m512h Packet32h
Definition: PacketMathFP16.h:20
eigen_packet_wrapper< __m128i, 2 > Packet8h
Definition: AVX/PacketMath.h:38
EIGEN_STRONG_INLINE Packet32h pconj< Packet32h >(const Packet32h &a)
Definition: PacketMathFP16.h:523
EIGEN_STRONG_INLINE Packet4f pexp(const Packet4f &_x)
Definition: LSX/PacketMath.h:2663
EIGEN_STRONG_INLINE Packet16h psub< Packet16h >(const Packet16h &a, const Packet16h &b)
Definition: AVX512/PacketMath.h:2382
EIGEN_STRONG_INLINE Packet16h pldexp< Packet16h >(const Packet16h &, const Packet16h &)
EIGEN_DEVICE_FUNC const Scalar & q
Definition: SpecialFunctionsImpl.h:2019
std::uint16_t uint16_t
Definition: Meta.h:38
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
t
Definition: plotPSD.py:36
Definition: GenericPacketMath.h:1407
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasSin
Definition: GenericPacketMath.h:81
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasNdtri
Definition: GenericPacketMath.h:97
@ HasCos
Definition: GenericPacketMath.h:82
@ HasCmp
Definition: GenericPacketMath.h:69
@ HasLog1p
Definition: GenericPacketMath.h:78
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasErf
Definition: GenericPacketMath.h:95
@ HasBessel
Definition: GenericPacketMath.h:98
@ HasExpm1
Definition: GenericPacketMath.h:76
@ HasLog
Definition: GenericPacketMath.h:77
@ HasTanh
Definition: GenericPacketMath.h:90
@ HasDiv
Definition: GenericPacketMath.h:71
Definition: GenericPacketMath.h:225
@ value
Definition: Meta.h:146
Packet32h type
Definition: PacketMathFP16.h:31
Packet16h half
Definition: PacketMathFP16.h:32
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
Packet8h half
Definition: PacketMathFP16.h:83
Eigen::half type
Definition: PacketMathFP16.h:82
Packet16h half
Definition: PacketMathFP16.h:70
Eigen::half type
Definition: PacketMathFP16.h:69
Packet8h half
Definition: PacketMathFP16.h:96
Eigen::half type
Definition: PacketMathFP16.h:95
Definition: GenericPacketMath.h:134
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140