10 #ifndef EIGEN_PACKET_MATH_SSE_H
11 #define EIGEN_PACKET_MATH_SSE_H
15 #include "../../InternalHeaderCheck.h"
21 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
22 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
25 #if !defined(EIGEN_VECTORIZE_AVX) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)
28 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2 * sizeof(void*))
31 #ifdef EIGEN_VECTORIZE_FMA
32 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
33 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
37 #if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW || EIGEN_COMP_LCC) && \
38 (__GXX_ABI_VERSION < 1004)) || \
45 typedef eigen_packet_wrapper<__m128>
Packet4f;
46 typedef eigen_packet_wrapper<__m128d>
Packet2d;
52 typedef eigen_packet_wrapper<__m128i, 0>
Packet4i;
59 enum {
value =
true };
62 struct is_arithmetic<__m128i> {
63 enum {
value =
true };
66 struct is_arithmetic<__m128d> {
67 enum {
value =
true };
71 enum {
value =
true };
75 enum {
value =
true };
82 enum {
value =
false };
89 template <
int p,
int q,
int r,
int s>
91 enum {
mask = (
s) << 6 | (
r) << 4 | (
q) << 2 | (
p) };
95 #define vec4f_swizzle1(v, p, q, r, s) \
96 Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v), (shuffle_mask<p, q, r, s>::mask))))
98 #define vec4i_swizzle1(v, p, q, r, s) Packet4i(_mm_shuffle_epi32(v, (shuffle_mask<p, q, r, s>::mask)))
100 #define vec4ui_swizzle1(v, p, q, r, s) Packet4ui(vec4i_swizzle1(v, p, q, r, s))
102 #define vec2d_swizzle1(v, p, q) \
103 Packet2d(_mm_castsi128_pd( \
104 _mm_shuffle_epi32(_mm_castpd_si128(v), (shuffle_mask<2 * p, 2 * p + 1, 2 * q, 2 * q + 1>::mask))))
106 #define vec4f_swizzle2(a, b, p, q, r, s) Packet4f(_mm_shuffle_ps((a), (b), (shuffle_mask<p, q, r, s>::mask)))
108 #define vec4i_swizzle2(a, b, p, q, r, s) \
110 _mm_castps_si128((_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (shuffle_mask<p, q, r, s>::mask)))))
112 #define vec4ui_swizzle2(a, b, p, q, r, s) Packet4i(vec4i_swizzle2(a, b, p, q, r, s))
126 #define vec4f_duplane(a, p) vec4f_swizzle2(a, a, p, p, p, p)
128 #define vec2d_swizzle2(a, b, mask) Packet2d(_mm_shuffle_pd(a, b, mask))
136 #define vec2d_duplane(a, p) vec2d_swizzle2(a, a, (p << 1) | p)
138 #define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = pset1<Packet4f>(X)
140 #define EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = pset1<Packet2d>(X)
142 #define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME, X) const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X)
144 #define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
146 #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1<Packet4ui>(X)
149 #if EIGEN_ARCH_x86_64
151 #ifdef EIGEN_VECTORIZE_SSE4_1
155 return _mm_cvtsi128_si64(_mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(
a), _mm_castsi128_pd(
a), 0x1)));
162 return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(
a)));
165 return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(
a), _mm_castsi128_pd(
a), 0x1)));
171 #ifndef EIGEN_VECTORIZE_AVX
173 struct packet_traits<float> : default_packet_traits {
206 struct packet_traits<
double> : default_packet_traits {
231 struct packet_traits<
int> : default_packet_traits {
246 struct packet_traits<
uint32_t> : default_packet_traits {
262 struct packet_traits<
int64_t> : default_packet_traits {
374 #ifndef EIGEN_VECTORIZE_AVX
376 struct scalar_div_cost<float, true> {
380 struct scalar_div_cost<
double, true> {
387 return _mm_set_ps1(from);
391 return _mm_set1_pd(from);
395 return _mm_set1_epi64x(from);
399 return _mm_set1_epi32(from);
403 return _mm_set1_epi32(numext::bit_cast<int32_t>(from));
407 return _mm_set1_epi8(
static_cast<char>(from));
416 return _mm_castsi128_pd(_mm_set1_epi64x(from));
421 return _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1));
425 return _mm_set_epi32(0, 0, -1, -1);
429 return _mm_set_epi32(0, -1, 0, -1);
433 return _mm_set_epi32(0, -1, 0, -1);
437 return _mm_castsi128_pd(_mm_set_epi32(0, 0, -1, -1));
442 return _mm_setzero_ps();
446 return _mm_setzero_pd();
450 return _mm_setzero_si128();
454 return _mm_setzero_si128();
458 return _mm_setzero_si128();
466 #if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
496 return _mm_add_ps(
a,
b);
500 return _mm_add_pd(
a,
b);
504 return _mm_add_epi64(
a,
b);
508 return _mm_add_epi32(
a,
b);
512 return _mm_add_epi32(
a,
b);
517 return _mm_or_si128(
a,
b);
520 template <
typename Packet>
524 return _mm_add_ss(
a,
b);
528 return _mm_add_sd(
a,
b);
533 return _mm_sub_ps(
a,
b);
537 return _mm_sub_pd(
a,
b);
541 return _mm_sub_epi64(
a,
b);
545 return _mm_sub_epi32(
a,
b);
549 return _mm_sub_epi32(
a,
b);
553 return _mm_xor_si128(
a,
b);
560 #ifdef EIGEN_VECTORIZE_SSE3
561 return _mm_addsub_ps(
a,
b);
563 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x0, 0x80000000, 0x0));
572 #ifdef EIGEN_VECTORIZE_SSE3
573 return _mm_addsub_pd(
a,
b);
575 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, 0x80000000, 0x0, 0x0));
582 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000));
583 return _mm_xor_ps(
a, mask);
587 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, 0x80000000, 0x0, 0x80000000));
588 return _mm_xor_pd(
a, mask);
619 return _mm_mul_ps(
a,
b);
623 return _mm_mul_pd(
a,
b);
628 __m128i upper32_a = _mm_srli_epi64(
a, 32);
629 __m128i upper32_b = _mm_srli_epi64(
b, 32);
632 __m128i mul1 = _mm_mul_epu32(upper32_a,
b);
633 __m128i mul2 = _mm_mul_epu32(upper32_b,
a);
635 __m128i mul3 = _mm_mul_epu32(
a,
b);
637 __m128i high = _mm_slli_epi64(_mm_add_epi64(mul1, mul2), 32);
638 return _mm_add_epi64(high, mul3);
642 #ifdef EIGEN_VECTORIZE_SSE4_1
643 return _mm_mullo_epi32(
a,
b);
647 vec4i_swizzle2(_mm_mul_epu32(
a,
b), _mm_mul_epu32(
vec4i_swizzle1(
a, 1, 0, 3, 2),
vec4i_swizzle1(
b, 1, 0, 3, 2)),
654 #ifdef EIGEN_VECTORIZE_SSE4_1
655 return _mm_mullo_epi32(
a,
b);
660 _mm_mul_epu32(
vec4ui_swizzle1(
a, 1, 0, 3, 2),
vec4ui_swizzle1(
b, 1, 0, 3, 2)), 0, 2, 0, 2),
667 return _mm_and_si128(
a,
b);
672 return _mm_div_ps(
a,
b);
676 return _mm_div_pd(
a,
b);
681 #ifdef EIGEN_VECTORIZE_AVX
682 return _mm256_cvttpd_epi32(_mm256_div_pd(_mm256_cvtepi32_pd(
a), _mm256_cvtepi32_pd(
b)));
684 __m128i q_lo = _mm_cvttpd_epi32(_mm_div_pd(_mm_cvtepi32_pd(
a), _mm_cvtepi32_pd(
b)));
685 __m128i q_hi = _mm_cvttpd_epi32(
687 return vec4i_swizzle1(_mm_unpacklo_epi32(q_lo, q_hi), 0, 2, 1, 3);
691 #ifdef EIGEN_VECTORIZE_FMA
694 return _mm_fmadd_ps(
a,
b,
c);
698 return _mm_fmadd_pd(
a,
b,
c);
702 return _mm_fmsub_ps(
a,
b,
c);
706 return _mm_fmsub_pd(
a,
b,
c);
710 return _mm_fnmadd_ps(
a,
b,
c);
714 return _mm_fnmadd_pd(
a,
b,
c);
718 return _mm_fnmsub_ps(
a,
b,
c);
722 return _mm_fnmsub_pd(
a,
b,
c);
725 template <
typename Packet>
729 return _mm_fmadd_ss(
a,
b,
c);
733 return _mm_fmadd_sd(
a,
b,
c);
737 #ifdef EIGEN_VECTORIZE_SSE4_1
740 return _mm_blendv_ps(
b,
a, mask);
745 return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(
b), _mm_castsi128_pd(
a), _mm_castsi128_pd(mask)));
750 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(
b), _mm_castsi128_ps(
a), _mm_castsi128_ps(mask)));
755 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(
b), _mm_castsi128_ps(
a), _mm_castsi128_ps(mask)));
760 return _mm_blendv_pd(
b,
a, mask);
766 return _mm_cmpeq_epi32(
a,
a);
770 return _mm_cmpeq_epi32(
a,
a);
779 return _mm_castsi128_ps(_mm_cmpeq_epi32(
b,
b));
784 return _mm_castsi128_pd(_mm_cmpeq_epi32(
b,
b));
789 return _mm_and_ps(
a,
b);
793 return _mm_and_pd(
a,
b);
797 return _mm_and_si128(
a,
b);
801 return _mm_and_si128(
a,
b);
805 return _mm_and_si128(
a,
b);
809 return _mm_and_si128(
a,
b);
814 return _mm_or_ps(
a,
b);
818 return _mm_or_pd(
a,
b);
822 return _mm_or_si128(
a,
b);
826 return _mm_or_si128(
a,
b);
830 return _mm_or_si128(
a,
b);
834 return _mm_or_si128(
a,
b);
839 return _mm_xor_ps(
a,
b);
843 return _mm_xor_pd(
a,
b);
847 return _mm_xor_si128(
a,
b);
851 return _mm_xor_si128(
a,
b);
855 return _mm_xor_si128(
a,
b);
859 return _mm_xor_si128(
a,
b);
864 return _mm_andnot_ps(
b,
a);
868 return _mm_andnot_pd(
b,
a);
872 return _mm_andnot_si128(
b,
a);
876 return _mm_andnot_si128(
b,
a);
880 return _mm_andnot_si128(
b,
a);
885 return _mm_cmple_ps(
a,
b);
889 return _mm_cmplt_ps(
a,
b);
893 return _mm_cmpnge_ps(
a,
b);
897 return _mm_cmpeq_ps(
a,
b);
902 return _mm_cmple_pd(
a,
b);
906 return _mm_cmplt_pd(
a,
b);
910 return _mm_cmpnge_pd(
a,
b);
914 return _mm_cmpeq_pd(
a,
b);
918 return _mm_cmplt_epi32(
a,
b);
922 return _mm_cmpeq_epi32(
a,
b);
930 #ifdef EIGEN_VECTORIZE_SSE4_2
931 return _mm_cmpgt_epi64(
b,
a);
939 return por(hi_lt,
pand(hi_eq, lo_lt));
944 #ifdef EIGEN_VECTORIZE_SSE4_1
945 return _mm_cmpeq_epi64(
a,
b);
959 return _mm_and_si128(_mm_cmpeq_epi8(
a,
b), kBoolMask);
963 return _mm_cmpeq_epi32(
a,
b);
968 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
973 #ifdef EIGEN_VECTORIZE_AVX
975 asm(
"vminps %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
978 asm(
"minps %[a], %[res]" : [
res]
"+x"(
res) : [
a]
"x"(
a));
983 return _mm_min_ps(
b,
a);
988 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
993 #ifdef EIGEN_VECTORIZE_AVX
995 asm(
"vminpd %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
998 asm(
"minpd %[a], %[res]" : [
res]
"+x"(
res) : [
a]
"x"(
a));
1003 return _mm_min_pd(
b,
a);
1013 #ifdef EIGEN_VECTORIZE_SSE4_1
1014 return _mm_min_epi32(
a,
b);
1018 return _mm_or_si128(_mm_and_si128(mask,
a), _mm_andnot_si128(mask,
b));
1023 #ifdef EIGEN_VECTORIZE_SSE4_1
1024 return _mm_min_epu32(
a,
b);
1034 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
1039 #ifdef EIGEN_VECTORIZE_AVX
1041 asm(
"vmaxps %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
1044 asm(
"maxps %[a], %[res]" : [
res]
"+x"(
res) : [
a]
"x"(
a));
1049 return _mm_max_ps(
b,
a);
1054 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
1059 #ifdef EIGEN_VECTORIZE_AVX
1061 asm(
"vmaxpd %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
1064 asm(
"maxpd %[a], %[res]" : [
res]
"+x"(
res) : [
a]
"x"(
a));
1069 return _mm_max_pd(
b,
a);
1079 #ifdef EIGEN_VECTORIZE_SSE4_1
1080 return _mm_max_epi32(
a,
b);
1084 return _mm_or_si128(_mm_and_si128(mask,
a), _mm_andnot_si128(mask,
b));
1089 #ifdef EIGEN_VECTORIZE_SSE4_1
1090 return _mm_max_epu32(
a,
b);
1100 #ifdef EIGEN_VECTORIZE_SSE4_1
1109 #ifdef EIGEN_VECTORIZE_SSE4_1
1117 template <
typename Packet,
typename Op>
1123 return pselect<Packet>(not_nan_mask_a,
m,
b);
1126 template <
typename Packet,
typename Op>
1132 return pselect<Packet>(not_nan_mask_a,
m,
a);
1171 return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(
a), 31));
1176 #ifdef EIGEN_VECTORIZE_AVX
1184 return _mm_srai_epi32(
a, 31);
1199 return por(_mm_slli_epi64(
signbit, 64 -
N), _mm_srli_epi64(
a,
N));
1203 return _mm_srli_epi64(
a,
N);
1207 return _mm_slli_epi64(
a,
N);
1211 return _mm_srai_epi32(
a,
N);
1215 return _mm_srli_epi32(
a,
N);
1219 return _mm_slli_epi32(
a,
N);
1223 return _mm_srli_epi32(
a,
N);
1227 return _mm_srli_epi32(
a,
N);
1231 return _mm_slli_epi32(
a,
N);
1236 const __m128i mask = _mm_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF);
1237 return _mm_castsi128_ps(_mm_and_si128(mask, _mm_castps_si128(
a)));
1241 const __m128i mask = _mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF);
1242 return _mm_castsi128_pd(_mm_and_si128(mask, _mm_castpd_si128(
a)));
1251 #ifdef EIGEN_VECTORIZE_SSSE3
1252 return _mm_abs_epi32(
a);
1263 #ifdef EIGEN_VECTORIZE_SSE4_1
1269 return _mm_round_ps(
padd(
por(
pand(
a, mask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
1274 const Packet2d mask = _mm_castsi128_pd(_mm_set_epi64x(0x8000000000000000ull, 0x8000000000000000ull));
1275 const Packet2d prev0dot5 = _mm_castsi128_pd(_mm_set_epi64x(0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull));
1276 return _mm_round_pd(
padd(
por(
pand(
a, mask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
1281 return _mm_round_ps(
a, _MM_FROUND_CUR_DIRECTION);
1285 return _mm_round_pd(
a, _MM_FROUND_CUR_DIRECTION);
1290 return _mm_ceil_ps(
a);
1294 return _mm_ceil_pd(
a);
1299 return _mm_floor_ps(
a);
1303 return _mm_floor_pd(
a);
1308 return _mm_round_ps(
a, _MM_FROUND_TRUNC);
1312 return _mm_round_pd(
a, _MM_FROUND_TRUNC);
1345 return _mm_loadu_ps(from);
1353 return _mm_loadu_ps(from);
1360 return _mm_loadu_pd(from);
1365 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
1370 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
1375 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
1380 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
1384 template <
typename Packet>
1396 template <
typename Packet>
1409 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(
reinterpret_cast<const double*
>(from))), 0, 0, 1, 1);
1422 tmp = _mm_loadl_epi64(
reinterpret_cast<const __m128i*
>(from));
1428 tmp = _mm_loadl_epi64(
reinterpret_cast<const __m128i*
>(from));
1436 __m128i
tmp = _mm_castpd_si128(pload1<Packet2d>(
reinterpret_cast<const double*
>(from)));
1437 return _mm_unpacklo_epi8(
tmp,
tmp);
1446 return _mm_unpacklo_epi16(
tmp,
tmp);
1499 template <
typename Scalar,
typename Packet>
1510 template <
typename Scalar,
typename Packet>
1523 return _mm_shuffle_ps(
a,
a, 0x1B);
1527 return _mm_shuffle_pd(
a,
a, 0x1);
1531 return _mm_castpd_si128(
preverse(_mm_castsi128_pd(
a)));
1535 return _mm_shuffle_epi32(
a, 0x1B);
1539 return _mm_shuffle_epi32(
a, 0x1B);
1543 #ifdef EIGEN_VECTORIZE_SSSE3
1544 __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1545 return _mm_shuffle_epi8(
a, mask);
1547 Packet16b tmp = _mm_shuffle_epi32(
a, _MM_SHUFFLE(0, 1, 2, 3));
1548 tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(
tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
1549 return _mm_or_si128(_mm_slli_epi16(
tmp, 8), _mm_srli_epi16(
tmp, 8));
1553 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
1558 return a.m128_f32[0];
1562 return a.m128d_f64[0];
1571 int x = _mm_cvtsi128_si32(
a);
1576 uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(
a));
1579 #elif EIGEN_COMP_MSVC_STRICT
1583 float x = _mm_cvtss_f32(
a);
1588 double x = _mm_cvtsd_f64(
a);
1598 int x = _mm_cvtsi128_si32(
a);
1603 uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(
a));
1609 return _mm_cvtss_f32(
a);
1613 return _mm_cvtsd_f64(
a);
1621 return _mm_cvtsi128_si32(
a);
1625 return numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(
a));
1630 int x = _mm_cvtsi128_si32(
a);
1631 return static_cast<bool>(
x & 1);
1636 return _mm_set_ps(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
1640 return _mm_set_pd(from[1 * stride], from[0 * stride]);
1644 return _mm_set_epi64x(from[1 * stride], from[0 * stride]);
1648 return _mm_set_epi32(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
1652 return _mm_set_epi32(numext::bit_cast<int32_t>(from[3 * stride]), numext::bit_cast<int32_t>(from[2 * stride]),
1653 numext::bit_cast<int32_t>(from[1 * stride]), numext::bit_cast<int32_t>(from[0 * stride]));
1658 return _mm_set_epi8(from[15 * stride], from[14 * stride], from[13 * stride], from[12 * stride], from[11 * stride],
1659 from[10 * stride], from[9 * stride], from[8 * stride], from[7 * stride], from[6 * stride],
1660 from[5 * stride], from[4 * stride], from[3 * stride], from[2 * stride], from[1 * stride],
1666 to[stride * 0] =
pfirst(from);
1667 to[stride * 1] =
pfirst(_mm_shuffle_ps(from, from, 1));
1668 to[stride * 2] =
pfirst(_mm_shuffle_ps(from, from, 2));
1669 to[stride * 3] =
pfirst(_mm_shuffle_ps(from, from, 3));
1673 to[stride * 0] =
pfirst(from);
1678 to[stride * 0] =
pfirst(from);
1683 to[stride * 0] = _mm_cvtsi128_si32(from);
1684 to[stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
1685 to[stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
1686 to[stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
1690 to[stride * 0] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(from));
1691 to[stride * 1] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)));
1692 to[stride * 2] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)));
1693 to[stride * 3] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)));
1697 to[4 * stride * 0] = _mm_cvtsi128_si32(from);
1698 to[4 * stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
1699 to[4 * stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
1700 to[4 * stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
1716 #if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900
1722 #ifndef EIGEN_VECTORIZE_AVX
1754 __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(
pand(
a, cst_exp_mask)), 52);
1780 const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);
1781 Packet4i b = parithmetic_shift_right<2>(ei);
1782 Packet2d c = _mm_castsi128_pd(_mm_slli_epi64(
padd(
b, bias), 52));
1785 c = _mm_castsi128_pd(_mm_slli_epi64(
padd(
b, bias), 52));
1803 const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);
1804 const Packet2d c = _mm_castsi128_pd(_mm_slli_epi64(
padd(ei, bias), 52));
1821 #ifdef EIGEN_VECTORIZE_SSE3
1822 a0 = _mm_loaddup_pd(
a + 0);
1823 a1 = _mm_loaddup_pd(
a + 1);
1824 a2 = _mm_loaddup_pd(
a + 2);
1825 a3 = _mm_loaddup_pd(
a + 3);
1838 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
1839 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
1840 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
1841 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
1873 #ifdef EIGEN_VECTORIZE_SSSE3
1919 return aux[0] * aux[1];
1928 return (aux[0] * aux[1]) * (aux[2] * aux[3]);
1937 return (aux[0] * aux[1]) * (aux[2] * aux[3]);
1958 #ifdef EIGEN_VECTORIZE_SSE4_1
1959 Packet4i tmp = _mm_min_epi32(
a, _mm_shuffle_epi32(
a, _MM_SHUFFLE(0, 0, 3, 2)));
1966 int aux0 = aux[0] < aux[1] ? aux[0] : aux[1];
1967 int aux2 = aux[2] < aux[3] ? aux[2] : aux[3];
1968 return aux0 < aux2 ? aux0 : aux2;
1973 #ifdef EIGEN_VECTORIZE_SSE4_1
1974 Packet4ui tmp = _mm_min_epu32(
a, _mm_shuffle_epi32(
a, _MM_SHUFFLE(0, 0, 3, 2)));
1981 uint32_t aux0 = aux[0] < aux[1] ? aux[0] : aux[1];
1982 uint32_t aux2 = aux[2] < aux[3] ? aux[2] : aux[3];
1983 return aux0 < aux2 ? aux0 : aux2;
1999 #ifdef EIGEN_VECTORIZE_SSE4_1
2000 Packet4i tmp = _mm_max_epi32(
a, _mm_shuffle_epi32(
a, _MM_SHUFFLE(0, 0, 3, 2)));
2007 int aux0 = aux[0] > aux[1] ? aux[0] : aux[1];
2008 int aux2 = aux[2] > aux[3] ? aux[2] : aux[3];
2009 return aux0 > aux2 ? aux0 : aux2;
2014 #ifdef EIGEN_VECTORIZE_SSE4_1
2015 Packet4ui tmp = _mm_max_epu32(
a, _mm_shuffle_epi32(
a, _MM_SHUFFLE(0, 0, 3, 2)));
2022 uint32_t aux0 = aux[0] > aux[1] ? aux[0] : aux[1];
2023 uint32_t aux2 = aux[2] > aux[3] ? aux[2] : aux[3];
2024 return aux0 > aux2 ? aux0 : aux2;
2036 return _mm_movemask_pd(
x) != 0x0;
2041 return _mm_movemask_ps(
x) != 0x0;
2046 return _mm_movemask_pd(_mm_castsi128_pd(
x)) != 0x0;
2051 return _mm_movemask_ps(_mm_castsi128_ps(
x)) != 0x0;
2055 return _mm_movemask_ps(_mm_castsi128_ps(
x)) != 0x0;
2059 _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
2063 __m128d
tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
2064 kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
2065 kernel.packet[1] =
tmp;
2069 __m128i
tmp = _mm_unpackhi_epi64(kernel.packet[0], kernel.packet[1]);
2070 kernel.packet[0] = _mm_unpacklo_epi64(kernel.packet[0], kernel.packet[1]);
2071 kernel.packet[1] =
tmp;
2075 __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
2076 __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
2077 __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
2078 __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
2080 kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
2081 kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
2082 kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
2083 kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
2086 ptranspose((PacketBlock<Packet4i, 4>&)kernel);
2090 __m128i T0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
2091 __m128i T1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
2092 __m128i T2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
2093 __m128i T3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
2094 kernel.
packet[0] = _mm_unpacklo_epi16(T0, T2);
2095 kernel.
packet[1] = _mm_unpackhi_epi16(T0, T2);
2096 kernel.
packet[2] = _mm_unpacklo_epi16(T1, T3);
2097 kernel.
packet[3] = _mm_unpackhi_epi16(T1, T3);
2122 __m128i t5 = _mm_unpackhi_epi8(kernel.
packet[4], kernel.
packet[5]);
2123 __m128i t6 = _mm_unpacklo_epi8(kernel.
packet[6], kernel.
packet[7]);
2124 __m128i t7 = _mm_unpackhi_epi8(kernel.
packet[6], kernel.
packet[7]);
2125 __m128i t8 = _mm_unpacklo_epi8(kernel.
packet[8], kernel.
packet[9]);
2126 __m128i t9 = _mm_unpackhi_epi8(kernel.
packet[8], kernel.
packet[9]);
2127 __m128i ta = _mm_unpacklo_epi8(kernel.
packet[10], kernel.
packet[11]);
2128 __m128i tb = _mm_unpackhi_epi8(kernel.
packet[10], kernel.
packet[11]);
2129 __m128i tc = _mm_unpacklo_epi8(kernel.
packet[12], kernel.
packet[13]);
2130 __m128i td = _mm_unpackhi_epi8(kernel.
packet[12], kernel.
packet[13]);
2131 __m128i te = _mm_unpacklo_epi8(kernel.
packet[14], kernel.
packet[15]);
2132 __m128i tf = _mm_unpackhi_epi8(kernel.
packet[14], kernel.
packet[15]);
2134 __m128i s0 = _mm_unpacklo_epi16(t0, t2);
2135 __m128i s1 = _mm_unpackhi_epi16(t0, t2);
2136 __m128i s2 = _mm_unpacklo_epi16(t1, t3);
2137 __m128i s3 = _mm_unpackhi_epi16(t1, t3);
2138 __m128i s4 = _mm_unpacklo_epi16(t4, t6);
2139 __m128i s5 = _mm_unpackhi_epi16(t4, t6);
2140 __m128i s6 = _mm_unpacklo_epi16(t5, t7);
2141 __m128i s7 = _mm_unpackhi_epi16(t5, t7);
2142 __m128i s8 = _mm_unpacklo_epi16(t8, ta);
2143 __m128i s9 = _mm_unpackhi_epi16(t8, ta);
2144 __m128i sa = _mm_unpacklo_epi16(t9, tb);
2145 __m128i sb = _mm_unpackhi_epi16(t9, tb);
2146 __m128i sc = _mm_unpacklo_epi16(tc, te);
2147 __m128i sd = _mm_unpackhi_epi16(tc, te);
2148 __m128i se = _mm_unpacklo_epi16(td, tf);
2149 __m128i sf = _mm_unpackhi_epi16(td, tf);
2151 __m128i u0 = _mm_unpacklo_epi32(s0, s4);
2152 __m128i u1 = _mm_unpackhi_epi32(s0, s4);
2153 __m128i u2 = _mm_unpacklo_epi32(s1, s5);
2154 __m128i u3 = _mm_unpackhi_epi32(s1, s5);
2155 __m128i u4 = _mm_unpacklo_epi32(s2, s6);
2156 __m128i u5 = _mm_unpackhi_epi32(s2, s6);
2157 __m128i u6 = _mm_unpacklo_epi32(s3, s7);
2158 __m128i u7 = _mm_unpackhi_epi32(s3, s7);
2159 __m128i u8 = _mm_unpacklo_epi32(s8, sc);
2160 __m128i u9 = _mm_unpackhi_epi32(s8, sc);
2161 __m128i ua = _mm_unpacklo_epi32(s9, sd);
2162 __m128i ub = _mm_unpackhi_epi32(s9, sd);
2163 __m128i uc = _mm_unpacklo_epi32(sa, se);
2164 __m128i ud = _mm_unpackhi_epi32(sa, se);
2165 __m128i ue = _mm_unpacklo_epi32(sb, sf);
2166 __m128i uf = _mm_unpackhi_epi32(sb, sf);
2168 kernel.
packet[0] = _mm_unpacklo_epi64(u0, u8);
2169 kernel.
packet[1] = _mm_unpackhi_epi64(u0, u8);
2170 kernel.
packet[2] = _mm_unpacklo_epi64(u1, u9);
2171 kernel.
packet[3] = _mm_unpackhi_epi64(u1, u9);
2172 kernel.
packet[4] = _mm_unpacklo_epi64(u2, ua);
2173 kernel.
packet[5] = _mm_unpackhi_epi64(u2, ua);
2174 kernel.
packet[6] = _mm_unpacklo_epi64(u3, ub);
2175 kernel.
packet[7] = _mm_unpackhi_epi64(u3, ub);
2176 kernel.
packet[8] = _mm_unpacklo_epi64(u4, uc);
2177 kernel.
packet[9] = _mm_unpackhi_epi64(u4, uc);
2178 kernel.
packet[10] = _mm_unpacklo_epi64(u5, ud);
2179 kernel.
packet[11] = _mm_unpackhi_epi64(u5, ud);
2180 kernel.
packet[12] = _mm_unpacklo_epi64(u6, ue);
2181 kernel.
packet[13] = _mm_unpackhi_epi64(u6, ue);
2182 kernel.
packet[14] = _mm_unpacklo_epi64(u7, uf);
2183 kernel.
packet[15] = _mm_unpackhi_epi64(u7, uf);
2187 return _mm_set_epi64x(0 - ifPacket.
select[1], 0 - ifPacket.
select[0]);
2191 return _mm_set_epi32(0 - ifPacket.
select[3], 0 - ifPacket.
select[2], 0 - ifPacket.
select[1], 0 - ifPacket.
select[0]);
2198 return pselect<Packet2l>(true_mask, thenPacket, elsePacket);
2204 return pselect<Packet4i>(true_mask, thenPacket, elsePacket);
2215 return pselect<Packet4f>(_mm_castsi128_ps(true_mask), thenPacket, elsePacket);
2221 return pselect<Packet2d>(_mm_castsi128_pd(true_mask), thenPacket, elsePacket);
2225 #ifdef EIGEN_VECTORIZE_FMA
2228 return ::fmaf(
a,
b,
c);
2232 return ::fma(
a,
b,
c);
2236 return ::fmaf(
a,
b, -
c);
2240 return ::fma(
a,
b, -
c);
2244 return ::fmaf(-
a,
b,
c);
2248 return ::fma(-
a,
b,
c);
2252 return ::fmaf(-
a,
b, -
c);
2256 return ::fma(-
a,
b, -
c);
2260 #ifdef EIGEN_VECTORIZE_SSE4_1
2264 __m128i input = _mm_cvtepu16_epi32(h);
2267 __m128i shifted_exp = _mm_set1_epi32(0x7c00 << 13);
2269 __m128i ou = _mm_slli_epi32(_mm_and_si128(input, _mm_set1_epi32(0x7fff)), 13);
2271 __m128i
exp = _mm_and_si128(ou, shifted_exp);
2273 ou = _mm_add_epi32(ou, _mm_set1_epi32((127 - 15) << 23));
2276 __m128i naninf_mask = _mm_cmpeq_epi32(
exp, shifted_exp);
2278 __m128i naninf_adj = _mm_and_si128(_mm_set1_epi32((128 - 16) << 23), naninf_mask);
2280 ou = _mm_add_epi32(ou, naninf_adj);
2283 __m128i zeroden_mask = _mm_cmpeq_epi32(
exp, _mm_setzero_si128());
2284 __m128i zeroden_adj = _mm_and_si128(zeroden_mask, _mm_set1_epi32(1 << 23));
2286 ou = _mm_add_epi32(ou, zeroden_adj);
2288 __m128i magic = _mm_and_si128(zeroden_mask, _mm_set1_epi32(113 << 23));
2290 ou = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(ou), _mm_castsi128_ps(magic)));
2292 __m128i
sign = _mm_slli_epi32(_mm_and_si128(input, _mm_set1_epi32(0x8000)), 16);
2294 ou = _mm_or_si128(ou,
sign);
2303 __m128i
sign = _mm_set1_epi32(0x80000000u);
2305 sign = _mm_and_si128(
sign, _mm_castps_si128(
f));
2307 f = _mm_xor_ps(
f, _mm_castsi128_ps(
sign));
2309 __m128i fu = _mm_castps_si128(
f);
2311 __m128i f16max = _mm_set1_epi32((127 + 16) << 23);
2312 __m128i f32infty = _mm_set1_epi32(255 << 23);
2315 __m128i infnan_mask = _mm_cmplt_epi32(f16max, _mm_castps_si128(
f));
2316 __m128i inf_mask = _mm_cmpgt_epi32(_mm_castps_si128(
f), f32infty);
2317 __m128i nan_mask = _mm_andnot_si128(inf_mask, infnan_mask);
2318 __m128i inf_value = _mm_and_si128(inf_mask, _mm_set1_epi32(0x7e00));
2319 __m128i nan_value = _mm_and_si128(nan_mask, _mm_set1_epi32(0x7c00));
2321 __m128i naninf_value = _mm_or_si128(inf_value, nan_value);
2323 __m128i denorm_magic = _mm_set1_epi32(((127 - 15) + (23 - 10) + 1) << 23);
2324 __m128i subnorm_mask = _mm_cmplt_epi32(_mm_castps_si128(
f), _mm_set1_epi32(113 << 23));
2326 f = _mm_add_ps(
f, _mm_castsi128_ps(denorm_magic));
2328 __m128i o = _mm_sub_epi32(_mm_castps_si128(
f), denorm_magic);
2329 o = _mm_and_si128(o, subnorm_mask);
2331 o = _mm_or_si128(o, naninf_value);
2333 __m128i mask = _mm_or_si128(infnan_mask, subnorm_mask);
2334 o = _mm_and_si128(o, mask);
2337 __m128i mand_odd = _mm_and_si128(_mm_srli_epi32(fu, 13), _mm_set1_epi32(0x1));
2339 fu = _mm_add_epi32(fu, _mm_set1_epi32(0xc8000fffU));
2341 fu = _mm_add_epi32(fu, mand_odd);
2342 fu = _mm_andnot_si128(mask, fu);
2344 fu = _mm_srli_epi32(fu, 13);
2345 o = _mm_or_si128(fu, o);
2348 o = _mm_or_si128(o, _mm_srli_epi32(
sign, 16));
2351 return _mm_and_si128(o, _mm_set1_epi32(0xffff));
2365 template<>
struct is_arithmetic<Packet4h> {
enum {
value =
true }; };
2368 struct packet_traits<
Eigen::half> : default_packet_traits {
2369 typedef Packet4h
type;
2371 typedef Packet4h
half;
2395 result.x = _mm_set1_pi16(from.
x);
2406 __int64_t a64 = _mm_cvtm64_si64(
a.x);
2407 __int64_t b64 = _mm_cvtm64_si64(
b.x);
2424 result.
x = _mm_set_pi16(h[3].
x, h[2].
x, h[1].
x, h[0].
x);
2429 __int64_t a64 = _mm_cvtm64_si64(
a.x);
2430 __int64_t b64 = _mm_cvtm64_si64(
b.x);
2447 result.
x = _mm_set_pi16(h[3].
x, h[2].
x, h[1].
x, h[0].
x);
2452 __int64_t a64 = _mm_cvtm64_si64(
a.x);
2453 __int64_t b64 = _mm_cvtm64_si64(
b.x);
2470 result.
x = _mm_set_pi16(h[3].
x, h[2].
x, h[1].
x, h[0].
x);
2475 __int64_t a64 = _mm_cvtm64_si64(
a.x);
2476 __int64_t b64 = _mm_cvtm64_si64(
b.x);
2493 result.
x = _mm_set_pi16(h[3].
x, h[2].
x, h[1].
x, h[0].
x);
2499 result.x = _mm_cvtsi64_m64(*
reinterpret_cast<const __int64_t*
>(from));
2505 result.x = _mm_cvtsi64_m64(*
reinterpret_cast<const __int64_t*
>(from));
2510 __int64_t
r = _mm_cvtm64_si64(from.x);
2511 *(
reinterpret_cast<__int64_t*
>(to)) =
r;
2515 __int64_t
r = _mm_cvtm64_si64(from.x);
2516 *(
reinterpret_cast<__int64_t*
>(to)) =
r;
2521 return pset1<Packet4h>(*from);
2527 result.x = _mm_set_pi16(from[3*stride].
x, from[2*stride].
x, from[1*stride].
x, from[0*stride].
x);
2533 __int64_t
a = _mm_cvtm64_si64(from.x);
2534 to[stride*0].
x =
static_cast<unsigned short>(
a);
2535 to[stride*1].
x =
static_cast<unsigned short>(
a >> 16);
2536 to[stride*2].
x =
static_cast<unsigned short>(
a >> 32);
2537 to[stride*3].
x =
static_cast<unsigned short>(
a >> 48);
2541 ptranspose(PacketBlock<Packet4h,4>& kernel) {
2542 __m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x);
2543 __m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x);
2544 __m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x);
2545 __m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x);
2547 kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1);
2548 kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1);
2549 kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3);
2550 kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3);
2559 #if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900
2561 static inline __m128 _mm_castpd_ps(__m128d
x) {
return reinterpret_cast<__m128&
>(
x); }
2562 static inline __m128i _mm_castpd_si128(__m128d
x) {
return reinterpret_cast<__m128i&
>(
x); }
2563 static inline __m128d _mm_castps_pd(__m128
x) {
return reinterpret_cast<__m128d&
>(
x); }
2564 static inline __m128i _mm_castps_si128(__m128
x) {
return reinterpret_cast<__m128i&
>(
x); }
2565 static inline __m128 _mm_castsi128_ps(__m128i
x) {
return reinterpret_cast<__m128&
>(
x); }
2566 static inline __m128d _mm_castsi128_pd(__m128i
x) {
return reinterpret_cast<__m128d&
>(
x); }
Array< double, 1, 3 > e(1./3., 0.5, 2.)
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_FAST_MATH
Definition: Macros.h:51
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
#define vec4ui_swizzle2(a, b, p, q, r, s)
Definition: SSE/PacketMath.h:112
#define vec2d_swizzle1(v, p, q)
Definition: SSE/PacketMath.h:102
#define vec4ui_swizzle1(v, p, q, r, s)
Definition: SSE/PacketMath.h:100
#define vec4i_swizzle1(v, p, q, r, s)
Definition: SSE/PacketMath.h:98
#define vec4i_swizzle2(a, b, p, q, r, s)
Definition: SSE/PacketMath.h:108
float * p
Definition: Tutorial_Map_using.cpp:9
Scalar * b
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
@ N
Definition: constructor.cpp:22
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
@ Aligned16
Definition: Constants.h:237
RealScalar s
Definition: level1_cplx_impl.h:130
const Scalar * a
Definition: level2_cplx_impl.h:32
const char const int const RealScalar const RealScalar * pa
Definition: level2_cplx_impl.h:20
int * m
Definition: level2_cplx_impl.h:294
char char * op
Definition: level2_impl.h:374
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16 &a)
Definition: BFloat16.h:615
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:496
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1465
EIGEN_STRONG_INLINE Packet4ui psub< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:634
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet4ui pset1< Packet4ui >(const uint32_t &from)
Definition: LSX/PacketMath.h:490
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1628
EIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet &a, const Packet &b, Op op)
Definition: SSE/PacketMath.h:1118
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1965
EIGEN_STRONG_INLINE Packet16b ptrue< Packet16b >(const Packet16b &)
Definition: SSE/PacketMath.h:773
EIGEN_STRONG_INLINE void pstoreu< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1636
EIGEN_STRONG_INLINE Packet16b pgather< bool, Packet16b >(const bool *from, Index stride)
Definition: SSE/PacketMath.h:1657
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
EIGEN_STRONG_INLINE void pstore< bool >(bool *to, const Packet16b &from)
Definition: SSE/PacketMath.h:1470
EIGEN_STRONG_INLINE bool predux< Packet16b >(const Packet16b &a)
Definition: SSE/PacketMath.h:1898
EIGEN_STRONG_INLINE Packet ploads(const typename unpacket_traits< Packet >::type *from)
EIGEN_STRONG_INLINE void prefetch< uint32_t >(const uint32_t *addr)
Definition: AVX/PacketMath.h:1758
EIGEN_STRONG_INLINE Packet2l pandnot< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1019
EIGEN_STRONG_INLINE int64_t predux< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:1987
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1250
EIGEN_STRONG_INLINE Packet16b ploaddup< Packet16b >(const bool *from)
Definition: SSE/PacketMath.h:1435
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:605
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1003
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f &a)
Definition: AVX/PacketMath.h:2283
EIGEN_STRONG_INLINE Packet16b pmul< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: SSE/PacketMath.h:666
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
EIGEN_STRONG_INLINE uint32_t predux_max< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2166
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:132
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i &a)
Definition: SSE/PacketMath.h:161
EIGEN_STRONG_INLINE Packet2l ploadu< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1464
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:2729
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1066
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1431
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: SSE/PacketMath.h:1149
EIGEN_STRONG_INLINE Packet4ui pmul< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:769
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: AltiVec/PacketMath.h:778
EIGEN_STRONG_INLINE Packet2d paddsub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:661
EIGEN_STRONG_INLINE Packet16b ploadquad< Packet16b >(const bool *from)
Definition: SSE/PacketMath.h:1443
EIGEN_STRONG_INLINE Packet16b psub< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: SSE/PacketMath.h:552
EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:161
EIGEN_STRONG_INLINE Packet padds(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1863
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:880
EIGEN_STRONG_INLINE Packet16b por< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: SSE/PacketMath.h:833
EIGEN_STRONG_INLINE void prefetch< int64_t >(const int64_t *addr)
Definition: LSX/PacketMath.h:1852
EIGEN_STRONG_INLINE Packet2d pldexp_fast< Packet2d >(const Packet2d &a, const Packet2d &exponent)
Definition: SSE/PacketMath.h:1793
EIGEN_STRONG_INLINE Packet4ui padd< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1074
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pgather< int64_t, Packet2l >(const int64_t *from, Index stride)
Definition: LSX/PacketMath.h:1669
EIGEN_STRONG_INLINE Packet4ui pand< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1414
EIGEN_STRONG_INLINE Packet4f ploadl< Packet4f >(const float *from)
Definition: SSE/PacketMath.h:1387
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1644
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: AltiVec/PacketMath.h:2751
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2679
EIGEN_STRONG_INLINE Packet4f ploads< Packet4f >(const float *from)
Definition: SSE/PacketMath.h:1399
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1490
EIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:135
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:962
EIGEN_STRONG_INLINE Packet16b pload< Packet16b >(const bool *from)
Definition: SSE/PacketMath.h:1337
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:921
EIGEN_STRONG_INLINE void pscatter< bool, Packet16b >(bool *to, const Packet16b &from, Index stride)
Definition: SSE/PacketMath.h:1696
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i &a)
Definition: SSE/PacketMath.h:164
EIGEN_STRONG_INLINE Packet2d pldexp< Packet2d >(const Packet2d &a, const Packet2d &exponent)
Definition: LSX/PacketMath.h:2753
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1205
EIGEN_STRONG_INLINE Packet2l padd< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:581
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: SSE/PacketMath.h:1145
eigen_packet_wrapper< __m128i, 1 > Packet16b
Definition: SSE/PacketMath.h:53
EIGEN_STRONG_INLINE void pstore1< Packet2d >(double *to, const double &a)
Definition: SSE/PacketMath.h:1711
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter< int, Packet4i >(int *to, const Packet4i &from, Index stride)
Definition: AltiVec/PacketMath.h:959
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1640
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1427
EIGEN_STRONG_INLINE Packet2l por< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:937
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1983
EIGEN_STRONG_INLINE Packet16b pset1< Packet16b >(const bool &from)
Definition: SSE/PacketMath.h:406
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2604
EIGEN_STRONG_INLINE void pstorel(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1452
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2127
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1162
EIGEN_STRONG_INLINE void pstore1< Packet4f >(float *to, const float &a)
Definition: SSE/PacketMath.h:1705
EIGEN_STRONG_INLINE Packet4f pload1< Packet4f >(const float *from)
Definition: MSA/PacketMath.h:154
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:663
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:3075
EIGEN_STRONG_INLINE uint32_t predux< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2004
EIGEN_STRONG_INLINE Packet4i pcmp_eq< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1147
EIGEN_STRONG_INLINE Packet4ui por< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:949
EIGEN_STRONG_INLINE Packet4ui pmin< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1196
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: AltiVec/PacketMath.h:853
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1314
EIGEN_STRONG_INLINE Packet4f paddsub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:653
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: LSX/PacketMath.h:503
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1979
EIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet &a, const Packet &b, Op op)
Definition: SSE/PacketMath.h:1127
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint32_t, Packet4ui >(uint32_t *to, const Packet4ui &from, Index stride)
Definition: LSX/PacketMath.h:1817
EIGEN_STRONG_INLINE void punpackp(Packet4f *vecs)
Definition: SSE/PacketMath.h:1837
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:492
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2529
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
Definition: AltiVec/PacketMath.h:647
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet2d ptrue< Packet2d >(const Packet2d &a)
Definition: SSE/PacketMath.h:782
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:2733
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2695
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1611
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1070
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1497
EIGEN_STRONG_INLINE uint32_t pfirst< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:1910
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1469
EIGEN_STRONG_INLINE Packet16b pand< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: SSE/PacketMath.h:808
EIGEN_STRONG_INLINE Packet4ui pandnot< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1031
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition: AVX/PacketMath.h:2263
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:649
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1187
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2323
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1407
EIGEN_STRONG_INLINE Packet16b ploadu< Packet16b >(const bool *from)
Definition: SSE/PacketMath.h:1378
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:741
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE Packet4f pfrexp< Packet4f >(const Packet4f &a, Packet4f &exponent)
Definition: AltiVec/PacketMath.h:2328
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2522
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: SSE/PacketMath.h:1141
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: AltiVec/PacketMath.h:1854
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1975
EIGEN_STRONG_INLINE Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
Definition: AVX/PacketMath.h:1880
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int64_t, Packet2l >(int64_t *to, const Packet2l &from, Index stride)
Definition: LSX/PacketMath.h:1779
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: LSX/PacketMath.h:1734
EIGEN_STRONG_INLINE Packet4ui pload< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1435
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2699
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1537
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2019
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:782
EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:1966
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2073
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: AltiVec/PacketMath.h:773
EIGEN_STRONG_INLINE Packet4ui pmax< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1229
EIGEN_STRONG_INLINE uint32_t predux_min< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2112
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1099
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: SSE/PacketMath.h:1137
EIGEN_STRONG_INLINE Packet4ui plset< Packet4ui >(const uint32_t &a)
Definition: LSX/PacketMath.h:548
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: AltiVec/PacketMath.h:954
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: LSX/PacketMath.h:563
EIGEN_STRONG_INLINE bool pfirst< Packet16b >(const Packet16b &a)
Definition: SSE/PacketMath.h:1629
EIGEN_STRONG_INLINE void pstoreu< bool >(bool *to, const Packet16b &from)
Definition: SSE/PacketMath.h:1495
EIGEN_STRONG_INLINE int64_t predux_mul< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2041
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1493
const char * SsePrefetchPtrType
Definition: SSE/PacketMath.h:1719
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:642
EIGEN_STRONG_INLINE Packet2l pset1< Packet2l >(const int64_t &from)
Definition: LSX/PacketMath.h:478
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1936
EIGEN_STRONG_INLINE void pstores(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet2l ploaddup< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1509
EIGEN_STRONG_INLINE Packet4f ptrunc< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1501
EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f &)
Definition: AVX/PacketMath.h:791
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_STRONG_INLINE void pbroadcast4< Packet2d >(const double *a, Packet2d &a0, Packet2d &a1, Packet2d &a2, Packet2d &a3)
Definition: SSE/PacketMath.h:1819
EIGEN_STRONG_INLINE Packet4f pset1frombits< Packet4f >(unsigned int from)
Definition: AltiVec/PacketMath.h:803
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835
EIGEN_STRONG_INLINE Packet4f pldexp< Packet4f >(const Packet4f &a, const Packet4f &exponent)
Definition: AltiVec/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1448
EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:141
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1448
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1261
EIGEN_STRONG_INLINE Packet2l pmax< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1217
EIGEN_STRONG_INLINE Packet2l psub< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:622
EIGEN_STRONG_INLINE Packet2d pfrexp< Packet2d >(const Packet2d &a, Packet2d &exponent)
Definition: LSX/PacketMath.h:2677
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet4f padds< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: SSE/PacketMath.h:523
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
Definition: LSX/PacketMath.h:126
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:226
EIGEN_STRONG_INLINE Packet4f ptrue< Packet4f >(const Packet4f &a)
Definition: SSE/PacketMath.h:777
EIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:138
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819
EIGEN_STRONG_INLINE Packet2l pand< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:896
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
Definition: AltiVec/PacketMath.h:1760
EIGEN_STRONG_INLINE Packet4ui pxor< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:990
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet4i ptrue< Packet4i >(const Packet4i &a)
Definition: SSE/PacketMath.h:769
EIGEN_STRONG_INLINE Packet2d ptrunc< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2749
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1869
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: AltiVec/PacketMath.h:1045
EIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:160
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2435
EIGEN_STRONG_INLINE Packet2d ploads< Packet2d >(const double *from)
Definition: SSE/PacketMath.h:1403
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1186
EIGEN_STRONG_INLINE Packet2l pload< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1423
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1533
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1166
EIGEN_STRONG_INLINE Packet2d print< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2745
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pgather< uint32_t, Packet4ui >(const uint32_t *from, Index stride)
Definition: LSX/PacketMath.h:1710
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1410
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1244
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2445
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Definition: GenericPacketMathFunctions.h:184
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1406
EIGEN_STRONG_INLINE Packet16b pxor< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: SSE/PacketMath.h:858
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:337
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4i pgather< int, Packet4i >(const int *from, Index stride)
Definition: AltiVec/PacketMath.h:858
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2684
EIGEN_STRONG_INLINE Packet2l pmin< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1184
EIGEN_STRONG_INLINE Packet2d ploadl< Packet2d >(const double *from)
Definition: SSE/PacketMath.h:1391
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: LSX/PacketMath.h:1621
EIGEN_STRONG_INLINE Packet4i pcmp_lt< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1097
EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector< 2 > &ifPacket)
Definition: SSE/PacketMath.h:2186
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1293
EIGEN_STRONG_INLINE void pbroadcast4< Packet4f >(const float *a, Packet4f &a0, Packet4f &a1, Packet4f &a2, Packet4f &a3)
Definition: AltiVec/PacketMath.h:823
EIGEN_STRONG_INLINE Packet2l pmul< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:757
EIGEN_STRONG_INLINE Packet2d pset1frombits< Packet2d >(uint64_t from)
Definition: LSX/PacketMath.h:513
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:497
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:646
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1095
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: AltiVec/PacketMath.h:1041
EIGEN_STRONG_INLINE uint32_t predux_mul< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2058
EIGEN_STRONG_INLINE Packet16b padd< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: SSE/PacketMath.h:516
EIGEN_STRONG_INLINE void pstoreu< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1123
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1756
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1256
EIGEN_STRONG_INLINE Packet4f pround< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1479
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1329
EIGEN_STRONG_INLINE Packet2l plset< Packet2l >(const int64_t &a)
Definition: LSX/PacketMath.h:533
EIGEN_STRONG_INLINE Packet4ui ploadu< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1476
EIGEN_STRONG_INLINE Packet2d pround< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1206
EIGEN_STRONG_INLINE int64_t pfirst< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:1898
EIGEN_STRONG_INLINE void pstore< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1619
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
Definition: AltiVec/PacketMath.h:1858
EIGEN_STRONG_INLINE Packet2d padds< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: SSE/PacketMath.h:527
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1879
EIGEN_STRONG_INLINE Packet ploadl(const typename unpacket_traits< Packet >::type *from)
EIGEN_STRONG_INLINE bool predux_mul< Packet16b >(const Packet16b &a)
Definition: SSE/PacketMath.h:1941
EIGEN_STRONG_INLINE Packet2l ptrue< Packet2l >(const Packet2l &a)
Definition: SSE/PacketMath.h:765
EIGEN_STRONG_INLINE Packet2l pxor< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:978
EIGEN_STRONG_INLINE Packet4ui ploaddup< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1523
EIGEN_STRONG_INLINE void pstore< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1106
EIGEN_STRONG_INLINE Packet4f print< Packet4f >(const Packet4f &a)
Definition: LSX/PacketMath.h:2711
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1167
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2599
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: AVX/PacketMath.h:1750
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1282
std::int64_t int64_t
Definition: Meta.h:43
EIGEN_DEVICE_FUNC const Scalar & q
Definition: SpecialFunctionsImpl.h:2019
std::uint32_t uint32_t
Definition: Meta.h:40
std::uint64_t uint64_t
Definition: Meta.h:42
EIGEN_DEVICE_FUNC static constexpr EIGEN_ALWAYS_INLINE Scalar signbit(const Scalar &x)
Definition: MathFunctions.h:1419
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
T sign(T x)
Definition: cxx11_tensor_builtins_sycl.cpp:172
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
numext::uint16_t x
Definition: Half.h:101
Definition: GenericPacketMath.h:1407
Packet packet[N]
Definition: GenericPacketMath.h:1408
Definition: GenericPacketMath.h:1421
bool select[N]
Definition: GenericPacketMath.h:1422
Definition: GenericPacketMath.h:45
@ HasSign
Definition: GenericPacketMath.h:59
@ HasASin
Definition: GenericPacketMath.h:84
@ HasATanh
Definition: GenericPacketMath.h:87
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasSin
Definition: GenericPacketMath.h:81
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasErfc
Definition: GenericPacketMath.h:96
@ HasACos
Definition: GenericPacketMath.h:85
@ HasNdtri
Definition: GenericPacketMath.h:97
@ HasCos
Definition: GenericPacketMath.h:82
@ HasCmp
Definition: GenericPacketMath.h:69
@ HasReciprocal
Definition: GenericPacketMath.h:72
@ HasShift
Definition: GenericPacketMath.h:50
@ HasLog1p
Definition: GenericPacketMath.h:78
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasErf
Definition: GenericPacketMath.h:95
@ HasBessel
Definition: GenericPacketMath.h:98
@ HasExpm1
Definition: GenericPacketMath.h:76
@ HasLog
Definition: GenericPacketMath.h:77
@ HasTanh
Definition: GenericPacketMath.h:90
@ HasATan
Definition: GenericPacketMath.h:86
@ HasDiv
Definition: GenericPacketMath.h:71
Definition: GenericPacketMath.h:225
@ value
Definition: Meta.h:146
Packet16b half
Definition: SSE/PacketMath.h:280
Packet16b type
Definition: SSE/PacketMath.h:279
Packet2d half
Definition: SSE/PacketMath.h:208
Packet2d type
Definition: SSE/PacketMath.h:207
Packet4f type
Definition: SSE/PacketMath.h:174
Packet4f half
Definition: SSE/PacketMath.h:175
Packet2l half
Definition: SSE/PacketMath.h:264
Packet2l type
Definition: SSE/PacketMath.h:263
Packet4i type
Definition: SSE/PacketMath.h:232
Packet4i half
Definition: SSE/PacketMath.h:233
Packet4ui type
Definition: SSE/PacketMath.h:247
Packet4ui half
Definition: SSE/PacketMath.h:248
Definition: GenericPacketMath.h:108
T type
Definition: GenericPacketMath.h:109
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
T half
Definition: GenericPacketMath.h:110
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
@ value
Definition: XprHelper.h:884
Definition: SSE/PacketMath.h:90
@ mask
Definition: SSE/PacketMath.h:91
Packet16b half
Definition: SSE/PacketMath.h:364
bool type
Definition: SSE/PacketMath.h:363
double type
Definition: SSE/PacketMath.h:314
Packet2l integer_packet
Definition: SSE/PacketMath.h:316
Packet2d half
Definition: SSE/PacketMath.h:315
Packet2l half
Definition: SSE/PacketMath.h:328
int64_t type
Definition: SSE/PacketMath.h:327
Packet4i integer_packet
Definition: SSE/PacketMath.h:303
Packet4f half
Definition: SSE/PacketMath.h:302
float type
Definition: SSE/PacketMath.h:301
int type
Definition: SSE/PacketMath.h:339
Packet4i half
Definition: SSE/PacketMath.h:340
uint32_t type
Definition: SSE/PacketMath.h:351
Packet4ui half
Definition: SSE/PacketMath.h:352
Definition: GenericPacketMath.h:134
T type
Definition: GenericPacketMath.h:135
T half
Definition: GenericPacketMath.h:136
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140
std::ofstream out("Result.txt")
Definition: ZVector/PacketMath.h:50