10 #ifndef EIGEN_PACKET_MATH_AVX_H
11 #define EIGEN_PACKET_MATH_AVX_H
14 #include "../../InternalHeaderCheck.h"
20 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
21 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
24 #if !defined(EIGEN_VECTORIZE_AVX512) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)
25 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
28 #ifdef EIGEN_VECTORIZE_FMA
29 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
30 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
37 #ifndef EIGEN_VECTORIZE_AVX512FP16
43 #ifdef EIGEN_VECTORIZE_AVX2
72 #ifndef EIGEN_VECTORIZE_AVX512FP16
82 #ifdef EIGEN_VECTORIZE_AVX2
84 struct is_arithmetic<Packet4l> {
85 enum {
value =
true };
91 struct is_arithmetic<Packet4ul> {
92 enum {
value =
false };
98 #ifndef EIGEN_VECTORIZE_AVX512
100 struct packet_traits<float> : default_packet_traits {
142 #ifdef EIGEN_VECTORIZE_AVX2
198 struct packet_traits<
bfloat16> : default_packet_traits {
237 struct packet_traits<
int> : default_packet_traits {
262 #ifdef EIGEN_VECTORIZE_AVX2
264 struct packet_traits<
int64_t> : default_packet_traits {
265 typedef Packet4l
type;
270 struct packet_traits<
uint64_t> : default_packet_traits {
271 typedef Packet4ul
type;
274 typedef Packet4ul
half;
316 #ifdef EIGEN_VECTORIZE_AVX512
318 masked_fpops_available =
true
326 #ifdef EIGEN_VECTORIZE_AVX2
361 #ifdef EIGEN_VECTORIZE_AVX2
363 struct unpacket_traits<Packet4l> {
375 struct unpacket_traits<Packet4ul> {
377 typedef Packet4ul
half;
403 return _mm_packs_epi32(_mm256_extractf128_si256(_mm256_castps_si256(rf), 0),
404 _mm256_extractf128_si256(_mm256_castps_si256(rf), 1));
407 #ifdef EIGEN_VECTORIZE_AVX2
410 return _mm256_set1_epi64x(from);
414 return _mm256_set1_epi64x(numext::bit_cast<uint64_t>(from));
418 return _mm256_setzero_si256();
422 return _mm256_setzero_si256();
426 return _mm256_set_epi64x(0ll, -1ll, 0ll, -1ll);
430 return _mm256_set_epi64x(0ll, -1ll, 0ll, -1ll);
434 return _mm256_set1_epi64x(*from);
438 return _mm256_set1_epi64x(*from);
442 return _mm256_add_epi64(
a,
b);
446 return _mm256_add_epi64(
a,
b);
450 return padd(pset1<Packet4l>(
a), Packet4l(_mm256_set_epi64x(3ll, 2ll, 1ll, 0ll)));
454 return padd(pset1<Packet4ul>(
a), Packet4ul(_mm256_set_epi64x(3ll, 2ll, 1ll, 0ll)));
458 return _mm256_sub_epi64(
a,
b);
462 return _mm256_sub_epi64(
a,
b);
474 return _mm256_xor_si256(_mm256_cmpgt_epi64(
a,
b), _mm256_set1_epi32(-1));
478 return (Packet4ul)
pcmp_le((Packet4l)
psub(
a, pset1<Packet4ul>(0x8000000000000000UL)),
479 (Packet4l)
psub(
b, pset1<Packet4ul>(0x8000000000000000UL)));
483 return _mm256_cmpgt_epi64(
b,
a);
487 return (Packet4ul)
pcmp_lt((Packet4l)
psub(
a, pset1<Packet4ul>(0x8000000000000000UL)),
488 (Packet4l)
psub(
b, pset1<Packet4ul>(0x8000000000000000UL)));
492 return _mm256_cmpeq_epi64(
a,
b);
496 return _mm256_cmpeq_epi64(
a,
b);
500 return _mm256_cmpeq_epi64(
a,
a);
504 return _mm256_cmpeq_epi64(
a,
a);
508 return _mm256_and_si256(
a,
b);
512 return _mm256_or_si256(
a,
b);
516 return _mm256_xor_si256(
a,
b);
520 return _mm256_xor_si256(
a,
b);
524 return _mm256_andnot_si256(
b,
a);
528 return _mm256_srli_epi64(
a,
N);
532 return _mm256_slli_epi64(
a,
N);
534 #ifdef EIGEN_VECTORIZE_AVX512FP16
537 return _mm256_srai_epi64(
a,
N);
546 __m256i hi_word = _mm256_srai_epi32(
a,
N);
547 __m256i lo_word = _mm256_srli_epi64(
a,
N);
548 return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
552 __m256i hi_word = _mm256_srai_epi32(
a, 31);
554 return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
558 return _mm256_cmpgt_epi64(_mm256_setzero_si256(),
a);
562 return parithmetic_shift_right<int(N & 63)>(
a);
584 const Packet4l
a = _mm256_castsi128_si256(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from)));
585 return _mm256_permutevar8x32_epi32(
a, _mm256_setr_epi32(0, 1, 0, 1, 2, 3, 2, 3));
590 const Packet4ul
a = _mm256_castsi128_si256(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from)));
591 return _mm256_permutevar8x32_epi32(
a, _mm256_setr_epi32(0, 1, 0, 1, 2, 3, 2, 3));
611 return _mm256_set_epi64x(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
615 return _mm256_set_epi64x(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
619 __m128i low = _mm256_extractf128_si256(from, 0);
623 __m128i high = _mm256_extractf128_si256(from, 1);
629 __m128i low = _mm256_extractf128_si256(from, 0);
633 __m128i high = _mm256_extractf128_si256(from, 1);
639 Packet4l
pa = pset1<Packet4l>(
a);
644 Packet4ul
pa = pset1<Packet4ul>(
a);
657 __m128i
r = _mm_add_epi64(_mm256_castsi256_si128(
a), _mm256_extractf128_si256(
a, 1));
662 __m128i
r = _mm_add_epi64(_mm256_castsi256_si128(
a), _mm256_extractf128_si256(
a, 1));
668 return _mm256_movemask_pd(_mm256_castsi256_pd(
a)) != 0;
672 return _mm256_movemask_pd(_mm256_castsi256_pd(
a)) != 0;
675 #define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
677 __m256d T0 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 15);
678 __m256d T1 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 0);
679 __m256d T2 = MM256_SHUFFLE_EPI64(kernel.packet[2], kernel.packet[3], 15);
680 __m256d T3 = MM256_SHUFFLE_EPI64(kernel.packet[2], kernel.packet[3], 0);
682 kernel.packet[1] = _mm256_castpd_si256(_mm256_permute2f128_pd(T0, T2, 32));
683 kernel.packet[3] = _mm256_castpd_si256(_mm256_permute2f128_pd(T0, T2, 49));
684 kernel.packet[0] = _mm256_castpd_si256(_mm256_permute2f128_pd(T1, T3, 32));
685 kernel.packet[2] = _mm256_castpd_si256(_mm256_permute2f128_pd(T1, T3, 49));
688 ptranspose((PacketBlock<Packet4l, 4>&)kernel);
692 __m256i cmp = _mm256_cmpgt_epi64(
a,
b);
693 __m256i a_min = _mm256_andnot_si256(cmp,
a);
694 __m256i b_min = _mm256_and_si256(cmp,
b);
695 return Packet4l(_mm256_or_si256(a_min, b_min));
699 return padd((Packet4ul)
pmin((Packet4l)
psub(
a, pset1<Packet4ul>(0x8000000000000000UL)),
700 (Packet4l)
psub(
b, pset1<Packet4ul>(0x8000000000000000UL))),
701 pset1<Packet4ul>(0x8000000000000000UL));
705 __m256i cmp = _mm256_cmpgt_epi64(
a,
b);
706 __m256i a_min = _mm256_and_si256(cmp,
a);
707 __m256i b_min = _mm256_andnot_si256(cmp,
b);
708 return Packet4l(_mm256_or_si256(a_min, b_min));
712 return padd((Packet4ul)
pmax((Packet4l)
psub(
a, pset1<Packet4ul>(0x8000000000000000UL)),
713 (Packet4l)
psub(
b, pset1<Packet4ul>(0x8000000000000000UL))),
714 pset1<Packet4ul>(0x8000000000000000UL));
718 Packet4l pz = pzero<Packet4l>(
a);
719 Packet4l cmp = _mm256_cmpgt_epi64(
a, pz);
729 __m256i upper32_a = _mm256_srli_epi64(
a, 32);
730 __m256i upper32_b = _mm256_srli_epi64(
b, 32);
733 __m256i mul1 = _mm256_mul_epu32(upper32_a,
b);
734 __m256i mul2 = _mm256_mul_epu32(upper32_b,
a);
736 __m256i mul3 = _mm256_mul_epu32(
a,
b);
738 __m256i high = _mm256_slli_epi64(_mm256_add_epi64(mul1, mul2), 32);
739 return _mm256_add_epi64(high, mul3);
743 return (Packet4ul)pmul<Packet4l>((Packet4l)
a, (Packet4l)
b);
749 return _mm256_set1_ps(from);
753 return _mm256_set1_pd(from);
757 return _mm256_set1_epi32(from);
761 return _mm256_set1_epi32(from);
770 return _mm256_castsi256_pd(_mm256_set1_epi64x(from));
775 return _mm256_setzero_ps();
779 return _mm256_setzero_pd();
783 return _mm256_setzero_si256();
787 return _mm256_setzero_si256();
792 return _mm256_castsi256_ps(_mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1));
796 return _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1);
800 return _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1);
804 return _mm256_castsi256_pd(_mm256_set_epi32(0, 0, -1, -1, 0, 0, -1, -1));
809 return _mm256_broadcast_ss(from);
813 return _mm256_broadcast_sd(from);
818 return _mm256_add_ps(
a,
b);
820 #ifdef EIGEN_VECTORIZE_AVX512
823 __mmask16 mask =
static_cast<__mmask16
>(umask & 0x00FF);
824 return _mm512_castps512_ps256(_mm512_maskz_add_ps(mask, _mm512_castps256_ps512(
a), _mm512_castps256_ps512(
b)));
829 return _mm256_add_pd(
a,
b);
833 #ifdef EIGEN_VECTORIZE_AVX2
834 return _mm256_add_epi32(
a,
b);
836 __m128i lo = _mm_add_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
837 __m128i hi = _mm_add_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
838 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
843 #ifdef EIGEN_VECTORIZE_AVX2
844 return _mm256_add_epi32(
a,
b);
846 __m128i lo = _mm_add_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
847 __m128i hi = _mm_add_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
848 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
871 return _mm256_sub_ps(
a,
b);
875 return _mm256_sub_pd(
a,
b);
879 #ifdef EIGEN_VECTORIZE_AVX2
880 return _mm256_sub_epi32(
a,
b);
882 __m128i lo = _mm_sub_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
883 __m128i hi = _mm_sub_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
884 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
889 #ifdef EIGEN_VECTORIZE_AVX2
890 return _mm256_sub_epi32(
a,
b);
892 __m128i lo = _mm_sub_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
893 __m128i hi = _mm_sub_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
894 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
900 const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
901 return _mm256_xor_ps(
a, mask);
905 const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL));
906 return _mm256_xor_pd(
a, mask);
928 return _mm256_mul_ps(
a,
b);
932 return _mm256_mul_pd(
a,
b);
936 #ifdef EIGEN_VECTORIZE_AVX2
937 return _mm256_mullo_epi32(
a,
b);
939 const __m128i lo = _mm_mullo_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
940 const __m128i hi = _mm_mullo_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
941 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
946 #ifdef EIGEN_VECTORIZE_AVX2
947 return _mm256_mullo_epi32(
a,
b);
949 const __m128i lo = _mm_mullo_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
950 const __m128i hi = _mm_mullo_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
951 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
957 return _mm256_div_ps(
a,
b);
961 return _mm256_div_pd(
a,
b);
966 #ifdef EIGEN_VECTORIZE_AVX512
967 return _mm512_cvttpd_epi32(_mm512_div_pd(_mm512_cvtepi32_pd(
a), _mm512_cvtepi32_pd(
b)));
971 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
975 #ifdef EIGEN_VECTORIZE_FMA
978 return _mm256_fmadd_ps(
a,
b,
c);
982 return _mm256_fmadd_pd(
a,
b,
c);
987 return _mm256_fmsub_ps(
a,
b,
c);
992 return _mm256_fmsub_pd(
a,
b,
c);
997 return _mm256_fnmadd_ps(
a,
b,
c);
1002 return _mm256_fnmadd_pd(
a,
b,
c);
1007 return _mm256_fnmsub_ps(
a,
b,
c);
1012 return _mm256_fnmsub_pd(
a,
b,
c);
1019 return _mm256_cmp_ps(
a,
b, _CMP_LE_OQ);
1023 return _mm256_cmp_ps(
a,
b, _CMP_LT_OQ);
1027 return _mm256_cmp_ps(
a,
b, _CMP_NGE_UQ);
1031 return _mm256_cmp_ps(
a,
b, _CMP_EQ_OQ);
1035 return _mm256_cmp_ps(
a,
a, _CMP_UNORD_Q);
1040 return _mm256_cmp_pd(
a,
b, _CMP_LE_OQ);
1044 return _mm256_cmp_pd(
a,
b, _CMP_LT_OQ);
1048 return _mm256_cmp_pd(
a,
b, _CMP_NGE_UQ);
1052 return _mm256_cmp_pd(
a,
b, _CMP_EQ_OQ);
1057 #ifdef EIGEN_VECTORIZE_AVX2
1058 return _mm256_xor_si256(_mm256_cmpgt_epi32(
a,
b), _mm256_set1_epi32(-1));
1060 __m128i lo = _mm_cmpgt_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1061 lo = _mm_xor_si128(lo, _mm_set1_epi32(-1));
1062 __m128i hi = _mm_cmpgt_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1063 hi = _mm_xor_si128(hi, _mm_set1_epi32(-1));
1064 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1069 #ifdef EIGEN_VECTORIZE_AVX2
1070 return _mm256_cmpgt_epi32(
b,
a);
1072 __m128i lo = _mm_cmpgt_epi32(_mm256_extractf128_si256(
b, 0), _mm256_extractf128_si256(
a, 0));
1073 __m128i hi = _mm_cmpgt_epi32(_mm256_extractf128_si256(
b, 1), _mm256_extractf128_si256(
a, 1));
1074 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1079 #ifdef EIGEN_VECTORIZE_AVX2
1080 return _mm256_cmpeq_epi32(
a,
b);
1082 __m128i lo = _mm_cmpeq_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1083 __m128i hi = _mm_cmpeq_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1084 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1089 #ifdef EIGEN_VECTORIZE_AVX2
1090 return _mm256_cmpeq_epi32(
a,
b);
1092 __m128i lo = _mm_cmpeq_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1093 __m128i hi = _mm_cmpeq_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1094 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1100 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
1106 asm(
"vminps %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
1110 return _mm256_min_ps(
b,
a);
1115 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
1118 asm(
"vminpd %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
1122 return _mm256_min_pd(
b,
a);
1127 #ifdef EIGEN_VECTORIZE_AVX2
1128 return _mm256_min_epi32(
a,
b);
1130 __m128i lo = _mm_min_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1131 __m128i hi = _mm_min_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1132 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1137 #ifdef EIGEN_VECTORIZE_AVX2
1138 return _mm256_min_epu32(
a,
b);
1140 __m128i lo = _mm_min_epu32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1141 __m128i hi = _mm_min_epu32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1142 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1148 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
1151 asm(
"vmaxps %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
1155 return _mm256_max_ps(
b,
a);
1160 #if EIGEN_GNUC_STRICT_LESS_THAN(6, 3, 0)
1163 asm(
"vmaxpd %[a], %[b], %[res]" : [
res]
"=x"(
res) : [
a]
"x"(
a), [
b]
"x"(
b));
1167 return _mm256_max_pd(
b,
a);
1172 #ifdef EIGEN_VECTORIZE_AVX2
1173 return _mm256_max_epi32(
a,
b);
1175 __m128i lo = _mm_max_epi32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1176 __m128i hi = _mm_max_epi32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1177 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1182 #ifdef EIGEN_VECTORIZE_AVX2
1183 return _mm256_max_epu32(
a,
b);
1185 __m128i lo = _mm_max_epu32(_mm256_extractf128_si256(
a, 0), _mm256_extractf128_si256(
b, 0));
1186 __m128i hi = _mm_max_epu32(_mm256_extractf128_si256(
a, 1), _mm256_extractf128_si256(
b, 1));
1187 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1191 #ifdef EIGEN_VECTORIZE_AVX2
1194 return _mm256_sign_epi32(_mm256_set1_epi32(1),
a);
1234 return _mm256_round_ps(
a, _MM_FROUND_CUR_DIRECTION);
1238 return _mm256_round_pd(
a, _MM_FROUND_CUR_DIRECTION);
1243 return _mm256_ceil_ps(
a);
1247 return _mm256_ceil_pd(
a);
1252 return _mm256_floor_ps(
a);
1256 return _mm256_floor_pd(
a);
1261 return _mm256_round_ps(
a, _MM_FROUND_TRUNC);
1265 return _mm256_round_pd(
a, _MM_FROUND_TRUNC);
1270 #ifdef EIGEN_VECTORIZE_AVX2
1272 return _mm256_cmpeq_epi32(
a,
a);
1274 const __m256
b = _mm256_castsi256_ps(
a);
1275 return _mm256_castps_si256(_mm256_cmp_ps(
b,
b, _CMP_TRUE_UQ));
1281 #ifdef EIGEN_VECTORIZE_AVX2
1283 const __m256i
b = _mm256_castps_si256(
a);
1284 return _mm256_castsi256_ps(_mm256_cmpeq_epi32(
b,
b));
1286 return _mm256_cmp_ps(
a,
a, _CMP_TRUE_UQ);
1292 #ifdef EIGEN_VECTORIZE_AVX2
1294 const __m256i
b = _mm256_castpd_si256(
a);
1295 return _mm256_castsi256_pd(_mm256_cmpeq_epi64(
b,
b));
1297 return _mm256_cmp_pd(
a,
a, _CMP_TRUE_UQ);
1303 return _mm256_and_ps(
a,
b);
1307 return _mm256_and_pd(
a,
b);
1311 #ifdef EIGEN_VECTORIZE_AVX2
1312 return _mm256_and_si256(
a,
b);
1314 return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(
b)));
1319 #ifdef EIGEN_VECTORIZE_AVX2
1320 return _mm256_and_si256(
a,
b);
1322 return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(
b)));
1328 return _mm256_or_ps(
a,
b);
1332 return _mm256_or_pd(
a,
b);
1336 #ifdef EIGEN_VECTORIZE_AVX2
1337 return _mm256_or_si256(
a,
b);
1339 return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(
b)));
1344 #ifdef EIGEN_VECTORIZE_AVX2
1345 return _mm256_or_si256(
a,
b);
1347 return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(
b)));
1353 return _mm256_xor_ps(
a,
b);
1357 return _mm256_xor_pd(
a,
b);
1361 #ifdef EIGEN_VECTORIZE_AVX2
1362 return _mm256_xor_si256(
a,
b);
1364 return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(
b)));
1369 #ifdef EIGEN_VECTORIZE_AVX2
1370 return _mm256_xor_si256(
a,
b);
1372 return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(
b)));
1378 return _mm256_andnot_ps(
b,
a);
1382 return _mm256_andnot_pd(
b,
a);
1386 #ifdef EIGEN_VECTORIZE_AVX2
1387 return _mm256_andnot_si256(
b,
a);
1389 return _mm256_castps_si256(_mm256_andnot_ps(_mm256_castsi256_ps(
b), _mm256_castsi256_ps(
a)));
1394 #ifdef EIGEN_VECTORIZE_AVX2
1395 return _mm256_andnot_si256(
b,
a);
1397 return _mm256_castps_si256(_mm256_andnot_ps(_mm256_castsi256_ps(
b), _mm256_castsi256_ps(
a)));
1414 return _mm256_round_ps(
padd(
por(
pand(
a, mask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
1420 return _mm256_round_pd(
padd(
por(
pand(
a, mask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
1425 return _mm256_blendv_ps(
b,
a, mask);
1429 return _mm256_castps_si256(
1430 _mm256_blendv_ps(_mm256_castsi256_ps(
b), _mm256_castsi256_ps(
a), _mm256_castsi256_ps(mask)));
1434 return _mm256_castps_si256(
1435 _mm256_blendv_ps(_mm256_castsi256_ps(
b), _mm256_castsi256_ps(
a), _mm256_castsi256_ps(mask)));
1440 return _mm256_blendv_pd(
b,
a, mask);
1445 #ifdef EIGEN_VECTORIZE_AVX2
1446 return _mm256_srai_epi32(
a,
N);
1448 __m128i lo = _mm_srai_epi32(_mm256_extractf128_si256(
a, 0),
N);
1449 __m128i hi = _mm_srai_epi32(_mm256_extractf128_si256(
a, 1),
N);
1450 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1456 #ifdef EIGEN_VECTORIZE_AVX2
1457 return _mm256_srli_epi32(
a,
N);
1459 __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(
a, 0),
N);
1460 __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(
a, 1),
N);
1461 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1467 #ifdef EIGEN_VECTORIZE_AVX2
1468 return _mm256_slli_epi32(
a,
N);
1470 __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(
a, 0),
N);
1471 __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(
a, 1),
N);
1472 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1525 #ifdef EIGEN_VECTORIZE_AVX512
1526 __mmask16 mask =
static_cast<__mmask16
>(umask & 0x00FF);
1529 Packet8i mask = _mm256_set1_epi8(
static_cast<char>(umask));
1531 _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
1533 mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
1547 Packet8f tmp = _mm256_broadcast_ps((
const __m128*)(
const void*)from);
1549 tmp = _mm256_blend_ps(
1550 tmp, _mm256_castps128_ps256(_mm_permute_ps(_mm256_castps256_ps128(
tmp), _MM_SHUFFLE(1, 0, 1, 0))), 15);
1552 return _mm256_permute_ps(
tmp, _MM_SHUFFLE(3, 3, 2, 2));
1557 Packet4d tmp = _mm256_broadcast_pd((
const __m128d*)(
const void*)from);
1558 return _mm256_permute_pd(
tmp, 3 << 2);
1563 #ifdef EIGEN_VECTORIZE_AVX2
1565 return _mm256_permutevar8x32_epi32(
a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3));
1567 __m256
tmp = _mm256_broadcast_ps((
const __m128*)(
const void*)from);
1569 tmp = _mm256_blend_ps(
1570 tmp, _mm256_castps128_ps256(_mm_permute_ps(_mm256_castps256_ps128(
tmp), _MM_SHUFFLE(1, 0, 1, 0))), 15);
1572 return _mm256_castps_si256(_mm256_permute_ps(
tmp, _MM_SHUFFLE(3, 3, 2, 2)));
1577 #ifdef EIGEN_VECTORIZE_AVX2
1579 return _mm256_permutevar8x32_epi32(
a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3));
1581 __m256
tmp = _mm256_broadcast_ps((
const __m128*)(
const void*)from);
1583 tmp = _mm256_blend_ps(
1584 tmp, _mm256_castps128_ps256(_mm_permute_ps(_mm256_castps256_ps128(
tmp), _MM_SHUFFLE(1, 0, 1, 0))), 15);
1587 return _mm256_castps_si256(_mm256_permute_ps(
tmp, _MM_SHUFFLE(3, 3, 2, 2)));
1594 Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
1595 return _mm256_insertf128_ps(
tmp, _mm_broadcast_ss(from + 1), 1);
1599 return _mm256_insertf128_si256(_mm256_set1_epi32(*from), _mm_set1_epi32(*(from + 1)), 1);
1603 return _mm256_insertf128_si256(_mm256_set1_epi32(*from), _mm_set1_epi32(*(from + 1)), 1);
1642 #ifdef EIGEN_VECTORIZE_AVX512
1643 __mmask16 mask =
static_cast<__mmask16
>(umask & 0x00FF);
1646 Packet8i mask = _mm256_set1_epi8(
static_cast<char>(umask));
1648 _mm256_set_epi32(0x7f7f7f7f, 0xbfbfbfbf, 0xdfdfdfdf, 0xefefefef, 0xf7f7f7f7, 0xfbfbfbfb, 0xfdfdfdfd, 0xfefefefe);
1650 mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
1653 const __m256i ifrom = _mm256_castps_si256(from);
1655 reinterpret_cast<char*
>(to));
1657 reinterpret_cast<char*
>(to + 4));
1669 return _mm256_set_ps(from[7 * stride], from[6 * stride], from[5 * stride], from[4 * stride], from[3 * stride],
1670 from[2 * stride], from[1 * stride], from[0 * stride]);
1674 return _mm256_set_pd(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
1678 return _mm256_set_epi32(from[7 * stride], from[6 * stride], from[5 * stride], from[4 * stride], from[3 * stride],
1679 from[2 * stride], from[1 * stride], from[0 * stride]);
1688 __m128 low = _mm256_extractf128_ps(from, 0);
1689 to[stride * 0] = _mm_cvtss_f32(low);
1690 to[stride * 1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
1691 to[stride * 2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
1692 to[stride * 3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
1694 __m128 high = _mm256_extractf128_ps(from, 1);
1695 to[stride * 4] = _mm_cvtss_f32(high);
1696 to[stride * 5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
1697 to[stride * 6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
1698 to[stride * 7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
1702 __m128d low = _mm256_extractf128_pd(from, 0);
1703 to[stride * 0] = _mm_cvtsd_f64(low);
1704 to[stride * 1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
1705 __m128d high = _mm256_extractf128_pd(from, 1);
1706 to[stride * 2] = _mm_cvtsd_f64(high);
1707 to[stride * 3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
1711 __m128i low = _mm256_extractf128_si256(from, 0);
1712 to[stride * 0] = _mm_extract_epi32(low, 0);
1713 to[stride * 1] = _mm_extract_epi32(low, 1);
1714 to[stride * 2] = _mm_extract_epi32(low, 2);
1715 to[stride * 3] = _mm_extract_epi32(low, 3);
1717 __m128i high = _mm256_extractf128_si256(from, 1);
1718 to[stride * 4] = _mm_extract_epi32(high, 0);
1719 to[stride * 5] = _mm_extract_epi32(high, 1);
1720 to[stride * 6] = _mm_extract_epi32(high, 2);
1721 to[stride * 7] = _mm_extract_epi32(high, 3);
1744 #ifndef EIGEN_VECTORIZE_AVX512
1765 return _mm_cvtss_f32(_mm256_castps256_ps128(
a));
1769 return _mm_cvtsd_f64(_mm256_castpd256_pd128(
a));
1773 return _mm_cvtsi128_si32(_mm256_castsi256_si128(
a));
1777 return numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm256_castsi256_si128(
a)));
1782 __m256
tmp = _mm256_shuffle_ps(
a,
a, 0x1b);
1783 return _mm256_permute2f128_ps(
tmp,
tmp, 1);
1787 __m256d
tmp = _mm256_shuffle_pd(
a,
a, 5);
1788 return _mm256_permute2f128_pd(
tmp,
tmp, 1);
1792 __m256d swap_halves = _mm256_permute2f128_pd(
a,
a,1);
1793 return _mm256_permute_pd(swap_halves,5);
1798 return _mm256_castps_si256(
preverse(_mm256_castsi256_ps(
a)));
1802 return _mm256_castps_si256(
preverse(_mm256_castsi256_ps(
a)));
1805 #ifdef EIGEN_VECTORIZE_AVX2
1808 return _mm256_castpd_si256(
preverse(_mm256_castsi256_pd(
a)));
1812 return _mm256_castpd_si256(
preverse(_mm256_castsi256_pd(
a)));
1819 const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));
1820 return _mm256_and_ps(
a, mask);
1824 const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFFFFFFFFFFFFFF));
1825 return _mm256_and_pd(
a, mask);
1829 #ifdef EIGEN_VECTORIZE_AVX2
1830 return _mm256_abs_epi32(
a);
1832 __m128i lo = _mm_abs_epi32(_mm256_extractf128_si256(
a, 0));
1833 __m128i hi = _mm_abs_epi32(_mm256_extractf128_si256(
a, 1));
1834 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
1844 return _mm_cmpgt_epi16(_mm_setzero_si128(),
a);
1848 return _mm_cmpgt_epi16(_mm_setzero_si128(),
a);
1852 #ifdef EIGEN_VECTORIZE_AVX2
1853 return _mm256_castsi256_ps(_mm256_cmpgt_epi32(_mm256_setzero_si256(), _mm256_castps_si256(
a)));
1855 return _mm256_castsi256_ps(parithmetic_shift_right<31>(
Packet8i(_mm256_castps_si256(
a))));
1860 return _mm256_setzero_si256();
1862 #ifdef EIGEN_VECTORIZE_AVX2
1865 return _mm256_castsi256_pd(_mm256_cmpgt_epi64(_mm256_setzero_si256(), _mm256_castpd_si256(
a)));
1869 return _mm256_setzero_si256();
1882 __m256i a_expo = _mm256_castpd_si256(
pand(
a, cst_exp_mask));
1883 #ifdef EIGEN_VECTORIZE_AVX2
1884 a_expo = _mm256_srli_epi64(a_expo, 52);
1885 __m128i lo = _mm256_extractf128_si256(a_expo, 0);
1886 __m128i hi = _mm256_extractf128_si256(a_expo, 1);
1888 __m128i lo = _mm256_extractf128_si256(a_expo, 0);
1889 __m128i hi = _mm256_extractf128_si256(a_expo, 1);
1890 lo = _mm_srli_epi64(lo, 52);
1891 hi = _mm_srli_epi64(hi, 52);
1895 Packet4d exponent = _mm256_insertf128_pd(_mm256_setzero_pd(), exponent_lo, 0);
1896 exponent = _mm256_insertf128_pd(exponent, exponent_hi, 1);
1922 Packet4i lo = _mm_slli_epi64(hi, 52);
1923 hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
1924 Packet4d c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
1930 lo = _mm_slli_epi64(hi, 52);
1931 hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
1932 c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
1942 const Packet4i e = _mm256_cvtpd_epi32(
pmin(
pmax(exponent, min_exponent), max_exponent));
1947 const Packet4i lo = _mm_slli_epi64(hi, 52);
1948 hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
1949 const Packet4d c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
1955 return predux(
Packet4f(_mm_add_ps(_mm256_castps256_ps128(
a), _mm256_extractf128_ps(
a, 1))));
1959 return predux(
Packet2d(_mm_add_pd(_mm256_castpd256_pd128(
a), _mm256_extractf128_pd(
a, 1))));
1963 return predux(
Packet4i(_mm_add_epi32(_mm256_castsi256_si128(
a), _mm256_extractf128_si256(
a, 1))));
1967 return predux(
Packet4ui(_mm_add_epi32(_mm256_castsi256_si128(
a), _mm256_extractf128_si256(
a, 1))));
1972 return _mm_add_ps(_mm256_castps256_ps128(
a), _mm256_extractf128_ps(
a, 1));
1976 return _mm_add_epi32(_mm256_castsi256_si128(
a), _mm256_extractf128_si256(
a, 1));
1980 return _mm_add_epi32(_mm256_castsi256_si128(
a), _mm256_extractf128_si256(
a, 1));
1986 tmp = _mm256_mul_ps(
a, _mm256_permute2f128_ps(
a,
a, 1));
1987 tmp = _mm256_mul_ps(
tmp, _mm256_shuffle_ps(
tmp,
tmp, _MM_SHUFFLE(1, 0, 3, 2)));
1993 tmp = _mm256_mul_pd(
a, _mm256_permute2f128_pd(
a,
a, 1));
1999 Packet8f tmp = _mm256_min_ps(
a, _mm256_permute2f128_ps(
a,
a, 1));
2000 tmp = _mm256_min_ps(
tmp, _mm256_shuffle_ps(
tmp,
tmp, _MM_SHUFFLE(1, 0, 3, 2)));
2005 Packet4d tmp = _mm256_min_pd(
a, _mm256_permute2f128_pd(
a,
a, 1));
2011 Packet8f tmp = _mm256_max_ps(
a, _mm256_permute2f128_ps(
a,
a, 1));
2012 tmp = _mm256_max_ps(
tmp, _mm256_shuffle_ps(
tmp,
tmp, _MM_SHUFFLE(1, 0, 3, 2)));
2018 Packet4d tmp = _mm256_max_pd(
a, _mm256_permute2f128_pd(
a,
a, 1));
2030 return _mm256_movemask_ps(
x) != 0;
2035 return _mm256_movemask_pd(
x) != 0;
2040 return _mm256_movemask_ps(_mm256_castsi256_ps(
x)) != 0;
2044 return _mm256_movemask_ps(_mm256_castsi256_ps(
x)) != 0;
2049 return _mm_movemask_epi8(
x) != 0;
2053 return _mm_movemask_epi8(
x) != 0;
2057 __m256 T0 = _mm256_unpacklo_ps(kernel.
packet[0], kernel.
packet[1]);
2058 __m256 T1 = _mm256_unpackhi_ps(kernel.
packet[0], kernel.
packet[1]);
2059 __m256 T2 = _mm256_unpacklo_ps(kernel.
packet[2], kernel.
packet[3]);
2060 __m256 T3 = _mm256_unpackhi_ps(kernel.
packet[2], kernel.
packet[3]);
2061 __m256 T4 = _mm256_unpacklo_ps(kernel.
packet[4], kernel.
packet[5]);
2062 __m256 T5 = _mm256_unpackhi_ps(kernel.
packet[4], kernel.
packet[5]);
2063 __m256 T6 = _mm256_unpacklo_ps(kernel.
packet[6], kernel.
packet[7]);
2064 __m256 T7 = _mm256_unpackhi_ps(kernel.
packet[6], kernel.
packet[7]);
2065 __m256
S0 = _mm256_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
2066 __m256
S1 = _mm256_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
2067 __m256 S2 = _mm256_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
2068 __m256 S3 = _mm256_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
2069 __m256 S4 = _mm256_shuffle_ps(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));
2070 __m256 S5 = _mm256_shuffle_ps(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));
2071 __m256 S6 = _mm256_shuffle_ps(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));
2072 __m256 S7 = _mm256_shuffle_ps(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));
2073 kernel.
packet[0] = _mm256_permute2f128_ps(
S0, S4, 0x20);
2074 kernel.
packet[1] = _mm256_permute2f128_ps(
S1, S5, 0x20);
2075 kernel.
packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
2076 kernel.
packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
2077 kernel.
packet[4] = _mm256_permute2f128_ps(
S0, S4, 0x31);
2078 kernel.
packet[5] = _mm256_permute2f128_ps(
S1, S5, 0x31);
2079 kernel.
packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
2080 kernel.
packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
2084 __m256 T0 = _mm256_unpacklo_ps(kernel.
packet[0], kernel.
packet[1]);
2085 __m256 T1 = _mm256_unpackhi_ps(kernel.
packet[0], kernel.
packet[1]);
2086 __m256 T2 = _mm256_unpacklo_ps(kernel.
packet[2], kernel.
packet[3]);
2087 __m256 T3 = _mm256_unpackhi_ps(kernel.
packet[2], kernel.
packet[3]);
2089 __m256
S0 = _mm256_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
2090 __m256
S1 = _mm256_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
2091 __m256 S2 = _mm256_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
2092 __m256 S3 = _mm256_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
2094 kernel.
packet[0] = _mm256_permute2f128_ps(
S0,
S1, 0x20);
2095 kernel.
packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
2096 kernel.
packet[2] = _mm256_permute2f128_ps(
S0,
S1, 0x31);
2097 kernel.
packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
2100 #define MM256_SHUFFLE_EPI32(A, B, M) \
2101 _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M))
2103 #ifndef EIGEN_VECTORIZE_AVX2
2104 #define MM256_UNPACKLO_EPI32(A, B) \
2105 _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
2106 #define MM256_UNPACKHI_EPI32(A, B) \
2107 _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
2109 #define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_epi32(A, B)
2110 #define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_epi32(A, B)
2130 kernel.
packet[0] = _mm256_permute2f128_si256(
S0, S4, 0x20);
2131 kernel.
packet[1] = _mm256_permute2f128_si256(
S1, S5, 0x20);
2132 kernel.
packet[2] = _mm256_permute2f128_si256(S2, S6, 0x20);
2133 kernel.
packet[3] = _mm256_permute2f128_si256(S3, S7, 0x20);
2134 kernel.
packet[4] = _mm256_permute2f128_si256(
S0, S4, 0x31);
2135 kernel.
packet[5] = _mm256_permute2f128_si256(
S1, S5, 0x31);
2136 kernel.
packet[6] = _mm256_permute2f128_si256(S2, S6, 0x31);
2137 kernel.
packet[7] = _mm256_permute2f128_si256(S3, S7, 0x31);
2154 kernel.
packet[0] = _mm256_permute2f128_si256(
S0,
S1, 0x20);
2155 kernel.
packet[1] = _mm256_permute2f128_si256(S2, S3, 0x20);
2156 kernel.
packet[2] = _mm256_permute2f128_si256(
S0,
S1, 0x31);
2157 kernel.
packet[3] = _mm256_permute2f128_si256(S2, S3, 0x31);
2164 __m256d T0 = _mm256_shuffle_pd(kernel.
packet[0], kernel.
packet[1], 15);
2165 __m256d T1 = _mm256_shuffle_pd(kernel.
packet[0], kernel.
packet[1], 0);
2166 __m256d T2 = _mm256_shuffle_pd(kernel.
packet[2], kernel.
packet[3], 15);
2167 __m256d T3 = _mm256_shuffle_pd(kernel.
packet[2], kernel.
packet[3], 0);
2169 kernel.
packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
2170 kernel.
packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
2171 kernel.
packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
2172 kernel.
packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
2176 return _mm256_set_epi64x(0 - ifPacket.
select[3], 0 - ifPacket.
select[2], 0 - ifPacket.
select[1],
2181 return _mm256_set_epi32(0 - ifPacket.
select[7], 0 - ifPacket.
select[6], 0 - ifPacket.
select[5],
2189 const __m256 true_mask = _mm256_castsi256_ps(
avx_blend_mask(ifPacket));
2196 const __m256d true_mask = _mm256_castsi256_pd(
avx_blend_mask(ifPacket));
2201 #ifndef EIGEN_VECTORIZE_AVX512FP16
2218 return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(from));
2223 return numext::bit_cast<Eigen::half>(
static_cast<numext::uint16_t>(_mm_extract_epi16(from, 0)));
2228 return _mm_load_si128(
reinterpret_cast<const __m128i*
>(from));
2233 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
2238 _mm_store_si128(
reinterpret_cast<__m128i*
>(to), from);
2243 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(to), from);
2252 return _mm_set_epi16(d, d,
c,
c,
b,
b,
a,
a);
2259 return _mm_set_epi16(
b,
b,
b,
b,
a,
a,
a,
a);
2264 return _mm_cmpeq_epi32(
a,
a);
2269 const __m128i sign_mask = _mm_set1_epi16(
static_cast<numext::uint16_t>(0x8000));
2270 return _mm_andnot_si128(sign_mask,
a);
2274 #ifdef EIGEN_HAS_FP16_C
2275 return _mm256_cvtph_ps(
a);
2278 _mm256_insertf128_si256(_mm256_castsi128_si256(half2floatsse(
a)), half2floatsse(_mm_srli_si128(
a, 8)), 1));
2284 #ifdef EIGEN_HAS_FP16_C
2285 return _mm256_cvtps_ph(
a, _MM_FROUND_TO_NEAREST_INT);
2287 __m128i lo =
float2half(_mm256_extractf128_ps(
a, 0));
2288 __m128i hi =
float2half(_mm256_extractf128_ps(
a, 1));
2289 return _mm_packus_epi32(lo, hi);
2312 return _mm_or_si128(
a,
b);
2316 return _mm_xor_si128(
a,
b);
2320 return _mm_and_si128(
a,
b);
2324 return _mm_andnot_si128(
b,
a);
2329 return _mm_blendv_epi8(
b,
a, mask);
2385 return _mm_xor_si128(
a, sign_mask);
2388 #ifndef EIGEN_VECTORIZE_AVX512FP16
2424 const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(from[0 * stride]);
2425 const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(from[1 * stride]);
2426 const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(from[2 * stride]);
2427 const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(from[3 * stride]);
2428 const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(from[4 * stride]);
2429 const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(from[5 * stride]);
2430 const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(from[6 * stride]);
2431 const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(from[7 * stride]);
2432 return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
2439 to[stride * 0] = aux[0];
2440 to[stride * 1] = aux[1];
2441 to[stride * 2] = aux[2];
2442 to[stride * 3] = aux[3];
2443 to[stride * 4] = aux[4];
2444 to[stride * 5] = aux[5];
2445 to[stride * 6] = aux[6];
2446 to[stride * 7] = aux[7];
2449 #ifndef EIGEN_VECTORIZE_AVX512FP16
2481 __m128i
m = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
2482 return _mm_shuffle_epi8(
a,
m);
2489 __m128i d = kernel.
packet[3];
2492 __m128i g = kernel.
packet[6];
2493 __m128i h = kernel.
packet[7];
2495 __m128i a03b03 = _mm_unpacklo_epi16(
a,
b);
2496 __m128i c03d03 = _mm_unpacklo_epi16(
c, d);
2497 __m128i e03f03 = _mm_unpacklo_epi16(
e,
f);
2498 __m128i g03h03 = _mm_unpacklo_epi16(g, h);
2499 __m128i a47b47 = _mm_unpackhi_epi16(
a,
b);
2500 __m128i c47d47 = _mm_unpackhi_epi16(
c, d);
2501 __m128i e47f47 = _mm_unpackhi_epi16(
e,
f);
2502 __m128i g47h47 = _mm_unpackhi_epi16(g, h);
2504 __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);
2505 __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);
2506 __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);
2507 __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);
2508 __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);
2509 __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);
2510 __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);
2511 __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);
2513 __m128i a0b0c0d0e0f0g0h0 = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);
2514 __m128i a1b1c1d1e1f1g1h1 = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);
2515 __m128i a2b2c2d2e2f2g2h2 = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);
2516 __m128i a3b3c3d3e3f3g3h3 = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);
2517 __m128i a4b4c4d4e4f4g4h4 = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);
2518 __m128i a5b5c5d5e5f5g5h5 = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);
2519 __m128i a6b6c6d6e6f6g6h6 = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);
2520 __m128i a7b7c7d7e7f7g7h7 = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);
2522 kernel.
packet[0] = a0b0c0d0e0f0g0h0;
2523 kernel.
packet[1] = a1b1c1d1e1f1g1h1;
2524 kernel.
packet[2] = a2b2c2d2e2f2g2h2;
2525 kernel.
packet[3] = a3b3c3d3e3f3g3h3;
2526 kernel.
packet[4] = a4b4c4d4e4f4g4h4;
2527 kernel.
packet[5] = a5b5c5d5e5f5g5h5;
2528 kernel.
packet[6] = a6b6c6d6e6f6g6h6;
2529 kernel.
packet[7] = a7b7c7d7e7f7g7h7;
2534 pstore<Eigen::half>(in[0], kernel.
packet[0]);
2535 pstore<Eigen::half>(in[1], kernel.
packet[1]);
2536 pstore<Eigen::half>(in[2], kernel.
packet[2]);
2537 pstore<Eigen::half>(in[3], kernel.
packet[3]);
2541 for (
int i = 0;
i < 4; ++
i) {
2542 for (
int j = 0;
j < 4; ++
j) {
2545 for (
int j = 0;
j < 4; ++
j) {
2546 out[
i][
j + 4] = in[
j][2 *
i + 1];
2559 #ifdef EIGEN_VECTORIZE_AVX2
2560 __m256i extend = _mm256_cvtepu16_epi32(
a);
2561 return _mm256_castsi256_ps(_mm256_slli_epi32(extend, 16));
2563 __m128i lo = _mm_cvtepu16_epi32(
a);
2564 __m128i hi = _mm_cvtepu16_epi32(_mm_srli_si128(
a, 8));
2565 __m128i lo_shift = _mm_slli_epi32(lo, 16);
2566 __m128i hi_shift = _mm_slli_epi32(hi, 16);
2567 return _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo_shift), hi_shift, 1));
2573 __m256i input = _mm256_castps_si256(
a);
2575 #ifdef EIGEN_VECTORIZE_AVX2
2577 __m256i
t = _mm256_srli_epi32(input, 16);
2579 t = _mm256_and_si256(
t, _mm256_set1_epi32(1));
2581 t = _mm256_add_epi32(
t, _mm256_set1_epi32(0x7fff));
2583 t = _mm256_add_epi32(
t, input);
2585 t = _mm256_srli_epi32(
t, 16);
2587 __m256 mask = _mm256_cmp_ps(
a,
a, _CMP_ORD_Q);
2588 __m256i nan = _mm256_set1_epi32(0x7fc0);
2589 t = _mm256_blendv_epi8(nan,
t, _mm256_castps_si256(mask));
2591 return _mm_packus_epi32(_mm256_extractf128_si256(
t, 0), _mm256_extractf128_si256(
t, 1));
2594 __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(input, 0), 16);
2595 __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(input, 1), 16);
2597 lo = _mm_and_si128(lo, _mm_set1_epi32(1));
2598 hi = _mm_and_si128(hi, _mm_set1_epi32(1));
2600 lo = _mm_add_epi32(lo, _mm_set1_epi32(0x7fff));
2601 hi = _mm_add_epi32(hi, _mm_set1_epi32(0x7fff));
2603 lo = _mm_add_epi32(lo, _mm256_extractf128_si256(input, 0));
2604 hi = _mm_add_epi32(hi, _mm256_extractf128_si256(input, 1));
2606 lo = _mm_srli_epi32(lo, 16);
2607 hi = _mm_srli_epi32(hi, 16);
2609 __m256 mask = _mm256_cmp_ps(
a,
a, _CMP_ORD_Q);
2610 __m128i nan = _mm_set1_epi32(0x7fc0);
2611 lo = _mm_blendv_epi8(nan, lo, _mm_castps_si128(_mm256_castps256_ps128(mask)));
2612 hi = _mm_blendv_epi8(nan, hi, _mm_castps_si128(_mm256_extractf128_ps(mask, 1)));
2614 return _mm_packus_epi32(lo, hi);
2620 return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(from));
2625 return numext::bit_cast<bfloat16>(
static_cast<numext::uint16_t>(_mm_extract_epi16(from, 0)));
2630 return _mm_load_si128(
reinterpret_cast<const __m128i*
>(from));
2635 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
2640 _mm_store_si128(
reinterpret_cast<__m128i*
>(to), from);
2645 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(to), from);
2654 return _mm_set_epi16(d, d,
c,
c,
b,
b,
a,
a);
2661 return _mm_set_epi16(
b,
b,
b,
b,
a,
a,
a,
a);
2666 return _mm_cmpeq_epi32(
a,
a);
2671 const __m128i sign_mask = _mm_set1_epi16(
static_cast<numext::uint16_t>(0x8000));
2672 return _mm_andnot_si128(sign_mask,
a);
2692 return _mm_or_si128(
a,
b);
2696 return _mm_xor_si128(
a,
b);
2700 return _mm_and_si128(
a,
b);
2704 return _mm_andnot_si128(
b,
a);
2709 return _mm_blendv_epi8(
b,
a, mask);
2765 return _mm_xor_si128(
a, sign_mask);
2790 const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(from[0 * stride]);
2791 const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(from[1 * stride]);
2792 const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(from[2 * stride]);
2793 const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(from[3 * stride]);
2794 const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(from[4 * stride]);
2795 const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(from[5 * stride]);
2796 const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(from[6 * stride]);
2797 const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(from[7 * stride]);
2798 return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
2805 to[stride * 0] = aux[0];
2806 to[stride * 1] = aux[1];
2807 to[stride * 2] = aux[2];
2808 to[stride * 3] = aux[3];
2809 to[stride * 4] = aux[4];
2810 to[stride * 5] = aux[5];
2811 to[stride * 6] = aux[6];
2812 to[stride * 7] = aux[7];
2837 __m128i
m = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
2838 return _mm_shuffle_epi8(
a,
m);
2842 __m128i
a = kernel.packet[0];
2843 __m128i
b = kernel.packet[1];
2844 __m128i
c = kernel.packet[2];
2845 __m128i d = kernel.packet[3];
2846 __m128i
e = kernel.packet[4];
2847 __m128i
f = kernel.packet[5];
2848 __m128i g = kernel.packet[6];
2849 __m128i h = kernel.packet[7];
2851 __m128i a03b03 = _mm_unpacklo_epi16(
a,
b);
2852 __m128i c03d03 = _mm_unpacklo_epi16(
c, d);
2853 __m128i e03f03 = _mm_unpacklo_epi16(
e,
f);
2854 __m128i g03h03 = _mm_unpacklo_epi16(g, h);
2855 __m128i a47b47 = _mm_unpackhi_epi16(
a,
b);
2856 __m128i c47d47 = _mm_unpackhi_epi16(
c, d);
2857 __m128i e47f47 = _mm_unpackhi_epi16(
e,
f);
2858 __m128i g47h47 = _mm_unpackhi_epi16(g, h);
2860 __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);
2861 __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);
2862 __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);
2863 __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);
2864 __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);
2865 __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);
2866 __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);
2867 __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);
2869 kernel.packet[0] = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);
2870 kernel.packet[1] = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);
2871 kernel.packet[2] = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);
2872 kernel.packet[3] = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);
2873 kernel.packet[4] = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);
2874 kernel.packet[5] = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);
2875 kernel.packet[6] = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);
2876 kernel.packet[7] = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);
2880 __m128i
a = kernel.packet[0];
2881 __m128i
b = kernel.packet[1];
2882 __m128i
c = kernel.packet[2];
2883 __m128i d = kernel.packet[3];
2885 __m128i ab_03 = _mm_unpacklo_epi16(
a,
b);
2886 __m128i cd_03 = _mm_unpacklo_epi16(
c, d);
2887 __m128i ab_47 = _mm_unpackhi_epi16(
a,
b);
2888 __m128i cd_47 = _mm_unpackhi_epi16(
c, d);
2890 kernel.packet[0] = _mm_unpacklo_epi32(ab_03, cd_03);
2891 kernel.packet[1] = _mm_unpackhi_epi32(ab_03, cd_03);
2892 kernel.packet[2] = _mm_unpacklo_epi32(ab_47, cd_47);
2893 kernel.packet[3] = _mm_unpackhi_epi32(ab_47, cd_47);
#define MM256_UNPACKLO_EPI32(A, B)
Definition: AVX/PacketMath.h:2104
#define MM256_SHUFFLE_EPI32(A, B, M)
Definition: AVX/PacketMath.h:2100
#define MM256_UNPACKHI_EPI32(A, B)
Definition: AVX/PacketMath.h:2106
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Array< double, 1, 3 > e(1./3., 0.5, 2.)
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_FAST_MATH
Definition: Macros.h:51
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
#define vec4i_swizzle1(v, p, q, r, s)
Definition: SSE/PacketMath.h:98
Scalar * b
Definition: benchVecAdd.cpp:17
@ N
Definition: constructor.cpp:22
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
@ Aligned32
Definition: Constants.h:238
@ Aligned16
Definition: Constants.h:237
const Scalar * a
Definition: level2_cplx_impl.h:32
const char const int const RealScalar const RealScalar * pa
Definition: level2_cplx_impl.h:20
int * m
Definition: level2_cplx_impl.h:294
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_STRONG_INLINE Packet8bf ptrunc< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2368
EIGEN_STRONG_INLINE Packet8i ploadu< Packet8i >(const int *from)
Definition: AVX/PacketMath.h:1515
EIGEN_STRONG_INLINE Packet4d print< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1237
EIGEN_STRONG_INLINE Packet8f pmax< PropagateNumbers, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1208
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1628
EIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet &a, const Packet &b, Op op)
Definition: SSE/PacketMath.h:1118
EIGEN_STRONG_INLINE Packet8ui pandnot< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1393
EIGEN_STRONG_INLINE void pstoreu< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1636
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
EIGEN_STRONG_INLINE Packet8bf print< Packet8bf >(const Packet8bf &a)
Definition: AVX/PacketMath.h:2718
EIGEN_STRONG_INLINE Packet8i ploadquad< Packet8i >(const int *from)
Definition: AVX/PacketMath.h:1598
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8bf pgather< bfloat16, Packet8bf >(const bfloat16 *from, Index stride)
Definition: AltiVec/PacketMath.h:874
EIGEN_STRONG_INLINE Packet8h pmax< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2299
EIGEN_STRONG_INLINE void prefetch< uint32_t >(const uint32_t *addr)
Definition: AVX/PacketMath.h:1758
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE Packet8i psub< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:878
EIGEN_STRONG_INLINE Packet8h pmul< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2406
EIGEN_STRONG_INLINE Packet8f pmin< PropagateNumbers, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1200
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f &a)
Definition: AVX/PacketMath.h:2283
EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf &a)
Definition: AVX/PacketMath.h:2558
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet8f pfloor< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1251
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i &a)
Definition: SSE/PacketMath.h:161
EIGEN_STRONG_INLINE Packet8f pisnan(const Packet8f &a)
Definition: AVX/PacketMath.h:1034
EIGEN_STRONG_INLINE Packet8f pmax< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1147
EIGEN_STRONG_INLINE Packet4d pset1< Packet4d >(const double &from)
Definition: AVX/PacketMath.h:752
EIGEN_STRONG_INLINE Packet8f pmax< PropagateNaN, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1224
EIGEN_STRONG_INLINE Packet8i padd< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:832
EIGEN_STRONG_INLINE void pstore1< Packet8i >(int *to, const int &a)
Definition: AVX/PacketMath.h:1739
EIGEN_STRONG_INLINE Packet4d pfrexp< Packet4d >(const Packet4d &a, Packet4d &exponent)
Definition: AVX/PacketMath.h:1901
EIGEN_STRONG_INLINE bfloat16 predux_mul< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2558
EIGEN_STRONG_INLINE Packet8bf pmin< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: AltiVec/PacketMath.h:2391
EIGEN_STRONG_INLINE float predux_mul< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1984
EIGEN_STRONG_INLINE Packet8i pset1< Packet8i >(const int &from)
Definition: AVX/PacketMath.h:756
EIGEN_STRONG_INLINE Packet4d ptrunc< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1264
EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf)
Definition: AVX/PacketMath.h:402
EIGEN_STRONG_INLINE Packet4d pldexp_fast< Packet4d >(const Packet4d &a, const Packet4d &exponent)
Definition: AVX/PacketMath.h:1938
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: AltiVec/PacketMath.h:778
EIGEN_STRONG_INLINE Packet8bf pceil< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2360
EIGEN_STRONG_INLINE Packet8bf pdiv< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: AltiVec/PacketMath.h:2293
EIGEN_STRONG_INLINE Packet8f pselect< Packet8f >(const Packet8f &mask, const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1424
EIGEN_STRONG_INLINE Packet8bf pround< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2364
EIGEN_STRONG_INLINE void pstore1< Packet8f >(float *to, const float &a)
Definition: AVX/PacketMath.h:1729
EIGEN_STRONG_INLINE Packet8h print< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2338
EIGEN_STRONG_INLINE Packet8h pset1< Packet8h >(const Eigen::half &from)
Definition: AVX/PacketMath.h:2217
EIGEN_STRONG_INLINE Packet8h psub< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2398
EIGEN_STRONG_INLINE Packet8f pfrexp< Packet8f >(const Packet8f &a, Packet8f &exponent)
Definition: AVX/PacketMath.h:1874
EIGEN_STRONG_INLINE Packet8f pxor< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1352
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_STRONG_INLINE Packet4d pceil< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1246
EIGEN_STRONG_INLINE Packet8h pfloor< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2348
EIGEN_STRONG_INLINE float predux_min< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1998
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: AltiVec/PacketMath.h:2751
EIGEN_STRONG_INLINE int pfirst< Packet8i >(const Packet8i &a)
Definition: AVX/PacketMath.h:1772
EIGEN_STRONG_INLINE Packet8i por< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1335
EIGEN_DEVICE_FUNC Packet8ui pgather< uint32_t, Packet8ui >(const uint32_t *from, Index stride)
Definition: AVX/PacketMath.h:1682
EIGEN_STRONG_INLINE Packet8f pset1frombits< Packet8f >(unsigned int from)
Definition: AVX/PacketMath.h:765
EIGEN_STRONG_INLINE Packet8f psub< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:870
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:368
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i &a)
Definition: SSE/PacketMath.h:164
EIGEN_STRONG_INLINE void pstore< bfloat16 >(bfloat16 *to, const Packet8bf &from)
Definition: AltiVec/PacketMath.h:662
EIGEN_STRONG_INLINE Packet4d pldexp< Packet4d >(const Packet4d &a, const Packet4d &exponent)
Definition: AVX/PacketMath.h:1911
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1205
EIGEN_STRONG_INLINE Packet4f predux_half_dowto4< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1971
EIGEN_STRONG_INLINE Packet8f pmin< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1099
EIGEN_DEVICE_FUNC Packet8i pgather< int, Packet8i >(const int *from, Index stride)
Definition: AVX/PacketMath.h:1677
EIGEN_STRONG_INLINE Packet4d pmin< PropagateNumbers, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1204
EIGEN_STRONG_INLINE Packet8f padd< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:817
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1983
EIGEN_STRONG_INLINE Eigen::half predux< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2451
EIGEN_STRONG_INLINE Packet8ui pset1< Packet8ui >(const uint32_t &from)
Definition: AVX/PacketMath.h:760
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:663
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:3075
EIGEN_STRONG_INLINE double predux< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1958
EIGEN_STRONG_INLINE Packet8bf padd< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: AltiVec/PacketMath.h:2283
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1314
EIGEN_STRONG_INLINE Packet8h plset< Packet8h >(const half &a)
Definition: AVX/PacketMath.h:2304
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1979
EIGEN_STRONG_INLINE Packet4i predux_half_dowto4< Packet8i >(const Packet8i &a)
Definition: AVX/PacketMath.h:1975
EIGEN_STRONG_INLINE Packet4d pdiv< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:960
EIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet &a, const Packet &b, Op op)
Definition: SSE/PacketMath.h:1127
EIGEN_STRONG_INLINE void pstore1< Packet4d >(double *to, const double &a)
Definition: AVX/PacketMath.h:1734
EIGEN_STRONG_INLINE Packet4ui predux_half_dowto4< Packet8ui >(const Packet8ui &a)
Definition: AVX/PacketMath.h:1979
EIGEN_STRONG_INLINE Packet8h pdiv< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2414
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
Definition: AltiVec/PacketMath.h:647
EIGEN_STRONG_INLINE Packet8bf plset< Packet8bf >(const bfloat16 &a)
Definition: AltiVec/PacketMath.h:2428
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341
EIGEN_STRONG_INLINE Packet8ui pload< Packet8ui >(const uint32_t *from)
Definition: AVX/PacketMath.h:1502
EIGEN_STRONG_INLINE Packet8bf pmul< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: AltiVec/PacketMath.h:2288
EIGEN_STRONG_INLINE Packet8bf ploaddup< Packet8bf >(const bfloat16 *from)
Definition: AltiVec/PacketMath.h:2423
EIGEN_STRONG_INLINE Packet8f ploaddup< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:1540
EIGEN_STRONG_INLINE Packet8f pandnot< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1377
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
EIGEN_STRONG_INLINE float predux< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1954
EIGEN_STRONG_INLINE Packet8f plset< Packet8f >(const float &a)
Definition: AVX/PacketMath.h:853
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet8i pdiv< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:965
EIGEN_STRONG_INLINE Packet4d pmax< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1159
EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h &a)
Definition: AVX/PacketMath.h:2273
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1611
EIGEN_STRONG_INLINE Packet8f ploadu< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:1507
EIGEN_STRONG_INLINE Packet8ui psub< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:888
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE double predux_min< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:2004
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet8ui ploaddup< Packet8ui >(const uint32_t *from)
Definition: AVX/PacketMath.h:1576
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition: AVX/PacketMath.h:2263
EIGEN_STRONG_INLINE Packet4d pmin< PropagateNaN, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1220
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:649
EIGEN_STRONG_INLINE Packet8h ptrunc< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2353
EIGEN_STRONG_INLINE Eigen::half predux_min< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2466
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2323
EIGEN_STRONG_INLINE double predux_max< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:2017
EIGEN_STRONG_INLINE Packet8f pload< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:1490
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
Definition: AltiVec/PacketMath.h:42
EIGEN_STRONG_INLINE Packet4d ptrue< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1291
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: AltiVec/PacketMath.h:1854
EIGEN_STRONG_INLINE Packet4d ploadu< Packet4d >(const double *from)
Definition: AVX/PacketMath.h:1511
EIGEN_STRONG_INLINE void pstoreu< bfloat16 >(bfloat16 *to, const Packet8bf &from)
Definition: AltiVec/PacketMath.h:1772
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1975
EIGEN_STRONG_INLINE Packet8i pload< Packet8i >(const int *from)
Definition: AVX/PacketMath.h:1498
EIGEN_STRONG_INLINE Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
Definition: AVX/PacketMath.h:1880
EIGEN_STRONG_INLINE Packet8f ploadquad< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:1593
EIGEN_STRONG_INLINE Packet4d pmul< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:931
EIGEN_STRONG_INLINE uint32_t pfirst< Packet8ui >(const Packet8ui &a)
Definition: AVX/PacketMath.h:1776
EIGEN_STRONG_INLINE Packet8i pxor< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1360
EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector< 4 > &ifPacket)
Definition: AVX/PacketMath.h:2175
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1537
EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:1966
EIGEN_STRONG_INLINE Packet8i plset< Packet8i >(const int &a)
Definition: AVX/PacketMath.h:861
EIGEN_STRONG_INLINE Packet4d ploaddup< Packet4d >(const double *from)
Definition: AVX/PacketMath.h:1556
EIGEN_STRONG_INLINE Packet8f pceil< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1242
EIGEN_STRONG_INLINE Packet8ui ploadu< Packet8ui >(const uint32_t *from)
Definition: AVX/PacketMath.h:1519
EIGEN_STRONG_INLINE Packet8f por< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1327
EIGEN_STRONG_INLINE float pfirst< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1764
EIGEN_STRONG_INLINE Packet8h pceil< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2343
EIGEN_STRONG_INLINE float predux_max< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:2010
EIGEN_STRONG_INLINE Packet8f pand< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1302
EIGEN_STRONG_INLINE Packet8bf pfloor< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2356
const char * SsePrefetchPtrType
Definition: SSE/PacketMath.h:1719
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:642
EIGEN_STRONG_INLINE Packet4d pselect< Packet4d >(const Packet4d &mask, const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1439
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1936
EIGEN_STRONG_INLINE Packet8ui pand< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1318
EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f &)
Definition: AVX/PacketMath.h:791
EIGEN_STRONG_INLINE Packet8i pandnot< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1385
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_STRONG_INLINE Packet8ui pmin< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1136
EIGEN_STRONG_INLINE int predux< Packet8i >(const Packet8i &a)
Definition: AVX/PacketMath.h:1962
eigen_packet_wrapper< __m256i, 0 > Packet8i
Definition: AVX/PacketMath.h:35
EIGEN_STRONG_INLINE Packet4d pround< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1417
EIGEN_STRONG_INLINE Packet8f ptrunc< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1260
EIGEN_DEVICE_FUNC Packet4d pgather< double, Packet4d >(const double *from, Index stride)
Definition: AVX/PacketMath.h:1673
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891
EIGEN_STRONG_INLINE Eigen::half predux_max< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2459
EIGEN_STRONG_INLINE uint32_t predux< Packet8ui >(const Packet8ui &a)
Definition: AVX/PacketMath.h:1966
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835
EIGEN_STRONG_INLINE Packet4d plset< Packet4d >(const double &a)
Definition: AVX/PacketMath.h:857
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Definition: GenericPacketMath.h:1232
EIGEN_DEVICE_FUNC void pscatter< double, Packet4d >(double *to, const Packet4d &from, Index stride)
Definition: AVX/PacketMath.h:1701
EIGEN_STRONG_INLINE Packet8bf psub< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: AltiVec/PacketMath.h:2304
EIGEN_STRONG_INLINE bfloat16 predux< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2455
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet4d pmin< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1114
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:226
EIGEN_STRONG_INLINE Packet8bf pload< Packet8bf >(const bfloat16 *from)
Definition: AltiVec/PacketMath.h:522
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819
EIGEN_DEVICE_FUNC void pscatter< int, Packet8i >(int *to, const Packet8i &from, Index stride)
Definition: AVX/PacketMath.h:1710
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
Definition: AltiVec/PacketMath.h:1760
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_DEVICE_FUNC void pscatter< uint32_t, Packet8ui >(uint32_t *to, const Packet8ui &from, Index stride)
Definition: AVX/PacketMath.h:1724
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315
EIGEN_STRONG_INLINE Packet4d pand< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1306
eigen_packet_wrapper< __m256i, 4 > Packet8ui
Definition: AVX/PacketMath.h:41
EIGEN_STRONG_INLINE Packet8f pround< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1411
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE Packet8f pmul< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:927
EIGEN_STRONG_INLINE Packet8f pldexp< Packet8f >(const Packet8f &a, const Packet8f &exponent)
Definition: AVX/PacketMath.h:1906
EIGEN_STRONG_INLINE Packet8ui por< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1343
EIGEN_STRONG_INLINE Packet8ui pmul< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:945
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_STRONG_INLINE Packet8i ploaddup< Packet8i >(const int *from)
Definition: AVX/PacketMath.h:1562
EIGEN_STRONG_INLINE Packet8h pmin< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2294
EIGEN_STRONG_INLINE Packet8ui ploadquad< Packet8ui >(const uint32_t *from)
Definition: AVX/PacketMath.h:1602
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Definition: GenericPacketMathFunctions.h:184
EIGEN_STRONG_INLINE Packet4d pmax< PropagateNumbers, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1212
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:337
EIGEN_STRONG_INLINE bfloat16 predux_min< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2609
EIGEN_STRONG_INLINE Packet4d pload< Packet4d >(const double *from)
Definition: AVX/PacketMath.h:1494
EIGEN_STRONG_INLINE Packet8f pload1< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:808
EIGEN_STRONG_INLINE Packet8ui pxor< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1368
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter< bfloat16, Packet8bf >(bfloat16 *to, const Packet8bf &from, Index stride)
Definition: AltiVec/PacketMath.h:977
EIGEN_STRONG_INLINE Packet4d pxor< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1356
EIGEN_DEVICE_FUNC Packet8f pgather< float, Packet8f >(const float *from, Index stride)
Definition: AVX/PacketMath.h:1668
EIGEN_STRONG_INLINE Packet8i ptrue< Packet8i >(const Packet8i &a)
Definition: AVX/PacketMath.h:1269
EIGEN_STRONG_INLINE Packet8h ploaddup< Packet8h >(const Eigen::half *from)
Definition: AVX/PacketMath.h:2247
EIGEN_STRONG_INLINE Packet8h ploadquad< Packet8h >(const Eigen::half *from)
Definition: AVX/PacketMath.h:2256
EIGEN_STRONG_INLINE Packet8i pselect< Packet8i >(const Packet8i &mask, const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1428
EIGEN_STRONG_INLINE Packet8h pround< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2333
EIGEN_STRONG_INLINE Packet8bf ploadquad< Packet8bf >(const bfloat16 *from)
Definition: AltiVec/PacketMath.h:1689
EIGEN_STRONG_INLINE Packet8ui pselect< Packet8ui >(const Packet8ui &mask, const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1433
EIGEN_STRONG_INLINE Packet8h ploadu< Packet8h >(const Eigen::half *from)
Definition: AVX/PacketMath.h:2232
EIGEN_STRONG_INLINE double pfirst< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1768
EIGEN_STRONG_INLINE Packet8f pmin< PropagateNaN, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1216
EIGEN_STRONG_INLINE Packet4d padd< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:828
EIGEN_STRONG_INLINE Packet8ui plset< Packet8ui >(const uint32_t &a)
Definition: AVX/PacketMath.h:865
EIGEN_STRONG_INLINE Packet8f pdiv< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:956
EIGEN_STRONG_INLINE Packet8i pmin< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1126
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet8h padd< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2390
__m256 Packet8f
Definition: AVX/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4d pset1frombits< Packet4d >(uint64_t from)
Definition: AVX/PacketMath.h:769
EIGEN_STRONG_INLINE double predux_mul< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1991
EIGEN_STRONG_INLINE bfloat16 pfirst< Packet8bf >(const Packet8bf &from)
Definition: AVX/PacketMath.h:2624
EIGEN_STRONG_INLINE Eigen::half pfirst< Packet8h >(const Packet8h &from)
Definition: AVX/PacketMath.h:2222
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f)
Definition: AltiVec/PacketMath.h:2059
EIGEN_STRONG_INLINE void pstoreu< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1123
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1756
EIGEN_STRONG_INLINE Packet8f pset1< Packet8f >(const float &from)
Definition: AVX/PacketMath.h:748
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1329
EIGEN_STRONG_INLINE Packet8i pand< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1310
EIGEN_STRONG_INLINE Packet4d pfloor< Packet4d >(const Packet4d &a)
Definition: AVX/PacketMath.h:1255
EIGEN_STRONG_INLINE Packet8bf ploadu< Packet8bf >(const bfloat16 *from)
Definition: AltiVec/PacketMath.h:1549
EIGEN_STRONG_INLINE Packet4d por< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1331
EIGEN_STRONG_INLINE void pstore< uint64_t >(uint64_t *to, const Packet2ul &from)
Definition: LSX/PacketMath.h:1569
EIGEN_STRONG_INLINE Packet8bf pmax< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: AltiVec/PacketMath.h:2396
EIGEN_STRONG_INLINE Packet4ui ploadu< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1476
EIGEN_STRONG_INLINE void pstore< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1619
EIGEN_DEVICE_FUNC Packet psign(const Packet &a)
Definition: GenericPacketMath.h:1189
EIGEN_STRONG_INLINE Packet8bf pset1< Packet8bf >(const bfloat16 &from)
Definition: AltiVec/PacketMath.h:808
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
Definition: AltiVec/PacketMath.h:1858
EIGEN_STRONG_INLINE Packet4d pload1< Packet4d >(const double *from)
Definition: AVX/PacketMath.h:812
__m256d Packet4d
Definition: AVX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet4d pandnot< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1381
EIGEN_STRONG_INLINE Packet4d psub< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:874
EIGEN_STRONG_INLINE Packet8i pmax< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:1171
EIGEN_STRONG_INLINE Packet8f ptrue< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1280
EIGEN_STRONG_INLINE Packet8i pmul< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: AVX/PacketMath.h:935
EIGEN_STRONG_INLINE bfloat16 predux_max< Packet8bf >(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2689
EIGEN_STRONG_INLINE void pstoreu< uint64_t >(uint64_t *to, const Packet2ul &from)
Definition: LSX/PacketMath.h:1611
eigen_packet_wrapper< __m128i, 2 > Packet8h
Definition: AVX/PacketMath.h:38
EIGEN_STRONG_INLINE Packet8h pload< Packet8h >(const Eigen::half *from)
Definition: AVX/PacketMath.h:2227
EIGEN_DEVICE_FUNC void pscatter< float, Packet8f >(float *to, const Packet8f &from, Index stride)
Definition: AVX/PacketMath.h:1687
EIGEN_STRONG_INLINE Packet4d pmax< PropagateNaN, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: AVX/PacketMath.h:1228
EIGEN_STRONG_INLINE void pstore< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1106
EIGEN_STRONG_INLINE Packet8f print< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1233
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: AVX/PacketMath.h:1750
EIGEN_STRONG_INLINE Packet8ui pmax< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:1181
EIGEN_STRONG_INLINE Eigen::half predux_mul< Packet8h >(const Packet8h &a)
Definition: AVX/PacketMath.h:2473
EIGEN_STRONG_INLINE Packet8ui padd< Packet8ui >(const Packet8ui &a, const Packet8ui &b)
Definition: AVX/PacketMath.h:842
std::uint8_t uint8_t
Definition: Meta.h:36
std::int64_t int64_t
Definition: Meta.h:43
std::uint16_t uint16_t
Definition: Meta.h:38
std::uint32_t uint32_t
Definition: Meta.h:40
std::uint64_t uint64_t
Definition: Meta.h:42
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
double S0
Strength of source function in inner region.
Definition: stefan_boltzmann.cc:148
double S1
Strength of source function in outer region.
Definition: stefan_boltzmann.cc:151
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
t
Definition: plotPSD.py:36
Definition: BFloat16.h:101
Definition: GenericPacketMath.h:1407
Packet packet[N]
Definition: GenericPacketMath.h:1408
Definition: GenericPacketMath.h:1421
bool select[N]
Definition: GenericPacketMath.h:1422
Definition: GenericPacketMath.h:45
@ HasASin
Definition: GenericPacketMath.h:84
@ HasATanh
Definition: GenericPacketMath.h:87
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasSin
Definition: GenericPacketMath.h:81
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasErfc
Definition: GenericPacketMath.h:96
@ HasACos
Definition: GenericPacketMath.h:85
@ HasNdtri
Definition: GenericPacketMath.h:97
@ HasCos
Definition: GenericPacketMath.h:82
@ HasCmp
Definition: GenericPacketMath.h:69
@ HasReciprocal
Definition: GenericPacketMath.h:72
@ HasShift
Definition: GenericPacketMath.h:50
@ HasLog1p
Definition: GenericPacketMath.h:78
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasErf
Definition: GenericPacketMath.h:95
@ HasBessel
Definition: GenericPacketMath.h:98
@ HasExpm1
Definition: GenericPacketMath.h:76
@ HasLog
Definition: GenericPacketMath.h:77
@ HasTanh
Definition: GenericPacketMath.h:90
@ HasATan
Definition: GenericPacketMath.h:86
@ HasDiv
Definition: GenericPacketMath.h:71
Definition: GenericPacketMath.h:225
@ value
Definition: Meta.h:146
Packet8h type
Definition: AVX/PacketMath.h:161
Packet8h half
Definition: AVX/PacketMath.h:163
Packet8bf half
Definition: AVX/PacketMath.h:202
Packet8bf type
Definition: AVX/PacketMath.h:199
Packet2d half
Definition: AVX/PacketMath.h:134
Packet4d type
Definition: AVX/PacketMath.h:133
Packet8f type
Definition: AVX/PacketMath.h:101
Packet4f half
Definition: AVX/PacketMath.h:102
Packet8i type
Definition: AVX/PacketMath.h:238
Packet4i half
Definition: AVX/PacketMath.h:239
Packet8ui type
Definition: AVX/PacketMath.h:244
Packet4ui half
Definition: AVX/PacketMath.h:245
Definition: GenericPacketMath.h:108
T type
Definition: GenericPacketMath.h:109
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
T half
Definition: GenericPacketMath.h:110
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
Definition: XprHelper.h:883
@ value
Definition: XprHelper.h:884
@ mask
Definition: SSE/PacketMath.h:91
Packet2d half
Definition: AVX/PacketMath.h:325
double type
Definition: AVX/PacketMath.h:324
Packet8bf half
Definition: AVX/PacketMath.h:390
bfloat16 type
Definition: AVX/PacketMath.h:389
uint8_t mask_t
Definition: AVX/PacketMath.h:309
Packet8i integer_packet
Definition: AVX/PacketMath.h:308
Packet4f half
Definition: AVX/PacketMath.h:307
float type
Definition: AVX/PacketMath.h:306
Packet8h half
Definition: AVX/PacketMath.h:2212
Eigen::half type
Definition: AVX/PacketMath.h:2204
Packet4i half
Definition: AVX/PacketMath.h:340
int type
Definition: AVX/PacketMath.h:339
Packet4ui half
Definition: AVX/PacketMath.h:352
uint32_t type
Definition: AVX/PacketMath.h:351
Definition: GenericPacketMath.h:134
numext::get_integer_by_size< sizeof(T)>::signed_type integer_packet
Definition: GenericPacketMath.h:137
T type
Definition: GenericPacketMath.h:135
T half
Definition: GenericPacketMath.h:136
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140
std::ofstream out("Result.txt")
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2