12 #ifndef EIGEN_PACKET_MATH_NEON_H
13 #define EIGEN_PACKET_MATH_NEON_H
16 #include "../../InternalHeaderCheck.h"
22 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
23 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
26 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
27 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
30 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
32 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
34 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
38 #if EIGEN_COMP_MSVC_STRICT
44 typedef eigen_packet_wrapper<float32x2_t, 0>
Packet2f;
45 typedef eigen_packet_wrapper<float32x4_t, 1>
Packet4f;
46 typedef eigen_packet_wrapper<int32_t, 2>
Packet4c;
47 typedef eigen_packet_wrapper<int8x8_t, 3>
Packet8c;
48 typedef eigen_packet_wrapper<int8x16_t, 4>
Packet16c;
49 typedef eigen_packet_wrapper<uint32_t, 5>
Packet4uc;
50 typedef eigen_packet_wrapper<uint8x8_t, 6>
Packet8uc;
51 typedef eigen_packet_wrapper<uint8x16_t, 7>
Packet16uc;
52 typedef eigen_packet_wrapper<int16x4_t, 8>
Packet4s;
53 typedef eigen_packet_wrapper<int16x8_t, 9>
Packet8s;
54 typedef eigen_packet_wrapper<uint16x4_t, 10>
Packet4us;
55 typedef eigen_packet_wrapper<uint16x8_t, 11>
Packet8us;
56 typedef eigen_packet_wrapper<int32x2_t, 12>
Packet2i;
57 typedef eigen_packet_wrapper<int32x4_t, 13>
Packet4i;
58 typedef eigen_packet_wrapper<uint32x2_t, 14>
Packet2ui;
59 typedef eigen_packet_wrapper<uint32x4_t, 15>
Packet4ui;
60 typedef eigen_packet_wrapper<int64x2_t, 16>
Packet2l;
61 typedef eigen_packet_wrapper<uint64x2_t, 17>
Packet2ul;
64 float from[4] = {
a,
b,
c, d};
65 return vld1q_f32(from);
69 float from[2] = {
a,
b};
70 return vld1_f32(from);
100 const float*
a =
reinterpret_cast<const float*
>(&
m);
102 make_packet4f(*(
a + (mask & 3)), *(
a + ((mask >> 2) & 3)), *(
a + ((mask >> 4) & 3)), *(
a + ((mask >> 6) & 3)));
110 template <
bool interleave>
112 const float*
a =
reinterpret_cast<const float*
>(&
m);
113 const float*
b =
reinterpret_cast<const float*
>(&
n);
115 make_packet4f(*(
a + (mask & 3)), *(
a + ((mask >> 2) & 3)), *(
b + ((mask >> 4) & 3)), *(
b + ((mask >> 6) & 3)));
121 const float*
a =
reinterpret_cast<const float*
>(&
m);
122 const float*
b =
reinterpret_cast<const float*
>(&
n);
124 make_packet4f(*(
a + (mask & 3)), *(
b + ((mask >> 2) & 3)), *(
a + ((mask >> 4) & 3)), *(
b + ((mask >> 6) & 3)));
129 return ((
s) << 6 | (
r) << 4 | (
q) << 2 | (
p));
150 #define vec4f_duplane(a, p) Packet4f(vdupq_lane_f32(vget_low_f32(a), p))
152 #define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = pset1<Packet4f>(X)
154 #define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME, X) \
155 const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
157 #define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
159 #if EIGEN_ARCH_ARM64 && EIGEN_COMP_GNUC
163 #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) :);
164 #elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
165 #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
167 #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
169 #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("pld [%[addr]]\n" ::[addr] "r"(ADDR) :);
172 #define EIGEN_ARM_PREFETCH(ADDR)
176 struct packet_traits<float> : default_packet_traits {
219 struct packet_traits<
int8_t> : default_packet_traits {
246 struct packet_traits<
uint8_t> : default_packet_traits {
275 struct packet_traits<
int16_t> : default_packet_traits {
302 struct packet_traits<
uint16_t> : default_packet_traits {
330 struct packet_traits<
int32_t> : default_packet_traits {
357 struct packet_traits<
uint32_t> : default_packet_traits {
386 struct packet_traits<
int64_t> : default_packet_traits {
413 struct packet_traits<
uint64_t> : default_packet_traits {
660 return vdup_n_f32(0.0f);
665 return vdupq_n_f32(0.0f);
670 return vdup_n_f32(from);
674 return vdupq_n_f32(from);
678 return vget_lane_s32(vreinterpret_s32_s8(vdup_n_s8(from)), 0);
682 return vdup_n_s8(from);
686 return vdupq_n_s8(from);
690 return vget_lane_u32(vreinterpret_u32_u8(vdup_n_u8(from)), 0);
694 return vdup_n_u8(from);
698 return vdupq_n_u8(from);
702 return vdup_n_s16(from);
706 return vdupq_n_s16(from);
710 return vdup_n_u16(from);
714 return vdupq_n_u16(from);
718 return vdup_n_s32(from);
722 return vdupq_n_s32(from);
726 return vdup_n_u32(from);
730 return vdupq_n_u32(from);
734 return vdupq_n_s64(from);
738 return vdupq_n_u64(from);
743 return vreinterpret_f32_u32(vdup_n_u32(from));
747 return vreinterpretq_f32_u32(vdupq_n_u32(from));
752 const float c[] = {0.0f, 1.0f};
757 const float c[] = {0.0f, 1.0f, 2.0f, 3.0f};
762 return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(vreinterpret_s8_u32(vdup_n_u32(0x03020100)), vdup_n_s8(
a))), 0);
766 const int8_t c[] = {0, 1, 2, 3, 4, 5, 6, 7};
771 const int8_t c[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
776 return vget_lane_u32(vreinterpret_u32_u8(vadd_u8(vreinterpret_u8_u32(vdup_n_u32(0x03020100)), vdup_n_u8(
a))), 0);
780 const uint8_t c[] = {0, 1, 2, 3, 4, 5, 6, 7};
785 const uint8_t c[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
800 const int16_t c[] = {0, 1, 2, 3, 4, 5, 6, 7};
805 const uint16_t c[] = {0, 1, 2, 3, 4, 5, 6, 7};
841 return vadd_f32(
a,
b);
845 return vaddq_f32(
a,
b);
849 return vget_lane_s32(
850 vreinterpret_s32_s8(vadd_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
854 return vadd_s8(
a,
b);
858 return vaddq_s8(
a,
b);
862 return vget_lane_u32(
863 vreinterpret_u32_u8(vadd_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
867 return vadd_u8(
a,
b);
871 return vaddq_u8(
a,
b);
875 return vadd_s16(
a,
b);
879 return vaddq_s16(
a,
b);
883 return vadd_u16(
a,
b);
887 return vaddq_u16(
a,
b);
891 return vadd_s32(
a,
b);
895 return vaddq_s32(
a,
b);
899 return vadd_u32(
a,
b);
903 return vaddq_u32(
a,
b);
907 return vaddq_s64(
a,
b);
911 return vaddq_u64(
a,
b);
916 return vsub_f32(
a,
b);
920 return vsubq_f32(
a,
b);
924 return vget_lane_s32(
925 vreinterpret_s32_s8(vsub_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
929 return vsub_s8(
a,
b);
933 return vsubq_s8(
a,
b);
937 return vget_lane_u32(
938 vreinterpret_u32_u8(vsub_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
942 return vsub_u8(
a,
b);
946 return vsubq_u8(
a,
b);
950 return vsub_s16(
a,
b);
954 return vsubq_s16(
a,
b);
958 return vsub_u16(
a,
b);
962 return vsubq_u16(
a,
b);
966 return vsub_s32(
a,
b);
970 return vsubq_s32(
a,
b);
974 return vsub_u32(
a,
b);
978 return vsubq_u32(
a,
b);
982 return vsubq_s64(
a,
b);
986 return vsubq_u64(
a,
b);
1000 Packet4f mask =
make_packet4f(numext::bit_cast<float>(0x80000000u), 0.0f, numext::bit_cast<float>(0x80000000u), 0.0f);
1010 return vnegq_f32(
a);
1014 return vget_lane_s32(vreinterpret_s32_s8(vneg_s8(vreinterpret_s8_s32(vdup_n_s32(
a)))), 0);
1030 return vnegq_s16(
a);
1038 return vnegq_s32(
a);
1042 #if EIGEN_ARCH_ARM64
1043 return vnegq_s64(
a);
1045 return vcombine_s64(vdup_n_s64(-vgetq_lane_s64(
a, 0)), vdup_n_s64(-vgetq_lane_s64(
a, 1)));
1124 return vmul_f32(
a,
b);
1128 return vmulq_f32(
a,
b);
1132 return vget_lane_s32(
1133 vreinterpret_s32_s8(vmul_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1137 return vmul_s8(
a,
b);
1141 return vmulq_s8(
a,
b);
1145 return vget_lane_u32(
1146 vreinterpret_u32_u8(vmul_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1150 return vmul_u8(
a,
b);
1154 return vmulq_u8(
a,
b);
1158 return vmul_s16(
a,
b);
1162 return vmulq_s16(
a,
b);
1166 return vmul_u16(
a,
b);
1170 return vmulq_u16(
a,
b);
1174 return vmul_s32(
a,
b);
1178 return vmulq_s32(
a,
b);
1182 return vmul_u32(
a,
b);
1186 return vmulq_u32(
a,
b);
1190 return vcombine_s64(vdup_n_s64(vgetq_lane_s64(
a, 0) * vgetq_lane_s64(
b, 0)),
1191 vdup_n_s64(vgetq_lane_s64(
a, 1) * vgetq_lane_s64(
b, 1)));
1195 return vcombine_u64(vdup_n_u64(vgetq_lane_u64(
a, 0) * vgetq_lane_u64(
b, 0)),
1196 vdup_n_u64(vgetq_lane_u64(
a, 1) * vgetq_lane_u64(
b, 1)));
1201 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1206 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1211 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1216 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1221 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1226 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1231 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1236 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1241 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1246 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1251 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1256 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1261 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1266 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1271 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1276 eigen_assert(
false &&
"packet integer division are not supported by NEON");
1280 #ifdef EIGEN_VECTORIZE_FMA
1283 return vfmaq_f32(
c,
a,
b);
1287 return vfma_f32(
c,
a,
b);
1292 return vmlaq_f32(
c,
a,
b);
1296 return vmla_f32(
c,
a,
b);
1303 return vget_lane_s32(
1304 vreinterpret_s32_s8(vmla_s8(vreinterpret_s8_s32(vdup_n_s32(
c)), vreinterpret_s8_s32(vdup_n_s32(
a)),
1305 vreinterpret_s8_s32(vdup_n_s32(
b)))),
1310 return vmla_s8(
c,
a,
b);
1314 return vmlaq_s8(
c,
a,
b);
1318 return vget_lane_u32(
1319 vreinterpret_u32_u8(vmla_u8(vreinterpret_u8_u32(vdup_n_u32(
c)), vreinterpret_u8_u32(vdup_n_u32(
a)),
1320 vreinterpret_u8_u32(vdup_n_u32(
b)))),
1325 return vmla_u8(
c,
a,
b);
1329 return vmlaq_u8(
c,
a,
b);
1333 return vmla_s16(
c,
a,
b);
1337 return vmlaq_s16(
c,
a,
b);
1341 return vmla_u16(
c,
a,
b);
1345 return vmlaq_u16(
c,
a,
b);
1349 return vmla_s32(
c,
a,
b);
1353 return vmlaq_s32(
c,
a,
b);
1357 return vmla_u32(
c,
a,
b);
1361 return vmlaq_u32(
c,
a,
b);
1366 return vabd_f32(
a,
b);
1370 return vabdq_f32(
a,
b);
1374 return vget_lane_s32(
1375 vreinterpret_s32_s8(vabd_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1379 return vabd_s8(
a,
b);
1383 return vabdq_s8(
a,
b);
1387 return vget_lane_u32(
1388 vreinterpret_u32_u8(vabd_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1392 return vabd_u8(
a,
b);
1396 return vabdq_u8(
a,
b);
1400 return vabd_s16(
a,
b);
1404 return vabdq_s16(
a,
b);
1408 return vabd_u16(
a,
b);
1412 return vabdq_u16(
a,
b);
1416 return vabd_s32(
a,
b);
1420 return vabdq_s32(
a,
b);
1424 return vabd_u32(
a,
b);
1428 return vabdq_u32(
a,
b);
1433 return vmin_f32(
a,
b);
1437 return vminq_f32(
a,
b);
1440 #ifdef __ARM_FEATURE_NUMERIC_MAXMIN
1445 return vminnmq_f32(
a,
b);
1449 return vminnm_f32(
a,
b);
1465 return vget_lane_s32(
1466 vreinterpret_s32_s8(vmin_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1470 return vmin_s8(
a,
b);
1474 return vminq_s8(
a,
b);
1478 return vget_lane_u32(
1479 vreinterpret_u32_u8(vmin_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1483 return vmin_u8(
a,
b);
1487 return vminq_u8(
a,
b);
1491 return vmin_s16(
a,
b);
1495 return vminq_s16(
a,
b);
1499 return vmin_u16(
a,
b);
1503 return vminq_u16(
a,
b);
1507 return vmin_s32(
a,
b);
1511 return vminq_s32(
a,
b);
1515 return vmin_u32(
a,
b);
1519 return vminq_u32(
a,
b);
1523 return vcombine_s64(vdup_n_s64((
std::min)(vgetq_lane_s64(
a, 0), vgetq_lane_s64(
b, 0))),
1524 vdup_n_s64((
std::min)(vgetq_lane_s64(
a, 1), vgetq_lane_s64(
b, 1))));
1528 return vcombine_u64(vdup_n_u64((
std::min)(vgetq_lane_u64(
a, 0), vgetq_lane_u64(
b, 0))),
1529 vdup_n_u64((
std::min)(vgetq_lane_u64(
a, 1), vgetq_lane_u64(
b, 1))));
1534 return vmax_f32(
a,
b);
1538 return vmaxq_f32(
a,
b);
1541 #ifdef __ARM_FEATURE_NUMERIC_MAXMIN
1546 return vmaxnmq_f32(
a,
b);
1550 return vmaxnm_f32(
a,
b);
1566 return vget_lane_s32(
1567 vreinterpret_s32_s8(vmax_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1571 return vmax_s8(
a,
b);
1575 return vmaxq_s8(
a,
b);
1579 return vget_lane_u32(
1580 vreinterpret_u32_u8(vmax_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1584 return vmax_u8(
a,
b);
1588 return vmaxq_u8(
a,
b);
1592 return vmax_s16(
a,
b);
1596 return vmaxq_s16(
a,
b);
1600 return vmax_u16(
a,
b);
1604 return vmaxq_u16(
a,
b);
1608 return vmax_s32(
a,
b);
1612 return vmaxq_s32(
a,
b);
1616 return vmax_u32(
a,
b);
1620 return vmaxq_u32(
a,
b);
1624 return vcombine_s64(vdup_n_s64((
std::max)(vgetq_lane_s64(
a, 0), vgetq_lane_s64(
b, 0))),
1625 vdup_n_s64((
std::max)(vgetq_lane_s64(
a, 1), vgetq_lane_s64(
b, 1))));
1629 return vcombine_u64(vdup_n_u64((
std::max)(vgetq_lane_u64(
a, 0), vgetq_lane_u64(
b, 0))),
1630 vdup_n_u64((
std::max)(vgetq_lane_u64(
a, 1), vgetq_lane_u64(
b, 1))));
1635 return vreinterpret_f32_u32(vcle_f32(
a,
b));
1639 return vreinterpretq_f32_u32(vcleq_f32(
a,
b));
1643 return vget_lane_s32(
1644 vreinterpret_s32_u8(vcle_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1648 return vreinterpret_s8_u8(vcle_s8(
a,
b));
1652 return vreinterpretq_s8_u8(vcleq_s8(
a,
b));
1656 return vget_lane_u32(
1657 vreinterpret_u32_u8(vcle_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1661 return vcle_u8(
a,
b);
1665 return vcleq_u8(
a,
b);
1669 return vreinterpret_s16_u16(vcle_s16(
a,
b));
1673 return vreinterpretq_s16_u16(vcleq_s16(
a,
b));
1677 return vcle_u16(
a,
b);
1681 return vcleq_u16(
a,
b);
1685 return vreinterpret_s32_u32(vcle_s32(
a,
b));
1689 return vreinterpretq_s32_u32(vcleq_s32(
a,
b));
1693 return vcle_u32(
a,
b);
1697 return vcleq_u32(
a,
b);
1701 #if EIGEN_ARCH_ARM64
1702 return vreinterpretq_s64_u64(vcleq_s64(
a,
b));
1704 return vcombine_s64(vdup_n_s64(vgetq_lane_s64(
a, 0) <= vgetq_lane_s64(
b, 0) ?
numext::int64_t(-1) : 0),
1705 vdup_n_s64(vgetq_lane_s64(
a, 1) <= vgetq_lane_s64(
b, 1) ?
numext::int64_t(-1) : 0));
1710 #if EIGEN_ARCH_ARM64
1711 return vcleq_u64(
a,
b);
1713 return vcombine_u64(vdup_n_u64(vgetq_lane_u64(
a, 0) <= vgetq_lane_u64(
b, 0) ?
numext::uint64_t(-1) : 0),
1714 vdup_n_u64(vgetq_lane_u64(
a, 1) <= vgetq_lane_u64(
b, 1) ?
numext::uint64_t(-1) : 0));
1720 return vreinterpret_f32_u32(vclt_f32(
a,
b));
1724 return vreinterpretq_f32_u32(vcltq_f32(
a,
b));
1728 return vget_lane_s32(
1729 vreinterpret_s32_u8(vclt_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1733 return vreinterpret_s8_u8(vclt_s8(
a,
b));
1737 return vreinterpretq_s8_u8(vcltq_s8(
a,
b));
1741 return vget_lane_u32(
1742 vreinterpret_u32_u8(vclt_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1746 return vclt_u8(
a,
b);
1750 return vcltq_u8(
a,
b);
1754 return vreinterpret_s16_u16(vclt_s16(
a,
b));
1758 return vreinterpretq_s16_u16(vcltq_s16(
a,
b));
1762 return vclt_u16(
a,
b);
1766 return vcltq_u16(
a,
b);
1770 return vreinterpret_s32_u32(vclt_s32(
a,
b));
1774 return vreinterpretq_s32_u32(vcltq_s32(
a,
b));
1778 return vclt_u32(
a,
b);
1782 return vcltq_u32(
a,
b);
1786 #if EIGEN_ARCH_ARM64
1787 return vreinterpretq_s64_u64(vcltq_s64(
a,
b));
1789 return vcombine_s64(vdup_n_s64(vgetq_lane_s64(
a, 0) < vgetq_lane_s64(
b, 0) ?
numext::int64_t(-1) : 0),
1790 vdup_n_s64(vgetq_lane_s64(
a, 1) < vgetq_lane_s64(
b, 1) ?
numext::int64_t(-1) : 0));
1795 #if EIGEN_ARCH_ARM64
1796 return vcltq_u64(
a,
b);
1798 return vcombine_u64(vdup_n_u64(vgetq_lane_u64(
a, 0) < vgetq_lane_u64(
b, 0) ?
numext::uint64_t(-1) : 0),
1805 return vreinterpret_f32_u32(vceq_f32(
a,
b));
1809 return vreinterpretq_f32_u32(vceqq_f32(
a,
b));
1813 return vget_lane_s32(
1814 vreinterpret_s32_u8(vceq_s8(vreinterpret_s8_s32(vdup_n_s32(
a)), vreinterpret_s8_s32(vdup_n_s32(
b)))), 0);
1818 return vreinterpret_s8_u8(vceq_s8(
a,
b));
1822 return vreinterpretq_s8_u8(vceqq_s8(
a,
b));
1826 return vget_lane_u32(
1827 vreinterpret_u32_u8(vceq_u8(vreinterpret_u8_u32(vdup_n_u32(
a)), vreinterpret_u8_u32(vdup_n_u32(
b)))), 0);
1831 return vceq_u8(
a,
b);
1835 return vceqq_u8(
a,
b);
1839 return vreinterpret_s16_u16(vceq_s16(
a,
b));
1843 return vreinterpretq_s16_u16(vceqq_s16(
a,
b));
1847 return vceq_u16(
a,
b);
1851 return vceqq_u16(
a,
b);
1855 return vreinterpret_s32_u32(vceq_s32(
a,
b));
1859 return vreinterpretq_s32_u32(vceqq_s32(
a,
b));
1863 return vceq_u32(
a,
b);
1867 return vceqq_u32(
a,
b);
1871 #if EIGEN_ARCH_ARM64
1872 return vreinterpretq_s64_u64(vceqq_s64(
a,
b));
1874 return vcombine_s64(vdup_n_s64(vgetq_lane_s64(
a, 0) == vgetq_lane_s64(
b, 0) ?
numext::int64_t(-1) : 0),
1875 vdup_n_s64(vgetq_lane_s64(
a, 1) == vgetq_lane_s64(
b, 1) ?
numext::int64_t(-1) : 0));
1880 #if EIGEN_ARCH_ARM64
1881 return vceqq_u64(
a,
b);
1883 return vcombine_u64(vdup_n_u64(vgetq_lane_u64(
a, 0) == vgetq_lane_u64(
b, 0) ?
numext::uint64_t(-1) : 0),
1884 vdup_n_u64(vgetq_lane_u64(
a, 1) == vgetq_lane_u64(
b, 1) ?
numext::uint64_t(-1) : 0));
1890 return vreinterpret_f32_u32(vmvn_u32(vcge_f32(
a,
b)));
1894 return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(
a,
b)));
1900 return vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(
a), vreinterpret_u32_f32(
b)));
1904 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(
a), vreinterpretq_u32_f32(
b)));
1912 return vand_s8(
a,
b);
1916 return vandq_s8(
a,
b);
1924 return vand_u8(
a,
b);
1928 return vandq_u8(
a,
b);
1932 return vand_s16(
a,
b);
1936 return vandq_s16(
a,
b);
1940 return vand_u16(
a,
b);
1944 return vandq_u16(
a,
b);
1948 return vand_s32(
a,
b);
1952 return vandq_s32(
a,
b);
1956 return vand_u32(
a,
b);
1960 return vandq_u32(
a,
b);
1964 return vandq_s64(
a,
b);
1968 return vandq_u64(
a,
b);
1973 return vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(
a), vreinterpret_u32_f32(
b)));
1977 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(
a), vreinterpretq_u32_f32(
b)));
1985 return vorr_s8(
a,
b);
1989 return vorrq_s8(
a,
b);
1997 return vorr_u8(
a,
b);
2001 return vorrq_u8(
a,
b);
2005 return vorr_s16(
a,
b);
2009 return vorrq_s16(
a,
b);
2013 return vorr_u16(
a,
b);
2017 return vorrq_u16(
a,
b);
2021 return vorr_s32(
a,
b);
2025 return vorrq_s32(
a,
b);
2029 return vorr_u32(
a,
b);
2033 return vorrq_u32(
a,
b);
2037 return vorrq_s64(
a,
b);
2041 return vorrq_u64(
a,
b);
2046 return vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(
a), vreinterpret_u32_f32(
b)));
2050 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(
a), vreinterpretq_u32_f32(
b)));
2058 return veor_s8(
a,
b);
2062 return veorq_s8(
a,
b);
2070 return veor_u8(
a,
b);
2074 return veorq_u8(
a,
b);
2078 return veor_s16(
a,
b);
2082 return veorq_s16(
a,
b);
2086 return veor_u16(
a,
b);
2090 return veorq_u16(
a,
b);
2094 return veor_s32(
a,
b);
2098 return veorq_s32(
a,
b);
2102 return veor_u32(
a,
b);
2106 return veorq_u32(
a,
b);
2110 return veorq_s64(
a,
b);
2114 return veorq_u64(
a,
b);
2119 return vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(
a), vreinterpret_u32_f32(
b)));
2123 return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(
a), vreinterpretq_u32_f32(
b)));
2131 return vbic_s8(
a,
b);
2135 return vbicq_s8(
a,
b);
2143 return vbic_u8(
a,
b);
2147 return vbicq_u8(
a,
b);
2151 return vbic_s16(
a,
b);
2155 return vbicq_s16(
a,
b);
2159 return vbic_u16(
a,
b);
2163 return vbicq_u16(
a,
b);
2167 return vbic_s32(
a,
b);
2171 return vbicq_s32(
a,
b);
2175 return vbic_u32(
a,
b);
2179 return vbicq_u32(
a,
b);
2183 return vbicq_s64(
a,
b);
2187 return vbicq_u64(
a,
b);
2192 return vget_lane_s32(vreinterpret_s32_s8(vshr_n_s8(vreinterpret_s8_s32(vdup_n_s32(
a)),
N)), 0);
2196 return vshr_n_s8(
a,
N);
2200 return vshrq_n_s8(
a,
N);
2204 return vget_lane_u32(vreinterpret_u32_u8(vshr_n_u8(vreinterpret_u8_u32(vdup_n_u32(
a)),
N)), 0);
2208 return vshr_n_u8(
a,
N);
2212 return vshrq_n_u8(
a,
N);
2216 return vshr_n_s16(
a,
N);
2220 return vshrq_n_s16(
a,
N);
2224 return vshr_n_u16(
a,
N);
2228 return vshrq_n_u16(
a,
N);
2232 return vshr_n_s32(
a,
N);
2236 return vshrq_n_s32(
a,
N);
2240 return vshr_n_u32(
a,
N);
2244 return vshrq_n_u32(
a,
N);
2248 return vshrq_n_s64(
a,
N);
2252 return vshrq_n_u64(
a,
N);
2257 return vget_lane_s32(vreinterpret_s32_u8(vshr_n_u8(vreinterpret_u8_s32(vdup_n_s32(
a)),
N)), 0);
2261 return vreinterpret_s8_u8(vshr_n_u8(vreinterpret_u8_s8(
a),
N));
2265 return vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(
a),
N));
2269 return vget_lane_u32(vreinterpret_u32_s8(vshr_n_s8(vreinterpret_s8_u32(vdup_n_u32(
a)),
N)), 0);
2273 return vshr_n_u8(
a,
N);
2277 return vshrq_n_u8(
a,
N);
2281 return vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(
a),
N));
2285 return vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(
a),
N));
2289 return vshr_n_u16(
a,
N);
2293 return vshrq_n_u16(
a,
N);
2297 return vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(
a),
N));
2301 return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(
a),
N));
2305 return vshr_n_u32(
a,
N);
2309 return vshrq_n_u32(
a,
N);
2313 return vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_s64(
a),
N));
2317 return vshrq_n_u64(
a,
N);
2322 return vget_lane_s32(vreinterpret_s32_s8(vshl_n_s8(vreinterpret_s8_s32(vdup_n_s32(
a)),
N)), 0);
2326 return vshl_n_s8(
a,
N);
2330 return vshlq_n_s8(
a,
N);
2334 return vget_lane_u32(vreinterpret_u32_u8(vshl_n_u8(vreinterpret_u8_u32(vdup_n_u32(
a)),
N)), 0);
2338 return vshl_n_u8(
a,
N);
2342 return vshlq_n_u8(
a,
N);
2346 return vshl_n_s16(
a,
N);
2350 return vshlq_n_s16(
a,
N);
2354 return vshl_n_u16(
a,
N);
2358 return vshlq_n_u16(
a,
N);
2362 return vshl_n_s32(
a,
N);
2366 return vshlq_n_s32(
a,
N);
2370 return vshl_n_u32(
a,
N);
2374 return vshlq_n_u32(
a,
N);
2378 return vshlq_n_s64(
a,
N);
2382 return vshlq_n_u64(
a,
N);
2541 return vld1_dup_f32(from);
2545 return vcombine_f32(vld1_dup_f32(from), vld1_dup_f32(from + 1));
2550 return vget_lane_s32(vreinterpret_s32_s8(vzip_s8(
a,
a).
val[0]), 0);
2554 const int8x8_t
a = vld1_s8(from);
2555 return vzip_s8(
a,
a).val[0];
2559 const int8x8_t
a = vld1_s8(from);
2560 const int8x8x2_t
b = vzip_s8(
a,
a);
2561 return vcombine_s8(
b.val[0],
b.val[1]);
2566 return vget_lane_u32(vreinterpret_u32_u8(vzip_u8(
a,
a).
val[0]), 0);
2570 const uint8x8_t
a = vld1_u8(from);
2571 return vzip_u8(
a,
a).val[0];
2575 const uint8x8_t
a = vld1_u8(from);
2576 const uint8x8x2_t
b = vzip_u8(
a,
a);
2577 return vcombine_u8(
b.val[0],
b.val[1]);
2581 return vreinterpret_s16_u32(
2582 vzip_u32(vreinterpret_u32_s16(vld1_dup_s16(from)), vreinterpret_u32_s16(vld1_dup_s16(from + 1))).
val[0]);
2586 const int16x4_t
a = vld1_s16(from);
2587 const int16x4x2_t
b = vzip_s16(
a,
a);
2588 return vcombine_s16(
b.val[0],
b.val[1]);
2592 return vreinterpret_u16_u32(
2593 vzip_u32(vreinterpret_u32_u16(vld1_dup_u16(from)), vreinterpret_u32_u16(vld1_dup_u16(from + 1))).
val[0]);
2597 const uint16x4_t
a = vld1_u16(from);
2598 const uint16x4x2_t
b = vzip_u16(
a,
a);
2599 return vcombine_u16(
b.val[0],
b.val[1]);
2603 return vld1_dup_s32(from);
2607 return vcombine_s32(vld1_dup_s32(from), vld1_dup_s32(from + 1));
2611 return vld1_dup_u32(from);
2615 return vcombine_u32(vld1_dup_u32(from), vld1_dup_u32(from + 1));
2619 return vld1q_dup_s64(from);
2623 return vld1q_dup_u64(from);
2628 return vld1q_dup_f32(from);
2632 return vget_lane_s32(vreinterpret_s32_s8(vld1_dup_s8(from)), 0);
2636 return vreinterpret_s8_u32(
2637 vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from)), vreinterpret_u32_s8(vld1_dup_s8(from + 1))).
val[0]);
2641 const int8x8_t
a = vreinterpret_s8_u32(
2642 vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from)), vreinterpret_u32_s8(vld1_dup_s8(from + 1))).
val[0]);
2643 const int8x8_t
b = vreinterpret_s8_u32(
2644 vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from + 2)), vreinterpret_u32_s8(vld1_dup_s8(from + 3))).
val[0]);
2645 return vcombine_s8(
a,
b);
2649 return vget_lane_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), 0);
2653 return vreinterpret_u8_u32(
2654 vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), vreinterpret_u32_u8(vld1_dup_u8(from + 1))).
val[0]);
2658 const uint8x8_t
a = vreinterpret_u8_u32(
2659 vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), vreinterpret_u32_u8(vld1_dup_u8(from + 1))).
val[0]);
2660 const uint8x8_t
b = vreinterpret_u8_u32(
2661 vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from + 2)), vreinterpret_u32_u8(vld1_dup_u8(from + 3))).
val[0]);
2662 return vcombine_u8(
a,
b);
2666 return vcombine_s16(vld1_dup_s16(from), vld1_dup_s16(from + 1));
2670 return vcombine_u16(vld1_dup_u16(from), vld1_dup_u16(from + 1));
2674 return vld1q_dup_s32(from);
2678 return vld1q_dup_u32(from);
2691 memcpy(to, &from,
sizeof(from));
2703 memcpy(to, &from,
sizeof(from));
2764 memcpy(to, &from,
sizeof(from));
2776 memcpy(to, &from,
sizeof(from));
2830 res = vld1_lane_f32(from + 1 * stride,
res, 1);
2836 res = vld1q_lane_f32(from + 1 * stride,
res, 1);
2837 res = vld1q_lane_f32(from + 2 * stride,
res, 2);
2838 res = vld1q_lane_f32(from + 3 * stride,
res, 3);
2844 for (
int i = 0;
i != 4;
i++)
reinterpret_cast<int8_t*
>(&
res)[
i] = *(from +
i * stride);
2850 res = vld1_lane_s8(from + 1 * stride,
res, 1);
2851 res = vld1_lane_s8(from + 2 * stride,
res, 2);
2852 res = vld1_lane_s8(from + 3 * stride,
res, 3);
2853 res = vld1_lane_s8(from + 4 * stride,
res, 4);
2854 res = vld1_lane_s8(from + 5 * stride,
res, 5);
2855 res = vld1_lane_s8(from + 6 * stride,
res, 6);
2856 res = vld1_lane_s8(from + 7 * stride,
res, 7);
2862 res = vld1q_lane_s8(from + 1 * stride,
res, 1);
2863 res = vld1q_lane_s8(from + 2 * stride,
res, 2);
2864 res = vld1q_lane_s8(from + 3 * stride,
res, 3);
2865 res = vld1q_lane_s8(from + 4 * stride,
res, 4);
2866 res = vld1q_lane_s8(from + 5 * stride,
res, 5);
2867 res = vld1q_lane_s8(from + 6 * stride,
res, 6);
2868 res = vld1q_lane_s8(from + 7 * stride,
res, 7);
2869 res = vld1q_lane_s8(from + 8 * stride,
res, 8);
2870 res = vld1q_lane_s8(from + 9 * stride,
res, 9);
2871 res = vld1q_lane_s8(from + 10 * stride,
res, 10);
2872 res = vld1q_lane_s8(from + 11 * stride,
res, 11);
2873 res = vld1q_lane_s8(from + 12 * stride,
res, 12);
2874 res = vld1q_lane_s8(from + 13 * stride,
res, 13);
2875 res = vld1q_lane_s8(from + 14 * stride,
res, 14);
2876 res = vld1q_lane_s8(from + 15 * stride,
res, 15);
2882 for (
int i = 0;
i != 4;
i++)
reinterpret_cast<uint8_t*
>(&
res)[
i] = *(from +
i * stride);
2888 res = vld1_lane_u8(from + 1 * stride,
res, 1);
2889 res = vld1_lane_u8(from + 2 * stride,
res, 2);
2890 res = vld1_lane_u8(from + 3 * stride,
res, 3);
2891 res = vld1_lane_u8(from + 4 * stride,
res, 4);
2892 res = vld1_lane_u8(from + 5 * stride,
res, 5);
2893 res = vld1_lane_u8(from + 6 * stride,
res, 6);
2894 res = vld1_lane_u8(from + 7 * stride,
res, 7);
2900 res = vld1q_lane_u8(from + 1 * stride,
res, 1);
2901 res = vld1q_lane_u8(from + 2 * stride,
res, 2);
2902 res = vld1q_lane_u8(from + 3 * stride,
res, 3);
2903 res = vld1q_lane_u8(from + 4 * stride,
res, 4);
2904 res = vld1q_lane_u8(from + 5 * stride,
res, 5);
2905 res = vld1q_lane_u8(from + 6 * stride,
res, 6);
2906 res = vld1q_lane_u8(from + 7 * stride,
res, 7);
2907 res = vld1q_lane_u8(from + 8 * stride,
res, 8);
2908 res = vld1q_lane_u8(from + 9 * stride,
res, 9);
2909 res = vld1q_lane_u8(from + 10 * stride,
res, 10);
2910 res = vld1q_lane_u8(from + 11 * stride,
res, 11);
2911 res = vld1q_lane_u8(from + 12 * stride,
res, 12);
2912 res = vld1q_lane_u8(from + 13 * stride,
res, 13);
2913 res = vld1q_lane_u8(from + 14 * stride,
res, 14);
2914 res = vld1q_lane_u8(from + 15 * stride,
res, 15);
2920 res = vld1_lane_s16(from + 1 * stride,
res, 1);
2921 res = vld1_lane_s16(from + 2 * stride,
res, 2);
2922 res = vld1_lane_s16(from + 3 * stride,
res, 3);
2928 res = vld1q_lane_s16(from + 1 * stride,
res, 1);
2929 res = vld1q_lane_s16(from + 2 * stride,
res, 2);
2930 res = vld1q_lane_s16(from + 3 * stride,
res, 3);
2931 res = vld1q_lane_s16(from + 4 * stride,
res, 4);
2932 res = vld1q_lane_s16(from + 5 * stride,
res, 5);
2933 res = vld1q_lane_s16(from + 6 * stride,
res, 6);
2934 res = vld1q_lane_s16(from + 7 * stride,
res, 7);
2940 res = vld1_lane_u16(from + 1 * stride,
res, 1);
2941 res = vld1_lane_u16(from + 2 * stride,
res, 2);
2942 res = vld1_lane_u16(from + 3 * stride,
res, 3);
2948 res = vld1q_lane_u16(from + 1 * stride,
res, 1);
2949 res = vld1q_lane_u16(from + 2 * stride,
res, 2);
2950 res = vld1q_lane_u16(from + 3 * stride,
res, 3);
2951 res = vld1q_lane_u16(from + 4 * stride,
res, 4);
2952 res = vld1q_lane_u16(from + 5 * stride,
res, 5);
2953 res = vld1q_lane_u16(from + 6 * stride,
res, 6);
2954 res = vld1q_lane_u16(from + 7 * stride,
res, 7);
2960 res = vld1_lane_s32(from + 1 * stride,
res, 1);
2966 res = vld1q_lane_s32(from + 1 * stride,
res, 1);
2967 res = vld1q_lane_s32(from + 2 * stride,
res, 2);
2968 res = vld1q_lane_s32(from + 3 * stride,
res, 3);
2974 res = vld1_lane_u32(from + 1 * stride,
res, 1);
2980 res = vld1q_lane_u32(from + 1 * stride,
res, 1);
2981 res = vld1q_lane_u32(from + 2 * stride,
res, 2);
2982 res = vld1q_lane_u32(from + 3 * stride,
res, 3);
2988 res = vld1q_lane_s64(from + 1 * stride,
res, 1);
2994 res = vld1q_lane_u64(from + 1 * stride,
res, 1);
3000 vst1_lane_f32(to + stride * 0, from, 0);
3001 vst1_lane_f32(to + stride * 1, from, 1);
3005 vst1q_lane_f32(to + stride * 0, from, 0);
3006 vst1q_lane_f32(to + stride * 1, from, 1);
3007 vst1q_lane_f32(to + stride * 2, from, 2);
3008 vst1q_lane_f32(to + stride * 3, from, 3);
3012 for (
int i = 0;
i != 4;
i++) *(to +
i * stride) =
reinterpret_cast<const int8_t*
>(&from)[
i];
3016 vst1_lane_s8(to + stride * 0, from, 0);
3017 vst1_lane_s8(to + stride * 1, from, 1);
3018 vst1_lane_s8(to + stride * 2, from, 2);
3019 vst1_lane_s8(to + stride * 3, from, 3);
3020 vst1_lane_s8(to + stride * 4, from, 4);
3021 vst1_lane_s8(to + stride * 5, from, 5);
3022 vst1_lane_s8(to + stride * 6, from, 6);
3023 vst1_lane_s8(to + stride * 7, from, 7);
3028 vst1q_lane_s8(to + stride * 0, from, 0);
3029 vst1q_lane_s8(to + stride * 1, from, 1);
3030 vst1q_lane_s8(to + stride * 2, from, 2);
3031 vst1q_lane_s8(to + stride * 3, from, 3);
3032 vst1q_lane_s8(to + stride * 4, from, 4);
3033 vst1q_lane_s8(to + stride * 5, from, 5);
3034 vst1q_lane_s8(to + stride * 6, from, 6);
3035 vst1q_lane_s8(to + stride * 7, from, 7);
3036 vst1q_lane_s8(to + stride * 8, from, 8);
3037 vst1q_lane_s8(to + stride * 9, from, 9);
3038 vst1q_lane_s8(to + stride * 10, from, 10);
3039 vst1q_lane_s8(to + stride * 11, from, 11);
3040 vst1q_lane_s8(to + stride * 12, from, 12);
3041 vst1q_lane_s8(to + stride * 13, from, 13);
3042 vst1q_lane_s8(to + stride * 14, from, 14);
3043 vst1q_lane_s8(to + stride * 15, from, 15);
3048 for (
int i = 0;
i != 4;
i++) *(to +
i * stride) =
reinterpret_cast<const uint8_t*
>(&from)[
i];
3053 vst1_lane_u8(to + stride * 0, from, 0);
3054 vst1_lane_u8(to + stride * 1, from, 1);
3055 vst1_lane_u8(to + stride * 2, from, 2);
3056 vst1_lane_u8(to + stride * 3, from, 3);
3057 vst1_lane_u8(to + stride * 4, from, 4);
3058 vst1_lane_u8(to + stride * 5, from, 5);
3059 vst1_lane_u8(to + stride * 6, from, 6);
3060 vst1_lane_u8(to + stride * 7, from, 7);
3065 vst1q_lane_u8(to + stride * 0, from, 0);
3066 vst1q_lane_u8(to + stride * 1, from, 1);
3067 vst1q_lane_u8(to + stride * 2, from, 2);
3068 vst1q_lane_u8(to + stride * 3, from, 3);
3069 vst1q_lane_u8(to + stride * 4, from, 4);
3070 vst1q_lane_u8(to + stride * 5, from, 5);
3071 vst1q_lane_u8(to + stride * 6, from, 6);
3072 vst1q_lane_u8(to + stride * 7, from, 7);
3073 vst1q_lane_u8(to + stride * 8, from, 8);
3074 vst1q_lane_u8(to + stride * 9, from, 9);
3075 vst1q_lane_u8(to + stride * 10, from, 10);
3076 vst1q_lane_u8(to + stride * 11, from, 11);
3077 vst1q_lane_u8(to + stride * 12, from, 12);
3078 vst1q_lane_u8(to + stride * 13, from, 13);
3079 vst1q_lane_u8(to + stride * 14, from, 14);
3080 vst1q_lane_u8(to + stride * 15, from, 15);
3085 vst1_lane_s16(to + stride * 0, from, 0);
3086 vst1_lane_s16(to + stride * 1, from, 1);
3087 vst1_lane_s16(to + stride * 2, from, 2);
3088 vst1_lane_s16(to + stride * 3, from, 3);
3093 vst1q_lane_s16(to + stride * 0, from, 0);
3094 vst1q_lane_s16(to + stride * 1, from, 1);
3095 vst1q_lane_s16(to + stride * 2, from, 2);
3096 vst1q_lane_s16(to + stride * 3, from, 3);
3097 vst1q_lane_s16(to + stride * 4, from, 4);
3098 vst1q_lane_s16(to + stride * 5, from, 5);
3099 vst1q_lane_s16(to + stride * 6, from, 6);
3100 vst1q_lane_s16(to + stride * 7, from, 7);
3105 vst1_lane_u16(to + stride * 0, from, 0);
3106 vst1_lane_u16(to + stride * 1, from, 1);
3107 vst1_lane_u16(to + stride * 2, from, 2);
3108 vst1_lane_u16(to + stride * 3, from, 3);
3113 vst1q_lane_u16(to + stride * 0, from, 0);
3114 vst1q_lane_u16(to + stride * 1, from, 1);
3115 vst1q_lane_u16(to + stride * 2, from, 2);
3116 vst1q_lane_u16(to + stride * 3, from, 3);
3117 vst1q_lane_u16(to + stride * 4, from, 4);
3118 vst1q_lane_u16(to + stride * 5, from, 5);
3119 vst1q_lane_u16(to + stride * 6, from, 6);
3120 vst1q_lane_u16(to + stride * 7, from, 7);
3125 vst1_lane_s32(to + stride * 0, from, 0);
3126 vst1_lane_s32(to + stride * 1, from, 1);
3131 vst1q_lane_s32(to + stride * 0, from, 0);
3132 vst1q_lane_s32(to + stride * 1, from, 1);
3133 vst1q_lane_s32(to + stride * 2, from, 2);
3134 vst1q_lane_s32(to + stride * 3, from, 3);
3139 vst1_lane_u32(to + stride * 0, from, 0);
3140 vst1_lane_u32(to + stride * 1, from, 1);
3145 vst1q_lane_u32(to + stride * 0, from, 0);
3146 vst1q_lane_u32(to + stride * 1, from, 1);
3147 vst1q_lane_u32(to + stride * 2, from, 2);
3148 vst1q_lane_u32(to + stride * 3, from, 3);
3153 vst1q_lane_s64(to + stride * 0, from, 0);
3154 vst1q_lane_s64(to + stride * 1, from, 1);
3159 vst1q_lane_u64(to + stride * 0, from, 0);
3160 vst1q_lane_u64(to + stride * 1, from, 1);
3202 return vget_lane_f32(
a, 0);
3206 return vgetq_lane_f32(
a, 0);
3210 return static_cast<int8_t>(
a & 0xff);
3214 return vget_lane_s8(
a, 0);
3218 return vgetq_lane_s8(
a, 0);
3222 return static_cast<uint8_t>(
a & 0xff);
3226 return vget_lane_u8(
a, 0);
3230 return vgetq_lane_u8(
a, 0);
3234 return vget_lane_s16(
a, 0);
3238 return vgetq_lane_s16(
a, 0);
3242 return vget_lane_u16(
a, 0);
3246 return vgetq_lane_u16(
a, 0);
3250 return vget_lane_s32(
a, 0);
3254 return vgetq_lane_s32(
a, 0);
3258 return vget_lane_u32(
a, 0);
3262 return vgetq_lane_u32(
a, 0);
3266 return vgetq_lane_s64(
a, 0);
3270 return vgetq_lane_u64(
a, 0);
3275 return vrev64_f32(
a);
3279 const float32x4_t a_r64 = vrev64q_f32(
a);
3280 return vcombine_f32(vget_high_f32(a_r64), vget_low_f32(a_r64));
3284 return vget_lane_s32(vreinterpret_s32_s8(vrev64_s8(vreinterpret_s8_s32(vdup_n_s32(
a)))), 0);
3288 return vrev64_s8(
a);
3292 const int8x16_t a_r64 = vrev64q_s8(
a);
3293 return vcombine_s8(vget_high_s8(a_r64), vget_low_s8(a_r64));
3297 return vget_lane_u32(vreinterpret_u32_u8(vrev64_u8(vreinterpret_u8_u32(vdup_n_u32(
a)))), 0);
3301 return vrev64_u8(
a);
3305 const uint8x16_t a_r64 = vrev64q_u8(
a);
3306 return vcombine_u8(vget_high_u8(a_r64), vget_low_u8(a_r64));
3310 return vrev64_s16(
a);
3314 const int16x8_t a_r64 = vrev64q_s16(
a);
3315 return vcombine_s16(vget_high_s16(a_r64), vget_low_s16(a_r64));
3319 return vrev64_u16(
a);
3323 const uint16x8_t a_r64 = vrev64q_u16(
a);
3324 return vcombine_u16(vget_high_u16(a_r64), vget_low_u16(a_r64));
3328 return vrev64_s32(
a);
3332 const int32x4_t a_r64 = vrev64q_s32(
a);
3333 return vcombine_s32(vget_high_s32(a_r64), vget_low_s32(a_r64));
3337 return vrev64_u32(
a);
3341 const uint32x4_t a_r64 = vrev64q_u32(
a);
3342 return vcombine_u32(vget_high_u32(a_r64), vget_low_u32(a_r64));
3346 return vcombine_s64(vget_high_s64(
a), vget_low_s64(
a));
3350 return vcombine_u64(vget_high_u64(
a), vget_low_u64(
a));
3359 return vabsq_f32(
a);
3363 return vget_lane_s32(vreinterpret_s32_s8(vabs_s8(vreinterpret_s8_s32(vdup_n_s32(
a)))), 0);
3391 return vabsq_s16(
a);
3407 return vabsq_s32(
a);
3419 #if EIGEN_ARCH_ARM64
3420 return vabsq_s64(
a);
3422 return vcombine_s64(vdup_n_s64((
std::abs)(vgetq_lane_s64(
a, 0))), vdup_n_s64((
std::abs)(vgetq_lane_s64(
a, 1))));
3432 return vreinterpret_f32_s32(vshr_n_s32(vreinterpret_s32_f32(
a), 31));
3436 return vreinterpretq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(
a), 31));
3457 #if EIGEN_ARCH_ARM64
3460 return vaddv_f32(
a);
3464 return vaddvq_f32(
a);
3469 return vget_lane_f32(vpadd_f32(
a,
a), 0);
3473 const float32x2_t sum = vadd_f32(vget_low_f32(
a), vget_high_f32(
a));
3474 return vget_lane_f32(vpadd_f32(sum, sum), 0);
3479 const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(
a));
3480 int8x8_t sum = vpadd_s8(a_dup, a_dup);
3481 sum = vpadd_s8(sum, sum);
3482 return vget_lane_s8(sum, 0);
3484 #if EIGEN_ARCH_ARM64
3491 return vaddvq_s8(
a);
3496 int8x8_t sum = vpadd_s8(
a,
a);
3497 sum = vpadd_s8(sum, sum);
3498 sum = vpadd_s8(sum, sum);
3499 return vget_lane_s8(sum, 0);
3503 int8x8_t sum = vadd_s8(vget_low_s8(
a), vget_high_s8(
a));
3504 sum = vpadd_s8(sum, sum);
3505 sum = vpadd_s8(sum, sum);
3506 sum = vpadd_s8(sum, sum);
3507 return vget_lane_s8(sum, 0);
3512 const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(
a));
3513 uint8x8_t sum = vpadd_u8(a_dup, a_dup);
3514 sum = vpadd_u8(sum, sum);
3515 return vget_lane_u8(sum, 0);
3517 #if EIGEN_ARCH_ARM64
3524 return vaddvq_u8(
a);
3528 return vaddv_s16(
a);
3532 return vaddvq_s16(
a);
3536 return vaddv_u16(
a);
3540 return vaddvq_u16(
a);
3544 return vaddv_s32(
a);
3548 return vaddvq_s32(
a);
3552 return vaddv_u32(
a);
3556 return vaddvq_u32(
a);
3560 return vaddvq_s64(
a);
3564 return vaddvq_u64(
a);
3569 uint8x8_t sum = vpadd_u8(
a,
a);
3570 sum = vpadd_u8(sum, sum);
3571 sum = vpadd_u8(sum, sum);
3572 return vget_lane_u8(sum, 0);
3576 uint8x8_t sum = vadd_u8(vget_low_u8(
a), vget_high_u8(
a));
3577 sum = vpadd_u8(sum, sum);
3578 sum = vpadd_u8(sum, sum);
3579 sum = vpadd_u8(sum, sum);
3580 return vget_lane_u8(sum, 0);
3584 const int16x4_t sum = vpadd_s16(
a,
a);
3585 return vget_lane_s16(vpadd_s16(sum, sum), 0);
3589 int16x4_t sum = vadd_s16(vget_low_s16(
a), vget_high_s16(
a));
3590 sum = vpadd_s16(sum, sum);
3591 sum = vpadd_s16(sum, sum);
3592 return vget_lane_s16(sum, 0);
3596 const uint16x4_t sum = vpadd_u16(
a,
a);
3597 return vget_lane_u16(vpadd_u16(sum, sum), 0);
3601 uint16x4_t sum = vadd_u16(vget_low_u16(
a), vget_high_u16(
a));
3602 sum = vpadd_u16(sum, sum);
3603 sum = vpadd_u16(sum, sum);
3604 return vget_lane_u16(sum, 0);
3608 return vget_lane_s32(vpadd_s32(
a,
a), 0);
3612 const int32x2_t sum = vadd_s32(vget_low_s32(
a), vget_high_s32(
a));
3613 return vget_lane_s32(vpadd_s32(sum, sum), 0);
3617 return vget_lane_u32(vpadd_u32(
a,
a), 0);
3621 const uint32x2_t sum = vadd_u32(vget_low_u32(
a), vget_high_u32(
a));
3622 return vget_lane_u32(vpadd_u32(sum, sum), 0);
3626 return vgetq_lane_s64(
a, 0) + vgetq_lane_s64(
a, 1);
3630 return vgetq_lane_u64(
a, 0) + vgetq_lane_u64(
a, 1);
3636 return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(
a, vreinterpret_s8_s32(vrev64_s32(vreinterpret_s32_s8(
a))))), 0);
3640 return vadd_s8(vget_high_s8(
a), vget_low_s8(
a));
3644 return vget_lane_u32(vreinterpret_u32_u8(vadd_u8(
a, vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(
a))))), 0);
3648 return vadd_u8(vget_high_u8(
a), vget_low_u8(
a));
3652 return vadd_s16(vget_high_s16(
a), vget_low_s16(
a));
3656 return vadd_u16(vget_high_u16(
a), vget_low_u16(
a));
3663 return vget_lane_f32(
a, 0) * vget_lane_f32(
a, 1);
3671 int8x8_t
prod = vreinterpret_s8_s32(vdup_n_s32(
a));
3673 return vget_lane_s8(
prod, 0) * vget_lane_s8(
prod, 2);
3677 int8x8_t
prod = vmul_s8(
a, vrev16_s8(
a));
3679 return vget_lane_s8(
prod, 0) * vget_lane_s8(
prod, 4);
3687 uint8x8_t
prod = vreinterpret_u8_u32(vdup_n_u32(
a));
3689 return vget_lane_u8(
prod, 0) * vget_lane_u8(
prod, 2);
3693 uint8x8_t
prod = vmul_u8(
a, vrev16_u8(
a));
3695 return vget_lane_u8(
prod, 0) * vget_lane_u8(
prod, 4);
3703 const int16x4_t
prod = vmul_s16(
a, vrev32_s16(
a));
3704 return vget_lane_s16(
prod, 0) * vget_lane_s16(
prod, 2);
3711 prod = vmul_s16(vget_low_s16(
a), vget_high_s16(
a));
3715 return vget_lane_s16(
prod, 0) * vget_lane_s16(
prod, 2);
3719 const uint16x4_t
prod = vmul_u16(
a, vrev32_u16(
a));
3720 return vget_lane_u16(
prod, 0) * vget_lane_u16(
prod, 2);
3727 prod = vmul_u16(vget_low_u16(
a), vget_high_u16(
a));
3731 return vget_lane_u16(
prod, 0) * vget_lane_u16(
prod, 2);
3735 return vget_lane_s32(
a, 0) * vget_lane_s32(
a, 1);
3743 return vget_lane_u32(
a, 0) * vget_lane_u32(
a, 1);
3751 return vgetq_lane_s64(
a, 0) * vgetq_lane_s64(
a, 1);
3755 return vgetq_lane_u64(
a, 0) * vgetq_lane_u64(
a, 1);
3759 #if EIGEN_ARCH_ARM64
3762 return vminv_f32(
a);
3766 return vminvq_f32(
a);
3771 return vget_lane_f32(vpmin_f32(
a,
a), 0);
3775 const float32x2_t
min = vmin_f32(vget_low_f32(
a), vget_high_f32(
a));
3776 return vget_lane_f32(vpmin_f32(
min,
min), 0);
3781 const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(
a));
3782 int8x8_t
min = vpmin_s8(a_dup, a_dup);
3784 return vget_lane_s8(
min, 0);
3786 #if EIGEN_ARCH_ARM64
3793 return vminvq_s8(
a);
3798 int8x8_t
min = vpmin_s8(
a,
a);
3801 return vget_lane_s8(
min, 0);
3805 int8x8_t
min = vmin_s8(vget_low_s8(
a), vget_high_s8(
a));
3809 return vget_lane_s8(
min, 0);
3814 const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(
a));
3815 uint8x8_t
min = vpmin_u8(a_dup, a_dup);
3817 return vget_lane_u8(
min, 0);
3819 #if EIGEN_ARCH_ARM64
3826 return vminvq_u8(
a);
3830 return vminv_s16(
a);
3834 return vminvq_s16(
a);
3838 return vminv_u16(
a);
3842 return vminvq_u16(
a);
3846 return vminv_s32(
a);
3850 return vminvq_s32(
a);
3854 return vminv_u32(
a);
3858 return vminvq_u32(
a);
3863 uint8x8_t
min = vpmin_u8(
a,
a);
3866 return vget_lane_u8(
min, 0);
3870 uint8x8_t
min = vmin_u8(vget_low_u8(
a), vget_high_u8(
a));
3874 return vget_lane_u8(
min, 0);
3878 const int16x4_t
min = vpmin_s16(
a,
a);
3879 return vget_lane_s16(vpmin_s16(
min,
min), 0);
3883 int16x4_t
min = vmin_s16(vget_low_s16(
a), vget_high_s16(
a));
3886 return vget_lane_s16(
min, 0);
3890 const uint16x4_t
min = vpmin_u16(
a,
a);
3891 return vget_lane_u16(vpmin_u16(
min,
min), 0);
3895 uint16x4_t
min = vmin_u16(vget_low_u16(
a), vget_high_u16(
a));
3898 return vget_lane_u16(
min, 0);
3902 return vget_lane_s32(vpmin_s32(
a,
a), 0);
3906 const int32x2_t
min = vmin_s32(vget_low_s32(
a), vget_high_s32(
a));
3907 return vget_lane_s32(vpmin_s32(
min,
min), 0);
3911 return vget_lane_u32(vpmin_u32(
a,
a), 0);
3915 const uint32x2_t
min = vmin_u32(vget_low_u32(
a), vget_high_u32(
a));
3916 return vget_lane_u32(vpmin_u32(
min,
min), 0);
3921 return (
std::min)(vgetq_lane_s64(
a, 0), vgetq_lane_s64(
a, 1));
3925 return (
std::min)(vgetq_lane_u64(
a, 0), vgetq_lane_u64(
a, 1));
3929 #if EIGEN_ARCH_ARM64
3932 return vmaxv_f32(
a);
3936 return vmaxvq_f32(
a);
3941 return vget_lane_f32(vpmax_f32(
a,
a), 0);
3945 const float32x2_t
max = vmax_f32(vget_low_f32(
a), vget_high_f32(
a));
3946 return vget_lane_f32(vpmax_f32(
max,
max), 0);
3951 const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(
a));
3952 int8x8_t
max = vpmax_s8(a_dup, a_dup);
3954 return vget_lane_s8(
max, 0);
3956 #if EIGEN_ARCH_ARM64
3963 return vmaxvq_s8(
a);
3968 int8x8_t
max = vpmax_s8(
a,
a);
3971 return vget_lane_s8(
max, 0);
3975 int8x8_t
max = vmax_s8(vget_low_s8(
a), vget_high_s8(
a));
3979 return vget_lane_s8(
max, 0);
3984 const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(
a));
3985 uint8x8_t
max = vpmax_u8(a_dup, a_dup);
3987 return vget_lane_u8(
max, 0);
3989 #if EIGEN_ARCH_ARM64
3996 return vmaxvq_u8(
a);
4000 return vmaxv_s16(
a);
4004 return vmaxvq_s16(
a);
4008 return vmaxv_u16(
a);
4012 return vmaxvq_u16(
a);
4016 return vmaxv_s32(
a);
4020 return vmaxvq_s32(
a);
4024 return vmaxv_u32(
a);
4028 return vmaxvq_u32(
a);
4033 uint8x8_t
max = vpmax_u8(
a,
a);
4036 return vget_lane_u8(
max, 0);
4040 uint8x8_t
max = vmax_u8(vget_low_u8(
a), vget_high_u8(
a));
4044 return vget_lane_u8(
max, 0);
4048 const int16x4_t
max = vpmax_s16(
a,
a);
4049 return vget_lane_s16(vpmax_s16(
max,
max), 0);
4053 int16x4_t
max = vmax_s16(vget_low_s16(
a), vget_high_s16(
a));
4056 return vget_lane_s16(
max, 0);
4060 const uint16x4_t
max = vpmax_u16(
a,
a);
4061 return vget_lane_u16(vpmax_u16(
max,
max), 0);
4065 uint16x4_t
max = vmax_u16(vget_low_u16(
a), vget_high_u16(
a));
4068 return vget_lane_u16(
max, 0);
4072 return vget_lane_s32(vpmax_s32(
a,
a), 0);
4076 const int32x2_t
max = vmax_s32(vget_low_s32(
a), vget_high_s32(
a));
4077 return vget_lane_s32(vpmax_s32(
max,
max), 0);
4081 return vget_lane_u32(vpmax_u32(
a,
a), 0);
4085 const uint32x2_t
max = vmax_u32(vget_low_u32(
a), vget_high_u32(
a));
4086 return vget_lane_u32(vpmax_u32(
max,
max), 0);
4091 return (
std::max)(vgetq_lane_s64(
a, 0), vgetq_lane_s64(
a, 1));
4095 return (
std::max)(vgetq_lane_u64(
a, 0), vgetq_lane_u64(
a, 1));
4100 uint32x2_t
tmp = vorr_u32(vget_low_u32(vreinterpretq_u32_f32(
x)), vget_high_u32(vreinterpretq_u32_f32(
x)));
4101 return vget_lane_u32(vpmax_u32(
tmp,
tmp), 0);
4107 template <
typename Packet>
4112 const float32x2x2_t
tmp = vzip_f32(
p1, p2);
4119 const float32x4x2_t
tmp = vzipq_f32(
p1, p2);
4126 const int8x8x2_t
tmp = vzip_s8(
p1, p2);
4133 const int8x16x2_t
tmp = vzipq_s8(
p1, p2);
4140 const uint8x8x2_t
tmp = vzip_u8(
p1, p2);
4147 const uint8x16x2_t
tmp = vzipq_u8(
p1, p2);
4154 const int32x2x2_t
tmp = vzip_s32(
p1, p2);
4161 const int32x4x2_t
tmp = vzipq_s32(
p1, p2);
4168 const uint32x2x2_t
tmp = vzip_u32(
p1, p2);
4175 const uint32x4x2_t
tmp = vzipq_u32(
p1, p2);
4182 const int16x4x2_t
tmp = vzip_s16(
p1, p2);
4189 const int16x8x2_t
tmp = vzipq_s16(
p1, p2);
4196 const uint16x4x2_t
tmp = vzip_u16(
p1, p2);
4203 const uint16x8x2_t
tmp = vzipq_u16(
p1, p2);
4208 template <
typename Packet>
4213 template <
typename Packet>
4221 template <
typename Packet>
4239 template <
typename Packet>
4242 for (
int i = 0;
i < 4; ++
i) {
4243 const int m = (1 <<
i);
4245 for (
int j = 0;
j <
m; ++
j) {
4246 const int n = (1 << (3 -
i));
4248 for (
int k = 0;
k <
n; ++
k) {
4249 const int idx = 2 *
j *
n +
k;
4266 const int8x8_t
a = vreinterpret_s8_s32(vset_lane_s32(kernel.
packet[2], vdup_n_s32(kernel.
packet[0]), 1));
4267 const int8x8_t
b = vreinterpret_s8_s32(vset_lane_s32(kernel.
packet[3], vdup_n_s32(kernel.
packet[1]), 1));
4269 const int8x8x2_t zip8 = vzip_s8(
a,
b);
4270 const int16x4x2_t zip16 = vzip_s16(vreinterpret_s16_s8(zip8.val[0]), vreinterpret_s16_s8(zip8.val[1]));
4272 kernel.
packet[0] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[0]), 0);
4273 kernel.
packet[1] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[0]), 1);
4274 kernel.
packet[2] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[1]), 0);
4275 kernel.
packet[3] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[1]), 1);
4294 const uint8x8_t
a = vreinterpret_u8_u32(vset_lane_u32(kernel.
packet[2], vdup_n_u32(kernel.
packet[0]), 1));
4295 const uint8x8_t
b = vreinterpret_u8_u32(vset_lane_u32(kernel.
packet[3], vdup_n_u32(kernel.
packet[1]), 1));
4297 const uint8x8x2_t zip8 = vzip_u8(
a,
b);
4298 const uint16x4x2_t zip16 = vzip_u16(vreinterpret_u16_u8(zip8.val[0]), vreinterpret_u16_u8(zip8.val[1]));
4300 kernel.
packet[0] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[0]), 0);
4301 kernel.
packet[1] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[0]), 1);
4302 kernel.
packet[2] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[1]), 0);
4303 kernel.
packet[3] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[1]), 1);
4355 #if EIGEN_ARCH_ARM64
4356 const int64x2_t tmp1 = vzip1q_s64(kernel.packet[0], kernel.packet[1]);
4357 kernel.packet[1] = vzip2q_s64(kernel.packet[0], kernel.packet[1]);
4358 kernel.packet[0] = tmp1;
4360 const int64x1_t
tmp[2][2] = {{vget_low_s64(kernel.packet[0]), vget_high_s64(kernel.packet[0])},
4361 {vget_low_s64(kernel.packet[1]), vget_high_s64(kernel.packet[1])}};
4364 kernel.packet[1] = vcombine_s64(
tmp[0][1],
tmp[1][1]);
4368 #if EIGEN_ARCH_ARM64
4369 const uint64x2_t tmp1 = vzip1q_u64(kernel.packet[0], kernel.packet[1]);
4370 kernel.packet[1] = vzip2q_u64(kernel.packet[0], kernel.packet[1]);
4371 kernel.packet[0] = tmp1;
4373 const uint64x1_t
tmp[2][2] = {{vget_low_u64(kernel.packet[0]), vget_high_u64(kernel.packet[0])},
4374 {vget_low_u64(kernel.packet[1]), vget_high_u64(kernel.packet[1])}};
4377 kernel.packet[1] = vcombine_u64(
tmp[0][1],
tmp[1][1]);
4383 return vbsl_f32(vreinterpret_u32_f32(mask),
a,
b);
4387 return vbslq_f32(vreinterpretq_u32_f32(mask),
a,
b);
4391 return vbsl_s8(vreinterpret_u8_s8(mask),
a,
b);
4395 return vbslq_s8(vreinterpretq_u8_s8(mask),
a,
b);
4399 return vbsl_u8(mask,
a,
b);
4404 return vbslq_u8(mask,
a,
b);
4408 return vbsl_s16(vreinterpret_u16_s16(mask),
a,
b);
4412 return vbslq_s16(vreinterpretq_u16_s16(mask),
a,
b);
4416 return vbsl_u16(mask,
a,
b);
4420 return vbslq_u16(mask,
a,
b);
4424 return vbsl_s32(vreinterpret_u32_s32(mask),
a,
b);
4428 return vbslq_s32(vreinterpretq_u32_s32(mask),
a,
b);
4432 return vbsl_u32(mask,
a,
b);
4436 return vbslq_u32(mask,
a,
b);
4440 return vbslq_s64(vreinterpretq_u64_s64(mask),
a,
b);
4444 return vbslq_u64(mask,
a,
b);
4448 #if EIGEN_ARCH_ARMV8
4451 return vrndn_f32(
a);
4456 return vrndnq_f32(
a);
4461 return vrndm_f32(
a);
4466 return vrndmq_f32(
a);
4471 return vrndp_f32(
a);
4476 return vrndpq_f32(
a);
4481 return vrnda_f32(
a);
4486 return vrndaq_f32(
a);
4496 return vrndq_f32(
a);
4508 uint8x8_t
x = vreinterpret_u8_u32(vdup_n_u32(
a));
4509 uint8x8_t
res = vdup_n_u8(0);
4510 uint8x8_t add = vdup_n_u8(0x8);
4511 for (
int i = 0;
i < 4;
i++) {
4512 const uint8x8_t temp = vorr_u8(
res, add);
4513 res = vbsl_u8(vcge_u8(
x, vmul_u8(temp, temp)), temp,
res);
4514 add = vshr_n_u8(add, 1);
4516 return vget_lane_u32(vreinterpret_u32_u8(
res), 0);
4521 uint8x8_t
res = vdup_n_u8(0);
4522 uint8x8_t add = vdup_n_u8(0x8);
4523 for (
int i = 0;
i < 4;
i++) {
4524 const uint8x8_t temp = vorr_u8(
res, add);
4525 res = vbsl_u8(vcge_u8(
a, vmul_u8(temp, temp)), temp,
res);
4526 add = vshr_n_u8(add, 1);
4533 uint8x16_t
res = vdupq_n_u8(0);
4534 uint8x16_t add = vdupq_n_u8(0x8);
4535 for (
int i = 0;
i < 4;
i++) {
4536 const uint8x16_t temp = vorrq_u8(
res, add);
4537 res = vbslq_u8(vcgeq_u8(
a, vmulq_u8(temp, temp)), temp,
res);
4538 add = vshrq_n_u8(add, 1);
4545 uint16x4_t
res = vdup_n_u16(0);
4546 uint16x4_t add = vdup_n_u16(0x80);
4547 for (
int i = 0;
i < 8;
i++) {
4548 const uint16x4_t temp = vorr_u16(
res, add);
4549 res = vbsl_u16(vcge_u16(
a, vmul_u16(temp, temp)), temp,
res);
4550 add = vshr_n_u16(add, 1);
4557 uint16x8_t
res = vdupq_n_u16(0);
4558 uint16x8_t add = vdupq_n_u16(0x80);
4559 for (
int i = 0;
i < 8;
i++) {
4560 const uint16x8_t temp = vorrq_u16(
res, add);
4561 res = vbslq_u16(vcgeq_u16(
a, vmulq_u16(temp, temp)), temp,
res);
4562 add = vshrq_n_u16(add, 1);
4569 uint32x2_t
res = vdup_n_u32(0);
4570 uint32x2_t add = vdup_n_u32(0x8000);
4571 for (
int i = 0;
i < 16;
i++) {
4572 const uint32x2_t temp = vorr_u32(
res, add);
4573 res = vbsl_u32(vcge_u32(
a, vmul_u32(temp, temp)), temp,
res);
4574 add = vshr_n_u32(add, 1);
4581 uint32x4_t
res = vdupq_n_u32(0);
4582 uint32x4_t add = vdupq_n_u32(0x8000);
4583 for (
int i = 0;
i < 16;
i++) {
4584 const uint32x4_t temp = vorrq_u32(
res, add);
4585 res = vbslq_u32(vcgeq_u32(
a, vmulq_u32(temp, temp)), temp,
res);
4586 add = vshrq_n_u32(add, 1);
4594 float32x4_t result = vrsqrteq_f32(
a);
4595 result = vmulq_f32(vrsqrtsq_f32(vmulq_f32(
a, result), result), result);
4596 result = vmulq_f32(vrsqrtsq_f32(vmulq_f32(
a, result), result), result);
4603 float32x2_t result = vrsqrte_f32(
a);
4604 result = vmul_f32(vrsqrts_f32(vmul_f32(
a, result), result), result);
4605 result = vmul_f32(vrsqrts_f32(vmul_f32(
a, result), result), result);
4609 template <
typename Packet>
4616 result =
pselect(return_inf,
por(cst_inf,
a), result);
4617 result =
pandnot(result, return_zero);
4634 float32x4_t result = vrecpeq_f32(
a);
4635 result = vmulq_f32(vrecpsq_f32(
a, result), result);
4636 result = vmulq_f32(vrecpsq_f32(
a, result), result);
4643 float32x2_t result = vrecpe_f32(
a);
4644 result = vmul_f32(vrecps_f32(
a, result), result);
4645 result = vmul_f32(vrecps_f32(
a, result), result);
4650 #if EIGEN_ARCH_ARM64
4653 return vsqrtq_f32(
a);
4658 return vsqrt_f32(
a);
4663 return vdivq_f32(
a,
b);
4668 return vdiv_f32(
a,
b);
4671 template <
typename Packet>
4679 Packet return_a =
por(a_is_zero, a_is_inf);
4681 result =
pselect(return_a,
a, result);
4695 template <
typename Packet>
4701 const Packet cst_one = pset1<Packet>(1.0f);
4702 const Packet cst_quarter = pset1<Packet>(0.25f);
4734 struct packet_traits<
bfloat16> : default_packet_traits {
4786 const uint16x4x2_t
tmp = vzip_u16(
p1, p2);
4798 Packet4ui lsb = vandq_u32(vshrq_n_u32(input, 16), vdupq_n_u32(1));
4801 Packet4ui rounding_bias = vaddq_u32(lsb, vdupq_n_u32(0x7fff));
4804 input = vaddq_u32(input, rounding_bias);
4807 input = vshrq_n_u32(input, 16);
4810 const Packet4ui bf16_nan = vdupq_n_u32(0x7fc0);
4812 input = vbslq_u32(mask, input, bf16_nan);
4815 return vmovn_u32(input);
4819 return Packet4f(vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(
p), 16)));
5045 #if EIGEN_COMP_CLANGAPPLE
5049 #define EIGEN_APPLE_DOUBLE_NEON_BUG (EIGEN_COMP_CLANGAPPLE < 6010000)
5051 #define EIGEN_APPLE_DOUBLE_NEON_BUG 0
5054 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
5062 template <
typename T>
5063 uint64x2_t vreinterpretq_u64_f64(
T a) {
5064 return (uint64x2_t)
a;
5067 template <
typename T>
5068 float64x2_t vreinterpretq_f64_u64(
T a) {
5069 return (float64x2_t)
a;
5073 #if EIGEN_COMP_MSVC_STRICT
5074 typedef eigen_packet_wrapper<float64x2_t, 18>
Packet2d;
5075 typedef eigen_packet_wrapper<float64x1_t, 19> Packet1d;
5078 double from[2] = {
a,
b};
5079 return vld1q_f64(from);
5084 typedef float64x1_t Packet1d;
5093 const double*
a =
reinterpret_cast<const double*
>(&
m);
5094 const double*
b =
reinterpret_cast<const double*
>(&
n);
5104 #define vec2d_duplane(a, p) Packet2d(vdupq_laneq_f64(a, p))
5107 struct packet_traits<
double> : default_packet_traits {
5133 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
5151 typedef double type;
5165 return vdupq_n_f64(0.0);
5170 return vdupq_n_f64(from);
5175 const double c[] = {0.0, 1.0};
5181 return vaddq_f64(
a,
b);
5186 return vsubq_f64(
a,
b);
5199 return vnegq_f64(
a);
5209 return vmulq_f64(
a,
b);
5214 return vdivq_f64(
a,
b);
5217 #ifdef EIGEN_VECTORIZE_FMA
5221 return vfmaq_f64(
c,
a,
b);
5226 return vmlaq_f64(
c,
a,
b);
5232 return vminq_f64(
a,
b);
5235 #ifdef __ARM_FEATURE_NUMERIC_MAXMIN
5240 return vminnmq_f64(
a,
b);
5244 return vmaxnmq_f64(
a,
b);
5256 return vmaxq_f64(
a,
b);
5267 return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
a), vreinterpretq_u64_f64(
b)));
5272 return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(
a), vreinterpretq_u64_f64(
b)));
5277 return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(
a), vreinterpretq_u64_f64(
b)));
5282 return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(
a), vreinterpretq_u64_f64(
b)));
5287 return vreinterpretq_f64_u64(vcleq_f64(
a,
b));
5292 return vreinterpretq_f64_u64(vcltq_f64(
a,
b));
5297 return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_u64(vcgeq_f64(
a,
b))));
5302 return vreinterpretq_f64_u64(vceqq_f64(
a,
b));
5317 return vld1q_dup_f64(from);
5332 res = vld1q_lane_f64(from + 0 * stride,
res, 0);
5333 res = vld1q_lane_f64(from + 1 * stride,
res, 1);
5339 vst1q_lane_f64(to + stride * 0, from, 0);
5340 vst1q_lane_f64(to + stride * 1, from, 1);
5351 return vgetq_lane_f64(
a, 0);
5356 return vcombine_f64(vget_high_f64(
a), vget_low_f64(
a));
5361 return vabsq_f64(
a);
5366 return vreinterpretq_f64_s64(vshrq_n_s64(vreinterpretq_s64_f64(
a), 63));
5371 return vaddvq_f64(
a);
5376 #if EIGEN_COMP_CLANGAPPLE
5379 return (vget_low_f64(
a) * vget_high_f64(
a))[0];
5384 return vget_lane_f64(vmul_f64(vget_low_f64(
a), vget_high_f64(
a)), 0);
5391 return vminvq_f64(
a);
5397 return vmaxvq_f64(
a);
5401 const float64x2_t tmp1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
5402 const float64x2_t tmp2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
5404 kernel.packet[0] = tmp1;
5405 kernel.packet[1] = tmp2;
5410 return vbslq_f64(vreinterpretq_u64_f64(mask),
a,
b);
5415 return vrndnq_f64(
a);
5420 return vrndmq_f64(
a);
5425 return vrndpq_f64(
a);
5430 return vrndaq_f64(
a);
5435 return vrndq_f64(
a);
5450 return vreinterpretq_f64_u64(vdupq_n_u64(from));
5456 return generic_rsqrt_newton_step<
Packet2d, 3>
::run(
a, vrsqrteq_f64(
a));
5461 return vsqrtq_f64(_x);
5467 #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
5468 typedef float16x4_t Packet4hf;
5469 typedef float16x8_t Packet8hf;
5472 struct packet_traits<
Eigen::half> : default_packet_traits {
5473 typedef Packet8hf
type;
5474 typedef Packet4hf
half;
5513 struct unpacket_traits<Packet4hf> {
5515 typedef Packet4hf
half;
5526 struct unpacket_traits<Packet8hf> {
5528 typedef Packet4hf
half;
5540 return vadd_f16(vget_low_f16(
a), vget_high_f16(
a));
5545 return vdupq_n_f16(from.
x);
5550 return vdup_n_f16(from.
x);
5555 const float16_t
f[] = {0, 1, 2, 3, 4, 5, 6, 7};
5556 Packet8hf countdown = vld1q_f16(
f);
5557 return vaddq_f16(pset1<Packet8hf>(
a), countdown);
5562 const float16_t
f[] = {0, 1, 2, 3};
5563 Packet4hf countdown = vld1_f16(
f);
5564 return vadd_f16(pset1<Packet4hf>(
a), countdown);
5569 return vaddq_f16(
a,
b);
5574 return vadd_f16(
a,
b);
5579 return vsubq_f16(
a,
b);
5584 return vsub_f16(
a,
b);
5589 return vnegq_f16(
a);
5609 return vmulq_f16(
a,
b);
5614 return vmul_f16(
a,
b);
5619 return vdivq_f16(
a,
b);
5624 return vdiv_f16(
a,
b);
5629 return vfmaq_f16(
c,
a,
b);
5634 return vfma_f16(
c,
a,
b);
5639 return vminq_f16(
a,
b);
5644 return vmin_f16(
a,
b);
5647 #ifdef __ARM_FEATURE_NUMERIC_MAXMIN
5651 EIGEN_STRONG_INLINE Packet4hf pmin<PropagateNumbers, Packet4hf>(
const Packet4hf&
a,
const Packet4hf&
b) {
5652 return vminnm_f16(
a,
b);
5655 EIGEN_STRONG_INLINE Packet8hf pmin<PropagateNumbers, Packet8hf>(
const Packet8hf&
a,
const Packet8hf&
b) {
5656 return vminnmq_f16(
a,
b);
5661 EIGEN_STRONG_INLINE Packet4hf pmin<PropagateNaN, Packet4hf>(
const Packet4hf&
a,
const Packet4hf&
b) {
5662 return pmin<Packet4hf>(
a,
b);
5666 EIGEN_STRONG_INLINE Packet8hf pmin<PropagateNaN, Packet8hf>(
const Packet8hf&
a,
const Packet8hf&
b) {
5667 return pmin<Packet8hf>(
a,
b);
5672 return vmaxq_f16(
a,
b);
5677 return vmax_f16(
a,
b);
5680 #ifdef __ARM_FEATURE_NUMERIC_MAXMIN
5684 EIGEN_STRONG_INLINE Packet4hf pmax<PropagateNumbers, Packet4hf>(
const Packet4hf&
a,
const Packet4hf&
b) {
5685 return vmaxnm_f16(
a,
b);
5688 EIGEN_STRONG_INLINE Packet8hf pmax<PropagateNumbers, Packet8hf>(
const Packet8hf&
a,
const Packet8hf&
b) {
5689 return vmaxnmq_f16(
a,
b);
5694 EIGEN_STRONG_INLINE Packet4hf pmax<PropagateNaN, Packet4hf>(
const Packet4hf&
a,
const Packet4hf&
b) {
5695 return pmax<Packet4hf>(
a,
b);
5699 EIGEN_STRONG_INLINE Packet8hf pmax<PropagateNaN, Packet8hf>(
const Packet8hf&
a,
const Packet8hf&
b) {
5700 return pmax<Packet8hf>(
a,
b);
5703 #define EIGEN_MAKE_ARM_FP16_CMP_8(name) \
5705 EIGEN_STRONG_INLINE Packet8hf pcmp_##name(const Packet8hf& a, const Packet8hf& b) { \
5706 return vreinterpretq_f16_u16(vc##name##q_f16(a, b)); \
5709 #define EIGEN_MAKE_ARM_FP16_CMP_4(name) \
5711 EIGEN_STRONG_INLINE Packet4hf pcmp_##name(const Packet4hf& a, const Packet4hf& b) { \
5712 return vreinterpret_f16_u16(vc##name##_f16(a, b)); \
5715 EIGEN_MAKE_ARM_FP16_CMP_8(eq)
5716 EIGEN_MAKE_ARM_FP16_CMP_8(lt)
5717 EIGEN_MAKE_ARM_FP16_CMP_8(le)
5719 EIGEN_MAKE_ARM_FP16_CMP_4(eq)
5720 EIGEN_MAKE_ARM_FP16_CMP_4(lt)
5721 EIGEN_MAKE_ARM_FP16_CMP_4(le)
5723 #undef EIGEN_MAKE_ARM_FP16_CMP_8
5724 #undef EIGEN_MAKE_ARM_FP16_CMP_4
5728 return vreinterpretq_f16_u16(vmvnq_u16(vcgeq_f16(
a,
b)));
5733 return vreinterpret_f16_u16(vmvn_u16(vcge_f16(
a,
b)));
5738 return vrndnq_f16(
a);
5743 return vrndn_f16(
a);
5748 return vrndmq_f16(
a);
5753 return vrndm_f16(
a);
5758 return vrndpq_f16(
a);
5763 return vrndp_f16(
a);
5768 return vrndaq_f16(
a);
5773 return vrnda_f16(
a);
5778 return vrndq_f16(
a);
5788 return vsqrtq_f16(
a);
5793 return vsqrt_f16(
a);
5798 return vreinterpretq_f16_u16(vandq_u16(vreinterpretq_u16_f16(
a), vreinterpretq_u16_f16(
b)));
5803 return vreinterpret_f16_u16(vand_u16(vreinterpret_u16_f16(
a), vreinterpret_u16_f16(
b)));
5808 return vreinterpretq_f16_u16(vorrq_u16(vreinterpretq_u16_f16(
a), vreinterpretq_u16_f16(
b)));
5813 return vreinterpret_f16_u16(vorr_u16(vreinterpret_u16_f16(
a), vreinterpret_u16_f16(
b)));
5818 return vreinterpretq_f16_u16(veorq_u16(vreinterpretq_u16_f16(
a), vreinterpretq_u16_f16(
b)));
5823 return vreinterpret_f16_u16(veor_u16(vreinterpret_u16_f16(
a), vreinterpret_u16_f16(
b)));
5828 return vreinterpretq_f16_u16(vbicq_u16(vreinterpretq_u16_f16(
a), vreinterpretq_u16_f16(
b)));
5833 return vreinterpret_f16_u16(vbic_u16(vreinterpret_u16_f16(
a), vreinterpret_u16_f16(
b)));
5859 packet[0] = from[0].
x;
5860 packet[1] = from[0].
x;
5861 packet[2] = from[1].
x;
5862 packet[3] = from[1].
x;
5863 packet[4] = from[2].
x;
5864 packet[5] = from[2].
x;
5865 packet[6] = from[3].
x;
5866 packet[7] = from[3].
x;
5874 tmp = (float16_t*)&packet;
5885 lo = vld1_dup_f16(
reinterpret_cast<const float16_t*
>(from));
5886 hi = vld1_dup_f16(
reinterpret_cast<const float16_t*
>(from + 1));
5887 return vcombine_f16(lo, hi);
5891 return vsetq_lane_f16(
b.x,
a, 0);
5895 return vset_lane_f16(
b.x,
a, 0);
5900 return vbslq_f16(vreinterpretq_u16_f16(mask),
a,
b);
5905 return vbsl_f16(vreinterpret_u16_f16(mask),
a,
b);
5909 return vsetq_lane_f16(
b.x,
a, 7);
5913 return vset_lane_f16(
b.x,
a, 3);
5939 res = vsetq_lane_f16(from[0 * stride].
x,
res, 0);
5940 res = vsetq_lane_f16(from[1 * stride].
x,
res, 1);
5941 res = vsetq_lane_f16(from[2 * stride].
x,
res, 2);
5942 res = vsetq_lane_f16(from[3 * stride].
x,
res, 3);
5943 res = vsetq_lane_f16(from[4 * stride].
x,
res, 4);
5944 res = vsetq_lane_f16(from[5 * stride].
x,
res, 5);
5945 res = vsetq_lane_f16(from[6 * stride].
x,
res, 6);
5946 res = vsetq_lane_f16(from[7 * stride].
x,
res, 7);
5953 res = vset_lane_f16(from[0 * stride].
x,
res, 0);
5954 res = vset_lane_f16(from[1 * stride].
x,
res, 1);
5955 res = vset_lane_f16(from[2 * stride].
x,
res, 2);
5956 res = vset_lane_f16(from[3 * stride].
x,
res, 3);
5963 to[stride * 0].
x = vgetq_lane_f16(from, 0);
5964 to[stride * 1].
x = vgetq_lane_f16(from, 1);
5965 to[stride * 2].
x = vgetq_lane_f16(from, 2);
5966 to[stride * 3].
x = vgetq_lane_f16(from, 3);
5967 to[stride * 4].
x = vgetq_lane_f16(from, 4);
5968 to[stride * 5].
x = vgetq_lane_f16(from, 5);
5969 to[stride * 6].
x = vgetq_lane_f16(from, 6);
5970 to[stride * 7].
x = vgetq_lane_f16(from, 7);
5976 to[stride * 0].
x = vget_lane_f16(from, 0);
5977 to[stride * 1].
x = vget_lane_f16(from, 1);
5978 to[stride * 2].
x = vget_lane_f16(from, 2);
5979 to[stride * 3].
x = vget_lane_f16(from, 3);
6007 float16x4_t a_lo, a_hi;
6010 a_r64 = vrev64q_f16(
a);
6011 a_lo = vget_low_f16(a_r64);
6012 a_hi = vget_high_f16(a_r64);
6013 return vcombine_f16(a_hi, a_lo);
6018 return vrev64_f16(
a);
6023 return vabsq_f16(
a);
6028 return vreinterpretq_f16_s16(vshrq_n_s16(vreinterpretq_s16_f16(
a), 15));
6038 return vreinterpret_f16_s16(vshr_n_s16(vreinterpret_s16_f16(
a), 15));
6043 float16x4_t a_lo, a_hi, sum;
6045 a_lo = vget_low_f16(
a);
6046 a_hi = vget_high_f16(
a);
6047 sum = vpadd_f16(a_lo, a_hi);
6048 sum = vpadd_f16(sum, sum);
6049 sum = vpadd_f16(sum, sum);
6052 h.
x = vget_lane_f16(sum, 0);
6060 sum = vpadd_f16(
a,
a);
6061 sum = vpadd_f16(sum, sum);
6063 h.
x = vget_lane_f16(sum, 0);
6069 float16x4_t a_lo, a_hi,
prod;
6071 a_lo = vget_low_f16(
a);
6072 a_hi = vget_high_f16(
a);
6073 prod = vmul_f16(a_lo, a_hi);
6077 h.
x = vmulh_f16(vget_lane_f16(
prod, 0), vget_lane_f16(
prod, 1));
6084 prod = vmul_f16(
a, vrev64_f16(
a));
6086 h.
x = vmulh_f16(vget_lane_f16(
prod, 0), vget_lane_f16(
prod, 1));
6093 h.
x = vminvq_f16(
a);
6107 h.
x = vmaxvq_f16(
a);
6119 const float16x8x2_t zip16_1 = vzipq_f16(kernel.packet[0], kernel.packet[1]);
6120 const float16x8x2_t zip16_2 = vzipq_f16(kernel.packet[2], kernel.packet[3]);
6122 const float32x4x2_t zip32_1 = vzipq_f32(vreinterpretq_f32_f16(zip16_1.val[0]), vreinterpretq_f32_f16(zip16_2.val[0]));
6123 const float32x4x2_t zip32_2 = vzipq_f32(vreinterpretq_f32_f16(zip16_1.val[1]), vreinterpretq_f32_f16(zip16_2.val[1]));
6125 kernel.packet[0] = vreinterpretq_f16_f32(zip32_1.val[0]);
6126 kernel.packet[1] = vreinterpretq_f16_f32(zip32_1.val[1]);
6127 kernel.packet[2] = vreinterpretq_f16_f32(zip32_2.val[0]);
6128 kernel.packet[3] = vreinterpretq_f16_f32(zip32_2.val[1]);
6133 float16_t*
tmp = (float16_t*)&kernel;
6134 tmp_x4 = vld4_f16(
tmp);
6136 kernel.packet[0] = tmp_x4.val[0];
6137 kernel.packet[1] = tmp_x4.val[1];
6138 kernel.packet[2] = tmp_x4.val[2];
6139 kernel.packet[3] = tmp_x4.val[3];
6143 float16x8x2_t T_1[4];
6145 T_1[0] = vuzpq_f16(kernel.packet[0], kernel.packet[1]);
6146 T_1[1] = vuzpq_f16(kernel.packet[2], kernel.packet[3]);
6147 T_1[2] = vuzpq_f16(kernel.packet[4], kernel.packet[5]);
6148 T_1[3] = vuzpq_f16(kernel.packet[6], kernel.packet[7]);
6150 float16x8x2_t T_2[4];
6151 T_2[0] = vuzpq_f16(T_1[0].
val[0], T_1[1].
val[0]);
6152 T_2[1] = vuzpq_f16(T_1[0].
val[1], T_1[1].
val[1]);
6153 T_2[2] = vuzpq_f16(T_1[2].
val[0], T_1[3].
val[0]);
6154 T_2[3] = vuzpq_f16(T_1[2].
val[1], T_1[3].
val[1]);
6156 float16x8x2_t T_3[4];
6157 T_3[0] = vuzpq_f16(T_2[0].
val[0], T_2[2].
val[0]);
6158 T_3[1] = vuzpq_f16(T_2[0].
val[1], T_2[2].
val[1]);
6159 T_3[2] = vuzpq_f16(T_2[1].
val[0], T_2[3].
val[0]);
6160 T_3[3] = vuzpq_f16(T_2[1].
val[1], T_2[3].
val[1]);
6162 kernel.packet[0] = T_3[0].val[0];
6163 kernel.packet[1] = T_3[2].val[0];
6164 kernel.packet[2] = T_3[1].val[0];
6165 kernel.packet[3] = T_3[3].val[0];
6166 kernel.packet[4] = T_3[0].val[1];
6167 kernel.packet[5] = T_3[2].val[1];
6168 kernel.packet[6] = T_3[1].val[1];
6169 kernel.packet[7] = T_3[3].val[1];
AnnoyingScalar abs(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:135
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_FAST_MATH
Definition: Macros.h:51
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
Vector3f p1
Definition: MatrixBase_all.cpp:2
#define EIGEN_ARM_PREFETCH(ADDR)
Definition: NEON/PacketMath.h:172
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
float * p
Definition: Tutorial_Map_using.cpp:9
Scalar * b
Definition: benchVecAdd.cpp:17
EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
Definition: PlainObjectBase.h:247
Tag for template metaprogramming.
Definition: Logger.h:174
@ N
Definition: constructor.cpp:22
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
#define min(a, b)
Definition: datatypes.h:22
#define max(a, b)
Definition: datatypes.h:23
@ Unaligned
Definition: Constants.h:235
@ Aligned16
Definition: Constants.h:237
RealScalar s
Definition: level1_cplx_impl.h:130
const Scalar * a
Definition: level2_cplx_impl.h:32
int * m
Definition: level2_cplx_impl.h:294
char char char int int * k
Definition: level2_impl.h:374
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value)
EIGEN_ALWAYS_INLINE void zip_in_place< Packet8c >(Packet8c &p1, Packet8c &p2)
Definition: NEON/PacketMath.h:4125
void zip_in_place(Packet &p1, Packet &p2)
EIGEN_ALWAYS_INLINE void zip_in_place< Packet2i >(Packet2i &p1, Packet2i &p2)
Definition: NEON/PacketMath.h:4153
EIGEN_ALWAYS_INLINE void zip_in_place< Packet16c >(Packet16c &p1, Packet16c &p2)
Definition: NEON/PacketMath.h:4132
EIGEN_ALWAYS_INLINE void zip_in_place< Packet4i >(Packet4i &p1, Packet4i &p2)
Definition: NEON/PacketMath.h:4160
EIGEN_ALWAYS_INLINE void zip_in_place< Packet8uc >(Packet8uc &p1, Packet8uc &p2)
Definition: NEON/PacketMath.h:4139
EIGEN_ALWAYS_INLINE void zip_in_place< Packet2ui >(Packet2ui &p1, Packet2ui &p2)
Definition: NEON/PacketMath.h:4167
EIGEN_ALWAYS_INLINE void zip_in_place< Packet16uc >(Packet16uc &p1, Packet16uc &p2)
Definition: NEON/PacketMath.h:4146
EIGEN_ALWAYS_INLINE void zip_in_place< Packet4us >(Packet4us &p1, Packet4us &p2)
Definition: NEON/PacketMath.h:4195
EIGEN_ALWAYS_INLINE void zip_in_place< Packet4ui >(Packet4ui &p1, Packet4ui &p2)
Definition: NEON/PacketMath.h:4174
EIGEN_ALWAYS_INLINE void zip_in_place< Packet8s >(Packet8s &p1, Packet8s &p2)
Definition: NEON/PacketMath.h:4188
EIGEN_ALWAYS_INLINE void zip_in_place< Packet8us >(Packet8us &p1, Packet8us &p2)
Definition: NEON/PacketMath.h:4202
EIGEN_ALWAYS_INLINE void zip_in_place< Packet4f >(Packet4f &p1, Packet4f &p2)
Definition: NEON/PacketMath.h:4118
EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock< Packet, 2 > &kernel)
Definition: NEON/PacketMath.h:4209
EIGEN_ALWAYS_INLINE void zip_in_place< Packet4bf >(Packet4bf &p1, Packet4bf &p2)
Definition: NEON/PacketMath.h:4785
EIGEN_ALWAYS_INLINE void zip_in_place< Packet2f >(Packet2f &p1, Packet2f &p2)
Definition: NEON/PacketMath.h:4111
EIGEN_ALWAYS_INLINE void zip_in_place< Packet4s >(Packet4s &p1, Packet4s &p2)
Definition: NEON/PacketMath.h:4181
EIGEN_STRONG_INLINE int64_t predux_min< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2095
EIGEN_STRONG_INLINE unsigned char predux< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2515
EIGEN_STRONG_INLINE Packet8uc pmin< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1482
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1465
EIGEN_STRONG_INLINE Packet8s pabsdiff< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:2764
EIGEN_STRONG_INLINE Packet4ui psub< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:634
EIGEN_STRONG_INLINE void pscatter< bfloat16, Packet4bf >(bfloat16 *to, const Packet4bf &from, Index stride)
Definition: NEON/PacketMath.h:4978
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ui pgather< uint32_t, Packet2ui >(const uint32_t *from, Index stride)
Definition: NEON/PacketMath.h:2972
EIGEN_STRONG_INLINE Packet2i pmax< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1607
EIGEN_STRONG_INLINE Packet16c pmin< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1273
EIGEN_STRONG_INLINE Packet8uc pload< Packet8uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2414
EIGEN_STRONG_INLINE Packet2ui pabsdiff< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1423
EIGEN_STRONG_INLINE Packet8us pand< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1418
EIGEN_STRONG_INLINE Packet8c por< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1984
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint16_t, Packet4us >(uint16_t *to, const Packet4us &from, Index stride)
Definition: NEON/PacketMath.h:3103
EIGEN_STRONG_INLINE Packet2d shuffle(const Packet2d &m, const Packet2d &n, int mask)
Definition: LSX/PacketMath.h:150
eigen_packet_wrapper< uint32_t, 5 > Packet4uc
Definition: NEON/PacketMath.h:80
EIGEN_STRONG_INLINE void pstore< int8_t >(int8_t *to, const Packet16c &from)
Definition: LSX/PacketMath.h:1541
EIGEN_STRONG_INLINE Packet4uc plset< Packet4uc >(const uint8_t &a)
Definition: NEON/PacketMath.h:775
EIGEN_STRONG_INLINE Packet16c pcmp_le< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1048
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c pgather< int8_t, Packet4c >(const int8_t *from, Index stride)
Definition: NEON/PacketMath.h:2842
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet4ui pset1< Packet4ui >(const uint32_t &from)
Definition: LSX/PacketMath.h:490
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1628
EIGEN_STRONG_INLINE Packet8s pmax< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1297
EIGEN_STRONG_INLINE Packet8c pload< Packet8c >(const int8_t *from)
Definition: NEON/PacketMath.h:2400
EIGEN_STRONG_INLINE float predux< Packet2f >(const Packet2f &a)
Definition: NEON/PacketMath.h:3468
EIGEN_STRONG_INLINE Packet2ui psub< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:973
EIGEN_STRONG_INLINE short int pfirst< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:1883
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1965
EIGEN_STRONG_INLINE void pstoreu< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1636
EIGEN_STRONG_INLINE void prefetch< uint64_t >(const uint64_t *addr)
Definition: LSX/PacketMath.h:1868
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
EIGEN_STRONG_INLINE Packet4c psub< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:923
EIGEN_STRONG_INLINE Packet2l pdiv< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:794
EIGEN_STRONG_INLINE Packet8c psub< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:928
EIGEN_STRONG_INLINE Packet2ui pdiv< Packet2ui >(const Packet2ui &, const Packet2ui &)
Definition: NEON/PacketMath.h:1260
EIGEN_STRONG_INLINE Packet4s pcmp_lt< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1753
EIGEN_STRONG_INLINE Packet16c pmax< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1305
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pgather< uint8_t, Packet16uc >(const uint8_t *from, Index stride)
Definition: LSX/PacketMath.h:1676
EIGEN_STRONG_INLINE Packet4s pcmp_eq< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1838
EIGEN_STRONG_INLINE Packet4us pset1< Packet4us >(const uint16_t &from)
Definition: NEON/PacketMath.h:709
EIGEN_STRONG_INLINE Packet2i pand< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1947
EIGEN_STRONG_INLINE void prefetch< int8_t >(const int8_t *addr)
Definition: LSX/PacketMath.h:1840
EIGEN_STRONG_INLINE Packet8c pand< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1911
EIGEN_STRONG_INLINE void prefetch< uint32_t >(const uint32_t *addr)
Definition: AVX/PacketMath.h:1758
EIGEN_STRONG_INLINE Packet2l pandnot< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1019
EIGEN_STRONG_INLINE int64_t predux< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:1987
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE float predux_min< Packet2f >(const Packet2f &a)
Definition: NEON/PacketMath.h:3770
EIGEN_STRONG_INLINE Packet4us pmin< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1498
EIGEN_STRONG_INLINE int8_t predux_min< Packet8c >(const Packet8c &a)
Definition: NEON/PacketMath.h:3797
EIGEN_STRONG_INLINE Packet4us por< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:2012
EIGEN_STRONG_INLINE Packet8c ploadu< Packet8c >(const int8_t *from)
Definition: NEON/PacketMath.h:2477
EIGEN_STRONG_INLINE Packet8us pabsdiff< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:2816
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1250
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:605
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1003
uint32x2_t Packet2ui
Definition: NEON/PacketMath.h:89
EIGEN_STRONG_INLINE Packet8c padd< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:853
EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf &a)
Definition: AVX/PacketMath.h:2558
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
EIGEN_STRONG_INLINE int32_t predux_mul< Packet2i >(const Packet2i &a)
Definition: NEON/PacketMath.h:3734
EIGEN_STRONG_INLINE Packet2f pand< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1899
EIGEN_STRONG_INLINE Packet2i psub< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:965
EIGEN_STRONG_INLINE uint32_t predux_max< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2166
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint64_t, Packet2ul >(uint64_t *to, const Packet2ul &from, Index stride)
Definition: LSX/PacketMath.h:1825
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet16uc ploadu< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:1557
EIGEN_STRONG_INLINE Packet4us ploadu< Packet4us >(const uint16_t *from)
Definition: NEON/PacketMath.h:2507
EIGEN_STRONG_INLINE Packet4bf pcmp_le< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:5032
EIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:132
EIGEN_STRONG_INLINE Packet2f pset1< Packet2f >(const float &from)
Definition: NEON/PacketMath.h:669
EIGEN_STRONG_INLINE Packet2l ploadu< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1464
EIGEN_STRONG_INLINE Packet8c pdiv< Packet8c >(const Packet8c &, const Packet8c &)
Definition: NEON/PacketMath.h:1205
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:2729
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1066
EIGEN_STRONG_INLINE Packet16uc pmul< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1182
EIGEN_STRONG_INLINE Packet4bf pround< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4938
EIGEN_STRONG_INLINE Packet4bf pset1< Packet4bf >(const bfloat16 &from)
Definition: NEON/PacketMath.h:4825
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1431
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: SSE/PacketMath.h:1149
int8x8_t Packet8c
Definition: NEON/PacketMath.h:78
EIGEN_STRONG_INLINE Packet4c ploaddup< Packet4c >(const int8_t *from)
Definition: NEON/PacketMath.h:2548
EIGEN_STRONG_INLINE Packet8c pcmp_eq< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1817
EIGEN_STRONG_INLINE short int predux_min< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2617
EIGEN_STRONG_INLINE Packet16c pdiv< Packet16c >(const Packet16c &, const Packet16c &)
Definition: NEON/PacketMath.h:1210
EIGEN_STRONG_INLINE Packet16c por< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:925
EIGEN_STRONG_INLINE Packet4f pcmp_eq< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1131
EIGEN_STRONG_INLINE Packet2ul pmin< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1200
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pgather< uint16_t, Packet8us >(const uint16_t *from, Index stride)
Definition: LSX/PacketMath.h:1697
EIGEN_STRONG_INLINE Packet2i pcmp_le< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1684
EIGEN_STRONG_INLINE Packet4bf preverse< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:5003
EIGEN_STRONG_INLINE Packet4ui pmul< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:769
__vector unsigned char Packet16uc
Definition: AltiVec/PacketMath.h:41
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle2(const Packet4f &a, const Packet4f &b, int p, int q, int r, int s)
Definition: LSX/PacketMath.h:129
EIGEN_STRONG_INLINE Packet8uc pandnot< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:2142
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: AltiVec/PacketMath.h:778
EIGEN_STRONG_INLINE Packet16c pload< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:512
EIGEN_STRONG_INLINE Packet4f pabs< Packet4f >(const Packet4f &a)
Definition: ZVector/PacketMath.h:954
EIGEN_STRONG_INLINE Packet8us pmin< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1269
EIGEN_STRONG_INLINE uint16_t predux_max< Packet4us >(const Packet4us &a)
Definition: NEON/PacketMath.h:4059
EIGEN_STRONG_INLINE Packet2d paddsub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:661
EIGEN_STRONG_INLINE Packet4us pabsdiff< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1407
EIGEN_STRONG_INLINE Packet4f shuffle2(const Packet4f &m, const Packet4f &n, int mask)
Definition: LSX/PacketMath.h:105
EIGEN_STRONG_INLINE Packet4bf pmin< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4874
eigen_packet_wrapper< uint16x4_t, 19 > Packet4bf
Definition: NEON/PacketMath.h:4726
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int8_t, Packet4c >(int8_t *to, const Packet4c &from, Index stride)
Definition: NEON/PacketMath.h:3011
EIGEN_STRONG_INLINE int8_t pfirst< Packet4c >(const Packet4c &a)
Definition: NEON/PacketMath.h:3209
EIGEN_STRONG_INLINE Packet16c psub< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1111
EIGEN_STRONG_INLINE Packet2i ploaddup< Packet2i >(const int32_t *from)
Definition: NEON/PacketMath.h:2602
EIGEN_STRONG_INLINE short int predux_max< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2697
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc pgather< uint8_t, Packet8uc >(const uint8_t *from, Index stride)
Definition: NEON/PacketMath.h:2886
EIGEN_STRONG_INLINE Packet4f pcmp_le< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1040
EIGEN_STRONG_INLINE Packet4bf pmax< PropagateNaN, Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4883
EIGEN_STRONG_INLINE Packet4us pload< Packet4us >(const uint16_t *from)
Definition: NEON/PacketMath.h:2430
EIGEN_STRONG_INLINE Packet4c pload< Packet4c >(const int8_t *from)
Definition: NEON/PacketMath.h:2394
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int32_t, Packet2i >(int32_t *to, const Packet2i &from, Index stride)
Definition: NEON/PacketMath.h:3123
EIGEN_STRONG_INLINE unsigned short int predux_max< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2712
EIGEN_STRONG_INLINE Packet4c ploadquad< Packet4c >(const int8_t *from)
Definition: NEON/PacketMath.h:2631
EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:161
EIGEN_STRONG_INLINE Packet8s pcmp_eq< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1143
EIGEN_STRONG_INLINE Packet2ul pxor< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:994
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1863
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:880
EIGEN_STRONG_INLINE Packet16uc pmax< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1309
EIGEN_STRONG_INLINE Packet4c pandnot< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:2126
EIGEN_STRONG_INLINE Packet4f ploadquad< Packet4f >(const float *from)
Definition: LSX/PacketMath.h:2703
EIGEN_STRONG_INLINE Packet4s pset1< Packet4s >(const int16_t &from)
Definition: NEON/PacketMath.h:701
EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f &m, int mask)
Definition: LSX/PacketMath.h:97
EIGEN_STRONG_INLINE void prefetch< uint16_t >(const uint16_t *addr)
Definition: LSX/PacketMath.h:1860
EIGEN_STRONG_INLINE Packet2ul pandnot< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1035
EIGEN_STRONG_INLINE unsigned short int predux_min< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2632
EIGEN_STRONG_INLINE Packet8us pcmp_le< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1068
EIGEN_STRONG_INLINE Packet4s pmin< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1490
EIGEN_STRONG_INLINE Packet8s por< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1435
EIGEN_STRONG_INLINE Packet8uc pcmp_lt< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1745
EIGEN_STRONG_INLINE void prefetch< int64_t >(const int64_t *addr)
Definition: LSX/PacketMath.h:1852
EIGEN_STRONG_INLINE Packet8us psub< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1107
EIGEN_STRONG_INLINE Packet2f pcmp_eq< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1804
EIGEN_STRONG_INLINE Packet4bf print< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4923
EIGEN_STRONG_INLINE Packet8c pxor< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:2057
EIGEN_STRONG_INLINE Packet4bf pmul< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4963
EIGEN_STRONG_INLINE Packet2ul pset1< Packet2ul >(const uint64_t &from)
Definition: LSX/PacketMath.h:494
EIGEN_STRONG_INLINE Packet4c pdiv< Packet4c >(const Packet4c &, const Packet4c &)
Definition: NEON/PacketMath.h:1200
EIGEN_STRONG_INLINE Packet4ui padd< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1074
EIGEN_STRONG_INLINE Packet8uc pxor< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:2069
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pgather< int64_t, Packet2l >(const int64_t *from, Index stride)
Definition: LSX/PacketMath.h:1669
EIGEN_STRONG_INLINE Packet16c plset< Packet16c >(const signed char &a)
Definition: AltiVec/PacketMath.h:1057
EIGEN_STRONG_INLINE Packet4ui pand< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1414
EIGEN_STRONG_INLINE Packet4uc ploadquad< Packet4uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2648
EIGEN_STRONG_INLINE signed char pfirst< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:1893
EIGEN_STRONG_INLINE Packet2ul pcmp_eq< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1167
EIGEN_STRONG_INLINE Packet4bf pmin< PropagateNaN, Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4869
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pgather< int16_t, Packet8s >(const int16_t *from, Index stride)
Definition: LSX/PacketMath.h:1647
EIGEN_STRONG_INLINE uint8_t pfirst< Packet8uc >(const Packet8uc &a)
Definition: NEON/PacketMath.h:3225
EIGEN_STRONG_INLINE Packet16c pabsdiff< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:2758
EIGEN_STRONG_INLINE int32_t predux_min< Packet2i >(const Packet2i &a)
Definition: NEON/PacketMath.h:3901
eigen_packet_wrapper< int32_t, 2 > Packet4c
Definition: NEON/PacketMath.h:77
EIGEN_STRONG_INLINE Packet16uc pset1< Packet16uc >(const unsigned char &from)
Definition: AltiVec/PacketMath.h:798
EIGEN_STRONG_INLINE Packet4bf pmax< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4888
EIGEN_STRONG_INLINE Packet4uc pdiv< Packet4uc >(const Packet4uc &, const Packet4uc &)
Definition: NEON/PacketMath.h:1215
EIGEN_STRONG_INLINE signed char predux_mul< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2566
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1644
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: AltiVec/PacketMath.h:2751
EIGEN_STRONG_INLINE uint32_t predux_mul< Packet2ui >(const Packet2ui &a)
Definition: NEON/PacketMath.h:3742
EIGEN_STRONG_INLINE Packet4us pcmp_le< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1676
EIGEN_STRONG_INLINE Packet4s padd< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:874
EIGEN_STRONG_INLINE Packet4s pand< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1931
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2679
EIGEN_STRONG_INLINE signed char predux_min< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2647
EIGEN_STRONG_INLINE Packet4bf padd< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4953
EIGEN_STRONG_INLINE Packet8us pdiv< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:798
EIGEN_STRONG_INLINE Packet2ui ploadu< Packet2ui >(const uint32_t *from)
Definition: NEON/PacketMath.h:2523
EIGEN_STRONG_INLINE Packet2ul ploaddup< Packet2ul >(const uint64_t *from)
Definition: LSX/PacketMath.h:1528
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1490
EIGEN_STRONG_INLINE Packet8us plset< Packet8us >(const unsigned short int &a)
Definition: AltiVec/PacketMath.h:1053
__vector unsigned short int Packet8us
Definition: AltiVec/PacketMath.h:38
EIGEN_STRONG_INLINE Packet2f pset1frombits< Packet2f >(uint32_t from)
Definition: NEON/PacketMath.h:742
EIGEN_STRONG_INLINE Packet2l pcmp_eq< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1151
EIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:135
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:962
EIGEN_STRONG_INLINE uint8_t predux_max< Packet4uc >(const Packet4uc &a)
Definition: NEON/PacketMath.h:3983
EIGEN_STRONG_INLINE uint32_t predux_min< Packet2ui >(const Packet2ui &a)
Definition: NEON/PacketMath.h:3910
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:368
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:921
EIGEN_STRONG_INLINE Packet2ul pload< Packet2ul >(const uint64_t *from)
Definition: LSX/PacketMath.h:1439
EIGEN_STRONG_INLINE Packet4f shuffle2< true >(const Packet4f &m, const Packet4f &n, int mask)
Definition: LSX/PacketMath.h:114
EIGEN_STRONG_INLINE void pstore< bfloat16 >(bfloat16 *to, const Packet8bf &from)
Definition: AltiVec/PacketMath.h:662
EIGEN_STRONG_INLINE Packet2d pldexp< Packet2d >(const Packet2d &a, const Packet2d &exponent)
Definition: LSX/PacketMath.h:2753
EIGEN_STRONG_INLINE Packet2f pmul< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1123
EIGEN_STRONG_INLINE Packet2f pmax< PropagateNaN, Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1560
EIGEN_STRONG_INLINE int16_t predux_mul< Packet4s >(const Packet4s &a)
Definition: NEON/PacketMath.h:3702
EIGEN_STRONG_INLINE Packet2ui pand< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1955
EIGEN_STRONG_INLINE Packet4us pmax< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1599
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1205
EIGEN_STRONG_INLINE Packet2l padd< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:581
EIGEN_STRONG_INLINE Packet8s ploadu< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:1541
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: SSE/PacketMath.h:1145
EIGEN_STRONG_INLINE Packet4ui pcmp_le< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1072
EIGEN_STRONG_INLINE Packet2f pmin< PropagateNaN, Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1459
EIGEN_STRONG_INLINE uint16_t predux_min< Packet4us >(const Packet4us &a)
Definition: NEON/PacketMath.h:3889
EIGEN_STRONG_INLINE Packet2ui padd< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:898
EIGEN_STRONG_INLINE Packet2l pcmp_lt< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1101
EIGEN_STRONG_INLINE Packet4uc pmul< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1144
EIGEN_STRONG_INLINE Packet16uc pdiv< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:2789
EIGEN_STRONG_INLINE Packet4c pabsdiff< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1373
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1640
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1427
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint8_t, Packet8uc >(uint8_t *to, const Packet8uc &from, Index stride)
Definition: NEON/PacketMath.h:3051
EIGEN_STRONG_INLINE Packet2l por< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:937
EIGEN_STRONG_INLINE void prefetch< int32_t >(const int32_t *addr)
Definition: LSX/PacketMath.h:1848
EIGEN_STRONG_INLINE Packet8uc pset1< Packet8uc >(const uint8_t &from)
Definition: NEON/PacketMath.h:693
EIGEN_STRONG_INLINE Packet2d vec2d_swizzle2(const Packet2d &a, const Packet2d &b, int mask)
Definition: LSX/PacketMath.h:157
EIGEN_STRONG_INLINE Packet4ui pabsdiff< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:2838
EIGEN_STRONG_INLINE Packet2i pmin< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1506
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1983
EIGEN_STRONG_INLINE Packet8s plset< Packet8s >(const short int &a)
Definition: AltiVec/PacketMath.h:1049
EIGEN_STRONG_INLINE Packet4c pcmp_eq< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1812
EIGEN_STRONG_INLINE Packet16uc padd< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1090
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2604
EIGEN_STRONG_INLINE Packet2ui pload< Packet2ui >(const uint32_t *from)
Definition: NEON/PacketMath.h:2446
EIGEN_STRONG_INLINE Packet16uc psub< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1115
EIGEN_STRONG_INLINE Packet8uc plset< Packet8uc >(const uint8_t &a)
Definition: NEON/PacketMath.h:779
EIGEN_STRONG_INLINE Packet2f pcmp_lt_or_nan< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1889
EIGEN_STRONG_INLINE Packet8c pmul< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1136
EIGEN_STRONG_INLINE Packet4bf ploadu< Packet4bf >(const bfloat16 *from)
Definition: NEON/PacketMath.h:4840
EIGEN_STRONG_INLINE Packet8c pabsdiff< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1378
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1452
EIGEN_STRONG_INLINE Packet2i padd< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:890
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2127
EIGEN_STRONG_INLINE int8_t predux_max< Packet8c >(const Packet8c &a)
Definition: NEON/PacketMath.h:3967
EIGEN_STRONG_INLINE Packet4c pcmp_lt< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1727
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1162
EIGEN_STRONG_INLINE Packet8us pcmp_lt< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1109
EIGEN_STRONG_INLINE Packet2f pcmp_lt< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1719
EIGEN_STRONG_INLINE Packet4s pxor< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:2077
EIGEN_STRONG_INLINE Packet2l pcmp_le< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1060
EIGEN_STRONG_INLINE uint32_t predux< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2004
EIGEN_STRONG_INLINE Packet4i pcmp_eq< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1147
EIGEN_STRONG_INLINE Packet4ui por< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:949
EIGEN_STRONG_INLINE Packet4c plset< Packet4c >(const int8_t &a)
Definition: NEON/PacketMath.h:761
EIGEN_STRONG_INLINE Packet8uc ploaddup< Packet8uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2569
EIGEN_STRONG_INLINE Packet4ui pmin< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1196
EIGEN_STRONG_INLINE Packet4c pmin< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1464
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: AltiVec/PacketMath.h:853
EIGEN_STRONG_INLINE Packet8us pmax< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1301
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1314
EIGEN_STRONG_INLINE Packet4f paddsub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:653
EIGEN_STRONG_INLINE Packet8c plset< Packet8c >(const int8_t &a)
Definition: NEON/PacketMath.h:765
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: LSX/PacketMath.h:503
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1979
EIGEN_STRONG_INLINE Packet4uc pcmp_eq< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1825
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us pgather< uint16_t, Packet4us >(const uint16_t *from, Index stride)
Definition: NEON/PacketMath.h:2938
EIGEN_STRONG_INLINE Packet2ui pcmp_eq< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1862
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint32_t, Packet2ui >(uint32_t *to, const Packet2ui &from, Index stride)
Definition: NEON/PacketMath.h:3137
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint32_t, Packet4ui >(uint32_t *to, const Packet4ui &from, Index stride)
Definition: LSX/PacketMath.h:1817
EIGEN_STRONG_INLINE unsigned short int predux< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2483
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:492
__vector signed char Packet16c
Definition: AltiVec/PacketMath.h:40
EIGEN_STRONG_INLINE Packet16uc ploadquad< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:1724
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2529
EIGEN_STRONG_INLINE void pstoreu< uint16_t >(uint16_t *to, const Packet8us &from)
Definition: LSX/PacketMath.h:1603
EIGEN_STRONG_INLINE Packet4uc pload< Packet4uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2408
EIGEN_STRONG_INLINE Packet4us plset< Packet4us >(const uint16_t &a)
Definition: NEON/PacketMath.h:794
EIGEN_STRONG_INLINE Packet16uc pload< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:517
EIGEN_STRONG_INLINE Packet4us ploaddup< Packet4us >(const uint16_t *from)
Definition: NEON/PacketMath.h:2591
EIGEN_STRONG_INLINE Packet4s por< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:2004
EIGEN_STRONG_INLINE Packet8us ploadu< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:1545
EIGEN_STRONG_INLINE Packet4c por< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1980
EIGEN_STRONG_INLINE Packet2f pldexp< Packet2f >(const Packet2f &a, const Packet2f &exponent)
Definition: NEON/PacketMath.h:3449
EIGEN_STRONG_INLINE int16_t predux< Packet4s >(const Packet4s &a)
Definition: NEON/PacketMath.h:3583
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int32_t, Packet4i >(int32_t *to, const Packet4i &from, Index stride)
Definition: LSX/PacketMath.h:1771
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet2f pmin< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1432
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341
EIGEN_STRONG_INLINE Packet2ui pmin< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1514
EIGEN_STRONG_INLINE Packet8us pmul< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1174
EIGEN_STRONG_INLINE uint64_t predux_max< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2171
EIGEN_STRONG_INLINE Packet8s pand< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:888
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:2733
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1122
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2695
EIGEN_STRONG_INLINE Packet2i pload< Packet2i >(const int32_t *from)
Definition: NEON/PacketMath.h:2438
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int16_t, Packet4s >(int16_t *to, const Packet4s &from, Index stride)
Definition: NEON/PacketMath.h:3083
EIGEN_STRONG_INLINE Packet2ui pmax< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1615
EIGEN_STRONG_INLINE Packet8s pcmp_le< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1052
EIGEN_STRONG_INLINE uint16_t predux< Packet4us >(const Packet4us &a)
Definition: NEON/PacketMath.h:3595
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1611
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1070
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1497
EIGEN_STRONG_INLINE Packet16c pcmp_eq< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1139
EIGEN_STRONG_INLINE uint32_t pfirst< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:1910
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< float, Packet2f >(float *to, const Packet2f &from, Index stride)
Definition: NEON/PacketMath.h:2999
EIGEN_STRONG_INLINE Packet4c pand< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1907
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet8c pcmp_le< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1647
EIGEN_STRONG_INLINE Packet16c pcmp_lt< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1089
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_DEVICE_FUNC Packet preciprocal(const Packet &a)
Definition: GenericPacketMath.h:1433
EIGEN_STRONG_INLINE Packet4us padd< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:882
EIGEN_STRONG_INLINE Packet4us pmul< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1165
EIGEN_STRONG_INLINE Packet8s pdiv< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:786
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1469
EIGEN_STRONG_INLINE unsigned char predux_max< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2739
EIGEN_STRONG_INLINE signed char predux_max< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2727
EIGEN_STRONG_INLINE uint8_t predux_min< Packet8uc >(const Packet8uc &a)
Definition: NEON/PacketMath.h:3862
EIGEN_STRONG_INLINE Packet8us pandnot< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1027
EIGEN_STRONG_INLINE Packet2ul pmul< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:773
EIGEN_STRONG_INLINE Packet4ui pandnot< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1031
EIGEN_STRONG_INLINE Packet8s ploaddup< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:1649
EIGEN_STRONG_INLINE Packet4bf F32MaskToBf16Mask(const Packet4f &p)
Definition: NEON/PacketMath.h:4822
EIGEN_STRONG_INLINE uint32_t predux< Packet2ui >(const Packet2ui &a)
Definition: NEON/PacketMath.h:3616
EIGEN_STRONG_INLINE Packet2ul pmax< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1233
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1187
EIGEN_STRONG_INLINE Packet4uc pmin< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1477
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2323
EIGEN_STRONG_INLINE Packet8uc pmax< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1583
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1407
EIGEN_STRONG_INLINE Packet4uc pcmp_lt< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1740
EIGEN_STRONG_INLINE Packet8uc pmul< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1149
EIGEN_STRONG_INLINE bfloat16 predux_max< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4988
EIGEN_STRONG_INLINE Packet8us pload< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:507
EIGEN_STRONG_INLINE Packet16uc pcmp_lt< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1105
EIGEN_STRONG_INLINE Packet4c pcmp_le< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1642
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:741
EIGEN_STRONG_INLINE Packet4i pabsdiff< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:2774
EIGEN_STRONG_INLINE Packet8uc psub< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:941
EIGEN_STRONG_INLINE bfloat16 pfirst< Packet4bf >(const Packet4bf &from)
Definition: NEON/PacketMath.h:4830
EIGEN_STRONG_INLINE Packet4s pmax< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1591
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE Packet2ui pandnot< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:2174
EIGEN_STRONG_INLINE Packet4f pfrexp< Packet4f >(const Packet4f &a, Packet4f &exponent)
Definition: AltiVec/PacketMath.h:2328
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2522
EIGEN_STRONG_INLINE Packet2ul plset< Packet2ul >(const uint64_t &a)
Definition: LSX/PacketMath.h:553
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: SSE/PacketMath.h:1141
EIGEN_STRONG_INLINE Packet2f pandnot< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:2118
EIGEN_STRONG_INLINE Packet2i pcmp_lt< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1769
EIGEN_STRONG_INLINE Packet8uc pcmp_le< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1660
EIGEN_STRONG_INLINE Packet2f pmax< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1533
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: AltiVec/PacketMath.h:1854
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int16_t, Packet8s >(int16_t *to, const Packet8s &from, Index stride)
Definition: LSX/PacketMath.h:1759
EIGEN_STRONG_INLINE void pstoreu< bfloat16 >(bfloat16 *to, const Packet8bf &from)
Definition: AltiVec/PacketMath.h:1772
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1975
EIGEN_STRONG_INLINE Packet4us pcmp_eq< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1846
EIGEN_STRONG_INLINE Packet4us pxor< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:2085
EIGEN_STRONG_INLINE int32_t pfirst< Packet2i >(const Packet2i &a)
Definition: NEON/PacketMath.h:3249
EIGEN_STRONG_INLINE Packet4ui pcmp_lt< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1113
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int64_t, Packet2l >(int64_t *to, const Packet2l &from, Index stride)
Definition: LSX/PacketMath.h:1779
EIGEN_STRONG_INLINE Packet8s padd< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1078
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: LSX/PacketMath.h:1734
EIGEN_STRONG_INLINE Packet4uc psub< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:936
EIGEN_STRONG_INLINE bfloat16 predux_min< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4993
EIGEN_STRONG_INLINE Packet4ui pload< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1435
EIGEN_STRONG_INLINE Packet4f prsqrt_float_unsafe(const Packet4f &a)
Definition: NEON/PacketMath.h:4591
EIGEN_STRONG_INLINE Packet2i pmul< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1173
EIGEN_STRONG_INLINE Packet16uc pandnot< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1023
EIGEN_STRONG_INLINE Packet8us ploaddup< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:1659
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2699
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1537
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2019
__vector short int Packet8s
Definition: AltiVec/PacketMath.h:37
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:782
EIGEN_STRONG_INLINE Packet2ul ploadu< Packet2ul >(const uint64_t *from)
Definition: LSX/PacketMath.h:1480
EIGEN_STRONG_INLINE Packet2ui pcmp_lt< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1777
EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:1966
EIGEN_STRONG_INLINE Packet2i pset1< Packet2i >(const int32_t &from)
Definition: NEON/PacketMath.h:717
EIGEN_STRONG_INLINE Packet4uc pabsdiff< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1386
EIGEN_STRONG_INLINE Packet4c pset1< Packet4c >(const int8_t &from)
Definition: NEON/PacketMath.h:677
EIGEN_STRONG_INLINE uint64_t predux_min< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2117
EIGEN_STRONG_INLINE Packet4s pandnot< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:2150
EIGEN_STRONG_INLINE Packet2i pdiv< Packet2i >(const Packet2i &, const Packet2i &)
Definition: NEON/PacketMath.h:1250
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2073
EIGEN_STRONG_INLINE Packet8c pcmp_lt< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1732
EIGEN_STRONG_INLINE Packet4s ploadu< Packet4s >(const int16_t *from)
Definition: NEON/PacketMath.h:2499
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: AltiVec/PacketMath.h:773
EIGEN_STRONG_INLINE Packet2ui por< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:2028
EIGEN_STRONG_INLINE Packet4us pand< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1939
EIGEN_STRONG_INLINE Packet4ui pmax< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1229
EIGEN_STRONG_INLINE int8_t predux_min< Packet4c >(const Packet4c &a)
Definition: NEON/PacketMath.h:3780
EIGEN_STRONG_INLINE Packet8c ploadquad< Packet8c >(const int8_t *from)
Definition: NEON/PacketMath.h:2635
uint16x4_t Packet4us
Definition: NEON/PacketMath.h:85
EIGEN_STRONG_INLINE uint32_t predux_min< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2112
EIGEN_STRONG_INLINE uint8_t predux_mul< Packet4uc >(const Packet4uc &a)
Definition: NEON/PacketMath.h:3686
EIGEN_STRONG_INLINE Packet4s ploaddup< Packet4s >(const int16_t *from)
Definition: NEON/PacketMath.h:2580
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1099
EIGEN_STRONG_INLINE Packet8c pmax< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1570
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: SSE/PacketMath.h:1137
EIGEN_STRONG_INLINE Packet4ui plset< Packet4ui >(const uint32_t &a)
Definition: LSX/PacketMath.h:548
EIGEN_STRONG_INLINE Packet4bf pnegate< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:5037
EIGEN_STRONG_INLINE Packet4s plset< Packet4s >(const int16_t &a)
Definition: NEON/PacketMath.h:789
EIGEN_STRONG_INLINE Packet2f preciprocal< Packet2f >(const Packet2f &a)
Definition: NEON/PacketMath.h:4641
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: AltiVec/PacketMath.h:954
EIGEN_STRONG_INLINE uint16_t pfirst< Packet4us >(const Packet4us &a)
Definition: NEON/PacketMath.h:3241
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: LSX/PacketMath.h:563
EIGEN_STRONG_INLINE uint64_t predux_mul< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2063
EIGEN_STRONG_INLINE Packet8uc ploadu< Packet8uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2491
EIGEN_STRONG_INLINE Packet8s pload< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:502
EIGEN_STRONG_INLINE Packet8uc ploadquad< Packet8uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2652
EIGEN_STRONG_INLINE Packet2ul padd< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:597
uint8x8_t Packet8uc
Definition: NEON/PacketMath.h:81
EIGEN_STRONG_INLINE Packet8us pxor< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1456
EIGEN_STRONG_INLINE Packet2f plset< Packet2f >(const float &a)
Definition: NEON/PacketMath.h:751
EIGEN_STRONG_INLINE int64_t predux_mul< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2041
EIGEN_STRONG_INLINE Packet8us padd< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1082
EIGEN_STRONG_INLINE int16_t predux_min< Packet4s >(const Packet4s &a)
Definition: NEON/PacketMath.h:3877
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1493
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pgather< int8_t, Packet16c >(const int8_t *from, Index stride)
Definition: LSX/PacketMath.h:1626
EIGEN_STRONG_INLINE Packet16uc pand< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:900
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:642
EIGEN_STRONG_INLINE Packet2l pset1< Packet2l >(const int64_t &from)
Definition: LSX/PacketMath.h:478
EIGEN_STRONG_INLINE Packet4bf pcmp_lt< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:5022
EIGEN_STRONG_INLINE Packet8uc por< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1996
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1936
EIGEN_STRONG_INLINE Packet2ui pmul< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1181
EIGEN_STRONG_INLINE Packet2ui pxor< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:2101
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint8_t, Packet4uc >(uint8_t *to, const Packet4uc &from, Index stride)
Definition: NEON/PacketMath.h:3046
EIGEN_STRONG_INLINE Packet4uc pand< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1919
EIGEN_STRONG_INLINE Packet2l ploaddup< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1509
EIGEN_STRONG_INLINE Packet8uc padd< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:866
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b)
Definition: NEON/PacketMath.h:95
EIGEN_STRONG_INLINE Packet2ul por< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:953
EIGEN_STRONG_INLINE Packet16uc por< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:941
EIGEN_STRONG_INLINE Packet4f ptrunc< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1501
EIGEN_STRONG_INLINE void pstore< uint8_t >(uint8_t *to, const Packet16uc &from)
Definition: LSX/PacketMath.h:1557
EIGEN_STRONG_INLINE Packet8uc pand< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1923
EIGEN_STRONG_INLINE Packet4i pcmp_le< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1056
EIGEN_STRONG_INLINE uint64_t predux< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2009
EIGEN_STRONG_INLINE Packet8us por< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1439
float32x2_t Packet2f
Definition: NEON/PacketMath.h:75
EIGEN_STRONG_INLINE Packet4ui pdiv< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:802
EIGEN_STRONG_INLINE Packet4uc por< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1992
EIGEN_STRONG_INLINE Packet4f pset1frombits< Packet4f >(unsigned int from)
Definition: AltiVec/PacketMath.h:803
EIGEN_STRONG_INLINE Packet4bf plset< Packet4bf >(const bfloat16 &a)
Definition: NEON/PacketMath.h:4893
EIGEN_STRONG_INLINE Packet4us pcmp_lt< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:1761
EIGEN_STRONG_INLINE uint8_t predux_min< Packet4uc >(const Packet4uc &a)
Definition: NEON/PacketMath.h:3813
EIGEN_STRONG_INLINE Packet4uc pandnot< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:2138
EIGEN_STRONG_INLINE Packet4uc pxor< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:2065
EIGEN_STRONG_INLINE Packet4f pldexp< Packet4f >(const Packet4f &a, const Packet4f &exponent)
Definition: AltiVec/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1448
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int8_t, Packet8c >(int8_t *to, const Packet8c &from, Index stride)
Definition: NEON/PacketMath.h:3015
EIGEN_STRONG_INLINE Packet4us pdiv< Packet4us >(const Packet4us &, const Packet4us &)
Definition: NEON/PacketMath.h:1240
EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:141
EIGEN_STRONG_INLINE float pfirst< Packet2f >(const Packet2f &a)
Definition: NEON/PacketMath.h:3201
EIGEN_STRONG_INLINE Packet4f pcmp_lt< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1081
EIGEN_STRONG_INLINE Packet2f pfrexp< Packet2f >(const Packet2f &a, Packet2f &exponent)
Definition: NEON/PacketMath.h:3440
EIGEN_STRONG_INLINE Packet2i pabsdiff< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1415
EIGEN_STRONG_INLINE Packet2ul pcmp_le< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1076
EIGEN_STRONG_INLINE Packet8c pmin< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:1469
EIGEN_STRONG_INLINE short int predux_mul< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2536
EIGEN_STRONG_INLINE int8_t predux_mul< Packet4c >(const Packet4c &a)
Definition: NEON/PacketMath.h:3670
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1448
EIGEN_STRONG_INLINE void pstoreu< int32_t >(int32_t *to, const Packet4i &from)
Definition: LSX/PacketMath.h:1591
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int8_t, Packet16c >(int8_t *to, const Packet16c &from, Index stride)
Definition: LSX/PacketMath.h:1739
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1261
EIGEN_STRONG_INLINE uint32_t pfirst< Packet2ui >(const Packet2ui &a)
Definition: NEON/PacketMath.h:3257
EIGEN_STRONG_INLINE Packet4uc pmax< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1578
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c predux_half_dowto4(const Packet8c &a)
Definition: NEON/PacketMath.h:3635
EIGEN_STRONG_INLINE Packet4uc pcmp_le< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:1655
EIGEN_STRONG_INLINE void prefetch< uint8_t >(const uint8_t *addr)
Definition: LSX/PacketMath.h:1856
EIGEN_STRONG_INLINE Packet2f ploadu< Packet2f >(const float *from)
Definition: NEON/PacketMath.h:2463
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2176
EIGEN_STRONG_INLINE Packet2i ploadu< Packet2i >(const int32_t *from)
Definition: NEON/PacketMath.h:2515
EIGEN_STRONG_INLINE Packet16uc plset< Packet16uc >(const unsigned char &a)
Definition: AltiVec/PacketMath.h:1061
EIGEN_STRONG_INLINE Packet16c ploadu< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:1553
EIGEN_STRONG_INLINE Packet2l pmax< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1217
EIGEN_STRONG_INLINE Packet2l psub< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:622
EIGEN_STRONG_INLINE uint8_t pfirst< Packet4uc >(const Packet4uc &a)
Definition: NEON/PacketMath.h:3221
EIGEN_STRONG_INLINE Packet2d pfrexp< Packet2d >(const Packet2d &a, Packet2d &exponent)
Definition: LSX/PacketMath.h:2677
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
Definition: LSX/PacketMath.h:126
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2i pgather< int32_t, Packet2i >(const int32_t *from, Index stride)
Definition: NEON/PacketMath.h:2958
EIGEN_STRONG_INLINE Packet8s pmin< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1265
EIGEN_STRONG_INLINE Packet4uc pset1< Packet4uc >(const uint8_t &from)
Definition: NEON/PacketMath.h:689
EIGEN_STRONG_INLINE Packet8s pset1< Packet8s >(const short int &from)
Definition: AltiVec/PacketMath.h:783
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:226
EIGEN_STRONG_INLINE Packet8c pset1< Packet8c >(const int8_t &from)
Definition: NEON/PacketMath.h:681
EIGEN_STRONG_INLINE Packet8s pmul< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1170
EIGEN_STRONG_INLINE Packet4c pabs< Packet4c >(const Packet4c &a)
Definition: NEON/PacketMath.h:3362
EIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:138
EIGEN_STRONG_INLINE Packet4bf pmax< PropagateNumbers, Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4879
EIGEN_STRONG_INLINE Packet16uc pcmp_le< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1064
EIGEN_STRONG_INLINE Packet2l pand< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:896
EIGEN_STRONG_INLINE Packet4ui pxor< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:990
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE int8_t predux< Packet4c >(const Packet4c &a)
Definition: NEON/PacketMath.h:3478
EIGEN_STRONG_INLINE unsigned char pfirst< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:1898
EIGEN_STRONG_INLINE Packet2i pcmp_eq< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:1854
EIGEN_STRONG_INLINE Packet8s pcmp_lt< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1093
EIGEN_STRONG_INLINE Packet2f paddsub< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:992
EIGEN_STRONG_INLINE void pstore< int32_t >(int32_t *to, const Packet4i &from)
Definition: LSX/PacketMath.h:1549
EIGEN_STRONG_INLINE Packet16c pand< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:884
EIGEN_STRONG_INLINE Packet pdiv_float_common(const Packet &a, const Packet &b)
Definition: NEON/PacketMath.h:4696
EIGEN_STRONG_INLINE Packet4bf pabsdiff< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:5012
EIGEN_STRONG_INLINE Packet2d ptrunc< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2749
EIGEN_STRONG_INLINE Packet4s pload< Packet4s >(const int16_t *from)
Definition: NEON/PacketMath.h:2422
EIGEN_STRONG_INLINE Packet16c pandnot< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1007
EIGEN_STRONG_INLINE Packet16uc pmin< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1277
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315
EIGEN_STRONG_INLINE Packet4bf pfloor< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4928
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1869
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: AltiVec/PacketMath.h:1045
EIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:160
EIGEN_STRONG_INLINE Packet2ui pcmp_le< Packet2ui >(const Packet2ui &a, const Packet2ui &b)
Definition: NEON/PacketMath.h:1692
static EIGEN_STRONG_INLINE int eigen_neon_shuffle_mask(int p, int q, int r, int s)
Definition: NEON/PacketMath.h:128
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2435
EIGEN_STRONG_INLINE bfloat16 predux_mul< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4998
EIGEN_STRONG_INLINE Packet8uc pdiv< Packet8uc >(const Packet8uc &, const Packet8uc &)
Definition: NEON/PacketMath.h:1220
EIGEN_STRONG_INLINE Packet4bf psub< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4958
EIGEN_STRONG_INLINE float predux_mul< Packet2f >(const Packet2f &a)
Definition: NEON/PacketMath.h:3662
EIGEN_STRONG_INLINE Packet2ul pcmp_lt< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1117
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1186
EIGEN_STRONG_INLINE Packet2l pload< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1423
EIGEN_STRONG_INLINE void pstore< int16_t >(int16_t *to, const Packet8s &from)
Definition: LSX/PacketMath.h:1545
EIGEN_STRONG_INLINE Packet2f padd< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:840
EIGEN_STRONG_INLINE Packet4s pmul< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1157
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1533
EIGEN_STRONG_INLINE Packet4bf ptrunc< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4943
EIGEN_STRONG_INLINE Packet16uc pcmp_eq< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1155
EIGEN_STRONG_INLINE Packet2f por< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1972
EIGEN_STRONG_INLINE void pstoreu< uint8_t >(uint8_t *to, const Packet16uc &from)
Definition: LSX/PacketMath.h:1599
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1166
EIGEN_STRONG_INLINE Packet2d print< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2745
EIGEN_STRONG_INLINE int32_t predux_max< Packet2i >(const Packet2i &a)
Definition: NEON/PacketMath.h:4071
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pgather< uint32_t, Packet4ui >(const uint32_t *from, Index stride)
Definition: LSX/PacketMath.h:1710
EIGEN_STRONG_INLINE Packet4bf pmin< PropagateNumbers, Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4865
EIGEN_STRONG_INLINE Packet4bf pcmp_lt_or_nan< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:5027
EIGEN_STRONG_INLINE Packet4i ploadquad< Packet4i >(const int32_t *from)
Definition: LSX/PacketMath.h:2601
EIGEN_STRONG_INLINE void pstoreu< int8_t >(int8_t *to, const Packet16c &from)
Definition: LSX/PacketMath.h:1583
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1410
EIGEN_STRONG_INLINE Packet2ui pset1< Packet2ui >(const uint32_t &from)
Definition: NEON/PacketMath.h:725
EIGEN_STRONG_INLINE Packet4bf pload< Packet4bf >(const bfloat16 *from)
Definition: NEON/PacketMath.h:4835
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1244
EIGEN_STRONG_INLINE Packet16uc ploaddup< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:1704
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2445
EIGEN_STRONG_INLINE Packet8us pcmp_eq< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1159
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pgather< uint64_t, Packet2ul >(const uint64_t *from, Index stride)
Definition: LSX/PacketMath.h:1719
EIGEN_STRONG_INLINE Packet2f pdiv< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:4717
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Definition: GenericPacketMathFunctions.h:184
EIGEN_STRONG_INLINE Packet4bf pceil< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4933
EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b)
Definition: LSX/PacketMath.h:145
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1406
EIGEN_STRONG_INLINE Packet16c ploadquad< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:1714
EIGEN_STRONG_INLINE int8_t predux_mul< Packet8c >(const Packet8c &a)
Definition: NEON/PacketMath.h:3676
EIGEN_STRONG_INLINE float predux_max< Packet2f >(const Packet2f &a)
Definition: NEON/PacketMath.h:3940
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2684
EIGEN_STRONG_INLINE Packet psqrt_float_common(const Packet &a)
Definition: NEON/PacketMath.h:4672
EIGEN_STRONG_INLINE uint8_t predux< Packet8uc >(const Packet8uc &a)
Definition: NEON/PacketMath.h:3568
EIGEN_STRONG_INLINE Packet2l pmin< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1184
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint8_t, Packet16uc >(uint8_t *to, const Packet16uc &from, Index stride)
Definition: LSX/PacketMath.h:1785
EIGEN_STRONG_INLINE Packet8us pset1< Packet8us >(const unsigned short int &from)
Definition: AltiVec/PacketMath.h:788
EIGEN_STRONG_INLINE bfloat16 predux< Packet4bf >(const Packet4bf &a)
Definition: NEON/PacketMath.h:4983
EIGEN_STRONG_INLINE Packet2ui ploaddup< Packet2ui >(const uint32_t *from)
Definition: NEON/PacketMath.h:2610
EIGEN_STRONG_INLINE Packet8s pandnot< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1011
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: LSX/PacketMath.h:1621
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s pgather< int16_t, Packet4s >(const int16_t *from, Index stride)
Definition: NEON/PacketMath.h:2918
EIGEN_STRONG_INLINE Packet2f ploaddup< Packet2f >(const float *from)
Definition: NEON/PacketMath.h:2540
EIGEN_STRONG_INLINE Packet4s pabsdiff< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1399
EIGEN_STRONG_INLINE Packet16c ploaddup< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:1694
EIGEN_STRONG_INLINE int8_t predux< Packet8c >(const Packet8c &a)
Definition: NEON/PacketMath.h:3495
EIGEN_STRONG_INLINE Packet8c ploaddup< Packet8c >(const int8_t *from)
Definition: NEON/PacketMath.h:2553
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pgather< int32_t, Packet4i >(const int32_t *from, Index stride)
Definition: LSX/PacketMath.h:1660
EIGEN_STRONG_INLINE Packet4bf pdiv< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:4968
EIGEN_STRONG_INLINE Packet4i pcmp_lt< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1097
EIGEN_STRONG_INLINE int64_t predux_max< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2149
EIGEN_STRONG_INLINE Packet4c pxor< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:2053
EIGEN_STRONG_INLINE int8_t pfirst< Packet8c >(const Packet8c &a)
Definition: NEON/PacketMath.h:3213
EIGEN_STRONG_INLINE unsigned short int pfirst< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:1888
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1293
EIGEN_STRONG_INLINE int16_t pfirst< Packet4s >(const Packet4s &a)
Definition: NEON/PacketMath.h:3233
EIGEN_STRONG_INLINE Packet8c pandnot< Packet8c >(const Packet8c &a, const Packet8c &b)
Definition: NEON/PacketMath.h:2130
EIGEN_STRONG_INLINE Packet16uc pxor< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:982
EIGEN_STRONG_INLINE signed char predux< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2510
EIGEN_STRONG_INLINE Packet4f pabsdiff< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2690
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c pgather< int8_t, Packet8c >(const int8_t *from, Index stride)
Definition: NEON/PacketMath.h:2848
EIGEN_STRONG_INLINE unsigned short int predux_mul< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2547
EIGEN_STRONG_INLINE Packet16c padd< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1086
EIGEN_STRONG_INLINE Packet4c padd< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:848
EIGEN_STRONG_INLINE int8_t predux_max< Packet4c >(const Packet4c &a)
Definition: NEON/PacketMath.h:3950
EIGEN_STRONG_INLINE unsigned char predux_min< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2659
EIGEN_STRONG_INLINE short int predux< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2478
EIGEN_STRONG_INLINE uint8_t predux_mul< Packet8uc >(const Packet8uc &a)
Definition: NEON/PacketMath.h:3692
EIGEN_STRONG_INLINE uint8_t predux< Packet4uc >(const Packet4uc &a)
Definition: NEON/PacketMath.h:3511
EIGEN_STRONG_INLINE Packet2l pmul< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:757
EIGEN_STRONG_INLINE int16_t predux_max< Packet4s >(const Packet4s &a)
Definition: NEON/PacketMath.h:4047
EIGEN_STRONG_INLINE Packet2i pxor< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:2093
EIGEN_STRONG_INLINE Packet2d pset1frombits< Packet2d >(uint64_t from)
Definition: LSX/PacketMath.h:513
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:497
EIGEN_STRONG_INLINE int32_t predux< Packet2i >(const Packet2i &a)
Definition: NEON/PacketMath.h:3607
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:646
EIGEN_STRONG_INLINE Packet4s psub< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:949
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1095
EIGEN_STRONG_INLINE Packet4c pmul< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1131
Packet prsqrt_float_common(const Packet &a)
Definition: NEON/PacketMath.h:4610
EIGEN_STRONG_INLINE Packet8s psub< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1103
EIGEN_STRONG_INLINE Packet2f pxor< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:2045
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2528
EIGEN_STRONG_INLINE Packet4uc padd< Packet4uc >(const Packet4uc &a, const Packet4uc &b)
Definition: NEON/PacketMath.h:861
int32x2_t Packet2i
Definition: NEON/PacketMath.h:87
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: AltiVec/PacketMath.h:1041
EIGEN_STRONG_INLINE uint32_t predux_mul< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2058
EIGEN_STRONG_INLINE Packet2ul psub< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:638
EIGEN_STRONG_INLINE Packet8uc pcmp_eq< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1830
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint16_t, Packet8us >(uint16_t *to, const Packet8us &from, Index stride)
Definition: LSX/PacketMath.h:1805
EIGEN_STRONG_INLINE Packet8s ploadquad< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:1669
EIGEN_STRONG_INLINE uint64_t pfirst< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:1914
EIGEN_STRONG_INLINE Packet4s pdiv< Packet4s >(const Packet4s &, const Packet4s &)
Definition: NEON/PacketMath.h:1230
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f)
Definition: AltiVec/PacketMath.h:2059
EIGEN_STRONG_INLINE Packet4uc ploadu< Packet4uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2485
EIGEN_STRONG_INLINE void pstoreu< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1123
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1756
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1256
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d)
Definition: LSX/PacketMath.h:92
EIGEN_STRONG_INLINE Packet4f pround< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1479
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1329
EIGEN_STRONG_INLINE void prefetch< int16_t >(const int16_t *addr)
Definition: LSX/PacketMath.h:1844
EIGEN_STRONG_INLINE Packet4bf pgather< bfloat16, Packet4bf >(const bfloat16 *from, Index stride)
Definition: NEON/PacketMath.h:4973
EIGEN_STRONG_INLINE Packet16c pset1< Packet16c >(const signed char &from)
Definition: AltiVec/PacketMath.h:793
EIGEN_STRONG_INLINE void pstore< uint64_t >(uint64_t *to, const Packet2ul &from)
Definition: LSX/PacketMath.h:1569
EIGEN_STRONG_INLINE uint8_t predux_max< Packet8uc >(const Packet8uc &a)
Definition: NEON/PacketMath.h:4032
EIGEN_STRONG_INLINE Packet2l plset< Packet2l >(const int64_t &a)
Definition: LSX/PacketMath.h:533
EIGEN_STRONG_INLINE Packet2i pandnot< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:2166
EIGEN_STRONG_INLINE Packet4ui ploadu< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1476
EIGEN_STRONG_INLINE Packet2d pround< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1206
EIGEN_STRONG_INLINE int64_t pfirst< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:1898
EIGEN_STRONG_INLINE Packet4us pandnot< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:2158
EIGEN_STRONG_INLINE void pstore< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1619
EIGEN_STRONG_INLINE Packet4bf pcmp_eq< Packet4bf >(const Packet4bf &a, const Packet4bf &b)
Definition: NEON/PacketMath.h:5017
EIGEN_STRONG_INLINE Packet4us psub< Packet4us >(const Packet4us &a, const Packet4us &b)
Definition: NEON/PacketMath.h:957
EIGEN_STRONG_INLINE Packet16c pxor< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:966
EIGEN_STRONG_INLINE Packet4ui ploadquad< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:2606
EIGEN_STRONG_INLINE Packet2ul pdiv< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:806
EIGEN_STRONG_INLINE Packet2i por< Packet2i >(const Packet2i &a, const Packet2i &b)
Definition: NEON/PacketMath.h:2020
EIGEN_STRONG_INLINE void pstoreu< uint64_t >(uint64_t *to, const Packet2ul &from)
Definition: LSX/PacketMath.h:1611
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1879
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2f pgather< float, Packet2f >(const float *from, Index stride)
Definition: NEON/PacketMath.h:2828
EIGEN_STRONG_INLINE uint16_t predux_mul< Packet4us >(const Packet4us &a)
Definition: NEON/PacketMath.h:3718
EIGEN_STRONG_INLINE Packet16uc pabsdiff< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:2793
EIGEN_STRONG_INLINE Packet2f pabsdiff< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1365
EIGEN_STRONG_INLINE unsigned char predux_mul< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2578
EIGEN_STRONG_INLINE void pstoreu< int16_t >(int16_t *to, const Packet8s &from)
Definition: LSX/PacketMath.h:1587
EIGEN_STRONG_INLINE Packet4ui pcmp_eq< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1163
eigen_packet_wrapper< __m128i, 7 > Packet2ul
Definition: LSX/PacketMath.h:45
EIGEN_STRONG_INLINE Packet2l pxor< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:978
int16x4_t Packet4s
Definition: NEON/PacketMath.h:83
EIGEN_STRONG_INLINE Packet4s pcmp_le< Packet4s >(const Packet4s &a, const Packet4s &b)
Definition: NEON/PacketMath.h:1668
EIGEN_STRONG_INLINE Packet4uc ploaddup< Packet4uc >(const uint8_t *from)
Definition: NEON/PacketMath.h:2564
EIGEN_STRONG_INLINE Packet2ui plset< Packet2ui >(const uint32_t &a)
Definition: NEON/PacketMath.h:819
EIGEN_STRONG_INLINE Packet16c pmul< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1178
EIGEN_STRONG_INLINE Packet4bf ploaddup< Packet4bf >(const bfloat16 *from)
Definition: NEON/PacketMath.h:4855
EIGEN_STRONG_INLINE Packet8s pxor< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:970
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4uc pgather< uint8_t, Packet4uc >(const uint8_t *from, Index stride)
Definition: NEON/PacketMath.h:2880
EIGEN_STRONG_INLINE Packet4ui ploaddup< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1523
EIGEN_STRONG_INLINE Packet2f pcmp_le< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:1634
EIGEN_STRONG_INLINE void pstore< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1106
EIGEN_STRONG_INLINE Packet2f pload< Packet2f >(const float *from)
Definition: NEON/PacketMath.h:2386
EIGEN_STRONG_INLINE Packet8us ploadquad< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:1679
EIGEN_STRONG_INLINE Packet4f print< Packet4f >(const Packet4f &a)
Definition: LSX/PacketMath.h:2711
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1167
EIGEN_STRONG_INLINE Packet4f preciprocal< Packet4f >(const Packet4f &a)
Definition: LSX/PacketMath.h:2719
EIGEN_STRONG_INLINE Packet2f psub< Packet2f >(const Packet2f &a, const Packet2f &b)
Definition: NEON/PacketMath.h:915
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2599
EIGEN_STRONG_INLINE void pstore< uint16_t >(uint16_t *to, const Packet8us &from)
Definition: LSX/PacketMath.h:1561
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: AVX/PacketMath.h:1750
EIGEN_STRONG_INLINE uint32_t predux_max< Packet2ui >(const Packet2ui &a)
Definition: NEON/PacketMath.h:4080
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1282
EIGEN_STRONG_INLINE Packet2ul pand< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:912
EIGEN_STRONG_INLINE Packet4c pmax< Packet4c >(const Packet4c &a, const Packet4c &b)
Definition: NEON/PacketMath.h:1565
EIGEN_STRONG_INLINE Packet8uc pabsdiff< Packet8uc >(const Packet8uc &a, const Packet8uc &b)
Definition: NEON/PacketMath.h:1391
EIGEN_STRONG_INLINE Packet4c ploadu< Packet4c >(const int8_t *from)
Definition: NEON/PacketMath.h:2471
EIGEN_STRONG_INLINE Packet2i plset< Packet2i >(const int32_t &a)
Definition: NEON/PacketMath.h:809
std::int32_t int32_t
Definition: Meta.h:41
std::int8_t int8_t
Definition: Meta.h:37
std::uint8_t uint8_t
Definition: Meta.h:36
std::int16_t int16_t
Definition: Meta.h:39
std::int64_t int64_t
Definition: Meta.h:43
EIGEN_DEVICE_FUNC const Scalar & q
Definition: SpecialFunctionsImpl.h:2019
std::uint16_t uint16_t
Definition: Meta.h:38
std::uint32_t uint32_t
Definition: Meta.h:40
std::uint64_t uint64_t
Definition: Meta.h:42
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
CleanedUpDerType< DerType >::type() min(const AutoDiffScalar< DerType > &x, const T &y)
Definition: AutoDiffScalar.h:494
CleanedUpDerType< DerType >::type() max(const AutoDiffScalar< DerType > &x, const T &y)
Definition: AutoDiffScalar.h:499
const Product< Lhs, Rhs > prod(const Lhs &lhs, const Rhs &rhs)
Definition: evaluators.cpp:7
int c
Definition: calibrate.py:100
val
Definition: calibrate.py:119
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217
unsigned short value
Definition: BFloat16.h:77
Definition: BFloat16.h:101
numext::uint16_t x
Definition: Half.h:101
Definition: GenericPacketMath.h:1407
Packet packet[N]
Definition: GenericPacketMath.h:1408
@ HasASin
Definition: GenericPacketMath.h:84
@ HasATanh
Definition: GenericPacketMath.h:87
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasSin
Definition: GenericPacketMath.h:81
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasErfc
Definition: GenericPacketMath.h:96
@ HasACos
Definition: GenericPacketMath.h:85
@ HasAbsDiff
Definition: GenericPacketMath.h:65
@ HasArg
Definition: GenericPacketMath.h:64
@ HasNdtri
Definition: GenericPacketMath.h:97
@ HasCos
Definition: GenericPacketMath.h:82
@ HasCmp
Definition: GenericPacketMath.h:69
@ HasShift
Definition: GenericPacketMath.h:50
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasErf
Definition: GenericPacketMath.h:95
@ HasBessel
Definition: GenericPacketMath.h:98
@ HasLog
Definition: GenericPacketMath.h:77
@ HasTanh
Definition: GenericPacketMath.h:90
@ HasATan
Definition: GenericPacketMath.h:86
@ HasDiv
Definition: GenericPacketMath.h:71
Definition: GenericPacketMath.h:225
@ value
Definition: Meta.h:146
Packet4bf half
Definition: NEON/PacketMath.h:4736
Packet4bf type
Definition: NEON/PacketMath.h:4735
Packet2f half
Definition: NEON/PacketMath.h:178
Packet4f type
Definition: NEON/PacketMath.h:177
@ HasTanh
Definition: AltiVec/PacketMath.h:200
Packet8s type
Definition: NEON/PacketMath.h:276
Packet4s half
Definition: NEON/PacketMath.h:277
Packet2i half
Definition: NEON/PacketMath.h:332
Packet4i type
Definition: NEON/PacketMath.h:331
Packet2l half
Definition: NEON/PacketMath.h:388
Packet2l type
Definition: NEON/PacketMath.h:387
Packet8c half
Definition: NEON/PacketMath.h:221
Packet16c type
Definition: NEON/PacketMath.h:220
Packet4us half
Definition: NEON/PacketMath.h:304
Packet8us type
Definition: NEON/PacketMath.h:303
Packet2ui half
Definition: NEON/PacketMath.h:359
Packet4ui type
Definition: NEON/PacketMath.h:358
Packet2ul type
Definition: NEON/PacketMath.h:414
Packet2ul half
Definition: NEON/PacketMath.h:415
Packet16uc type
Definition: NEON/PacketMath.h:247
Packet8uc half
Definition: NEON/PacketMath.h:248
T type
Definition: GenericPacketMath.h:109
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
T half
Definition: GenericPacketMath.h:110
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
int8_t type
Definition: NEON/PacketMath.h:491
Packet8c half
Definition: NEON/PacketMath.h:492
uint8_t type
Definition: NEON/PacketMath.h:527
Packet8uc half
Definition: NEON/PacketMath.h:528
Packet2f half
Definition: NEON/PacketMath.h:442
Packet2i integer_packet
Definition: NEON/PacketMath.h:443
float type
Definition: NEON/PacketMath.h:441
int32_t type
Definition: NEON/PacketMath.h:587
Packet2i half
Definition: NEON/PacketMath.h:588
Packet2l half
Definition: NEON/PacketMath.h:636
int64_t type
Definition: NEON/PacketMath.h:635
uint32_t type
Definition: NEON/PacketMath.h:611
Packet2ui half
Definition: NEON/PacketMath.h:612
Packet2ul half
Definition: NEON/PacketMath.h:648
uint64_t type
Definition: NEON/PacketMath.h:647
Packet4bf half
Definition: NEON/PacketMath.h:4773
bfloat16 type
Definition: NEON/PacketMath.h:4772
Packet4c half
Definition: NEON/PacketMath.h:468
int8_t type
Definition: NEON/PacketMath.h:467
Packet4i integer_packet
Definition: NEON/PacketMath.h:456
Packet2f half
Definition: NEON/PacketMath.h:455
float type
Definition: NEON/PacketMath.h:454
int32_t type
Definition: NEON/PacketMath.h:599
Packet2i half
Definition: NEON/PacketMath.h:600
int16_t type
Definition: NEON/PacketMath.h:539
Packet4s half
Definition: NEON/PacketMath.h:540
Packet4uc half
Definition: NEON/PacketMath.h:504
uint8_t type
Definition: NEON/PacketMath.h:503
uint32_t type
Definition: NEON/PacketMath.h:623
Packet2ui half
Definition: NEON/PacketMath.h:624
uint16_t type
Definition: NEON/PacketMath.h:563
Packet4us half
Definition: NEON/PacketMath.h:564
int8_t type
Definition: NEON/PacketMath.h:479
Packet4c half
Definition: NEON/PacketMath.h:480
int16_t type
Definition: NEON/PacketMath.h:551
Packet4s half
Definition: NEON/PacketMath.h:552
uint8_t type
Definition: NEON/PacketMath.h:515
Packet4uc half
Definition: NEON/PacketMath.h:516
Packet4us half
Definition: NEON/PacketMath.h:576
uint16_t type
Definition: NEON/PacketMath.h:575
Definition: GenericPacketMath.h:134
numext::get_integer_by_size< sizeof(T)>::signed_type integer_packet
Definition: GenericPacketMath.h:137
T type
Definition: GenericPacketMath.h:135
T half
Definition: GenericPacketMath.h:136
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Definition: ZVector/PacketMath.h:50