2 #ifndef EIGEN_HVX_PACKET_MATH_H
3 #define EIGEN_HVX_PACKET_MATH_H
7 #if defined __HVX__ && (__HVX_LENGTH__ == 128) && __HVX_ARCH__ >= 68
14 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
15 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
29 __asm__(
"%0 = vmem(%1+#%2)" :
"=v"(
v) :
"r"(
m),
"i"(
D) :
"memory");
32 reinterpret_cast<void*
>((
reinterpret_cast<uintptr_t
>(
m) & ~(__HVX_LENGTH__ - 1)) +
D * __HVX_LENGTH__);
33 memcpy(&
v, aligned_mem, __HVX_LENGTH__);
41 memcpy(&
v,
reinterpret_cast<const HVX_Vector*
>(mem), __HVX_LENGTH__);
48 memcpy(&
v, mem, __HVX_LENGTH__);
52 template <
size_t Size,
size_t Alignment,
typename T>
54 #if defined(EIGEN_HVX_FAST_PARTIAL_VECTOR_LOAD)
57 HVX_Vector v0 = HVX_vmem<0>(mem);
59 uintptr_t mem_addr =
reinterpret_cast<uintptr_t
>(mem);
65 uintptr_t left_off = mem_addr & (__HVX_LENGTH__ - 1);
66 if (left_off + Size *
sizeof(
T) > __HVX_LENGTH__) {
67 v1 = HVX_vmem<1>(mem);
72 return Q6_V_valign_VVR(
v1, v0, mem_addr);
75 memcpy(&
v, mem, Size *
sizeof(
T));
82 memcpy(
reinterpret_cast<HVX_Vector*
>(mem), &
v, __HVX_LENGTH__);
87 memcpy(mem, &
v, __HVX_LENGTH__);
90 template <
size_t Size,
size_t Alignment,
typename T>
92 uintptr_t mem_addr =
reinterpret_cast<uintptr_t
>(mem);
93 HVX_Vector
value = Q6_V_vlalign_VVR(
v,
v, mem_addr);
94 uintptr_t left_off = mem_addr & (__HVX_LENGTH__ - 1);
95 uintptr_t right_off = left_off + Size *
sizeof(
T);
97 HVX_VectorPred ql_not = Q6_Q_vsetq_R(mem_addr);
98 HVX_VectorPred
qr = Q6_Q_vsetq2_R(right_off);
101 if (right_off > __HVX_LENGTH__) {
102 Q6_vmem_QRIV(
qr, mem + __HVX_LENGTH__ /
sizeof(
T),
value);
107 ql_not = Q6_Q_or_QQn(ql_not,
qr);
108 Q6_vmem_QnRIV(ql_not, mem,
value);
112 enum class HVXPacketSize {
121 template <HVXPacketSize T>
124 HVXPacket() =
default;
125 static HVXPacket Create(HVX_Vector
v) {
return HVXPacket(
v); }
126 HVX_Vector Get()
const {
return m_val; }
129 explicit HVXPacket(HVX_Vector
v) : m_val(
v) {}
130 HVX_Vector m_val = Q6_V_vzero();
133 typedef HVXPacket<HVXPacketSize::Full> Packet32f;
134 typedef HVXPacket<HVXPacketSize::Half>
Packet16f;
135 typedef HVXPacket<HVXPacketSize::Quarter>
Packet8f;
139 struct packet_traits<float> : default_packet_traits {
140 typedef Packet32f
type;
183 struct unpacket_traits<Packet32f> {
226 template <HVXPacketSize T>
228 return HVXPacket<T>::Create(Q6_V_vzero());
232 return pzero_hvx(Packet32f());
243 template <HVXPacketSize T>
244 EIGEN_STRONG_INLINE typename unpacket_traits<HVXPacket<T>>::half predux_half_dowto4_hvx(
const HVXPacket<T>&
a) {
245 const Index packet_size = unpacket_traits<HVXPacket<T>>
::size;
246 return unpacket_traits<HVXPacket<T>>::half::Create(
247 Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_VsfVsf(Q6_V_vror_VR(
a.Get(),
sizeof(
float) * packet_size / 2),
a.Get())));
251 return predux_half_dowto4_hvx(
a);
255 return predux_half_dowto4_hvx(
a);
258 template <HVXPacketSize T>
265 return HVXPacket<T>::Create(Q6_V_vsplat_R(u.i));
269 return pset1_hvx<HVXPacketSize::Full>(from);
273 return pset1_hvx<HVXPacketSize::Half>(from);
277 return pset1_hvx<HVXPacketSize::Quarter>(from);
282 return Packet32f::Create(HVX_load(from));
286 return Packet16f::Create(
291 return Packet8f::Create(
297 return Packet32f::Create(HVX_loadu(from));
310 HVX_store(to, from.Get());
323 HVX_storeu(to, from.Get());
334 template <HVXPacketSize T>
336 return HVXPacket<T>::Create(Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(
a.Get(),
b.Get())));
340 return pmul_hvx(
a,
b);
344 return pmul_hvx(
a,
b);
348 return pmul_hvx(
a,
b);
351 template <HVXPacketSize T>
353 return HVXPacket<T>::Create(Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_VsfVsf(
a.Get(),
b.Get())));
357 return padd_hvx(
a,
b);
361 return padd_hvx(
a,
b);
365 return padd_hvx(
a,
b);
368 template <HVXPacketSize T>
370 return HVXPacket<T>::Create(Q6_Vsf_equals_Vqf32(Q6_Vqf32_vsub_VsfVsf(
a.Get(),
b.Get())));
374 return psub_hvx(
a,
b);
378 return psub_hvx(
a,
b);
382 return psub_hvx(
a,
b);
385 template <HVXPacketSize T>
387 return HVXPacket<T>::Create(
a.Get() ^ Q6_V_vsplat_R(0x80000000));
391 return pnegate_hvx(
a);
395 return pnegate_hvx(
a);
399 return pnegate_hvx(
a);
402 template <HVXPacketSize T>
404 HVX_Vector v_true = Q6_Vb_vsplat_R(0xff);
405 HVX_VectorPred pred = Q6_Q_vcmp_gt_VsfVsf(
a.Get(),
b.Get());
406 return HVXPacket<T>::Create(Q6_V_vmux_QVV(pred, Q6_V_vzero(), v_true));
410 return pcmp_le_hvx(
a,
b);
414 return pcmp_le_hvx(
a,
b);
418 return pcmp_le_hvx(
a,
b);
421 template <HVXPacketSize T>
423 HVX_Vector v_true = Q6_Vb_vsplat_R(0xff);
424 HVX_VectorPred pred = Q6_Q_vcmp_eq_VwVw(
a.Get(),
b.Get());
425 return HVXPacket<T>::Create(Q6_V_vmux_QVV(pred, v_true, Q6_V_vzero()));
429 return pcmp_eq_hvx(
a,
b);
433 return pcmp_eq_hvx(
a,
b);
437 return pcmp_eq_hvx(
a,
b);
440 template <HVXPacketSize T>
442 HVX_Vector v_true = Q6_Vb_vsplat_R(0xff);
443 HVX_VectorPred pred = Q6_Q_vcmp_gt_VsfVsf(
b.Get(),
a.Get());
444 return HVXPacket<T>::Create(Q6_V_vmux_QVV(pred, v_true, Q6_V_vzero()));
448 return pcmp_lt_hvx(
a,
b);
452 return pcmp_lt_hvx(
a,
b);
456 return pcmp_lt_hvx(
a,
b);
459 template <HVXPacketSize T>
461 HVX_Vector v_true = Q6_Vb_vsplat_R(0xff);
462 HVX_VectorPred pred = Q6_Q_vcmp_gt_VsfVsf(
b.Get(),
a.Get());
463 return HVXPacket<T>::Create(Q6_V_vmux_QVV(pred, v_true, Q6_V_vzero()));
467 return pcmp_lt_or_nan_hvx(
a,
b);
471 return pcmp_lt_or_nan_hvx(
a,
b);
475 return pcmp_lt_or_nan_hvx(
a,
b);
478 template <HVXPacketSize T>
480 return HVXPacket<T>::Create(
a.Get() & Q6_V_vsplat_R(0x7FFFFFFF));
495 template <HVXPacketSize T>
501 HVX_and_array.vector =
a.Get();
502 return HVX_and_array.array[0];
506 return pfirst_hvx(
a);
510 return pfirst_hvx(
a);
514 return pfirst_hvx(
a);
519 HVX_VectorPair v_0_1_0 = Q6_W_vshuff_VVR(kernel.packet[1].Get(), kernel.packet[0].Get(), -4);
520 HVX_VectorPair v_0_3_2 = Q6_W_vshuff_VVR(kernel.packet[3].Get(), kernel.packet[2].Get(), -4);
523 HVX_VectorPair v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_3_2), HEXAGON_HVX_GET_V0(v_0_1_0), -8);
524 HVX_VectorPair v_1_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_3_2), HEXAGON_HVX_GET_V1(v_0_1_0), -8);
525 kernel.packet[0] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_1_1_0));
526 kernel.packet[1] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_1_1_0));
527 kernel.packet[2] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_1_3_2));
528 kernel.packet[3] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_1_3_2));
532 HVX_VectorPair v_0_1_0 = Q6_W_vshuff_VVR(kernel.packet[1].Get(), kernel.packet[0].Get(), -4);
533 HVX_VectorPair v_0_3_2 = Q6_W_vshuff_VVR(kernel.packet[3].Get(), kernel.packet[2].Get(), -4);
536 HVX_VectorPair v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_3_2), HEXAGON_HVX_GET_V0(v_0_1_0), -8);
538 kernel.packet[0] = Packet16f::Create(HEXAGON_HVX_GET_V0(v_1_1_0));
539 kernel.packet[1] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_1_0), HEXAGON_HVX_GET_V0(v_1_1_0), 64));
540 kernel.packet[2] = Packet16f::Create(HEXAGON_HVX_GET_V1(v_1_1_0));
541 kernel.packet[3] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_1_1_0), HEXAGON_HVX_GET_V1(v_1_1_0), 64));
545 HVX_VectorPair v_0_1_0 = Q6_W_vshuff_VVR(kernel.packet[1].Get(), kernel.packet[0].Get(), -4);
546 HVX_VectorPair v_0_3_2 = Q6_W_vshuff_VVR(kernel.packet[3].Get(), kernel.packet[2].Get(), -4);
549 HVX_VectorPair v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_3_2), HEXAGON_HVX_GET_V0(v_0_1_0), -8);
551 kernel.packet[0] = Packet8f::Create(HEXAGON_HVX_GET_V0(v_1_1_0));
552 kernel.packet[1] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_1_0), HEXAGON_HVX_GET_V0(v_1_1_0), 32));
553 kernel.packet[2] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_1_0), HEXAGON_HVX_GET_V0(v_1_1_0), 64));
554 kernel.packet[3] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_1_0), HEXAGON_HVX_GET_V0(v_1_1_0), 96));
559 HVX_VectorPair v_0_1_0 = Q6_W_vshuff_VVR(kernel.packet[1].Get(), kernel.packet[0].Get(), -4);
560 HVX_VectorPair v_0_3_2 = Q6_W_vshuff_VVR(kernel.packet[3].Get(), kernel.packet[2].Get(), -4);
561 HVX_VectorPair v_0_5_4 = Q6_W_vshuff_VVR(kernel.packet[5].Get(), kernel.packet[4].Get(), -4);
562 HVX_VectorPair v_0_7_6 = Q6_W_vshuff_VVR(kernel.packet[7].Get(), kernel.packet[6].Get(), -4);
565 HVX_VectorPair v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_3_2), HEXAGON_HVX_GET_V0(v_0_1_0), -8);
566 HVX_VectorPair v_1_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_7_6), HEXAGON_HVX_GET_V0(v_0_5_4), -8);
569 v_0_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_3_2), HEXAGON_HVX_GET_V0(v_1_1_0), -16);
571 kernel.packet[0] = Packet8f::Create(HEXAGON_HVX_GET_V0(v_0_1_0));
572 kernel.packet[1] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_0_1_0), HEXAGON_HVX_GET_V0(v_0_1_0), 32));
573 kernel.packet[2] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_0_1_0), HEXAGON_HVX_GET_V0(v_0_1_0), 64));
574 kernel.packet[3] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_0_1_0), HEXAGON_HVX_GET_V0(v_0_1_0), 96));
575 kernel.packet[4] = Packet8f::Create(HEXAGON_HVX_GET_V1(v_0_1_0));
576 kernel.packet[5] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_0_1_0), HEXAGON_HVX_GET_V1(v_0_1_0), 32));
577 kernel.packet[6] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_0_1_0), HEXAGON_HVX_GET_V1(v_0_1_0), 64));
578 kernel.packet[7] = Packet8f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_0_1_0), HEXAGON_HVX_GET_V1(v_0_1_0), 96));
582 HVX_VectorPair v_0_1_0 = Q6_W_vshuff_VVR(kernel.packet[1].Get(), kernel.packet[0].Get(), -4);
583 HVX_VectorPair v_0_3_2 = Q6_W_vshuff_VVR(kernel.packet[3].Get(), kernel.packet[2].Get(), -4);
584 HVX_VectorPair v_0_5_4 = Q6_W_vshuff_VVR(kernel.packet[5].Get(), kernel.packet[4].Get(), -4);
585 HVX_VectorPair v_0_7_6 = Q6_W_vshuff_VVR(kernel.packet[7].Get(), kernel.packet[6].Get(), -4);
586 HVX_VectorPair v_0_9_8 = Q6_W_vshuff_VVR(kernel.packet[9].Get(), kernel.packet[8].Get(), -4);
587 HVX_VectorPair v_0_11_10 = Q6_W_vshuff_VVR(kernel.packet[11].Get(), kernel.packet[10].Get(), -4);
588 HVX_VectorPair v_0_13_12 = Q6_W_vshuff_VVR(kernel.packet[13].Get(), kernel.packet[12].Get(), -4);
589 HVX_VectorPair v_0_15_14 = Q6_W_vshuff_VVR(kernel.packet[15].Get(), kernel.packet[14].Get(), -4);
592 HVX_VectorPair v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_3_2), HEXAGON_HVX_GET_V0(v_0_1_0), -8);
593 HVX_VectorPair v_1_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_7_6), HEXAGON_HVX_GET_V0(v_0_5_4), -8);
594 HVX_VectorPair v_1_5_4 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_11_10), HEXAGON_HVX_GET_V0(v_0_9_8), -8);
595 HVX_VectorPair v_1_7_6 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_15_14), HEXAGON_HVX_GET_V0(v_0_13_12), -8);
598 v_0_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_3_2), HEXAGON_HVX_GET_V0(v_1_1_0), -16);
599 v_0_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_3_2), HEXAGON_HVX_GET_V1(v_1_1_0), -16);
600 v_0_9_8 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_7_6), HEXAGON_HVX_GET_V0(v_1_5_4), -16);
601 v_0_11_10 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_7_6), HEXAGON_HVX_GET_V1(v_1_5_4), -16);
604 v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_9_8), HEXAGON_HVX_GET_V0(v_0_1_0), -32);
605 v_1_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_9_8), HEXAGON_HVX_GET_V1(v_0_1_0), -32);
606 v_1_5_4 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_11_10), HEXAGON_HVX_GET_V0(v_0_3_2), -32);
607 v_1_7_6 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_11_10), HEXAGON_HVX_GET_V1(v_0_3_2), -32);
609 kernel.packet[0] = Packet16f::Create(HEXAGON_HVX_GET_V0(v_1_1_0));
610 kernel.packet[1] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_1_0), HEXAGON_HVX_GET_V0(v_1_1_0), 64));
611 kernel.packet[2] = Packet16f::Create(HEXAGON_HVX_GET_V1(v_1_1_0));
612 kernel.packet[3] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_1_1_0), HEXAGON_HVX_GET_V1(v_1_1_0), 64));
613 kernel.packet[4] = Packet16f::Create(HEXAGON_HVX_GET_V0(v_1_3_2));
614 kernel.packet[5] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_3_2), HEXAGON_HVX_GET_V0(v_1_3_2), 64));
615 kernel.packet[6] = Packet16f::Create(HEXAGON_HVX_GET_V1(v_1_3_2));
616 kernel.packet[7] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_1_3_2), HEXAGON_HVX_GET_V1(v_1_3_2), 64));
617 kernel.packet[8] = Packet16f::Create(HEXAGON_HVX_GET_V0(v_1_5_4));
618 kernel.packet[9] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_5_4), HEXAGON_HVX_GET_V0(v_1_5_4), 64));
619 kernel.packet[10] = Packet16f::Create(HEXAGON_HVX_GET_V1(v_1_5_4));
620 kernel.packet[11] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_1_5_4), HEXAGON_HVX_GET_V1(v_1_5_4), 64));
621 kernel.packet[12] = Packet16f::Create(HEXAGON_HVX_GET_V0(v_1_7_6));
622 kernel.packet[13] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V0(v_1_7_6), HEXAGON_HVX_GET_V0(v_1_7_6), 64));
623 kernel.packet[14] = Packet16f::Create(HEXAGON_HVX_GET_V1(v_1_7_6));
624 kernel.packet[15] = Packet16f::Create(Q6_V_valign_VVR(HEXAGON_HVX_GET_V1(v_1_7_6), HEXAGON_HVX_GET_V1(v_1_7_6), 64));
628 HVX_VectorPair v_0_1_0 = Q6_W_vshuff_VVR(kernel.packet[1].Get(), kernel.packet[0].Get(), -4);
629 HVX_VectorPair v_0_3_2 = Q6_W_vshuff_VVR(kernel.packet[3].Get(), kernel.packet[2].Get(), -4);
630 HVX_VectorPair v_0_5_4 = Q6_W_vshuff_VVR(kernel.packet[5].Get(), kernel.packet[4].Get(), -4);
631 HVX_VectorPair v_0_7_6 = Q6_W_vshuff_VVR(kernel.packet[7].Get(), kernel.packet[6].Get(), -4);
632 HVX_VectorPair v_0_9_8 = Q6_W_vshuff_VVR(kernel.packet[9].Get(), kernel.packet[8].Get(), -4);
633 HVX_VectorPair v_0_11_10 = Q6_W_vshuff_VVR(kernel.packet[11].Get(), kernel.packet[10].Get(), -4);
634 HVX_VectorPair v_0_13_12 = Q6_W_vshuff_VVR(kernel.packet[13].Get(), kernel.packet[12].Get(), -4);
635 HVX_VectorPair v_0_15_14 = Q6_W_vshuff_VVR(kernel.packet[15].Get(), kernel.packet[14].Get(), -4);
636 HVX_VectorPair v_0_17_16 = Q6_W_vshuff_VVR(kernel.packet[17].Get(), kernel.packet[16].Get(), -4);
637 HVX_VectorPair v_0_19_18 = Q6_W_vshuff_VVR(kernel.packet[19].Get(), kernel.packet[18].Get(), -4);
638 HVX_VectorPair v_0_21_20 = Q6_W_vshuff_VVR(kernel.packet[21].Get(), kernel.packet[20].Get(), -4);
639 HVX_VectorPair v_0_23_22 = Q6_W_vshuff_VVR(kernel.packet[23].Get(), kernel.packet[22].Get(), -4);
640 HVX_VectorPair v_0_25_24 = Q6_W_vshuff_VVR(kernel.packet[25].Get(), kernel.packet[24].Get(), -4);
641 HVX_VectorPair v_0_27_26 = Q6_W_vshuff_VVR(kernel.packet[27].Get(), kernel.packet[26].Get(), -4);
642 HVX_VectorPair v_0_29_28 = Q6_W_vshuff_VVR(kernel.packet[29].Get(), kernel.packet[28].Get(), -4);
643 HVX_VectorPair v_0_31_30 = Q6_W_vshuff_VVR(kernel.packet[31].Get(), kernel.packet[30].Get(), -4);
646 HVX_VectorPair v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_3_2), HEXAGON_HVX_GET_V0(v_0_1_0), -8);
647 HVX_VectorPair v_1_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_3_2), HEXAGON_HVX_GET_V1(v_0_1_0), -8);
648 HVX_VectorPair v_1_5_4 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_7_6), HEXAGON_HVX_GET_V0(v_0_5_4), -8);
649 HVX_VectorPair v_1_7_6 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_7_6), HEXAGON_HVX_GET_V1(v_0_5_4), -8);
650 HVX_VectorPair v_1_9_8 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_11_10), HEXAGON_HVX_GET_V0(v_0_9_8), -8);
651 HVX_VectorPair v_1_11_10 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_11_10), HEXAGON_HVX_GET_V1(v_0_9_8), -8);
652 HVX_VectorPair v_1_13_12 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_15_14), HEXAGON_HVX_GET_V0(v_0_13_12), -8);
653 HVX_VectorPair v_1_15_14 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_15_14), HEXAGON_HVX_GET_V1(v_0_13_12), -8);
654 HVX_VectorPair v_1_17_16 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_19_18), HEXAGON_HVX_GET_V0(v_0_17_16), -8);
655 HVX_VectorPair v_1_19_18 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_19_18), HEXAGON_HVX_GET_V1(v_0_17_16), -8);
656 HVX_VectorPair v_1_21_20 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_23_22), HEXAGON_HVX_GET_V0(v_0_21_20), -8);
657 HVX_VectorPair v_1_23_22 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_23_22), HEXAGON_HVX_GET_V1(v_0_21_20), -8);
658 HVX_VectorPair v_1_25_24 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_27_26), HEXAGON_HVX_GET_V0(v_0_25_24), -8);
659 HVX_VectorPair v_1_27_26 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_27_26), HEXAGON_HVX_GET_V1(v_0_25_24), -8);
660 HVX_VectorPair v_1_29_28 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_31_30), HEXAGON_HVX_GET_V0(v_0_29_28), -8);
661 HVX_VectorPair v_1_31_30 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_31_30), HEXAGON_HVX_GET_V1(v_0_29_28), -8);
664 v_0_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_5_4), HEXAGON_HVX_GET_V0(v_1_1_0), -16);
665 v_0_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_5_4), HEXAGON_HVX_GET_V1(v_1_1_0), -16);
666 v_0_5_4 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_7_6), HEXAGON_HVX_GET_V0(v_1_3_2), -16);
667 v_0_7_6 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_7_6), HEXAGON_HVX_GET_V1(v_1_3_2), -16);
668 v_0_9_8 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_13_12), HEXAGON_HVX_GET_V0(v_1_9_8), -16);
669 v_0_11_10 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_13_12), HEXAGON_HVX_GET_V1(v_1_9_8), -16);
670 v_0_13_12 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_15_14), HEXAGON_HVX_GET_V0(v_1_11_10), -16);
671 v_0_15_14 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_15_14), HEXAGON_HVX_GET_V1(v_1_11_10), -16);
672 v_0_17_16 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_21_20), HEXAGON_HVX_GET_V0(v_1_17_16), -16);
673 v_0_19_18 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_21_20), HEXAGON_HVX_GET_V1(v_1_17_16), -16);
674 v_0_21_20 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_23_22), HEXAGON_HVX_GET_V0(v_1_19_18), -16);
675 v_0_23_22 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_23_22), HEXAGON_HVX_GET_V1(v_1_19_18), -16);
676 v_0_25_24 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_29_28), HEXAGON_HVX_GET_V0(v_1_25_24), -16);
677 v_0_27_26 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_29_28), HEXAGON_HVX_GET_V1(v_1_25_24), -16);
678 v_0_29_28 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_31_30), HEXAGON_HVX_GET_V0(v_1_27_26), -16);
679 v_0_31_30 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_31_30), HEXAGON_HVX_GET_V1(v_1_27_26), -16);
682 v_1_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_9_8), HEXAGON_HVX_GET_V0(v_0_1_0), -32);
683 v_1_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_9_8), HEXAGON_HVX_GET_V1(v_0_1_0), -32);
684 v_1_5_4 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_11_10), HEXAGON_HVX_GET_V0(v_0_3_2), -32);
685 v_1_7_6 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_11_10), HEXAGON_HVX_GET_V1(v_0_3_2), -32);
686 v_1_9_8 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_13_12), HEXAGON_HVX_GET_V0(v_0_5_4), -32);
687 v_1_11_10 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_13_12), HEXAGON_HVX_GET_V1(v_0_5_4), -32);
688 v_1_13_12 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_15_14), HEXAGON_HVX_GET_V0(v_0_7_6), -32);
689 v_1_15_14 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_15_14), HEXAGON_HVX_GET_V1(v_0_7_6), -32);
690 v_1_17_16 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_25_24), HEXAGON_HVX_GET_V0(v_0_17_16), -32);
691 v_1_19_18 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_25_24), HEXAGON_HVX_GET_V1(v_0_17_16), -32);
692 v_1_21_20 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_27_26), HEXAGON_HVX_GET_V0(v_0_19_18), -32);
693 v_1_23_22 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_27_26), HEXAGON_HVX_GET_V1(v_0_19_18), -32);
694 v_1_25_24 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_29_28), HEXAGON_HVX_GET_V0(v_0_21_20), -32);
695 v_1_27_26 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_29_28), HEXAGON_HVX_GET_V1(v_0_21_20), -32);
696 v_1_29_28 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_0_31_30), HEXAGON_HVX_GET_V0(v_0_23_22), -32);
697 v_1_31_30 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_0_31_30), HEXAGON_HVX_GET_V1(v_0_23_22), -32);
700 v_0_1_0 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_17_16), HEXAGON_HVX_GET_V0(v_1_1_0), -64);
701 v_0_3_2 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_17_16), HEXAGON_HVX_GET_V1(v_1_1_0), -64);
702 v_0_5_4 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_19_18), HEXAGON_HVX_GET_V0(v_1_3_2), -64);
703 v_0_7_6 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_19_18), HEXAGON_HVX_GET_V1(v_1_3_2), -64);
704 v_0_9_8 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_21_20), HEXAGON_HVX_GET_V0(v_1_5_4), -64);
705 v_0_11_10 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_21_20), HEXAGON_HVX_GET_V1(v_1_5_4), -64);
706 v_0_13_12 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_23_22), HEXAGON_HVX_GET_V0(v_1_7_6), -64);
707 v_0_15_14 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_23_22), HEXAGON_HVX_GET_V1(v_1_7_6), -64);
708 v_0_17_16 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_25_24), HEXAGON_HVX_GET_V0(v_1_9_8), -64);
709 v_0_19_18 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_25_24), HEXAGON_HVX_GET_V1(v_1_9_8), -64);
710 v_0_21_20 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_27_26), HEXAGON_HVX_GET_V0(v_1_11_10), -64);
711 v_0_23_22 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_27_26), HEXAGON_HVX_GET_V1(v_1_11_10), -64);
712 v_0_25_24 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_29_28), HEXAGON_HVX_GET_V0(v_1_13_12), -64);
713 v_0_27_26 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_29_28), HEXAGON_HVX_GET_V1(v_1_13_12), -64);
714 v_0_29_28 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(v_1_31_30), HEXAGON_HVX_GET_V0(v_1_15_14), -64);
715 v_0_31_30 = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V1(v_1_31_30), HEXAGON_HVX_GET_V1(v_1_15_14), -64);
717 kernel.packet[0] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_1_0));
718 kernel.packet[1] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_1_0));
719 kernel.packet[2] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_3_2));
720 kernel.packet[3] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_3_2));
721 kernel.packet[4] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_5_4));
722 kernel.packet[5] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_5_4));
723 kernel.packet[6] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_7_6));
724 kernel.packet[7] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_7_6));
725 kernel.packet[8] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_9_8));
726 kernel.packet[9] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_9_8));
727 kernel.packet[10] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_11_10));
728 kernel.packet[11] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_11_10));
729 kernel.packet[12] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_13_12));
730 kernel.packet[13] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_13_12));
731 kernel.packet[14] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_15_14));
732 kernel.packet[15] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_15_14));
733 kernel.packet[16] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_17_16));
734 kernel.packet[17] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_17_16));
735 kernel.packet[18] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_19_18));
736 kernel.packet[19] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_19_18));
737 kernel.packet[20] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_21_20));
738 kernel.packet[21] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_21_20));
739 kernel.packet[22] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_23_22));
740 kernel.packet[23] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_23_22));
741 kernel.packet[24] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_25_24));
742 kernel.packet[25] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_25_24));
743 kernel.packet[26] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_27_26));
744 kernel.packet[27] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_27_26));
745 kernel.packet[28] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_29_28));
746 kernel.packet[29] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_29_28));
747 kernel.packet[30] = Packet32f::Create(HEXAGON_HVX_GET_V0(v_0_31_30));
748 kernel.packet[31] = Packet32f::Create(HEXAGON_HVX_GET_V1(v_0_31_30));
751 template <HVXPacketSize T>
753 const Index packet_size = unpacket_traits<HVXPacket<T>>
::size;
754 HVX_Vector vsum = Q6_Vqf32_vadd_VsfVsf(
a.Get(), Q6_V_vror_VR(
a.Get(),
sizeof(
float)));
755 for (
int i = 2;
i < packet_size;
i <<= 1) {
756 vsum = Q6_Vqf32_vadd_Vqf32Vqf32(vsum, Q6_V_vror_VR(vsum,
i *
sizeof(
float)));
758 return pfirst(HVXPacket<T>::Create(Q6_Vsf_equals_Vqf32(vsum)));
762 return predux_hvx(
a);
766 return predux_hvx(
a);
770 return predux_hvx(
a);
773 template <HVXPacketSize T>
776 HVX_Vector
load = HVX_load_partial<size, 0>(from);
777 HVX_VectorPair dup = Q6_W_vshuff_VVR(
load,
load, -4);
778 return HVXPacket<T>::Create(HEXAGON_HVX_GET_V0(dup));
782 return ploaddup_hvx<HVXPacketSize::Full>(from);
786 return ploaddup_hvx<HVXPacketSize::Half>(from);
790 return ploaddup_hvx<HVXPacketSize::Quarter>(from);
793 template <HVXPacketSize T>
796 HVX_Vector
load = HVX_load_partial<size, 0>(from);
797 HVX_VectorPair dup = Q6_W_vshuff_VVR(
load,
load, -4);
798 HVX_VectorPair quad = Q6_W_vshuff_VVR(HEXAGON_HVX_GET_V0(dup), HEXAGON_HVX_GET_V0(dup), -8);
799 return HVXPacket<T>::Create(HEXAGON_HVX_GET_V0(quad));
803 return ploadquad_hvx<HVXPacketSize::Full>(from);
807 return ploadquad_hvx<HVXPacketSize::Half>(from);
811 return ploadquad_hvx<HVXPacketSize::Quarter>(from);
816 HVX_Vector
delta = Q6_Vb_vsplat_R(0x7c);
817 return Packet32f::Create(Q6_V_vdelta_VV(
a.Get(),
delta));
822 HVX_Vector
delta = Q6_Vb_vsplat_R(0x3c);
823 return Packet16f::Create(Q6_V_vdelta_VV(
a.Get(),
delta));
828 HVX_Vector
delta = Q6_Vb_vsplat_R(0x1c);
829 return Packet8f::Create(Q6_V_vdelta_VV(
a.Get(),
delta));
832 template <HVXPacketSize T>
834 return HVXPacket<T>::Create(Q6_Vsf_vmin_VsfVsf(
a.Get(),
b.Get()));
838 return pmin_hvx(
a,
b);
842 return pmin_hvx(
a,
b);
846 return pmin_hvx(
a,
b);
849 template <HVXPacketSize T>
851 return HVXPacket<T>::Create(Q6_Vsf_vmax_VsfVsf(
a.Get(),
b.Get()));
855 return pmax_hvx(
a,
b);
859 return pmax_hvx(
a,
b);
863 return pmax_hvx(
a,
b);
866 template <HVXPacketSize T>
868 return HVXPacket<T>::Create(
a.Get() &
b.Get());
872 return pand_hvx(
a,
b);
876 return pand_hvx(
a,
b);
880 return pand_hvx(
a,
b);
883 template <HVXPacketSize T>
885 return HVXPacket<T>::Create(
a.Get() |
b.Get());
889 return por_hvx(
a,
b);
893 return por_hvx(
a,
b);
897 return por_hvx(
a,
b);
900 template <HVXPacketSize T>
902 return HVXPacket<T>::Create(
a.Get() ^
b.Get());
906 return pxor_hvx(
a,
b);
910 return pxor_hvx(
a,
b);
914 return pxor_hvx(
a,
b);
917 template <HVXPacketSize T>
919 return HVXPacket<T>::Create(~
a.Get());
934 template <HVXPacketSize T>
935 EIGEN_STRONG_INLINE HVXPacket<T> pselect_hvx(
const HVXPacket<T>& mask,
const HVXPacket<T>&
a,
const HVXPacket<T>&
b) {
936 HVX_VectorPred pred = Q6_Q_vcmp_eq_VwVw(mask.Get(), Q6_V_vzero());
937 return HVXPacket<T>::Create(Q6_V_vmux_QVV(pred,
b.Get(),
a.Get()));
941 return pselect_hvx(mask,
a,
b);
945 return pselect_hvx(mask,
a,
b);
949 return pselect_hvx(mask,
a,
b);
952 template <HVXPacketSize T,
typename Op>
954 const Index packet_size = unpacket_traits<HVXPacket<T>>
::size;
955 HVXPacket<T> vredux =
a;
956 for (
int i = 1;
i < packet_size;
i <<= 1) {
957 vredux =
op(vredux, HVXPacket<T>::Create(Q6_V_vror_VR(vredux.Get(),
i *
sizeof(
float))));
964 return predux_generic(
a, pmax<Packet32f>);
977 return predux_generic(
a, pmin<Packet32f>);
990 return predux_generic(
a, por<Packet32f>) != 0.0f;
1001 static const float index_vsf[32]
1002 __attribute__((aligned(__HVX_LENGTH__))) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1003 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
1005 template <HVXPacketSize T>
1011 return plset_hvx<HVXPacketSize::Full>(
a);
1015 return plset_hvx<HVXPacketSize::Half>(
a);
1019 return plset_hvx<HVXPacketSize::Quarter>(
a);
1022 template <HVXPacketSize T>
1024 const Index packet_size = unpacket_traits<HVXPacket<T>>
::size;
1025 float elements[packet_size]
__attribute__((aligned(__HVX_LENGTH__)));
1027 for (
Index i = 0;
i < packet_size; ++
i) {
1028 to[
i * stride] = elements[
i];
1033 pscatter_hvx(to, from, stride);
1037 pscatter_hvx(to, from, stride);
1041 pscatter_hvx(to, from, stride);
1044 template <HVXPacketSize T>
1046 const Index packet_size = unpacket_traits<HVXPacket<T>>
::size;
1047 float elements[packet_size]
__attribute__((aligned(__HVX_LENGTH__)));
1048 for (
Index i = 0;
i < packet_size;
i++) {
1049 elements[
i] = from[
i * stride];
1051 return pload<HVXPacket<T>>(elements);
1055 return pgather_hvx<HVXPacketSize::Full>(from, stride);
1059 return pgather_hvx<HVXPacketSize::Half>(from, stride);
1063 return pgather_hvx<HVXPacketSize::Quarter>(from, stride);
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1
int i
Definition: BiCGSTAB_step_by_step.cpp:9
dominoes D
Definition: Domino.cpp:55
Eigen::Triplet< double > T
Definition: EigenUnitTest.cpp:11
HouseholderQR< MatrixXf > qr(A)
#define EIGEN_IF_CONSTEXPR(X)
Definition: Macros.h:1306
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
void load(Archive &ar, ParticleHandler &handl)
Definition: Particles.h:21
M1<< 1, 2, 3, 4, 5, 6, 7, 8, 9;Map< RowVectorXf > v1(M1.data(), M1.size())
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
Scalar * b
Definition: benchVecAdd.cpp:17
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
@ Aligned64
Definition: Constants.h:239
@ Aligned128
Definition: Constants.h:240
@ Aligned32
Definition: Constants.h:238
const Scalar * a
Definition: level2_cplx_impl.h:32
int * m
Definition: level2_cplx_impl.h:294
char char * op
Definition: level2_impl.h:374
EIGEN_DEVICE_FUNC Packet16f pgather< float, Packet16f >(const Packet16f &src, const float *from, Index stride, uint16_t umask)
Definition: AVX512/PacketMath.h:1141
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE Packet16f padd< Packet16f >(const Packet16f &a, const Packet16f &b)
Definition: AVX512/PacketMath.h:355
EIGEN_STRONG_INLINE Packet8f pmax< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1147
EIGEN_STRONG_INLINE Packet16h ploadquad(const Eigen::half *from)
Definition: AVX512/PacketMath.h:2250
EIGEN_STRONG_INLINE Packet16f por< Packet16f >(const Packet16f &a, const Packet16f &b)
Definition: AVX512/PacketMath.h:851
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: AltiVec/PacketMath.h:2751
EIGEN_STRONG_INLINE Packet16f ploadu< Packet16f >(const float *from)
Definition: AVX512/PacketMath.h:986
EIGEN_STRONG_INLINE Packet8f psub< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:870
EIGEN_STRONG_INLINE Packet16f psub< Packet16f >(const Packet16f &a, const Packet16f &b)
Definition: AVX512/PacketMath.h:383
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux_max(const Packet &a)
Definition: GenericPacketMath.h:1258
EIGEN_STRONG_INLINE Packet8f pmin< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1099
EIGEN_STRONG_INLINE Packet8f padd< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:817
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:663
EIGEN_DEVICE_FUNC Packet pnot(const Packet &a)
Definition: GenericPacketMath.h:572
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1314
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux_min(const Packet &a)
Definition: GenericPacketMath.h:1245
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1341
EIGEN_STRONG_INLINE float predux< Packet8f >(const Packet8f &a)
Definition: AVX/PacketMath.h:1954
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet8f ploadu< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:1507
EIGEN_STRONG_INLINE float predux< Packet16f >(const Packet16f &a)
Definition: AVX512/PacketMath.h:1456
EIGEN_STRONG_INLINE Packet16f pset1< Packet16f >(const float &from)
Definition: AVX512/PacketMath.h:252
EIGEN_STRONG_INLINE Packet16f pmax< Packet16f >(const Packet16f &a, const Packet16f &b)
Definition: AVX512/PacketMath.h:562
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:649
EIGEN_STRONG_INLINE Packet8f pload< Packet8f >(const float *from)
Definition: AVX/PacketMath.h:1490
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE Packet16f pmul< Packet16f >(const Packet16f &a, const Packet16f &b)
Definition: AVX512/PacketMath.h:443
EIGEN_DEVICE_FUNC void pscatter< float, Packet16f >(float *to, const Packet16f &from, Index stride, uint16_t umask)
Definition: AVX512/PacketMath.h:1197
EIGEN_STRONG_INLINE Packet8f por< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:1327
EIGEN_STRONG_INLINE Packet16f pload< Packet16f >(const float *from)
Definition: AVX512/PacketMath.h:969
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:642
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1936
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits< Packet >::type *from)
Definition: GenericPacketMath.h:824
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c predux_half_dowto4(const Packet8c &a)
Definition: NEON/PacketMath.h:3635
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315
EIGEN_STRONG_INLINE Packet8f pmul< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: AVX/PacketMath.h:927
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_DEVICE_FUNC Packet8f pgather< float, Packet8f >(const float *from, Index stride)
Definition: AVX/PacketMath.h:1668
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits< Packet >::type &a)
Returns a packet with coefficients (a,a+1,...,a+packet_size-1).
Definition: GenericPacketMath.h:872
svint32_t PacketXi __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)))
Definition: SVE/PacketMath.h:34
EIGEN_STRONG_INLINE Packet16f pmin< Packet16f >(const Packet16f &a, const Packet16f &b)
Definition: AVX512/PacketMath.h:543
EIGEN_DEVICE_FUNC Packet pload(const typename unpacket_traits< Packet >::type *from)
Definition: GenericPacketMath.h:752
__m256 Packet8f
Definition: AVX/PacketMath.h:34
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1756
EIGEN_STRONG_INLINE Packet8f pset1< Packet8f >(const float &from)
Definition: AVX/PacketMath.h:748
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1329
__m512 Packet16f
Definition: AVX512/PacketMath.h:34
EIGEN_DEVICE_FUNC void pscatter< float, Packet8f >(float *to, const Packet8f &from, Index stride)
Definition: AVX/PacketMath.h:1687
EIGEN_DEVICE_FUNC Packet pset1(const typename unpacket_traits< Packet >::type &a)
Definition: GenericPacketMath.h:804
std::int32_t int32_t
Definition: Meta.h:41
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
squared absolute value
Definition: GlobalFunctions.h:87
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
int delta
Definition: MultiOpt.py:96
Definition: Eigen_Colamd.h:49
@ HasASin
Definition: GenericPacketMath.h:84
@ HasATanh
Definition: GenericPacketMath.h:87
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasSin
Definition: GenericPacketMath.h:81
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasACos
Definition: GenericPacketMath.h:85
@ HasAbsDiff
Definition: GenericPacketMath.h:65
@ HasArg
Definition: GenericPacketMath.h:64
@ HasNdtri
Definition: GenericPacketMath.h:97
@ HasCos
Definition: GenericPacketMath.h:82
@ HasCmp
Definition: GenericPacketMath.h:69
@ HasShift
Definition: GenericPacketMath.h:50
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasErf
Definition: GenericPacketMath.h:95
@ HasBessel
Definition: GenericPacketMath.h:98
@ HasLog
Definition: GenericPacketMath.h:77
@ HasTanh
Definition: GenericPacketMath.h:90
@ HasATan
Definition: GenericPacketMath.h:86
@ HasDiv
Definition: GenericPacketMath.h:71
T type
Definition: GenericPacketMath.h:109
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
T half
Definition: GenericPacketMath.h:110
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
@ size
Definition: AVX512/PacketMath.h:187
@ alignment
Definition: AVX512/PacketMath.h:188
@ alignment
Definition: AVX/PacketMath.h:312
@ size
Definition: AVX/PacketMath.h:311
T type
Definition: GenericPacketMath.h:135
T half
Definition: GenericPacketMath.h:136
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140