LSX/PacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2023 Zang Ruochen <zangruochen@loongson.cn>
5 // Copyright (C) 2024 XiWei Gu <guxiwei-hf@loongson.cn>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_PACKET_MATH_LSX_H
12 #define EIGEN_PACKET_MATH_LSX_H
13 
14 // IWYU pragma: private
15 #include "../../InternalHeaderCheck.h"
16 
17 namespace Eigen {
18 
19 namespace internal {
20 
21 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
22 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
23 #endif
24 
25 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
26 #if EIGEN_ARCH_LOONGARCH64
27 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
28 #endif
29 #endif
30 
31 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
32 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
33 #endif
34 
35 typedef __m128 Packet4f;
36 typedef __m128d Packet2d;
37 
46 
47 template <>
48 struct is_arithmetic<__m128> {
49  enum { value = true };
50 };
51 template <>
52 struct is_arithmetic<__m128i> {
53  enum { value = true };
54 };
55 template <>
56 struct is_arithmetic<__m128d> {
57  enum { value = true };
58 };
59 template <>
61  enum { value = true };
62 };
63 template <>
65  enum { value = true };
66 };
67 template <>
69  enum { value = true };
70 };
71 template <>
73  enum { value = true };
74 };
75 template <>
77  enum { value = false };
78 };
79 template <>
81  enum { value = false };
82 };
83 template <>
85  enum { value = false };
86 };
87 template <>
89  enum { value = false };
90 };
91 
92 EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
93  float from[4] = {a, b, c, d};
94  return (Packet4f)__lsx_vld(from, 0);
95 }
96 
98  const float* a = reinterpret_cast<const float*>(&m);
99  Packet4f res =
100  make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3)), *(a + ((mask >> 6) & 3)));
101  return res;
102 }
103 
104 template <bool interleave>
106  const float* a = reinterpret_cast<const float*>(&m);
107  const float* b = reinterpret_cast<const float*>(&n);
108  Packet4f res =
109  make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(b + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3)));
110  return res;
111 }
112 
113 template <>
115  const float* a = reinterpret_cast<const float*>(&m);
116  const float* b = reinterpret_cast<const float*>(&n);
117  Packet4f res =
118  make_packet4f(*(a + (mask & 3)), *(b + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3)));
119  return res;
120 }
121 
122 EIGEN_STRONG_INLINE static int eigen_lsx_shuffle_mask(int p, int q, int r, int s) {
123  return ((s) << 6 | (r) << 4 | (q) << 2 | (p));
124 }
125 
126 EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f& a, int p, int q, int r, int s) {
127  return shuffle1(a, eigen_lsx_shuffle_mask(p, q, r, s));
128 }
129 EIGEN_STRONG_INLINE Packet4f vec4f_swizzle2(const Packet4f& a, const Packet4f& b, int p, int q, int r, int s) {
130  return shuffle2<false>(a, b, eigen_lsx_shuffle_mask(p, q, r, s));
131 }
133  return shuffle2<false>(a, b, eigen_lsx_shuffle_mask(0, 1, 0, 1));
134 }
136  return shuffle2<false>(b, a, eigen_lsx_shuffle_mask(2, 3, 2, 3));
137 }
139  return shuffle2<true>(a, b, eigen_lsx_shuffle_mask(0, 0, 1, 1));
140 }
142  return shuffle2<true>(a, b, eigen_lsx_shuffle_mask(2, 2, 3, 3));
143 }
144 
146  double from[2] = {a, b};
147  return (Packet2d)__lsx_vld(from, 0);
148 }
149 
151  const double* a = reinterpret_cast<const double*>(&m);
152  const double* b = reinterpret_cast<const double*>(&n);
153  Packet2d res = make_packet2d(*(a + (mask & 1)), *(b + ((mask >> 1) & 1)));
154  return res;
155 }
156 
158  return shuffle(a, b, mask);
159 }
162 
163 template <>
165  typedef Packet16c type;
166  typedef Packet16c half;
167  enum {
170  size = 16,
171 
172  HasAbs2 = 0,
174  HasCmp = 1,
175  HasBlend = 0
176  };
177 };
178 
179 template <>
181  typedef Packet8s type;
182  typedef Packet8s half;
183  enum {
186  size = 8,
187 
188  HasAbs2 = 0,
190  HasCmp = 1,
191  HasDiv = 1,
192  HasBlend = 0
193  };
194 };
195 
196 template <>
198  typedef Packet4i type;
199  typedef Packet4i half;
200  enum {
203  size = 4,
204 
205  HasAbs2 = 0,
207  HasCmp = 1,
208  HasDiv = 1,
209  HasBlend = 0
210  };
211 };
212 
213 template <>
214 struct packet_traits<int64_t> : default_packet_traits {
215  typedef Packet2l type;
216  typedef Packet2l half;
217  enum {
218  Vectorizable = 1,
219  AlignedOnScalar = 1,
220  size = 2,
221 
222  HasAbs2 = 0,
224  HasCmp = 1,
225  HasDiv = 1,
226  HasBlend = 0
227  };
228 };
229 
230 template <>
232  typedef Packet16uc type;
233  typedef Packet16uc half;
234  enum {
237  size = 16,
238 
239  HasAbs2 = 0,
242  HasCmp = 1,
243  HasBlend = 0
244  };
245 };
246 
247 template <>
249  typedef Packet8us type;
250  typedef Packet8us half;
251  enum {
254  size = 8,
255 
256  HasAbs2 = 0,
259  HasCmp = 1,
260  HasDiv = 1,
261  HasBlend = 0
262  };
263 };
264 
265 template <>
266 struct packet_traits<uint32_t> : default_packet_traits {
267  typedef Packet4ui type;
268  typedef Packet4ui half;
269  enum {
270  Vectorizable = 1,
271  AlignedOnScalar = 1,
272  size = 4,
273 
274  HasAbs2 = 0,
276  HasNegate = 0,
277  HasCmp = 1,
278  HasDiv = 1,
279  HasBlend = 0
280  };
281 };
282 
283 template <>
285  typedef Packet2ul type;
286  typedef Packet2ul half;
287  enum {
290  size = 2,
291 
292  HasAbs2 = 0,
295  HasCmp = 1,
296  HasDiv = 1,
297  HasBlend = 0
298  };
299 };
300 
301 template <>
302 struct packet_traits<float> : default_packet_traits {
303  typedef Packet4f type;
304  typedef Packet4f half;
305  enum {
306  Vectorizable = 1,
307  AlignedOnScalar = 1,
308  size = 4,
309 
310  HasAbs2 = 0,
312  HasBlend = 0,
313  HasSign = 0,
314  HasDiv = 1,
315  HasExp = 1,
316  HasSqrt = 1,
317  HasLog = 1,
318  HasRsqrt = 1
319  };
320 };
321 
322 template <>
323 struct packet_traits<double> : default_packet_traits {
324  typedef Packet2d type;
325  typedef Packet2d half;
326  enum {
327  Vectorizable = 1,
328  AlignedOnScalar = 1,
329  size = 2,
330 
331  HasAbs2 = 0,
333  HasBlend = 0,
334  HasSign = 0,
335  HasDiv = 1,
336  HasSqrt = 1,
337  HasLog = 1,
338  HasRsqrt = 1
339  };
340 };
341 
342 template <>
343 struct unpacket_traits<Packet16c> {
344  typedef int8_t type;
345  typedef Packet16c half;
346  enum {
347  size = 16,
349  vectorizable = true,
350  masked_load_available = false,
351  masked_store_available = false
352  };
353 };
354 template <>
355 struct unpacket_traits<Packet8s> {
356  typedef int16_t type;
357  typedef Packet8s half;
358  enum {
359  size = 8,
361  vectorizable = true,
362  masked_load_available = false,
363  masked_store_available = false
364  };
365 };
366 template <>
367 struct unpacket_traits<Packet4i> {
368  typedef int32_t type;
369  typedef Packet4i half;
370  enum {
371  size = 4,
373  vectorizable = true,
374  masked_load_available = false,
375  masked_store_available = false
376  };
377 };
378 template <>
380  typedef int64_t type;
381  typedef Packet2l half;
382  enum {
383  size = 2,
385  vectorizable = true,
387  masked_store_available = false
388  };
389 };
390 template <>
391 struct unpacket_traits<Packet16uc> {
392  typedef uint8_t type;
393  typedef Packet16uc half;
394  enum {
395  size = 16,
397  vectorizable = true,
398  masked_load_available = false,
399  masked_store_available = false
400  };
401 };
402 template <>
403 struct unpacket_traits<Packet8us> {
404  typedef uint16_t type;
405  typedef Packet8us half;
406  enum {
407  size = 8,
409  vectorizable = true,
410  masked_load_available = false,
411  masked_store_available = false
412  };
413 };
414 template <>
416  typedef uint32_t type;
417  typedef Packet4ui half;
418  enum {
419  size = 4,
421  vectorizable = true,
423  masked_store_available = false
424  };
425 };
426 template <>
428  typedef uint64_t type;
429  typedef Packet2ul half;
430  enum {
431  size = 2,
433  vectorizable = true,
435  masked_store_available = false
436  };
437 };
438 template <>
439 struct unpacket_traits<Packet4f> {
440  typedef float type;
441  typedef Packet4f half;
443  enum {
444  size = 4,
446  vectorizable = true,
447  masked_load_available = false,
448  masked_store_available = false
449  };
450 };
451 template <>
453  typedef double type;
454  typedef Packet2d half;
456  enum {
457  size = 2,
459  vectorizable = true,
461  masked_store_available = false
462  };
463 };
464 
465 template <>
467  return __lsx_vreplgr2vr_b(from);
468 }
469 template <>
471  return __lsx_vreplgr2vr_h(from);
472 }
473 template <>
475  return __lsx_vreplgr2vr_w(from);
476 }
477 template <>
479  return __lsx_vreplgr2vr_d(from);
480 }
481 template <>
483  return __lsx_vreplgr2vr_b(from);
484 }
485 template <>
487  return __lsx_vreplgr2vr_h(from);
488 }
489 template <>
491  return __lsx_vreplgr2vr_w(from);
492 }
493 template <>
495  return __lsx_vreplgr2vr_d(from);
496 }
497 template <>
498 EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
499  Packet4f v = {from, from, from, from};
500  return v;
501 }
502 template <>
504  Packet2d v = {from, from};
505  return v;
506 }
507 
508 template <>
510  return reinterpret_cast<__m128>((__m128i)pset1<Packet4ui>(from));
511 }
512 template <>
514  return reinterpret_cast<__m128d>((__m128i)pset1<Packet2ul>(from));
515 }
516 
517 template <>
519  const int8_t countdown[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
520  return __lsx_vadd_b(pset1<Packet16c>(a), __lsx_vld(countdown, 0));
521 }
522 template <>
524  const int16_t countdown[] = {0, 1, 2, 3, 4, 5, 6, 7};
525  return __lsx_vadd_h(pset1<Packet8s>(a), __lsx_vld(countdown, 0));
526 }
527 template <>
529  const int32_t countdown[] = {0, 1, 2, 3};
530  return __lsx_vadd_w(pset1<Packet4i>(a), __lsx_vld(countdown, 0));
531 }
532 template <>
534  const int64_t countdown[] = {0, 1};
535  return __lsx_vadd_d(pset1<Packet2l>(a), __lsx_vld(countdown, 0));
536 }
537 template <>
539  const uint8_t countdown[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
540  return __lsx_vadd_b(pset1<Packet16uc>(a), __lsx_vld(countdown, 0));
541 }
542 template <>
544  const uint16_t countdown[] = {0, 1, 2, 3, 4, 5, 6, 7};
545  return __lsx_vadd_h(pset1<Packet8us>(a), __lsx_vld(countdown, 0));
546 }
547 template <>
549  const uint32_t countdown[] = {0, 1, 2, 3};
550  return __lsx_vadd_w(pset1<Packet4ui>(a), __lsx_vld(countdown, 0));
551 }
552 template <>
554  const uint64_t countdown[] = {0, 1};
555  return __lsx_vadd_d(pset1<Packet2ul>(a), __lsx_vld(countdown, 0));
556 }
557 template <>
559  static const Packet4f countdown = {0.0f, 1.0f, 2.0f, 3.0f};
560  return __lsx_vfadd_s(pset1<Packet4f>(a), countdown);
561 }
562 template <>
564  static const Packet2d countdown = {0.0f, 1.0f};
565  return __lsx_vfadd_d(pset1<Packet2d>(a), countdown);
566 }
567 
568 template <>
570  return __lsx_vadd_b(a, b);
571 }
572 template <>
574  return __lsx_vadd_h(a, b);
575 }
576 template <>
578  return __lsx_vadd_w(a, b);
579 }
580 template <>
582  return __lsx_vadd_d(a, b);
583 }
584 template <>
586  return __lsx_vadd_b(a, b);
587 }
588 template <>
590  return __lsx_vadd_h(a, b);
591 }
592 template <>
594  return __lsx_vadd_w(a, b);
595 }
596 template <>
598  return __lsx_vadd_d(a, b);
599 }
600 template <>
602  return __lsx_vfadd_s(a, b);
603 }
604 template <>
606  return __lsx_vfadd_d(a, b);
607 }
608 
609 template <>
611  return __lsx_vsub_b(a, b);
612 }
613 template <>
615  return __lsx_vsub_h(a, b);
616 }
617 template <>
619  return __lsx_vsub_w(a, b);
620 }
621 template <>
623  return __lsx_vsub_d(a, b);
624 }
625 template <>
627  return __lsx_vsub_b(a, b);
628 }
629 template <>
631  return __lsx_vsub_h(a, b);
632 }
633 template <>
635  return __lsx_vsub_w(a, b);
636 }
637 template <>
639  return __lsx_vsub_d(a, b);
640 }
641 template <>
643  return __lsx_vfsub_s(a, b);
644 }
645 template <>
647  return __lsx_vfsub_d(a, b);
648 }
649 
650 template <>
652 template <>
654  const Packet4f mask =
655  make_packet4f(numext::bit_cast<float>(0x80000000u), 0.0f, numext::bit_cast<float>(0x80000000u), 0.0f);
656  return padd(a, pxor(mask, b));
657 }
658 template <>
660 template <>
662  const Packet2d mask = make_packet2d(numext::bit_cast<double>(0x8000000000000000ull), 0.0);
663  return padd(a, pxor(mask, b));
664 }
665 
666 template <>
668  Packet4f mask = make_packet4f(numext::bit_cast<float>(0x80000000), numext::bit_cast<float>(0x80000000),
669  numext::bit_cast<float>(0x80000000), numext::bit_cast<float>(0x80000000));
670  return (Packet4f)__lsx_vxor_v(numext::bit_cast<__m128i>(mask), numext::bit_cast<__m128i>(a));
671 }
672 template <>
674  Packet2d mask =
675  make_packet2d(numext::bit_cast<double>(0x8000000000000000), numext::bit_cast<double>(0x8000000000000000));
676  return (Packet2d)__lsx_vxor_v(numext::bit_cast<__m128i>(mask), numext::bit_cast<__m128i>(a));
677 }
678 template <>
680  return __lsx_vneg_b(a);
681 }
682 template <>
684  return __lsx_vneg_h(a);
685 }
686 template <>
688  return __lsx_vneg_w(a);
689 }
690 template <>
692  return __lsx_vneg_d(a);
693 }
694 
695 template <>
697  return a;
698 }
699 template <>
701  return a;
702 }
703 template <>
705  return a;
706 }
707 template <>
709  return a;
710 }
711 template <>
713  return a;
714 }
715 template <>
717  return a;
718 }
719 template <>
721  return a;
722 }
723 template <>
725  return a;
726 }
727 template <>
729  return a;
730 }
731 template <>
733  return a;
734 }
735 
736 template <>
738  return __lsx_vfmul_s(a, b);
739 }
740 template <>
742  return __lsx_vfmul_d(a, b);
743 }
744 template <>
746  return __lsx_vmul_b(a, b);
747 }
748 template <>
750  return __lsx_vmul_h(a, b);
751 }
752 template <>
754  return __lsx_vmul_w(a, b);
755 }
756 template <>
758  return __lsx_vmul_d(a, b);
759 }
760 template <>
762  return __lsx_vmul_b(a, b);
763 }
764 template <>
766  return __lsx_vmul_h(a, b);
767 }
768 template <>
770  return __lsx_vmul_w(a, b);
771 }
772 template <>
774  return __lsx_vmul_d(a, b);
775 }
776 
777 template <>
779  return __lsx_vfdiv_s(a, b);
780 }
781 template <>
783  return __lsx_vfdiv_d(a, b);
784 }
785 template <>
787  return __lsx_vdiv_h(a, b);
788 }
789 template <>
791  return __lsx_vdiv_w(a, b);
792 }
793 template <>
795  return __lsx_vdiv_d(a, b);
796 }
797 template <>
799  return __lsx_vdiv_hu(a, b);
800 }
801 template <>
803  return __lsx_vdiv_wu(a, b);
804 }
805 template <>
807  return __lsx_vdiv_du(a, b);
808 }
809 
810 template <>
811 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
812  return __lsx_vfmadd_s(a, b, c);
813 }
814 template <>
816  return __lsx_vfmadd_d(a, b, c);
817 }
818 template <>
820  return __lsx_vfmsub_s(a, b, c);
821 }
822 template <>
824  return __lsx_vfmsub_d(a, b, c);
825 }
826 template <>
828  return __lsx_vfnmsub_s(a, b, c);
829 }
830 template <>
832  return __lsx_vfnmsub_d(a, b, c);
833 }
834 template <>
836  return __lsx_vfnmadd_s(a, b, c);
837 }
838 template <>
840  return __lsx_vfnmadd_d(a, b, c);
841 }
842 template <>
844  return __lsx_vmadd_b(c, a, b);
845 }
846 template <>
847 EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) {
848  return __lsx_vmadd_h(c, a, b);
849 }
850 template <>
851 EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
852  return __lsx_vmadd_w(c, a, b);
853 }
854 template <>
856  return __lsx_vmadd_d(c, a, b);
857 }
858 template <>
860  return __lsx_vmadd_b(c, a, b);
861 }
862 template <>
864  return __lsx_vmadd_h(c, a, b);
865 }
866 template <>
868  return __lsx_vmadd_w(c, a, b);
869 }
870 template <>
872  return __lsx_vmadd_d(c, a, b);
873 }
874 
875 template <>
877  return (Packet4f)__lsx_vand_v((__m128i)a, (__m128i)b);
878 }
879 template <>
881  return (Packet2d)__lsx_vand_v((__m128i)a, (__m128i)b);
882 }
883 template <>
885  return __lsx_vand_v(a, b);
886 }
887 template <>
889  return __lsx_vand_v(a, b);
890 }
891 template <>
893  return __lsx_vand_v(a, b);
894 }
895 template <>
897  return __lsx_vand_v(a, b);
898 }
899 template <>
901  return __lsx_vand_v(a, b);
902 }
903 template <>
905  return __lsx_vand_v(a, b);
906 }
907 template <>
909  return __lsx_vand_v(a, b);
910 }
911 template <>
913  return __lsx_vand_v(a, b);
914 }
915 
916 template <>
918  return (Packet4f)__lsx_vor_v((__m128i)a, (__m128i)b);
919 }
920 template <>
922  return (Packet2d)__lsx_vor_v((__m128i)a, (__m128i)b);
923 }
924 template <>
926  return __lsx_vor_v(a, b);
927 }
928 template <>
930  return __lsx_vor_v(a, b);
931 }
932 template <>
934  return __lsx_vor_v(a, b);
935 }
936 template <>
938  return __lsx_vor_v(a, b);
939 }
940 template <>
942  return __lsx_vor_v(a, b);
943 }
944 template <>
946  return __lsx_vor_v(a, b);
947 }
948 template <>
950  return __lsx_vor_v(a, b);
951 }
952 template <>
954  return __lsx_vor_v(a, b);
955 }
956 
957 template <>
959  return (Packet4f)__lsx_vxor_v((__m128i)a, (__m128i)b);
960 }
961 template <>
963  return (Packet2d)__lsx_vxor_v((__m128i)a, (__m128i)b);
964 }
965 template <>
967  return __lsx_vxor_v(a, b);
968 }
969 template <>
971  return __lsx_vxor_v(a, b);
972 }
973 template <>
975  return __lsx_vxor_v(a, b);
976 }
977 template <>
979  return __lsx_vxor_v(a, b);
980 }
981 template <>
983  return __lsx_vxor_v(a, b);
984 }
985 template <>
987  return __lsx_vxor_v(a, b);
988 }
989 template <>
991  return __lsx_vxor_v(a, b);
992 }
993 template <>
995  return __lsx_vxor_v(a, b);
996 }
997 
998 template <>
1000  return (Packet4f)__lsx_vandn_v((__m128i)b, (__m128i)a);
1001 }
1002 template <>
1004  return (Packet2d)__lsx_vandn_v((__m128i)b, (__m128i)a);
1005 }
1006 template <>
1008  return __lsx_vandn_v(b, a);
1009 }
1010 template <>
1012  return __lsx_vandn_v(b, a);
1013 }
1014 template <>
1016  return __lsx_vandn_v(b, a);
1017 }
1018 template <>
1020  return __lsx_vandn_v(b, a);
1021 }
1022 template <>
1024  return __lsx_vandn_v(b, a);
1025 }
1026 template <>
1028  return __lsx_vandn_v(b, a);
1029 }
1030 template <>
1032  return __lsx_vandn_v(b, a);
1033 }
1034 template <>
1036  return __lsx_vandn_v(b, a);
1037 }
1038 
1039 template <>
1041  return (Packet4f)__lsx_vfcmp_cle_s(a, b);
1042 }
1043 template <>
1045  return (Packet2d)__lsx_vfcmp_cle_d(a, b);
1046 }
1047 template <>
1049  return __lsx_vsle_b(a, b);
1050 }
1051 template <>
1053  return __lsx_vsle_h(a, b);
1054 }
1055 template <>
1057  return __lsx_vsle_w(a, b);
1058 }
1059 template <>
1061  return __lsx_vsle_d(a, b);
1062 }
1063 template <>
1065  return __lsx_vsle_bu(a, b);
1066 }
1067 template <>
1069  return __lsx_vsle_hu(a, b);
1070 }
1071 template <>
1073  return __lsx_vsle_wu(a, b);
1074 }
1075 template <>
1077  return __lsx_vsle_du(a, b);
1078 }
1079 
1080 template <>
1082  return (Packet4f)__lsx_vfcmp_clt_s(a, b);
1083 }
1084 template <>
1086  return (Packet2d)__lsx_vfcmp_clt_d(a, b);
1087 }
1088 template <>
1090  return __lsx_vslt_b(a, b);
1091 }
1092 template <>
1094  return __lsx_vslt_h(a, b);
1095 }
1096 template <>
1098  return __lsx_vslt_w(a, b);
1099 }
1100 template <>
1102  return __lsx_vslt_d(a, b);
1103 }
1104 template <>
1106  return __lsx_vslt_bu(a, b);
1107 }
1108 template <>
1110  return __lsx_vslt_hu(a, b);
1111 }
1112 template <>
1114  return __lsx_vslt_wu(a, b);
1115 }
1116 template <>
1118  return __lsx_vslt_du(a, b);
1119 }
1120 
1121 template <>
1123  return (Packet4f)__lsx_vfcmp_sult_s(a, b);
1124 }
1125 template <>
1127  return (Packet2d)__lsx_vfcmp_sult_d(a, b);
1128 }
1129 
1130 template <>
1132  return (Packet4f)__lsx_vfcmp_seq_s(a, b);
1133 }
1134 template <>
1136  return (Packet2d)__lsx_vfcmp_seq_d(a, b);
1137 }
1138 template <>
1140  return __lsx_vseq_b(a, b);
1141 }
1142 template <>
1144  return __lsx_vseq_h(a, b);
1145 }
1146 template <>
1148  return __lsx_vseq_w(a, b);
1149 }
1150 template <>
1152  return __lsx_vseq_d(a, b);
1153 }
1154 template <>
1156  return __lsx_vseq_b(a, b);
1157 }
1158 template <>
1160  return __lsx_vseq_h(a, b);
1161 }
1162 template <>
1164  return __lsx_vseq_w(a, b);
1165 }
1166 template <>
1168  return __lsx_vseq_d(a, b);
1169 }
1170 
1171 template <>
1173  return __lsx_vmin_b(a, b);
1174 }
1175 template <>
1177  return __lsx_vmin_h(a, b);
1178 }
1179 template <>
1181  return __lsx_vmin_w(a, b);
1182 }
1183 template <>
1185  return __lsx_vmin_d(a, b);
1186 }
1187 template <>
1189  return __lsx_vmin_bu(a, b);
1190 }
1191 template <>
1193  return __lsx_vmin_hu(a, b);
1194 }
1195 template <>
1197  return __lsx_vmin_wu(a, b);
1198 }
1199 template <>
1201  return __lsx_vmin_du(a, b);
1202 }
1203 
1204 template <>
1206  return __lsx_vmax_b(a, b);
1207 }
1208 template <>
1210  return __lsx_vmax_h(a, b);
1211 }
1212 template <>
1214  return __lsx_vmax_w(a, b);
1215 }
1216 template <>
1218  return __lsx_vmax_d(a, b);
1219 }
1220 template <>
1222  return __lsx_vmax_bu(a, b);
1223 }
1224 template <>
1226  return __lsx_vmax_hu(a, b);
1227 }
1228 template <>
1230  return __lsx_vmax_wu(a, b);
1231 }
1232 template <>
1234  return __lsx_vmax_du(a, b);
1235 }
1236 
1237 template <>
1239  Packet4i aNaN = __lsx_vfcmp_cun_s(a, a);
1240  Packet4i aMinOrNaN = por<Packet4i>(__lsx_vfcmp_clt_s(a, b), aNaN);
1241  return (Packet4f)__lsx_vbitsel_v((__m128i)b, (__m128i)a, aMinOrNaN);
1242 }
1243 template <>
1245  Packet2l aNaN = __lsx_vfcmp_cun_d(a, a);
1246  Packet2l aMinOrNaN = por<Packet2l>(__lsx_vfcmp_clt_d(a, b), aNaN);
1247  return (Packet2d)__lsx_vbitsel_v((__m128i)b, (__m128i)a, aMinOrNaN);
1248 }
1249 template <>
1251  Packet4i aNaN = __lsx_vfcmp_cun_s(a, a);
1252  Packet4i aMaxOrNaN = por<Packet4i>(__lsx_vfcmp_clt_s(b, a), aNaN);
1253  return (Packet4f)__lsx_vbitsel_v((__m128i)b, (__m128i)a, aMaxOrNaN);
1254 }
1255 template <>
1257  Packet2l aNaN = __lsx_vfcmp_cun_d(a, a);
1258  Packet2l aMaxOrNaN = por<Packet2l>(__lsx_vfcmp_clt_d(b, a), aNaN);
1259  return (Packet2d)__lsx_vbitsel_v((__m128i)b, (__m128i)a, aMaxOrNaN);
1260 }
1261 
1262 template <int N>
1264  return __lsx_vsrai_b((__m128i)a, N);
1265 }
1266 template <int N>
1268  return __lsx_vsrai_h((__m128i)a, N);
1269 }
1270 template <int N>
1272  return __lsx_vsrai_w((__m128i)a, N);
1273 }
1274 template <int N>
1276  return __lsx_vsrai_d((__m128i)a, N);
1277 }
1278 template <int N>
1280  return __lsx_vsrli_b((__m128i)a, N);
1281 }
1282 template <int N>
1284  return __lsx_vsrli_h((__m128i)a, N);
1285 }
1286 template <int N>
1288  return __lsx_vsrli_w((__m128i)a, N);
1289 }
1290 template <int N>
1292  return __lsx_vsrli_d((__m128i)a, N);
1293 }
1294 
1295 template <int N>
1297  return __lsx_vsrli_b((__m128i)a, N);
1298 }
1299 template <int N>
1301  return __lsx_vsrli_h((__m128i)a, N);
1302 }
1303 template <int N>
1305  return __lsx_vsrli_w((__m128i)a, N);
1306 }
1307 template <int N>
1309  return __lsx_vsrli_d((__m128i)a, N);
1310 }
1311 template <int N>
1313  return __lsx_vsrli_b((__m128i)a, N);
1314 }
1315 template <int N>
1317  return __lsx_vsrli_h((__m128i)a, N);
1318 }
1319 template <int N>
1321  return __lsx_vsrli_w((__m128i)a, N);
1322 }
1323 template <int N>
1325  return __lsx_vsrli_d((__m128i)a, N);
1326 }
1327 
1328 template <int N>
1330  return __lsx_vslli_b((__m128i)a, N);
1331 }
1332 template <int N>
1334  return __lsx_vslli_h((__m128i)a, N);
1335 }
1336 template <int N>
1338  return __lsx_vslli_w((__m128i)a, N);
1339 }
1340 template <int N>
1342  return __lsx_vslli_d((__m128i)a, N);
1343 }
1344 template <int N>
1346  return __lsx_vslli_b((__m128i)a, N);
1347 }
1348 template <int N>
1350  return __lsx_vslli_h((__m128i)a, N);
1351 }
1352 template <int N>
1354  return __lsx_vslli_w((__m128i)a, N);
1355 }
1356 template <int N>
1358  return __lsx_vslli_d((__m128i)a, N);
1359 }
1360 
1361 template <>
1363  return (Packet4f)__lsx_vbitclri_w((__m128i)a, 31);
1364 }
1365 template <>
1367  return (Packet2d)__lsx_vbitclri_d((__m128i)a, 63);
1368 }
1369 template <>
1371  return __lsx_vabsd_b(a, pzero(a));
1372 }
1373 template <>
1375  return __lsx_vabsd_h(a, pzero(a));
1376 }
1377 template <>
1379  return __lsx_vabsd_w(a, pzero(a));
1380 }
1381 template <>
1383  return __lsx_vabsd_d(a, pzero(a));
1384 }
1385 template <>
1387  return a;
1388 }
1389 template <>
1391  return a;
1392 }
1393 template <>
1395  return a;
1396 }
1397 template <>
1399  return a;
1400 }
1401 
1402 template <>
1403 EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
1404  EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__lsx_vld(from, 0);
1405 }
1406 template <>
1408  EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d)__lsx_vld(from, 0);
1409 }
1410 template <>
1412  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1413 }
1414 template <>
1416  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1417 }
1418 template <>
1420  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1421 }
1422 template <>
1424  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1425 }
1426 template <>
1428  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1429 }
1430 template <>
1432  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1433 }
1434 template <>
1436  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1437 }
1438 template <>
1440  EIGEN_DEBUG_ALIGNED_LOAD return __lsx_vld(from, 0);
1441 }
1442 
1443 template <>
1444 EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
1445  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__lsx_vld(from, 0);
1446 }
1447 template <>
1449  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__lsx_vld(from, 0);
1450 }
1451 template <>
1453  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1454 }
1455 template <>
1457  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1458 }
1459 template <>
1461  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1462 }
1463 template <>
1465  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1466 }
1467 template <>
1469  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1470 }
1471 template <>
1473  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1474 }
1475 template <>
1477  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1478 }
1479 template <>
1481  EIGEN_DEBUG_UNALIGNED_LOAD return __lsx_vld(from, 0);
1482 }
1483 
1484 template <>
1485 EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
1486  float f0 = from[0], f1 = from[1];
1487  return make_packet4f(f0, f0, f1, f1);
1488 }
1489 template <>
1491  return pset1<Packet2d>(from[0]);
1492 }
1493 template <>
1495  Packet16c tmp = pload<Packet16c>(from);
1496  return __lsx_vilvl_b(tmp, tmp);
1497 }
1498 template <>
1500  Packet8s tmp = pload<Packet8s>(from);
1501  return __lsx_vilvl_h(tmp, tmp);
1502 }
1503 template <>
1505  Packet4i tmp = pload<Packet4i>(from);
1506  return __lsx_vilvl_w(tmp, tmp);
1507 }
1508 template <>
1510  return pset1<Packet2l>(from[0]);
1511 }
1512 template <>
1515  return __lsx_vilvl_b(tmp, tmp);
1516 }
1517 template <>
1519  Packet8us tmp = pload<Packet8us>(from);
1520  return __lsx_vilvl_h(tmp, tmp);
1521 }
1522 template <>
1524  Packet4ui tmp = pload<Packet4ui>(from);
1525  return __lsx_vilvl_w(tmp, tmp);
1526 }
1527 template <>
1529  return pset1<Packet2ul>(from[0]);
1530 }
1531 
1532 template <>
1533 EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
1534  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst(from, to, 0);
1535 }
1536 template <>
1537 EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1538  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst(from, to, 0);
1539 }
1540 template <>
1542  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1543 }
1544 template <>
1546  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1547 }
1548 template <>
1550  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1551 }
1552 template <>
1554  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1555 }
1556 template <>
1558  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1559 }
1560 template <>
1562  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1563 }
1564 template <>
1566  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1567 }
1568 template <>
1570  EIGEN_DEBUG_ALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1571 }
1572 
1573 template <>
1574 EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
1575  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst(from, to, 0);
1576 }
1577 template <>
1578 EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1579  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst(from, to, 0);
1580 }
1581 
1582 template <>
1584  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1585 }
1586 template <>
1588  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1589 }
1590 template <>
1592  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1593 }
1594 template <>
1596  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1597 }
1598 template <>
1600  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1601 }
1602 template <>
1604  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1605 }
1606 template <>
1608  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1609 }
1610 template <>
1612  EIGEN_DEBUG_UNALIGNED_STORE __lsx_vst((__m128i)from, to, 0);
1613 }
1614 
1615 template <>
1617  Packet4f v = {from[0], from[stride], from[2 * stride], from[3 * stride]};
1618  return v;
1619 }
1620 template <>
1622  Packet2d v = {from[0], from[stride]};
1623  return v;
1624 }
1625 template <>
1627  int8_t v[16] __attribute__((aligned(16)));
1628  v[0] = from[0];
1629  v[1] = from[stride];
1630  v[2] = from[2 * stride];
1631  v[3] = from[3 * stride];
1632  v[4] = from[4 * stride];
1633  v[5] = from[5 * stride];
1634  v[6] = from[6 * stride];
1635  v[7] = from[7 * stride];
1636  v[8] = from[8 * stride];
1637  v[9] = from[9 * stride];
1638  v[10] = from[10 * stride];
1639  v[11] = from[11 * stride];
1640  v[12] = from[12 * stride];
1641  v[13] = from[13 * stride];
1642  v[14] = from[14 * stride];
1643  v[15] = from[15 * stride];
1644  return __lsx_vld(v, 0);
1645 }
1646 template <>
1648  int16_t v[8] __attribute__((aligned(16)));
1649  v[0] = from[0];
1650  v[1] = from[stride];
1651  v[2] = from[2 * stride];
1652  v[3] = from[3 * stride];
1653  v[4] = from[4 * stride];
1654  v[5] = from[5 * stride];
1655  v[6] = from[6 * stride];
1656  v[7] = from[7 * stride];
1657  return __lsx_vld(v, 0);
1658 }
1659 template <>
1661  int32_t v[4] __attribute__((aligned(16)));
1662  v[0] = from[0];
1663  v[1] = from[stride];
1664  v[2] = from[2 * stride];
1665  v[3] = from[3 * stride];
1666  return __lsx_vld(v, 0);
1667 }
1668 template <>
1670  int64_t v[2] __attribute__((aligned(16)));
1671  v[0] = from[0];
1672  v[1] = from[stride];
1673  return __lsx_vld(v, 0);
1674 }
1675 template <>
1677  uint8_t v[16] __attribute__((aligned(16)));
1678  v[0] = from[0];
1679  v[1] = from[stride];
1680  v[2] = from[2 * stride];
1681  v[3] = from[3 * stride];
1682  v[4] = from[4 * stride];
1683  v[5] = from[5 * stride];
1684  v[6] = from[6 * stride];
1685  v[7] = from[7 * stride];
1686  v[8] = from[8 * stride];
1687  v[9] = from[9 * stride];
1688  v[10] = from[10 * stride];
1689  v[11] = from[11 * stride];
1690  v[12] = from[12 * stride];
1691  v[13] = from[13 * stride];
1692  v[14] = from[14 * stride];
1693  v[15] = from[15 * stride];
1694  return __lsx_vld(v, 0);
1695 }
1696 template <>
1698  uint16_t v[8] __attribute__((aligned(16)));
1699  v[0] = from[0];
1700  v[1] = from[stride];
1701  v[2] = from[2 * stride];
1702  v[3] = from[3 * stride];
1703  v[4] = from[4 * stride];
1704  v[5] = from[5 * stride];
1705  v[6] = from[6 * stride];
1706  v[7] = from[7 * stride];
1707  return __lsx_vld(v, 0);
1708 }
1709 template <>
1711  uint32_t v[4] __attribute__((aligned(16)));
1712  v[0] = from[0];
1713  v[1] = from[stride];
1714  v[2] = from[2 * stride];
1715  v[3] = from[3 * stride];
1716  return __lsx_vld(v, 0);
1717 }
1718 template <>
1720  uint64_t v[2] __attribute__((aligned(16)));
1721  v[0] = from[0];
1722  v[1] = from[stride];
1723  return __lsx_vld(v, 0);
1724 }
1725 
1726 template <>
1727 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
1728  __lsx_vstelm_w(from, to, 0, 0);
1729  __lsx_vstelm_w(from, to + stride * 1, 0, 1);
1730  __lsx_vstelm_w(from, to + stride * 2, 0, 2);
1731  __lsx_vstelm_w(from, to + stride * 3, 0, 3);
1732 }
1733 template <>
1735  __lsx_vstelm_d(from, to, 0, 0);
1736  __lsx_vstelm_d(from, to + stride, 0, 1);
1737 }
1738 template <>
1740  Index stride) {
1741  __lsx_vstelm_b((__m128i)from, to, 0, 0);
1742  __lsx_vstelm_b((__m128i)from, to + stride * 1, 0, 1);
1743  __lsx_vstelm_b((__m128i)from, to + stride * 2, 0, 2);
1744  __lsx_vstelm_b((__m128i)from, to + stride * 3, 0, 3);
1745  __lsx_vstelm_b((__m128i)from, to + stride * 4, 0, 4);
1746  __lsx_vstelm_b((__m128i)from, to + stride * 5, 0, 5);
1747  __lsx_vstelm_b((__m128i)from, to + stride * 6, 0, 6);
1748  __lsx_vstelm_b((__m128i)from, to + stride * 7, 0, 7);
1749  __lsx_vstelm_b((__m128i)from, to + stride * 8, 0, 8);
1750  __lsx_vstelm_b((__m128i)from, to + stride * 9, 0, 9);
1751  __lsx_vstelm_b((__m128i)from, to + stride * 10, 0, 10);
1752  __lsx_vstelm_b((__m128i)from, to + stride * 11, 0, 11);
1753  __lsx_vstelm_b((__m128i)from, to + stride * 12, 0, 12);
1754  __lsx_vstelm_b((__m128i)from, to + stride * 13, 0, 13);
1755  __lsx_vstelm_b((__m128i)from, to + stride * 14, 0, 14);
1756  __lsx_vstelm_b((__m128i)from, to + stride * 15, 0, 15);
1757 }
1758 template <>
1760  Index stride) {
1761  __lsx_vstelm_h((__m128i)from, to, 0, 0);
1762  __lsx_vstelm_h((__m128i)from, to + stride * 1, 0, 1);
1763  __lsx_vstelm_h((__m128i)from, to + stride * 2, 0, 2);
1764  __lsx_vstelm_h((__m128i)from, to + stride * 3, 0, 3);
1765  __lsx_vstelm_h((__m128i)from, to + stride * 4, 0, 4);
1766  __lsx_vstelm_h((__m128i)from, to + stride * 5, 0, 5);
1767  __lsx_vstelm_h((__m128i)from, to + stride * 6, 0, 6);
1768  __lsx_vstelm_h((__m128i)from, to + stride * 7, 0, 7);
1769 }
1770 template <>
1772  Index stride) {
1773  __lsx_vstelm_w((__m128i)from, to, 0, 0);
1774  __lsx_vstelm_w((__m128i)from, to + stride * 1, 0, 1);
1775  __lsx_vstelm_w((__m128i)from, to + stride * 2, 0, 2);
1776  __lsx_vstelm_w((__m128i)from, to + stride * 3, 0, 3);
1777 }
1778 template <>
1780  Index stride) {
1781  __lsx_vstelm_d((__m128i)from, to, 0, 0);
1782  __lsx_vstelm_d((__m128i)from, to + stride * 1, 0, 1);
1783 }
1784 template <>
1786  Index stride) {
1787  __lsx_vstelm_b((__m128i)from, to, 0, 0);
1788  __lsx_vstelm_b((__m128i)from, to + stride * 1, 0, 1);
1789  __lsx_vstelm_b((__m128i)from, to + stride * 2, 0, 2);
1790  __lsx_vstelm_b((__m128i)from, to + stride * 3, 0, 3);
1791  __lsx_vstelm_b((__m128i)from, to + stride * 4, 0, 4);
1792  __lsx_vstelm_b((__m128i)from, to + stride * 5, 0, 5);
1793  __lsx_vstelm_b((__m128i)from, to + stride * 6, 0, 6);
1794  __lsx_vstelm_b((__m128i)from, to + stride * 7, 0, 7);
1795  __lsx_vstelm_b((__m128i)from, to + stride * 8, 0, 8);
1796  __lsx_vstelm_b((__m128i)from, to + stride * 9, 0, 9);
1797  __lsx_vstelm_b((__m128i)from, to + stride * 10, 0, 10);
1798  __lsx_vstelm_b((__m128i)from, to + stride * 11, 0, 11);
1799  __lsx_vstelm_b((__m128i)from, to + stride * 12, 0, 12);
1800  __lsx_vstelm_b((__m128i)from, to + stride * 13, 0, 13);
1801  __lsx_vstelm_b((__m128i)from, to + stride * 14, 0, 14);
1802  __lsx_vstelm_b((__m128i)from, to + stride * 15, 0, 15);
1803 }
1804 template <>
1806  Index stride) {
1807  __lsx_vstelm_h((__m128i)from, to, 0, 0);
1808  __lsx_vstelm_h((__m128i)from, to + stride * 1, 0, 1);
1809  __lsx_vstelm_h((__m128i)from, to + stride * 2, 0, 2);
1810  __lsx_vstelm_h((__m128i)from, to + stride * 3, 0, 3);
1811  __lsx_vstelm_h((__m128i)from, to + stride * 4, 0, 4);
1812  __lsx_vstelm_h((__m128i)from, to + stride * 5, 0, 5);
1813  __lsx_vstelm_h((__m128i)from, to + stride * 6, 0, 6);
1814  __lsx_vstelm_h((__m128i)from, to + stride * 7, 0, 7);
1815 }
1816 template <>
1818  Index stride) {
1819  __lsx_vstelm_w((__m128i)from, to, 0, 0);
1820  __lsx_vstelm_w((__m128i)from, to + stride * 1, 0, 1);
1821  __lsx_vstelm_w((__m128i)from, to + stride * 2, 0, 2);
1822  __lsx_vstelm_w((__m128i)from, to + stride * 3, 0, 3);
1823 }
1824 template <>
1826  Index stride) {
1827  __lsx_vstelm_d((__m128i)from, to, 0, 0);
1828  __lsx_vstelm_d((__m128i)from, to + stride * 1, 0, 1);
1829 }
1830 
1831 template <>
1832 EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
1833  __builtin_prefetch(addr);
1834 }
1835 template <>
1836 EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1837  __builtin_prefetch(addr);
1838 }
1839 template <>
1841  __builtin_prefetch(addr);
1842 }
1843 template <>
1845  __builtin_prefetch(addr);
1846 }
1847 template <>
1849  __builtin_prefetch(addr);
1850 }
1851 template <>
1853  __builtin_prefetch(addr);
1854 }
1855 template <>
1857  __builtin_prefetch(addr);
1858 }
1859 template <>
1861  __builtin_prefetch(addr);
1862 }
1863 template <>
1865  __builtin_prefetch(addr);
1866 }
1867 template <>
1869  __builtin_prefetch(addr);
1870 }
1871 
1872 template <>
1874  float v;
1875  __lsx_vstelm_w(a, &v, 0, 0);
1876  return v;
1877 }
1878 template <>
1880  double v;
1881  __lsx_vstelm_d(a, &v, 0, 0);
1882  return v;
1883 }
1884 
1885 template <>
1887  return (int8_t)__lsx_vpickve2gr_b((__m128i)a, 0);
1888 }
1889 template <>
1891  return (int16_t)__lsx_vpickve2gr_h((__m128i)a, 0);
1892 }
1893 template <>
1895  return __lsx_vpickve2gr_w((__m128i)a, 0);
1896 }
1897 template <>
1899  return __lsx_vpickve2gr_d((__m128i)a, 0);
1900 }
1901 template <>
1903  return (uint8_t)__lsx_vpickve2gr_bu((__m128i)a, 0);
1904 }
1905 template <>
1907  return (uint16_t)__lsx_vpickve2gr_hu((__m128i)a, 0);
1908 }
1909 template <>
1911  return __lsx_vpickve2gr_wu((__m128i)a, 0);
1912 }
1913 template <>
1915  return __lsx_vpickve2gr_du((__m128i)a, 0);
1916 }
1917 
1918 template <>
1920  return (Packet4f)__lsx_vshuf4i_w(a, 0x1B);
1921 }
1922 template <>
1924  return (Packet2d)__lsx_vshuf4i_d(a, a, 0x1);
1925 }
1926 template <>
1928  return __lsx_vshuf4i_b(__lsx_vshuf4i_w((__m128i)a, 0x1B), 0x1B);
1929 }
1930 template <>
1932  return __lsx_vshuf4i_h(__lsx_vshuf4i_d((__m128i)a, (__m128i)a, 0x1), 0x1B);
1933 }
1934 template <>
1936  return __lsx_vshuf4i_w((__m128i)a, 0x1B);
1937 }
1938 template <>
1940  return __lsx_vshuf4i_d((__m128i)a, (__m128i)a, 0x1);
1941 }
1942 template <>
1944  return __lsx_vshuf4i_b(__lsx_vshuf4i_w((__m128i)a, 0x1B), 0x1B);
1945 }
1946 template <>
1948  return __lsx_vshuf4i_h(__lsx_vshuf4i_d((__m128i)a, (__m128i)a, 0x1), 0x1B);
1949 }
1950 template <>
1952  return __lsx_vshuf4i_w((__m128i)a, 0x1B);
1953 }
1954 template <>
1956  return __lsx_vshuf4i_d((__m128i)a, (__m128i)a, 0x1);
1957 }
1958 
1959 template <>
1961  Packet4f tmp = __lsx_vfadd_s(a, vec4f_swizzle1(a, 2, 3, 2, 3));
1962  return pfirst<Packet4f>(__lsx_vfadd_s(tmp, vec4f_swizzle1(tmp, 1, 1, 1, 1)));
1963 }
1964 template <>
1966  return pfirst<Packet2d>(__lsx_vfadd_d(a, preverse(a)));
1967 }
1968 template <>
1970  Packet8s tmp1 = __lsx_vhaddw_h_b(a, a);
1971  Packet4i tmp2 = __lsx_vhaddw_w_h(tmp1, tmp1);
1972  Packet2l tmp3 = __lsx_vhaddw_d_w(tmp2, tmp2);
1973  return (int8_t)__lsx_vpickve2gr_d(__lsx_vhaddw_q_d(tmp3, tmp3), 0);
1974 }
1975 template <>
1977  Packet4i tmp1 = __lsx_vhaddw_w_h(a, a);
1978  Packet2l tmp2 = __lsx_vhaddw_d_w(tmp1, tmp1);
1979  return (int16_t)__lsx_vpickve2gr_d(__lsx_vhaddw_q_d(tmp2, tmp2), 0);
1980 }
1981 template <>
1983  Packet2l tmp = __lsx_vhaddw_d_w(a, a);
1984  return (int32_t)__lsx_vpickve2gr_d(__lsx_vhaddw_q_d(tmp, tmp), 0);
1985 }
1986 template <>
1988  return (int64_t)__lsx_vpickve2gr_d(__lsx_vhaddw_q_d(a, a), 0);
1989 }
1990 template <>
1992  Packet8us tmp1 = __lsx_vhaddw_hu_bu(a, a);
1993  Packet4ui tmp2 = __lsx_vhaddw_wu_hu(tmp1, tmp1);
1994  Packet2ul tmp3 = __lsx_vhaddw_du_wu(tmp2, tmp2);
1995  return (uint8_t)__lsx_vpickve2gr_d(__lsx_vhaddw_qu_du(tmp3, tmp3), 0);
1996 }
1997 template <>
1999  Packet4ui tmp1 = __lsx_vhaddw_wu_hu(a, a);
2000  Packet2ul tmp2 = __lsx_vhaddw_du_wu(tmp1, tmp1);
2001  return (uint16_t)__lsx_vpickve2gr_d(__lsx_vhaddw_qu_du(tmp2, tmp2), 0);
2002 }
2003 template <>
2005  Packet2ul tmp = __lsx_vhaddw_du_wu(a, a);
2006  return (uint32_t)__lsx_vpickve2gr_d(__lsx_vhaddw_qu_du(tmp, tmp), 0);
2007 }
2008 template <>
2010  return (uint64_t)__lsx_vpickve2gr_d(__lsx_vhaddw_qu_du(a, a), 0);
2011 }
2012 
2013 template <>
2015  Packet4f tmp = __lsx_vfmul_s(a, vec4f_swizzle1(a, 2, 3, 2, 3));
2016  return pfirst<Packet4f>(__lsx_vfmul_s(tmp, vec4f_swizzle1(tmp, 1, 1, 1, 1)));
2017 }
2018 template <>
2020  return pfirst<Packet2d>(__lsx_vfmul_d(a, preverse(a)));
2021 }
2022 template <>
2024  Packet8s tmp1 = __lsx_vmulwev_h_b(a, preverse(a));
2025  Packet4i tmp2 = __lsx_vmulwev_w_h(tmp1, preverse(tmp1));
2026  Packet2l tmp3 = __lsx_vmulwev_d_w(tmp2, preverse(tmp2));
2027  return (int8_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(tmp3, preverse(tmp3)), 0);
2028 }
2029 template <>
2031  Packet4i tmp1 = __lsx_vmulwev_w_h(a, preverse(a));
2032  Packet2l tmp2 = __lsx_vmulwev_d_w(tmp1, preverse(tmp1));
2033  return (int16_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(tmp2, preverse(tmp2)), 0);
2034 }
2035 template <>
2037  Packet2l tmp = __lsx_vmulwev_d_w(a, preverse(a));
2038  return (int32_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(tmp, preverse(tmp)), 0);
2039 }
2040 template <>
2042  return (int64_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(a, preverse(a)), 0);
2043 }
2044 template <>
2046  Packet8us tmp1 = __lsx_vmulwev_h_bu(a, preverse(a));
2047  Packet4ui tmp2 = __lsx_vmulwev_w_h(tmp1, preverse(tmp1));
2048  Packet2ul tmp3 = __lsx_vmulwev_d_w(tmp2, preverse(tmp2));
2049  return (uint8_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(tmp3, preverse(tmp3)), 0);
2050 }
2051 template <>
2053  Packet4ui tmp1 = __lsx_vmulwev_w_hu(a, preverse(a));
2054  Packet2ul tmp2 = __lsx_vmulwev_d_w(tmp1, preverse(tmp1));
2055  return (uint16_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(tmp2, preverse(tmp2)), 0);
2056 }
2057 template <>
2059  Packet2ul tmp = __lsx_vmulwev_d_wu(a, preverse(a));
2060  return (uint32_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_d(tmp, preverse(tmp)), 0);
2061 }
2062 template <>
2064  return (uint64_t)__lsx_vpickve2gr_d(__lsx_vmulwev_q_du(a, preverse(a)), 0);
2065 }
2066 
2067 template <>
2069  Packet4f tmp = __lsx_vfmin_s(a, (Packet4f)__lsx_vshuf4i_w(a, 0x4E));
2070  return pfirst(__lsx_vfmin_s(tmp, (Packet4f)__lsx_vshuf4i_w(tmp, 0xB1)));
2071 }
2072 template <>
2074  return pfirst(__lsx_vfmin_d(a, preverse(a)));
2075 }
2076 template <>
2078  Packet16c tmp1 = __lsx_vmin_b(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2079  Packet16c tmp2 = __lsx_vmin_b(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2080  Packet16c tmp3 = __lsx_vmin_b(tmp2, __lsx_vshuf4i_b((__m128i)tmp2, 0x4E));
2081  return pfirst((Packet16c)__lsx_vmin_b(tmp3, __lsx_vshuf4i_b((__m128i)tmp3, 0xB1)));
2082 }
2083 template <>
2085  Packet8s tmp1 = __lsx_vmin_h(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2086  Packet8s tmp2 = __lsx_vmin_h(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2087  return pfirst((Packet8s)__lsx_vmin_h(tmp2, __lsx_vshuf4i_h((__m128i)tmp2, 0xB1)));
2088 }
2089 template <>
2091  Packet4i tmp = __lsx_vmin_w(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2092  return pfirst((Packet4i)__lsx_vmin_w(tmp, __lsx_vshuf4i_w((__m128i)tmp, 0xB1)));
2093 }
2094 template <>
2096  return pfirst((Packet2l)__lsx_vmin_d(a, preverse(a)));
2097 }
2098 template <>
2100  Packet16uc tmp1 = __lsx_vmin_bu(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2101  Packet16uc tmp2 = __lsx_vmin_bu(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2102  Packet16uc tmp3 = __lsx_vmin_bu(tmp2, __lsx_vshuf4i_b((__m128i)tmp2, 0x4E));
2103  return pfirst((Packet16uc)__lsx_vmin_bu(tmp3, __lsx_vshuf4i_b((__m128i)tmp3, 0xB1)));
2104 }
2105 template <>
2107  Packet8us tmp1 = __lsx_vmin_hu(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2108  Packet8us tmp2 = __lsx_vmin_hu(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2109  return pfirst((Packet8us)__lsx_vmin_hu(tmp2, __lsx_vshuf4i_h((__m128i)tmp2, 0xB1)));
2110 }
2111 template <>
2113  Packet4ui tmp = __lsx_vmin_wu(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2114  return pfirst((Packet4ui)__lsx_vmin_wu(tmp, __lsx_vshuf4i_w((__m128i)tmp, 0xB1)));
2115 }
2116 template <>
2118  return pfirst((Packet2ul)__lsx_vmin_du(a, preverse(a)));
2119 }
2120 
2121 template <>
2123  Packet4f tmp = __lsx_vfmax_s(a, (Packet4f)__lsx_vshuf4i_w(a, 0x4E));
2124  return pfirst(__lsx_vfmax_s(tmp, (Packet4f)__lsx_vshuf4i_w(tmp, 0xB1)));
2125 }
2126 template <>
2128  return pfirst(__lsx_vfmax_d(a, preverse(a)));
2129 }
2130 template <>
2132  Packet16c tmp1 = __lsx_vmax_b(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2133  Packet16c tmp2 = __lsx_vmax_b(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2134  Packet16c tmp3 = __lsx_vmax_b(tmp2, __lsx_vshuf4i_b((__m128i)tmp2, 0x4E));
2135  return pfirst((Packet16c)__lsx_vmax_b(tmp3, __lsx_vshuf4i_b((__m128i)tmp3, 0xB1)));
2136 }
2137 template <>
2139  Packet8s tmp1 = __lsx_vmax_h(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2140  Packet8s tmp2 = __lsx_vmax_h(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2141  return pfirst((Packet8s)__lsx_vmax_h(tmp2, __lsx_vshuf4i_h((__m128i)tmp2, 0xB1)));
2142 }
2143 template <>
2145  Packet4i tmp = __lsx_vmax_w(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2146  return pfirst((Packet4i)__lsx_vmax_w(tmp, __lsx_vshuf4i_w((__m128i)tmp, 0xB1)));
2147 }
2148 template <>
2150  return pfirst((Packet2l)__lsx_vmax_d(a, preverse(a)));
2151 }
2152 template <>
2154  Packet16uc tmp1 = __lsx_vmax_bu(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2155  Packet16uc tmp2 = __lsx_vmax_bu(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2156  Packet16uc tmp3 = __lsx_vmax_bu(tmp2, __lsx_vshuf4i_b((__m128i)tmp2, 0x4E));
2157  return pfirst((Packet16uc)__lsx_vmax_bu(tmp3, __lsx_vshuf4i_b((__m128i)tmp3, 0xB1)));
2158 }
2159 template <>
2161  Packet8us tmp1 = __lsx_vmax_hu(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2162  Packet8us tmp2 = __lsx_vmax_hu(tmp1, __lsx_vshuf4i_h((__m128i)tmp1, 0x4E));
2163  return pfirst((Packet8us)__lsx_vmax_hu(tmp2, __lsx_vshuf4i_h((__m128i)tmp2, 0xB1)));
2164 }
2165 template <>
2167  Packet4ui tmp = __lsx_vmax_wu(a, __lsx_vshuf4i_w((__m128i)a, 0x4E));
2168  return pfirst((Packet4ui)__lsx_vmax_wu(tmp, __lsx_vshuf4i_w((__m128i)tmp, 0xB1)));
2169 }
2170 template <>
2172  return pfirst((Packet2ul)__lsx_vmax_du(a, preverse(a)));
2173 }
2174 
2175 template <>
2177  return __lsx_vfsqrt_s(a);
2178 }
2179 template <>
2181  return __lsx_vfsqrt_d(a);
2182 }
2183 
2184 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
2185  Packet4f T0 = (Packet4f)__lsx_vilvl_w((__m128i)kernel.packet[1], (__m128i)kernel.packet[0]);
2186  Packet4f T1 = (Packet4f)__lsx_vilvh_w((__m128i)kernel.packet[1], (__m128i)kernel.packet[0]);
2187  Packet4f T2 = (Packet4f)__lsx_vilvl_w((__m128i)kernel.packet[3], (__m128i)kernel.packet[2]);
2188  Packet4f T3 = (Packet4f)__lsx_vilvh_w((__m128i)kernel.packet[3], (__m128i)kernel.packet[2]);
2189 
2190  kernel.packet[0] = (Packet4f)__lsx_vilvl_d((__m128i)T2, (__m128i)T0);
2191  kernel.packet[1] = (Packet4f)__lsx_vilvh_d((__m128i)T2, (__m128i)T0);
2192  kernel.packet[2] = (Packet4f)__lsx_vilvl_d((__m128i)T3, (__m128i)T1);
2193  kernel.packet[3] = (Packet4f)__lsx_vilvh_d((__m128i)T3, (__m128i)T1);
2194 }
2196  Packet2d tmp = (Packet2d)__lsx_vilvh_d((__m128i)kernel.packet[1], (__m128i)kernel.packet[0]);
2197  kernel.packet[0] = (Packet2d)__lsx_vilvl_d((__m128i)kernel.packet[1], (__m128i)kernel.packet[0]);
2198  kernel.packet[1] = tmp;
2199 }
2200 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 16>& kernel) {
2201  __m128i t0 = __lsx_vilvl_b(kernel.packet[1], kernel.packet[0]);
2202  __m128i t1 = __lsx_vilvh_b(kernel.packet[1], kernel.packet[0]);
2203  __m128i t2 = __lsx_vilvl_b(kernel.packet[3], kernel.packet[2]);
2204  __m128i t3 = __lsx_vilvh_b(kernel.packet[3], kernel.packet[2]);
2205  __m128i t4 = __lsx_vilvl_b(kernel.packet[5], kernel.packet[4]);
2206  __m128i t5 = __lsx_vilvh_b(kernel.packet[5], kernel.packet[4]);
2207  __m128i t6 = __lsx_vilvl_b(kernel.packet[7], kernel.packet[6]);
2208  __m128i t7 = __lsx_vilvh_b(kernel.packet[7], kernel.packet[6]);
2209  __m128i t8 = __lsx_vilvl_b(kernel.packet[9], kernel.packet[8]);
2210  __m128i t9 = __lsx_vilvh_b(kernel.packet[9], kernel.packet[8]);
2211  __m128i ta = __lsx_vilvl_b(kernel.packet[11], kernel.packet[10]);
2212  __m128i tb = __lsx_vilvh_b(kernel.packet[11], kernel.packet[10]);
2213  __m128i tc = __lsx_vilvl_b(kernel.packet[13], kernel.packet[12]);
2214  __m128i td = __lsx_vilvh_b(kernel.packet[13], kernel.packet[12]);
2215  __m128i te = __lsx_vilvl_b(kernel.packet[15], kernel.packet[14]);
2216  __m128i tf = __lsx_vilvh_b(kernel.packet[15], kernel.packet[14]);
2217 
2218  __m128i s0 = __lsx_vilvl_h(t2, t0);
2219  __m128i s1 = __lsx_vilvh_h(t2, t0);
2220  __m128i s2 = __lsx_vilvl_h(t3, t1);
2221  __m128i s3 = __lsx_vilvh_h(t3, t1);
2222  __m128i s4 = __lsx_vilvl_h(t6, t4);
2223  __m128i s5 = __lsx_vilvh_h(t6, t4);
2224  __m128i s6 = __lsx_vilvl_h(t7, t5);
2225  __m128i s7 = __lsx_vilvh_h(t7, t5);
2226  __m128i s8 = __lsx_vilvl_h(ta, t8);
2227  __m128i s9 = __lsx_vilvh_h(ta, t8);
2228  __m128i sa = __lsx_vilvl_h(tb, t9);
2229  __m128i sb = __lsx_vilvh_h(tb, t9);
2230  __m128i sc = __lsx_vilvl_h(te, tc);
2231  __m128i sd = __lsx_vilvh_h(te, tc);
2232  __m128i se = __lsx_vilvl_h(tf, td);
2233  __m128i sf = __lsx_vilvh_h(tf, td);
2234 
2235  __m128i u0 = __lsx_vilvl_w(s4, s0);
2236  __m128i u1 = __lsx_vilvh_w(s4, s0);
2237  __m128i u2 = __lsx_vilvl_w(s5, s1);
2238  __m128i u3 = __lsx_vilvh_w(s5, s1);
2239  __m128i u4 = __lsx_vilvl_w(s6, s2);
2240  __m128i u5 = __lsx_vilvh_w(s6, s2);
2241  __m128i u6 = __lsx_vilvl_w(s7, s3);
2242  __m128i u7 = __lsx_vilvh_w(s7, s3);
2243  __m128i u8 = __lsx_vilvl_w(sc, s8);
2244  __m128i u9 = __lsx_vilvh_w(sc, s8);
2245  __m128i ua = __lsx_vilvl_w(sd, s9);
2246  __m128i ub = __lsx_vilvh_w(sd, s9);
2247  __m128i uc = __lsx_vilvl_w(se, sa);
2248  __m128i ud = __lsx_vilvh_w(se, sa);
2249  __m128i ue = __lsx_vilvl_w(sf, sb);
2250  __m128i uf = __lsx_vilvh_w(sf, sb);
2251 
2252  kernel.packet[0] = __lsx_vilvl_d(u8, u0);
2253  kernel.packet[1] = __lsx_vilvh_d(u8, u0);
2254  kernel.packet[2] = __lsx_vilvl_d(u9, u1);
2255  kernel.packet[3] = __lsx_vilvh_d(u9, u1);
2256  kernel.packet[4] = __lsx_vilvl_d(ua, u2);
2257  kernel.packet[5] = __lsx_vilvh_d(ua, u2);
2258  kernel.packet[6] = __lsx_vilvl_d(ub, u3);
2259  kernel.packet[7] = __lsx_vilvh_d(ub, u3);
2260  kernel.packet[8] = __lsx_vilvl_d(uc, u4);
2261  kernel.packet[9] = __lsx_vilvh_d(uc, u4);
2262  kernel.packet[10] = __lsx_vilvl_d(ud, u5);
2263  kernel.packet[11] = __lsx_vilvh_d(ud, u5);
2264  kernel.packet[12] = __lsx_vilvl_d(ue, u6);
2265  kernel.packet[13] = __lsx_vilvh_d(ue, u6);
2266  kernel.packet[14] = __lsx_vilvl_d(uf, u7);
2267  kernel.packet[15] = __lsx_vilvh_d(uf, u7);
2268 }
2270  __m128i t0 = __lsx_vilvl_b(kernel.packet[1], kernel.packet[0]);
2271  __m128i t1 = __lsx_vilvh_b(kernel.packet[1], kernel.packet[0]);
2272  __m128i t2 = __lsx_vilvl_b(kernel.packet[3], kernel.packet[2]);
2273  __m128i t3 = __lsx_vilvh_b(kernel.packet[3], kernel.packet[2]);
2274  __m128i t4 = __lsx_vilvl_b(kernel.packet[5], kernel.packet[4]);
2275  __m128i t5 = __lsx_vilvh_b(kernel.packet[5], kernel.packet[4]);
2276  __m128i t6 = __lsx_vilvl_b(kernel.packet[7], kernel.packet[6]);
2277  __m128i t7 = __lsx_vilvh_b(kernel.packet[7], kernel.packet[6]);
2278 
2279  __m128i s0 = __lsx_vilvl_h(t2, t0);
2280  __m128i s1 = __lsx_vilvh_h(t2, t0);
2281  __m128i s2 = __lsx_vilvl_h(t3, t1);
2282  __m128i s3 = __lsx_vilvh_h(t3, t1);
2283  __m128i s4 = __lsx_vilvl_h(t6, t4);
2284  __m128i s5 = __lsx_vilvh_h(t6, t4);
2285  __m128i s6 = __lsx_vilvl_h(t7, t5);
2286  __m128i s7 = __lsx_vilvh_h(t7, t5);
2287 
2288  kernel.packet[0] = __lsx_vilvl_w(s4, s0);
2289  kernel.packet[1] = __lsx_vilvh_w(s4, s0);
2290  kernel.packet[2] = __lsx_vilvl_w(s5, s1);
2291  kernel.packet[3] = __lsx_vilvh_w(s5, s1);
2292  kernel.packet[4] = __lsx_vilvl_w(s6, s2);
2293  kernel.packet[5] = __lsx_vilvh_w(s6, s2);
2294  kernel.packet[6] = __lsx_vilvl_w(s7, s3);
2295  kernel.packet[7] = __lsx_vilvh_w(s7, s3);
2296 }
2297 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 4>& kernel) {
2298  __m128i t0 = __lsx_vilvl_b(kernel.packet[1], kernel.packet[0]);
2299  __m128i t1 = __lsx_vilvh_b(kernel.packet[1], kernel.packet[0]);
2300  __m128i t2 = __lsx_vilvl_b(kernel.packet[3], kernel.packet[2]);
2301  __m128i t3 = __lsx_vilvh_b(kernel.packet[3], kernel.packet[2]);
2302 
2303  kernel.packet[0] = __lsx_vilvl_h(t2, t0);
2304  kernel.packet[1] = __lsx_vilvh_h(t2, t0);
2305  kernel.packet[2] = __lsx_vilvl_h(t3, t1);
2306  kernel.packet[3] = __lsx_vilvh_h(t3, t1);
2307 }
2308 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 8>& kernel) {
2309  __m128i t0 = __lsx_vilvl_h(kernel.packet[1], kernel.packet[0]);
2310  __m128i t1 = __lsx_vilvh_h(kernel.packet[1], kernel.packet[0]);
2311  __m128i t2 = __lsx_vilvl_h(kernel.packet[3], kernel.packet[2]);
2312  __m128i t3 = __lsx_vilvh_h(kernel.packet[3], kernel.packet[2]);
2313  __m128i t4 = __lsx_vilvl_h(kernel.packet[5], kernel.packet[4]);
2314  __m128i t5 = __lsx_vilvh_h(kernel.packet[5], kernel.packet[4]);
2315  __m128i t6 = __lsx_vilvl_h(kernel.packet[7], kernel.packet[6]);
2316  __m128i t7 = __lsx_vilvh_h(kernel.packet[7], kernel.packet[6]);
2317 
2318  __m128i s0 = __lsx_vilvl_w(t2, t0);
2319  __m128i s1 = __lsx_vilvh_w(t2, t0);
2320  __m128i s2 = __lsx_vilvl_w(t3, t1);
2321  __m128i s3 = __lsx_vilvh_w(t3, t1);
2322  __m128i s4 = __lsx_vilvl_w(t6, t4);
2323  __m128i s5 = __lsx_vilvh_w(t6, t4);
2324  __m128i s6 = __lsx_vilvl_w(t7, t5);
2325  __m128i s7 = __lsx_vilvh_w(t7, t5);
2326 
2327  kernel.packet[0] = __lsx_vilvl_d(s4, s0);
2328  kernel.packet[1] = __lsx_vilvh_d(s4, s0);
2329  kernel.packet[2] = __lsx_vilvl_d(s5, s1);
2330  kernel.packet[3] = __lsx_vilvh_d(s5, s1);
2331  kernel.packet[4] = __lsx_vilvl_d(s6, s2);
2332  kernel.packet[5] = __lsx_vilvh_d(s6, s2);
2333  kernel.packet[6] = __lsx_vilvl_d(s7, s3);
2334  kernel.packet[7] = __lsx_vilvh_d(s7, s3);
2335 }
2336 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 4>& kernel) {
2337  __m128i t0 = __lsx_vilvl_h(kernel.packet[1], kernel.packet[0]);
2338  __m128i t1 = __lsx_vilvh_h(kernel.packet[1], kernel.packet[0]);
2339  __m128i t2 = __lsx_vilvl_h(kernel.packet[3], kernel.packet[2]);
2340  __m128i t3 = __lsx_vilvh_h(kernel.packet[3], kernel.packet[2]);
2341 
2342  kernel.packet[0] = __lsx_vilvl_w(t2, t0);
2343  kernel.packet[1] = __lsx_vilvh_w(t2, t0);
2344  kernel.packet[2] = __lsx_vilvl_w(t3, t1);
2345  kernel.packet[3] = __lsx_vilvh_w(t3, t1);
2346 }
2347 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
2348  __m128i T0 = __lsx_vilvl_w(kernel.packet[1], kernel.packet[0]);
2349  __m128i T1 = __lsx_vilvh_w(kernel.packet[1], kernel.packet[0]);
2350  __m128i T2 = __lsx_vilvl_w(kernel.packet[3], kernel.packet[2]);
2351  __m128i T3 = __lsx_vilvh_w(kernel.packet[3], kernel.packet[2]);
2352 
2353  kernel.packet[0] = __lsx_vilvl_d(T2, T0);
2354  kernel.packet[1] = __lsx_vilvh_d(T2, T0);
2355  kernel.packet[2] = __lsx_vilvl_d(T3, T1);
2356  kernel.packet[3] = __lsx_vilvh_d(T3, T1);
2357 }
2359  __m128i tmp = __lsx_vilvh_d(kernel.packet[1], kernel.packet[0]);
2360  kernel.packet[0] = __lsx_vilvl_d(kernel.packet[1], kernel.packet[0]);
2361  kernel.packet[1] = tmp;
2362 }
2363 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 16>& kernel) {
2364  __m128i t0 = __lsx_vilvl_b(kernel.packet[1], kernel.packet[0]);
2365  __m128i t1 = __lsx_vilvh_b(kernel.packet[1], kernel.packet[0]);
2366  __m128i t2 = __lsx_vilvl_b(kernel.packet[3], kernel.packet[2]);
2367  __m128i t3 = __lsx_vilvh_b(kernel.packet[3], kernel.packet[2]);
2368  __m128i t4 = __lsx_vilvl_b(kernel.packet[5], kernel.packet[4]);
2369  __m128i t5 = __lsx_vilvh_b(kernel.packet[5], kernel.packet[4]);
2370  __m128i t6 = __lsx_vilvl_b(kernel.packet[7], kernel.packet[6]);
2371  __m128i t7 = __lsx_vilvh_b(kernel.packet[7], kernel.packet[6]);
2372  __m128i t8 = __lsx_vilvl_b(kernel.packet[9], kernel.packet[8]);
2373  __m128i t9 = __lsx_vilvh_b(kernel.packet[9], kernel.packet[8]);
2374  __m128i ta = __lsx_vilvl_b(kernel.packet[11], kernel.packet[10]);
2375  __m128i tb = __lsx_vilvh_b(kernel.packet[11], kernel.packet[10]);
2376  __m128i tc = __lsx_vilvl_b(kernel.packet[13], kernel.packet[12]);
2377  __m128i td = __lsx_vilvh_b(kernel.packet[13], kernel.packet[12]);
2378  __m128i te = __lsx_vilvl_b(kernel.packet[15], kernel.packet[14]);
2379  __m128i tf = __lsx_vilvh_b(kernel.packet[15], kernel.packet[14]);
2380 
2381  __m128i s0 = __lsx_vilvl_h(t2, t0);
2382  __m128i s1 = __lsx_vilvh_h(t2, t0);
2383  __m128i s2 = __lsx_vilvl_h(t3, t1);
2384  __m128i s3 = __lsx_vilvh_h(t3, t1);
2385  __m128i s4 = __lsx_vilvl_h(t6, t4);
2386  __m128i s5 = __lsx_vilvh_h(t6, t4);
2387  __m128i s6 = __lsx_vilvl_h(t7, t5);
2388  __m128i s7 = __lsx_vilvh_h(t7, t5);
2389  __m128i s8 = __lsx_vilvl_h(ta, t8);
2390  __m128i s9 = __lsx_vilvh_h(ta, t8);
2391  __m128i sa = __lsx_vilvl_h(tb, t9);
2392  __m128i sb = __lsx_vilvh_h(tb, t9);
2393  __m128i sc = __lsx_vilvl_h(te, tc);
2394  __m128i sd = __lsx_vilvh_h(te, tc);
2395  __m128i se = __lsx_vilvl_h(tf, td);
2396  __m128i sf = __lsx_vilvh_h(tf, td);
2397 
2398  __m128i u0 = __lsx_vilvl_w(s4, s0);
2399  __m128i u1 = __lsx_vilvh_w(s4, s0);
2400  __m128i u2 = __lsx_vilvl_w(s5, s1);
2401  __m128i u3 = __lsx_vilvh_w(s5, s1);
2402  __m128i u4 = __lsx_vilvl_w(s6, s2);
2403  __m128i u5 = __lsx_vilvh_w(s6, s2);
2404  __m128i u6 = __lsx_vilvl_w(s7, s3);
2405  __m128i u7 = __lsx_vilvh_w(s7, s3);
2406  __m128i u8 = __lsx_vilvl_w(sc, s8);
2407  __m128i u9 = __lsx_vilvh_w(sc, s8);
2408  __m128i ua = __lsx_vilvl_w(sd, s9);
2409  __m128i ub = __lsx_vilvh_w(sd, s9);
2410  __m128i uc = __lsx_vilvl_w(se, sa);
2411  __m128i ud = __lsx_vilvh_w(se, sa);
2412  __m128i ue = __lsx_vilvl_w(sf, sb);
2413  __m128i uf = __lsx_vilvh_w(sf, sb);
2414 
2415  kernel.packet[0] = __lsx_vilvl_d(u8, u0);
2416  kernel.packet[1] = __lsx_vilvh_d(u8, u0);
2417  kernel.packet[2] = __lsx_vilvl_d(u9, u1);
2418  kernel.packet[3] = __lsx_vilvh_d(u9, u1);
2419  kernel.packet[4] = __lsx_vilvl_d(ua, u2);
2420  kernel.packet[5] = __lsx_vilvh_d(ua, u2);
2421  kernel.packet[6] = __lsx_vilvl_d(ub, u3);
2422  kernel.packet[7] = __lsx_vilvh_d(ub, u3);
2423  kernel.packet[8] = __lsx_vilvl_d(uc, u4);
2424  kernel.packet[9] = __lsx_vilvh_d(uc, u4);
2425  kernel.packet[10] = __lsx_vilvl_d(ud, u5);
2426  kernel.packet[11] = __lsx_vilvh_d(ud, u5);
2427  kernel.packet[12] = __lsx_vilvl_d(ue, u6);
2428  kernel.packet[13] = __lsx_vilvh_d(ue, u6);
2429  kernel.packet[14] = __lsx_vilvl_d(uf, u7);
2430  kernel.packet[15] = __lsx_vilvh_d(uf, u7);
2431 }
2433  __m128i t0 = __lsx_vilvl_b(kernel.packet[1], kernel.packet[0]);
2434  __m128i t1 = __lsx_vilvh_b(kernel.packet[1], kernel.packet[0]);
2435  __m128i t2 = __lsx_vilvl_b(kernel.packet[3], kernel.packet[2]);
2436  __m128i t3 = __lsx_vilvh_b(kernel.packet[3], kernel.packet[2]);
2437  __m128i t4 = __lsx_vilvl_b(kernel.packet[5], kernel.packet[4]);
2438  __m128i t5 = __lsx_vilvh_b(kernel.packet[5], kernel.packet[4]);
2439  __m128i t6 = __lsx_vilvl_b(kernel.packet[7], kernel.packet[6]);
2440  __m128i t7 = __lsx_vilvh_b(kernel.packet[7], kernel.packet[6]);
2441 
2442  __m128i s0 = __lsx_vilvl_h(t2, t0);
2443  __m128i s1 = __lsx_vilvh_h(t2, t0);
2444  __m128i s2 = __lsx_vilvl_h(t3, t1);
2445  __m128i s3 = __lsx_vilvh_h(t3, t1);
2446  __m128i s4 = __lsx_vilvl_h(t6, t4);
2447  __m128i s5 = __lsx_vilvh_h(t6, t4);
2448  __m128i s6 = __lsx_vilvl_h(t7, t5);
2449  __m128i s7 = __lsx_vilvh_h(t7, t5);
2450 
2451  kernel.packet[0] = __lsx_vilvl_w(s4, s0);
2452  kernel.packet[1] = __lsx_vilvh_w(s4, s0);
2453  kernel.packet[2] = __lsx_vilvl_w(s5, s1);
2454  kernel.packet[3] = __lsx_vilvh_w(s5, s1);
2455  kernel.packet[4] = __lsx_vilvl_w(s6, s2);
2456  kernel.packet[5] = __lsx_vilvh_w(s6, s2);
2457  kernel.packet[6] = __lsx_vilvl_w(s7, s3);
2458  kernel.packet[7] = __lsx_vilvh_w(s7, s3);
2459 }
2460 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 4>& kernel) {
2461  __m128i t0 = __lsx_vilvl_b(kernel.packet[1], kernel.packet[0]);
2462  __m128i t1 = __lsx_vilvh_b(kernel.packet[1], kernel.packet[0]);
2463  __m128i t2 = __lsx_vilvl_b(kernel.packet[3], kernel.packet[2]);
2464  __m128i t3 = __lsx_vilvh_b(kernel.packet[3], kernel.packet[2]);
2465 
2466  kernel.packet[0] = __lsx_vilvl_h(t2, t0);
2467  kernel.packet[1] = __lsx_vilvh_h(t2, t0);
2468  kernel.packet[2] = __lsx_vilvl_h(t3, t1);
2469  kernel.packet[3] = __lsx_vilvh_h(t3, t1);
2470 }
2471 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 8>& kernel) {
2472  __m128i t0 = __lsx_vilvl_h(kernel.packet[1], kernel.packet[0]);
2473  __m128i t1 = __lsx_vilvh_h(kernel.packet[1], kernel.packet[0]);
2474  __m128i t2 = __lsx_vilvl_h(kernel.packet[3], kernel.packet[2]);
2475  __m128i t3 = __lsx_vilvh_h(kernel.packet[3], kernel.packet[2]);
2476  __m128i t4 = __lsx_vilvl_h(kernel.packet[5], kernel.packet[4]);
2477  __m128i t5 = __lsx_vilvh_h(kernel.packet[5], kernel.packet[4]);
2478  __m128i t6 = __lsx_vilvl_h(kernel.packet[7], kernel.packet[6]);
2479  __m128i t7 = __lsx_vilvh_h(kernel.packet[7], kernel.packet[6]);
2480 
2481  __m128i s0 = __lsx_vilvl_w(t2, t0);
2482  __m128i s1 = __lsx_vilvh_w(t2, t0);
2483  __m128i s2 = __lsx_vilvl_w(t3, t1);
2484  __m128i s3 = __lsx_vilvh_w(t3, t1);
2485  __m128i s4 = __lsx_vilvl_w(t6, t4);
2486  __m128i s5 = __lsx_vilvh_w(t6, t4);
2487  __m128i s6 = __lsx_vilvl_w(t7, t5);
2488  __m128i s7 = __lsx_vilvh_w(t7, t5);
2489 
2490  kernel.packet[0] = __lsx_vilvl_d(s4, s0);
2491  kernel.packet[1] = __lsx_vilvh_d(s4, s0);
2492  kernel.packet[2] = __lsx_vilvl_d(s5, s1);
2493  kernel.packet[3] = __lsx_vilvh_d(s5, s1);
2494  kernel.packet[4] = __lsx_vilvl_d(s6, s2);
2495  kernel.packet[5] = __lsx_vilvh_d(s6, s2);
2496  kernel.packet[6] = __lsx_vilvl_d(s7, s3);
2497  kernel.packet[7] = __lsx_vilvh_d(s7, s3);
2498 }
2499 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 4>& kernel) {
2500  __m128i t0 = __lsx_vilvl_h(kernel.packet[1], kernel.packet[0]);
2501  __m128i t1 = __lsx_vilvh_h(kernel.packet[1], kernel.packet[0]);
2502  __m128i t2 = __lsx_vilvl_h(kernel.packet[3], kernel.packet[2]);
2503  __m128i t3 = __lsx_vilvh_h(kernel.packet[3], kernel.packet[2]);
2504 
2505  kernel.packet[0] = __lsx_vilvl_w(t2, t0);
2506  kernel.packet[1] = __lsx_vilvh_w(t2, t0);
2507  kernel.packet[2] = __lsx_vilvl_w(t3, t1);
2508  kernel.packet[3] = __lsx_vilvh_w(t3, t1);
2509 }
2511  __m128i T0 = __lsx_vilvl_w(kernel.packet[1], kernel.packet[0]);
2512  __m128i T1 = __lsx_vilvh_w(kernel.packet[1], kernel.packet[0]);
2513  __m128i T2 = __lsx_vilvl_w(kernel.packet[3], kernel.packet[2]);
2514  __m128i T3 = __lsx_vilvh_w(kernel.packet[3], kernel.packet[2]);
2515 
2516  kernel.packet[0] = __lsx_vilvl_d(T2, T0);
2517  kernel.packet[1] = __lsx_vilvh_d(T2, T0);
2518  kernel.packet[2] = __lsx_vilvl_d(T3, T1);
2519  kernel.packet[3] = __lsx_vilvh_d(T3, T1);
2520 }
2522  __m128i tmp = __lsx_vilvh_d(kernel.packet[1], kernel.packet[0]);
2523  kernel.packet[0] = __lsx_vilvl_d(kernel.packet[1], kernel.packet[0]);
2524  kernel.packet[1] = tmp;
2525 }
2526 
2527 template <>
2529  return __lsx_vfrsqrt_s(a);
2530 }
2531 template <>
2533  return __lsx_vfrsqrt_d(a);
2534 }
2535 
2536 template <>
2538  return __lsx_vfrintrm_s(a);
2539 }
2540 template <>
2542  return __lsx_vfrintrm_d(a);
2543 }
2544 
2545 template <>
2547  return __lsx_vfrintrp_s(a);
2548 }
2549 template <>
2551  return __lsx_vfrintrp_d(a);
2552 }
2553 
2554 template <>
2556  const Packet4f mask = pset1frombits<Packet4f>(static_cast<numext::uint32_t>(0x80000000u));
2557  const Packet4f prev0dot5 = pset1frombits<Packet4f>(static_cast<numext::uint32_t>(0x3EFFFFFFu));
2558  return __lsx_vfrintrz_s(padd(pxor(pand(a, mask), prev0dot5), a));
2559 }
2560 template <>
2562  const Packet2d mask = pset1frombits<Packet2d>(static_cast<numext::uint64_t>(0x8000000000000000ull));
2563  const Packet2d prev0dot5 = pset1frombits<Packet2d>(static_cast<numext::uint64_t>(0x3FDFFFFFFFFFFFFFull));
2564  return __lsx_vfrintrz_d(padd(por(pand(a, mask), prev0dot5), a));
2565 }
2566 
2567 template <>
2569  return (Packet4f)__lsx_vbitsel_v((__m128i)b, (__m128i)a, (__m128i)mask);
2570 }
2571 template <>
2573  return (Packet16c)__lsx_vbitsel_v((__m128i)b, (__m128i)a, (__m128i)mask);
2574 }
2575 
2576 template <>
2578  int8_t tmp[16] = {*from, *from, *from, *from, *(from + 1), *(from + 1),
2579  *(from + 1), *(from + 1), *(from + 2), *(from + 2), *(from + 2), *(from + 2),
2580  *(from + 3), *(from + 3), *(from + 3), *(from + 3)};
2581  return __lsx_vld(tmp, 0);
2582 }
2583 template <>
2585  uint8_t tmp[16] = {*from, *from, *from, *from, *(from + 1), *(from + 1),
2586  *(from + 1), *(from + 1), *(from + 2), *(from + 2), *(from + 2), *(from + 2),
2587  *(from + 3), *(from + 3), *(from + 3), *(from + 3)};
2588  return __lsx_vld(tmp, 0);
2589 }
2590 template <>
2592  int16_t tmp[8] = {*from, *from, *from, *from, *(from + 1), *(from + 1), *(from + 1), *(from + 1)};
2593  return __lsx_vld(tmp, 0);
2594 }
2595 template <>
2597  uint16_t tmp[8] = {*from, *from, *from, *from, *(from + 1), *(from + 1), *(from + 1), *(from + 1)};
2598  return __lsx_vld(tmp, 0);
2599 }
2600 template <>
2602  int32_t tmp[4] = {*from, *from, *from, *from};
2603  return __lsx_vld(tmp, 0);
2604 }
2605 template <>
2607  uint32_t tmp[4] = {*from, *from, *from, *from};
2608  return __lsx_vld(tmp, 0);
2609 }
2610 
2611 template <>
2613  return __lsx_vmsub_b(pnegate(c), a, b);
2614 }
2615 template <>
2617  return __lsx_vmsub_h(pnegate(c), a, b);
2618 }
2619 template <>
2621  return __lsx_vmsub_w(pnegate(c), a, b);
2622 }
2623 template <>
2625  return __lsx_vmsub_d(pnegate(c), a, b);
2626 }
2627 
2628 template <>
2630  return __lsx_vmadd_b(pnegate(c), a, b);
2631 }
2632 template <>
2634  return __lsx_vmadd_h(pnegate(c), a, b);
2635 }
2636 template <>
2638  return __lsx_vmadd_w(pnegate(c), a, b);
2639 }
2640 template <>
2642  return __lsx_vmadd_d(pnegate(c), a, b);
2643 }
2644 
2645 template <>
2647  return __lsx_vmsub_b(c, a, b);
2648 }
2649 template <>
2651  return __lsx_vmsub_h(c, a, b);
2652 }
2653 template <>
2655  return __lsx_vmsub_w(c, a, b);
2656 }
2657 template <>
2659  return __lsx_vmsub_d(c, a, b);
2660 }
2661 
2662 template <>
2664  return pexp_float(_x);
2665 }
2666 template <>
2668  return pexp_double(_x);
2669 }
2670 
2671 template <>
2673  return pldexp_generic(a, exponent);
2674 }
2675 
2676 template <>
2678  return pfrexp_generic(a, exponent);
2679 }
2680 template <>
2682  return pfrexp_generic(a, exponent);
2683 }
2684 template <>
2686  Packet4f v = {0.0f, 0.0f, 0.0f, 0.0f};
2687  return v;
2688 }
2689 template <>
2691  Packet4f v = psub(a, b);
2692  return pabs(v);
2693 }
2694 template <>
2696  return pmin<Packet4f>(a, b);
2697 }
2698 template <>
2700  return pmax<Packet4f>(a, b);
2701 }
2702 template <>
2704  return (__m128)__lsx_vldrepl_w(from, 0);
2705 }
2706 template <>
2708  return (__m128)__lsx_vsrai_w((__m128i)a, 31);
2709 }
2710 template <>
2712  return __lsx_vfrintrne_s(a);
2713 }
2714 template <>
2716  return __lsx_vfrintrz_s(a);
2717 }
2718 template <>
2720  return __lsx_vfrecip_s(a);
2721 }
2722 
2723 template <>
2725  Packet2d v = {0.0, 0.0};
2726  return v;
2727 }
2728 template <>
2730  return pmin<Packet2d>(a, b);
2731 }
2732 template <>
2734  return pmax<Packet2d>(a, b);
2735 }
2736 template <>
2738  return (__m128d)(__lsx_vsrai_d((__m128i)a, 63));
2739 }
2740 template <>
2742  return (Packet2d)__lsx_vbitsel_v((__m128i)b, (__m128i)a, (__m128i)mask);
2743 }
2744 template <>
2746  return __lsx_vfrintrne_d(a);
2747 }
2748 template <>
2750  return __lsx_vfrintrz_d(a);
2751 }
2752 template <>
2754  return pldexp_generic(a, exponent);
2755 }
2756 
2757 template <>
2759  Packet16c v = psub(a, b);
2760  return pabs(v);
2761 }
2762 
2763 template <>
2765  Packet8s v = psub(a, b);
2766  return pabs(v);
2767 }
2768 template <>
2770  return __lsx_vbitsel_v(b, a, mask);
2771 }
2772 
2773 template <>
2775  Packet4i v = psub(a, b);
2776  return pabs(v);
2777 }
2778 template <>
2780  return __lsx_vbitsel_v(b, a, mask);
2781 }
2782 
2783 template <>
2785  return __lsx_vbitsel_v(b, a, mask);
2786 }
2787 
2788 template <>
2790  return __lsx_vdiv_bu(a, b);
2791 }
2792 template <>
2794  Packet16uc v = psub(a, b);
2795  return pabs(v);
2796 }
2797 template <>
2799  const Packet16uc& b) {
2800  return __lsx_vbitsel_v(b, a, mask);
2801 }
2802 template <>
2804  __m128i res = {0, 0};
2805  __m128i add = {0x0808080808080808, 0x0808080808080808};
2806  for (int i = 0; i < 4; i++) {
2807  const __m128i temp = __lsx_vor_v(res, add);
2808  const __m128i tmul = __lsx_vpackev_b(__lsx_vmulwod_h_bu(temp, temp), __lsx_vmulwev_h_bu(temp, temp));
2809  res = __lsx_vbitsel_v(res, temp, __lsx_vsle_bu(tmul, a));
2810  add = __lsx_vsrli_b(add, 1);
2811  }
2812  return res;
2813 }
2814 
2815 template <>
2817  Packet8us v = psub(a, b);
2818  return pabs(v);
2819 }
2820 template <>
2822  return __lsx_vbitsel_v(b, a, mask);
2823 }
2824 template <>
2826  __m128i res = {0, 0};
2827  __m128i add = {0x0080008000800080, 0x0080008000800080};
2828  for (int i = 0; i < 4; i++) {
2829  const __m128i temp = __lsx_vor_v(res, add);
2830  const __m128i tmul = __lsx_vpackev_h(__lsx_vmulwod_w_hu(temp, temp), __lsx_vmulwev_w_hu(temp, temp));
2831  res = __lsx_vbitsel_v(res, temp, __lsx_vsle_hu(tmul, a));
2832  add = __lsx_vsrli_h(add, 1);
2833  }
2834  return res;
2835 }
2836 
2837 template <>
2839  Packet4ui v = psub(a, b);
2840  return pabs(v);
2841 }
2842 template <>
2844  return __lsx_vbitsel_v(b, a, mask);
2845 }
2846 template <>
2848  __m128i res = {0, 0};
2849  __m128i add = {0x0000800000008000, 0x0000800000008000};
2850  for (int i = 0; i < 4; i++) {
2851  const __m128i temp = __lsx_vor_v(res, add);
2852  const __m128i tmul = __lsx_vpackev_w(__lsx_vmulwod_d_wu(temp, temp), __lsx_vmulwev_d_wu(temp, temp));
2853  res = __lsx_vbitsel_v(res, temp, __lsx_vsle_wu(tmul, a));
2854  add = __lsx_vsrli_w(add, 1);
2855  }
2856  return res;
2857 }
2858 
2859 template <>
2861  return __lsx_vbitsel_v(b, a, mask);
2862 }
2863 
2864 } // namespace internal
2865 } // namespace Eigen
2866 #endif
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
float * p
Definition: Tutorial_Map_using.cpp:9
Scalar * b
Definition: benchVecAdd.cpp:17
@ N
Definition: constructor.cpp:22
@ Aligned16
Definition: Constants.h:237
RealScalar s
Definition: level1_cplx_impl.h:130
const Scalar * a
Definition: level2_cplx_impl.h:32
int * m
Definition: level2_cplx_impl.h:294
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_STRONG_INLINE int64_t predux_min< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2095
EIGEN_STRONG_INLINE unsigned char predux< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2515
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1465
EIGEN_STRONG_INLINE Packet8s pabsdiff< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:2764
EIGEN_STRONG_INLINE Packet4ui psub< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:634
EIGEN_STRONG_INLINE Packet16c pmin< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1273
EIGEN_STRONG_INLINE Packet8us pand< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1418
EIGEN_STRONG_INLINE Packet2d shuffle(const Packet2d &m, const Packet2d &n, int mask)
Definition: LSX/PacketMath.h:150
EIGEN_STRONG_INLINE void pstore< int8_t >(int8_t *to, const Packet16c &from)
Definition: LSX/PacketMath.h:1541
EIGEN_STRONG_INLINE Packet16c pcmp_le< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1048
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet4ui pset1< Packet4ui >(const uint32_t &from)
Definition: LSX/PacketMath.h:490
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1628
EIGEN_STRONG_INLINE Packet8s pmax< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1297
EIGEN_STRONG_INLINE short int pfirst< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:1883
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1965
EIGEN_STRONG_INLINE void pstoreu< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1636
EIGEN_STRONG_INLINE void prefetch< uint64_t >(const uint64_t *addr)
Definition: LSX/PacketMath.h:1868
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
EIGEN_STRONG_INLINE Packet2l pdiv< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:794
EIGEN_STRONG_INLINE Packet16c pmax< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1305
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pgather< uint8_t, Packet16uc >(const uint8_t *from, Index stride)
Definition: LSX/PacketMath.h:1676
EIGEN_STRONG_INLINE Packet2d pcmp_eq< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1135
EIGEN_STRONG_INLINE void prefetch< int8_t >(const int8_t *addr)
Definition: LSX/PacketMath.h:1840
EIGEN_STRONG_INLINE void prefetch< uint32_t >(const uint32_t *addr)
Definition: AVX/PacketMath.h:1758
EIGEN_STRONG_INLINE Packet2l pandnot< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1019
EIGEN_STRONG_INLINE int64_t predux< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:1987
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE Packet8us pabsdiff< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:2816
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1250
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:605
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1003
EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1126
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
EIGEN_STRONG_INLINE uint32_t predux_max< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2166
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint64_t, Packet2ul >(uint64_t *to, const Packet2ul &from, Index stride)
Definition: LSX/PacketMath.h:1825
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet16uc ploadu< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:1557
EIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:132
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:561
EIGEN_STRONG_INLINE Packet2l ploadu< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1464
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:2729
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1066
EIGEN_STRONG_INLINE Packet16uc pmul< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1182
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1431
EIGEN_STRONG_INLINE short int predux_min< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2617
EIGEN_STRONG_INLINE Packet16c por< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:925
EIGEN_STRONG_INLINE Packet4f pcmp_eq< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1131
EIGEN_STRONG_INLINE Packet2ul pmin< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1200
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pgather< uint16_t, Packet8us >(const uint16_t *from, Index stride)
Definition: LSX/PacketMath.h:1697
EIGEN_STRONG_INLINE Packet4ui pmul< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:769
__vector unsigned char Packet16uc
Definition: AltiVec/PacketMath.h:41
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle2(const Packet4f &a, const Packet4f &b, int p, int q, int r, int s)
Definition: LSX/PacketMath.h:129
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: AltiVec/PacketMath.h:778
EIGEN_STRONG_INLINE Packet16c pload< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:512
EIGEN_STRONG_INLINE Packet8us pmin< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1269
EIGEN_STRONG_INLINE Packet2d paddsub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:661
EIGEN_STRONG_INLINE Packet4f shuffle2(const Packet4f &m, const Packet4f &n, int mask)
Definition: LSX/PacketMath.h:105
EIGEN_STRONG_INLINE Packet16c psub< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1111
EIGEN_STRONG_INLINE short int predux_max< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2697
EIGEN_STRONG_INLINE Packet4f pcmp_le< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1040
EIGEN_STRONG_INLINE unsigned short int predux_max< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2712
EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:161
EIGEN_STRONG_INLINE Packet8s pcmp_eq< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1143
EIGEN_STRONG_INLINE Packet2ul pxor< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:994
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1863
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:880
EIGEN_STRONG_INLINE Packet16uc pmax< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1309
EIGEN_STRONG_INLINE Packet4f ploadquad< Packet4f >(const float *from)
Definition: LSX/PacketMath.h:2703
EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f &m, int mask)
Definition: LSX/PacketMath.h:97
EIGEN_STRONG_INLINE void prefetch< uint16_t >(const uint16_t *addr)
Definition: LSX/PacketMath.h:1860
EIGEN_STRONG_INLINE Packet2ul pandnot< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1035
EIGEN_STRONG_INLINE unsigned short int predux_min< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2632
EIGEN_STRONG_INLINE Packet8us pcmp_le< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1068
EIGEN_STRONG_INLINE Packet8s por< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1435
EIGEN_STRONG_INLINE void prefetch< int64_t >(const int64_t *addr)
Definition: LSX/PacketMath.h:1852
EIGEN_STRONG_INLINE Packet8us psub< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1107
EIGEN_STRONG_INLINE Packet2ul pset1< Packet2ul >(const uint64_t &from)
Definition: LSX/PacketMath.h:494
EIGEN_STRONG_INLINE Packet4ui padd< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1074
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pgather< int64_t, Packet2l >(const int64_t *from, Index stride)
Definition: LSX/PacketMath.h:1669
EIGEN_STRONG_INLINE Packet16c plset< Packet16c >(const signed char &a)
Definition: AltiVec/PacketMath.h:1057
EIGEN_STRONG_INLINE Packet4ui pand< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1414
EIGEN_STRONG_INLINE signed char pfirst< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:1893
EIGEN_STRONG_INLINE Packet2ul pcmp_eq< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1167
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pgather< int16_t, Packet8s >(const int16_t *from, Index stride)
Definition: LSX/PacketMath.h:1647
EIGEN_STRONG_INLINE Packet16c pabsdiff< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:2758
EIGEN_STRONG_INLINE Packet16uc pset1< Packet16uc >(const unsigned char &from)
Definition: AltiVec/PacketMath.h:798
EIGEN_STRONG_INLINE signed char predux_mul< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2566
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1644
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2679
EIGEN_STRONG_INLINE signed char predux_min< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2647
EIGEN_STRONG_INLINE Packet8us pdiv< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:798
EIGEN_STRONG_INLINE Packet2ul ploaddup< Packet2ul >(const uint64_t *from)
Definition: LSX/PacketMath.h:1528
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1490
EIGEN_STRONG_INLINE Packet8us plset< Packet8us >(const unsigned short int &a)
Definition: AltiVec/PacketMath.h:1053
__vector unsigned short int Packet8us
Definition: AltiVec/PacketMath.h:38
EIGEN_STRONG_INLINE Packet2l pcmp_eq< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1151
EIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:135
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:962
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:921
EIGEN_STRONG_INLINE Packet2ul pload< Packet2ul >(const uint64_t *from)
Definition: LSX/PacketMath.h:1439
EIGEN_STRONG_INLINE Packet4f shuffle2< true >(const Packet4f &m, const Packet4f &n, int mask)
Definition: LSX/PacketMath.h:114
EIGEN_STRONG_INLINE Packet2d pldexp< Packet2d >(const Packet2d &a, const Packet2d &exponent)
Definition: LSX/PacketMath.h:2753
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1205
EIGEN_STRONG_INLINE Packet2l padd< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:581
EIGEN_STRONG_INLINE Packet8s ploadu< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:1541
EIGEN_STRONG_INLINE Packet4ui pcmp_le< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1072
EIGEN_STRONG_INLINE Packet2l pcmp_lt< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1101
EIGEN_STRONG_INLINE Packet16uc pdiv< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:2789
EIGEN_STRONG_INLINE Packet2d pcmp_le< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1044
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1640
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1427
EIGEN_STRONG_INLINE Packet2l por< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:937
EIGEN_STRONG_INLINE void prefetch< int32_t >(const int32_t *addr)
Definition: LSX/PacketMath.h:1848
EIGEN_STRONG_INLINE Packet2d vec2d_swizzle2(const Packet2d &a, const Packet2d &b, int mask)
Definition: LSX/PacketMath.h:157
EIGEN_STRONG_INLINE Packet4ui pabsdiff< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:2838
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1983
EIGEN_STRONG_INLINE Packet8s plset< Packet8s >(const short int &a)
Definition: AltiVec/PacketMath.h:1049
EIGEN_STRONG_INLINE Packet16uc padd< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1090
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2604
EIGEN_STRONG_INLINE Packet16uc psub< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1115
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1452
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2127
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1162
EIGEN_STRONG_INLINE Packet8us pcmp_lt< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1109
EIGEN_STRONG_INLINE Packet2l pcmp_le< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1060
EIGEN_STRONG_INLINE uint32_t predux< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2004
EIGEN_STRONG_INLINE Packet4i pcmp_eq< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1147
EIGEN_STRONG_INLINE Packet4ui por< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:949
EIGEN_STRONG_INLINE Packet4ui pmin< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1196
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: AltiVec/PacketMath.h:853
EIGEN_STRONG_INLINE Packet8us pmax< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1301
EIGEN_STRONG_INLINE Packet4f paddsub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:653
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: LSX/PacketMath.h:503
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1979
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint32_t, Packet4ui >(uint32_t *to, const Packet4ui &from, Index stride)
Definition: LSX/PacketMath.h:1817
EIGEN_STRONG_INLINE unsigned short int predux< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2483
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:492
__vector signed char Packet16c
Definition: AltiVec/PacketMath.h:40
EIGEN_STRONG_INLINE Packet16uc ploadquad< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:1724
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2529
EIGEN_STRONG_INLINE void pstoreu< uint16_t >(uint16_t *to, const Packet8us &from)
Definition: LSX/PacketMath.h:1603
EIGEN_STRONG_INLINE Packet16uc pload< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:517
EIGEN_STRONG_INLINE Packet8us ploadu< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:1545
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int32_t, Packet4i >(int32_t *to, const Packet4i &from, Index stride)
Definition: LSX/PacketMath.h:1771
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
EIGEN_STRONG_INLINE Packet8us pmul< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1174
EIGEN_STRONG_INLINE uint64_t predux_max< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2171
EIGEN_STRONG_INLINE Packet8s pand< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:888
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:2733
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1122
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2695
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet8s pcmp_le< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1052
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1611
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1070
EIGEN_STRONG_INLINE Packet16c pcmp_eq< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1139
EIGEN_STRONG_INLINE uint32_t pfirst< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:1910
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet16c pcmp_lt< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1089
EIGEN_STRONG_INLINE Packet8s pdiv< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:786
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1469
EIGEN_STRONG_INLINE unsigned char predux_max< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2739
EIGEN_STRONG_INLINE signed char predux_max< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2727
static EIGEN_STRONG_INLINE int eigen_lsx_shuffle_mask(int p, int q, int r, int s)
Definition: LSX/PacketMath.h:122
EIGEN_STRONG_INLINE Packet8us pandnot< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1027
EIGEN_STRONG_INLINE Packet2ul pmul< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:773
EIGEN_STRONG_INLINE Packet4ui pandnot< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1031
EIGEN_STRONG_INLINE Packet8s ploaddup< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:1649
EIGEN_STRONG_INLINE Packet2ul pmax< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1233
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1187
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1407
EIGEN_STRONG_INLINE Packet8us pload< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:507
EIGEN_STRONG_INLINE Packet16uc pcmp_lt< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1105
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:741
EIGEN_STRONG_INLINE Packet4i pabsdiff< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:2774
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE Packet4f pfrexp< Packet4f >(const Packet4f &a, Packet4f &exponent)
Definition: AltiVec/PacketMath.h:2328
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2522
EIGEN_STRONG_INLINE Packet2ul plset< Packet2ul >(const uint64_t &a)
Definition: LSX/PacketMath.h:553
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: AltiVec/PacketMath.h:1854
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int16_t, Packet8s >(int16_t *to, const Packet8s &from, Index stride)
Definition: LSX/PacketMath.h:1759
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1975
EIGEN_STRONG_INLINE Packet4ui pcmp_lt< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1113
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int64_t, Packet2l >(int64_t *to, const Packet2l &from, Index stride)
Definition: LSX/PacketMath.h:1779
EIGEN_STRONG_INLINE Packet8s padd< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1078
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: LSX/PacketMath.h:1734
EIGEN_STRONG_INLINE Packet4ui pload< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1435
EIGEN_STRONG_INLINE Packet2d pcmp_lt< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1085
EIGEN_STRONG_INLINE Packet16uc pandnot< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1023
EIGEN_STRONG_INLINE Packet8us ploaddup< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:1659
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2699
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1537
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2019
__vector short int Packet8s
Definition: AltiVec/PacketMath.h:37
EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f &a)
Definition: LSX/PacketMath.h:2546
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:782
EIGEN_STRONG_INLINE Packet2ul ploadu< Packet2ul >(const uint64_t *from)
Definition: LSX/PacketMath.h:1480
EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:1966
EIGEN_STRONG_INLINE uint64_t predux_min< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2117
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2073
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: AltiVec/PacketMath.h:773
EIGEN_STRONG_INLINE Packet4ui pmax< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1229
EIGEN_STRONG_INLINE uint32_t predux_min< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2112
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1099
EIGEN_STRONG_INLINE Packet4ui plset< Packet4ui >(const uint32_t &a)
Definition: LSX/PacketMath.h:548
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: AltiVec/PacketMath.h:954
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: LSX/PacketMath.h:563
EIGEN_STRONG_INLINE uint64_t predux_mul< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2063
EIGEN_STRONG_INLINE Packet8s pload< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:502
EIGEN_STRONG_INLINE Packet2ul padd< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:597
EIGEN_STRONG_INLINE Packet8us pxor< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1456
EIGEN_STRONG_INLINE int64_t predux_mul< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2041
EIGEN_STRONG_INLINE Packet8us padd< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1082
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pgather< int8_t, Packet16c >(const int8_t *from, Index stride)
Definition: LSX/PacketMath.h:1626
EIGEN_STRONG_INLINE Packet16uc pand< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:900
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:642
EIGEN_STRONG_INLINE Packet2l pset1< Packet2l >(const int64_t &from)
Definition: LSX/PacketMath.h:478
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1936
EIGEN_STRONG_INLINE Packet2l ploaddup< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1509
EIGEN_STRONG_INLINE Packet2ul por< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:953
EIGEN_STRONG_INLINE Packet16uc por< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:941
EIGEN_STRONG_INLINE Packet4f ptrunc< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1501
EIGEN_STRONG_INLINE void pstore< uint8_t >(uint8_t *to, const Packet16uc &from)
Definition: LSX/PacketMath.h:1557
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_STRONG_INLINE Packet4i pcmp_le< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1056
EIGEN_STRONG_INLINE uint64_t predux< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:2009
EIGEN_STRONG_INLINE Packet8us por< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: AltiVec/PacketMath.h:1439
EIGEN_STRONG_INLINE Packet4ui pdiv< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:802
EIGEN_STRONG_INLINE Packet4f pset1frombits< Packet4f >(unsigned int from)
Definition: AltiVec/PacketMath.h:803
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835
EIGEN_STRONG_INLINE Packet4f pldexp< Packet4f >(const Packet4f &a, const Packet4f &exponent)
Definition: AltiVec/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1448
EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:141
EIGEN_STRONG_INLINE Packet4f pcmp_lt< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:1081
EIGEN_STRONG_INLINE Packet2ul pcmp_le< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1076
EIGEN_STRONG_INLINE short int predux_mul< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2536
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1448
EIGEN_STRONG_INLINE void pstoreu< int32_t >(int32_t *to, const Packet4i &from)
Definition: LSX/PacketMath.h:1591
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int8_t, Packet16c >(int8_t *to, const Packet16c &from, Index stride)
Definition: LSX/PacketMath.h:1739
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1261
EIGEN_STRONG_INLINE void prefetch< uint8_t >(const uint8_t *addr)
Definition: LSX/PacketMath.h:1856
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2176
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:509
EIGEN_STRONG_INLINE Packet16uc plset< Packet16uc >(const unsigned char &a)
Definition: AltiVec/PacketMath.h:1061
EIGEN_STRONG_INLINE Packet16c ploadu< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:1553
EIGEN_STRONG_INLINE Packet2l pmax< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1217
EIGEN_STRONG_INLINE Packet2l psub< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:622
EIGEN_STRONG_INLINE Packet2d pfrexp< Packet2d >(const Packet2d &a, Packet2d &exponent)
Definition: LSX/PacketMath.h:2677
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
Definition: LSX/PacketMath.h:126
EIGEN_STRONG_INLINE Packet8s pmin< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1265
EIGEN_STRONG_INLINE Packet8s pset1< Packet8s >(const short int &from)
Definition: AltiVec/PacketMath.h:783
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:226
EIGEN_STRONG_INLINE Packet8s pmul< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1170
EIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:138
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819
EIGEN_STRONG_INLINE Packet16uc pcmp_le< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1064
EIGEN_STRONG_INLINE Packet2l pand< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:896
EIGEN_STRONG_INLINE Packet4ui pxor< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:990
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE unsigned char pfirst< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:1898
EIGEN_STRONG_INLINE Packet8s pcmp_lt< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1093
EIGEN_STRONG_INLINE void pstore< int32_t >(int32_t *to, const Packet4i &from)
Definition: LSX/PacketMath.h:1549
EIGEN_STRONG_INLINE Packet16c pand< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:884
EIGEN_STRONG_INLINE Packet2d ptrunc< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2749
EIGEN_STRONG_INLINE Packet16c pandnot< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:1007
EIGEN_STRONG_INLINE Packet16uc pmin< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: AltiVec/PacketMath.h:1277
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1869
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: AltiVec/PacketMath.h:1045
EIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:160
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2435
EIGEN_STRONG_INLINE Packet2ul pcmp_lt< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:1117
EIGEN_STRONG_INLINE Packet2l pload< Packet2l >(const int64_t *from)
Definition: LSX/PacketMath.h:1423
EIGEN_STRONG_INLINE void pstore< int16_t >(int16_t *to, const Packet8s &from)
Definition: LSX/PacketMath.h:1545
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1533
EIGEN_STRONG_INLINE Packet16uc pcmp_eq< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:1155
EIGEN_STRONG_INLINE void pstoreu< uint8_t >(uint8_t *to, const Packet16uc &from)
Definition: LSX/PacketMath.h:1599
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1166
EIGEN_STRONG_INLINE Packet2d print< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2745
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pgather< uint32_t, Packet4ui >(const uint32_t *from, Index stride)
Definition: LSX/PacketMath.h:1710
EIGEN_STRONG_INLINE Packet4i ploadquad< Packet4i >(const int32_t *from)
Definition: LSX/PacketMath.h:2601
EIGEN_STRONG_INLINE void pstoreu< int8_t >(int8_t *to, const Packet16c &from)
Definition: LSX/PacketMath.h:1583
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1410
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1244
EIGEN_STRONG_INLINE Packet16uc ploaddup< Packet16uc >(const unsigned char *from)
Definition: AltiVec/PacketMath.h:1704
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1474
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2445
EIGEN_STRONG_INLINE Packet8us pcmp_eq< Packet8us >(const Packet8us &a, const Packet8us &b)
Definition: LSX/PacketMath.h:1159
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pgather< uint64_t, Packet2ul >(const uint64_t *from, Index stride)
Definition: LSX/PacketMath.h:1719
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Definition: GenericPacketMathFunctions.h:184
EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b)
Definition: LSX/PacketMath.h:145
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1406
EIGEN_STRONG_INLINE Packet16c ploadquad< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:1714
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:337
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2684
EIGEN_STRONG_INLINE Packet2l pmin< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:1184
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint8_t, Packet16uc >(uint8_t *to, const Packet16uc &from, Index stride)
Definition: LSX/PacketMath.h:1785
EIGEN_STRONG_INLINE Packet8us pset1< Packet8us >(const unsigned short int &from)
Definition: AltiVec/PacketMath.h:788
EIGEN_STRONG_INLINE Packet8s pandnot< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:1011
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: LSX/PacketMath.h:1621
EIGEN_STRONG_INLINE Packet16c ploaddup< Packet16c >(const signed char *from)
Definition: AltiVec/PacketMath.h:1694
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pgather< int32_t, Packet4i >(const int32_t *from, Index stride)
Definition: LSX/PacketMath.h:1660
EIGEN_STRONG_INLINE Packet4f pround(const Packet4f &a)
Definition: LSX/PacketMath.h:2555
EIGEN_STRONG_INLINE Packet4i pcmp_lt< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: LSX/PacketMath.h:1097
EIGEN_STRONG_INLINE int64_t predux_max< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:2149
EIGEN_STRONG_INLINE unsigned short int pfirst< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:1888
svint32_t PacketXi __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)))
Definition: SVE/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1293
EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f &a)
Definition: LSX/PacketMath.h:2537
EIGEN_STRONG_INLINE Packet16uc pxor< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:982
EIGEN_STRONG_INLINE signed char predux< Packet16c >(const Packet16c &a)
Definition: AltiVec/PacketMath.h:2510
EIGEN_STRONG_INLINE Packet4f pabsdiff< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: LSX/PacketMath.h:2690
EIGEN_STRONG_INLINE unsigned short int predux_mul< Packet8us >(const Packet8us &a)
Definition: AltiVec/PacketMath.h:2547
EIGEN_STRONG_INLINE Packet16c padd< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1086
EIGEN_STRONG_INLINE unsigned char predux_min< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2659
EIGEN_STRONG_INLINE short int predux< Packet8s >(const Packet8s &a)
Definition: AltiVec/PacketMath.h:2478
EIGEN_STRONG_INLINE Packet2l pmul< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:757
EIGEN_STRONG_INLINE Packet2d pset1frombits< Packet2d >(uint64_t from)
Definition: LSX/PacketMath.h:513
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:497
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:646
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1095
EIGEN_STRONG_INLINE Packet8s psub< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: AltiVec/PacketMath.h:1103
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2528
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: AltiVec/PacketMath.h:1041
EIGEN_STRONG_INLINE uint32_t predux_mul< Packet4ui >(const Packet4ui &a)
Definition: LSX/PacketMath.h:2058
EIGEN_STRONG_INLINE Packet2ul psub< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:638
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< uint16_t, Packet8us >(uint16_t *to, const Packet8us &from, Index stride)
Definition: LSX/PacketMath.h:1805
EIGEN_STRONG_INLINE Packet8s ploadquad< Packet8s >(const short int *from)
Definition: AltiVec/PacketMath.h:1669
EIGEN_STRONG_INLINE uint64_t pfirst< Packet2ul >(const Packet2ul &a)
Definition: LSX/PacketMath.h:1914
EIGEN_STRONG_INLINE void pstoreu< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1123
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1756
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1256
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d)
Definition: LSX/PacketMath.h:92
EIGEN_STRONG_INLINE void prefetch< int16_t >(const int16_t *addr)
Definition: LSX/PacketMath.h:1844
EIGEN_STRONG_INLINE Packet16c pset1< Packet16c >(const signed char &from)
Definition: AltiVec/PacketMath.h:793
EIGEN_STRONG_INLINE void pstore< uint64_t >(uint64_t *to, const Packet2ul &from)
Definition: LSX/PacketMath.h:1569
EIGEN_STRONG_INLINE Packet2l plset< Packet2l >(const int64_t &a)
Definition: LSX/PacketMath.h:533
EIGEN_STRONG_INLINE Packet4ui ploadu< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1476
EIGEN_STRONG_INLINE int64_t pfirst< Packet2l >(const Packet2l &a)
Definition: LSX/PacketMath.h:1898
EIGEN_STRONG_INLINE void pstore< uint32_t >(uint32_t *to, const Packet8ui &from)
Definition: AVX/PacketMath.h:1619
EIGEN_STRONG_INLINE Packet16c pxor< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: LSX/PacketMath.h:966
EIGEN_STRONG_INLINE Packet4ui ploadquad< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:2606
EIGEN_STRONG_INLINE Packet2ul pdiv< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:806
EIGEN_STRONG_INLINE void pstoreu< uint64_t >(uint64_t *to, const Packet2ul &from)
Definition: LSX/PacketMath.h:1611
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1879
EIGEN_STRONG_INLINE Packet16uc pabsdiff< Packet16uc >(const Packet16uc &a, const Packet16uc &b)
Definition: LSX/PacketMath.h:2793
EIGEN_STRONG_INLINE unsigned char predux_mul< Packet16uc >(const Packet16uc &a)
Definition: AltiVec/PacketMath.h:2578
EIGEN_STRONG_INLINE void pstoreu< int16_t >(int16_t *to, const Packet8s &from)
Definition: LSX/PacketMath.h:1587
EIGEN_STRONG_INLINE Packet4f pexp(const Packet4f &_x)
Definition: LSX/PacketMath.h:2663
EIGEN_STRONG_INLINE Packet4ui pcmp_eq< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: LSX/PacketMath.h:1163
eigen_packet_wrapper< __m128i, 7 > Packet2ul
Definition: LSX/PacketMath.h:45
EIGEN_STRONG_INLINE Packet2l pxor< Packet2l >(const Packet2l &a, const Packet2l &b)
Definition: LSX/PacketMath.h:978
EIGEN_STRONG_INLINE Packet16c pmul< Packet16c >(const Packet16c &a, const Packet16c &b)
Definition: AltiVec/PacketMath.h:1178
EIGEN_STRONG_INLINE Packet8s pxor< Packet8s >(const Packet8s &a, const Packet8s &b)
Definition: LSX/PacketMath.h:970
EIGEN_STRONG_INLINE Packet4ui ploaddup< Packet4ui >(const uint32_t *from)
Definition: LSX/PacketMath.h:1523
EIGEN_STRONG_INLINE void pstore< int64_t >(int64_t *to, const Packet8l &from)
Definition: AVX512/PacketMath.h:1106
EIGEN_STRONG_INLINE Packet8us ploadquad< Packet8us >(const unsigned short int *from)
Definition: AltiVec/PacketMath.h:1679
EIGEN_STRONG_INLINE Packet4f print< Packet4f >(const Packet4f &a)
Definition: LSX/PacketMath.h:2711
EIGEN_STRONG_INLINE Packet4f preciprocal< Packet4f >(const Packet4f &a)
Definition: LSX/PacketMath.h:2719
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2599
EIGEN_STRONG_INLINE void pstore< uint16_t >(uint16_t *to, const Packet8us &from)
Definition: LSX/PacketMath.h:1561
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: AVX/PacketMath.h:1750
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1282
EIGEN_STRONG_INLINE Packet2ul pand< Packet2ul >(const Packet2ul &a, const Packet2ul &b)
Definition: LSX/PacketMath.h:912
std::int32_t int32_t
Definition: Meta.h:41
std::int8_t int8_t
Definition: Meta.h:37
std::uint8_t uint8_t
Definition: Meta.h:36
std::int16_t int16_t
Definition: Meta.h:39
std::int64_t int64_t
Definition: Meta.h:43
EIGEN_DEVICE_FUNC const Scalar & q
Definition: SpecialFunctionsImpl.h:2019
std::uint16_t uint16_t
Definition: Meta.h:38
std::uint32_t uint32_t
Definition: Meta.h:40
std::uint64_t uint64_t
Definition: Meta.h:42
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
double f1(const Vector< double > &coord)
f1 function, in front of the C1 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:147
r
Definition: UniformPSDSelfTest.py:20
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
Definition: GenericPacketMath.h:1407
Packet packet[N]
Definition: GenericPacketMath.h:1408
Definition: GenericPacketMath.h:45
@ HasSign
Definition: GenericPacketMath.h:59
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasCmp
Definition: GenericPacketMath.h:69
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasLog
Definition: GenericPacketMath.h:77
@ HasDiv
Definition: GenericPacketMath.h:71
Definition: GenericPacketMath.h:225
Definition: Meta.h:145
@ value
Definition: Meta.h:146
Packet2d half
Definition: LSX/PacketMath.h:325
Packet2d type
Definition: LSX/PacketMath.h:324
Packet4f type
Definition: LSX/PacketMath.h:303
Packet4f half
Definition: LSX/PacketMath.h:304
Packet8s type
Definition: LSX/PacketMath.h:181
Packet8s half
Definition: LSX/PacketMath.h:182
Packet4i half
Definition: LSX/PacketMath.h:199
Packet4i type
Definition: LSX/PacketMath.h:198
Packet2l half
Definition: LSX/PacketMath.h:216
Packet2l type
Definition: LSX/PacketMath.h:215
Packet16c type
Definition: LSX/PacketMath.h:165
Packet16c half
Definition: LSX/PacketMath.h:166
Packet8us half
Definition: LSX/PacketMath.h:250
Packet8us type
Definition: LSX/PacketMath.h:249
Packet4ui type
Definition: LSX/PacketMath.h:267
Packet4ui half
Definition: LSX/PacketMath.h:268
Packet2ul type
Definition: LSX/PacketMath.h:285
Packet2ul half
Definition: LSX/PacketMath.h:286
Packet16uc type
Definition: LSX/PacketMath.h:232
Packet16uc half
Definition: LSX/PacketMath.h:233
Definition: GenericPacketMath.h:108
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasAbs2
Definition: GenericPacketMath.h:122
int8_t type
Definition: LSX/PacketMath.h:344
Packet16c half
Definition: LSX/PacketMath.h:345
Packet16uc half
Definition: LSX/PacketMath.h:393
uint8_t type
Definition: LSX/PacketMath.h:392
double type
Definition: LSX/PacketMath.h:453
Packet2l integer_packet
Definition: LSX/PacketMath.h:455
Packet2d half
Definition: LSX/PacketMath.h:454
Packet2l half
Definition: LSX/PacketMath.h:381
int64_t type
Definition: LSX/PacketMath.h:380
Packet2ul half
Definition: LSX/PacketMath.h:429
uint64_t type
Definition: LSX/PacketMath.h:428
Packet4i integer_packet
Definition: LSX/PacketMath.h:442
Packet4f half
Definition: LSX/PacketMath.h:441
float type
Definition: LSX/PacketMath.h:440
int32_t type
Definition: LSX/PacketMath.h:368
Packet4i half
Definition: LSX/PacketMath.h:369
uint32_t type
Definition: LSX/PacketMath.h:416
Packet4ui half
Definition: LSX/PacketMath.h:417
int16_t type
Definition: LSX/PacketMath.h:356
Packet8s half
Definition: LSX/PacketMath.h:357
uint16_t type
Definition: LSX/PacketMath.h:404
Packet8us half
Definition: LSX/PacketMath.h:405
Definition: GenericPacketMath.h:134
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140