MSA/PacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2018 Wave Computing, Inc.
5 // Written by:
6 // Chris Larsen
7 // Alexey Frunze (afrunze@wavecomp.com)
8 //
9 // This Source Code Form is subject to the terms of the Mozilla
10 // Public License v. 2.0. If a copy of the MPL was not distributed
11 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12 
13 #ifndef EIGEN_PACKET_MATH_MSA_H
14 #define EIGEN_PACKET_MATH_MSA_H
15 
16 #include <iostream>
17 #include <string>
18 
19 // IWYU pragma: private
20 #include "../../InternalHeaderCheck.h"
21 
22 namespace Eigen {
23 
24 namespace internal {
25 
26 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
27 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
28 #endif
29 
30 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
31 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
32 #endif
33 
34 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
35 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
36 #endif
37 
38 #if 0
39 #define EIGEN_MSA_DEBUG \
40  static bool firstTime = true; \
41  do { \
42  if (firstTime) { \
43  std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
44  firstTime = false; \
45  } \
46  } while (0)
47 #else
48 #define EIGEN_MSA_DEBUG
49 #endif
50 
51 #define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
52 
53 typedef v4f32 Packet4f;
54 typedef v4i32 Packet4i;
55 typedef v4u32 Packet4ui;
56 
57 #define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = {X, X, X, X}
58 #define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = {X, X, X, X}
59 #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = {X, X, X, X}
60 
61 inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
62  os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
63  return os;
64 }
65 
66 inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
67  os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
68  return os;
69 }
70 
71 inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
72  os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
73  return os;
74 }
75 
76 template <>
77 struct packet_traits<float> : default_packet_traits {
78  typedef Packet4f type;
79  typedef Packet4f half; // Packet2f intrinsics not implemented yet
80  enum {
81  Vectorizable = 1,
82  AlignedOnScalar = 1,
83  size = 4,
84  // FIXME check the Has*
85  HasDiv = 1,
90  HasLog = 1,
91  HasExp = 1,
92  HasSqrt = 1,
93  HasRsqrt = 1,
94  HasBlend = 1
95  };
96 };
97 
98 template <>
99 struct packet_traits<int32_t> : default_packet_traits {
100  typedef Packet4i type;
101  typedef Packet4i half; // Packet2i intrinsics not implemented yet
102  enum {
103  Vectorizable = 1,
104  AlignedOnScalar = 1,
105  size = 4,
106  // FIXME check the Has*
107  HasDiv = 1,
108  HasBlend = 1
109  };
110 };
111 
112 template <>
113 struct unpacket_traits<Packet4f> {
114  typedef float type;
115  enum {
116  size = 4,
118  vectorizable = true,
119  masked_load_available = false,
120  masked_store_available = false
121  };
122  typedef Packet4f half;
123 };
124 
125 template <>
126 struct unpacket_traits<Packet4i> {
127  typedef int32_t type;
128  enum {
129  size = 4,
131  vectorizable = true,
132  masked_load_available = false,
133  masked_store_available = false
134  };
135  typedef Packet4i half;
136 };
137 
138 template <>
139 EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
141 
142  Packet4f v = {from, from, from, from};
143  return v;
144 }
145 
146 template <>
149 
150  return __builtin_msa_fill_w(from);
151 }
152 
153 template <>
156 
157  float f = *from;
158  Packet4f v = {f, f, f, f};
159  return v;
160 }
161 
162 template <>
165 
166  return __builtin_msa_fill_w(*from);
167 }
168 
169 template <>
172 
173  return __builtin_msa_fadd_w(a, b);
174 }
175 
176 template <>
179 
180  return __builtin_msa_addv_w(a, b);
181 }
182 
183 template <>
186 
187  static const Packet4f countdown = {0.0f, 1.0f, 2.0f, 3.0f};
188  return padd(pset1<Packet4f>(a), countdown);
189 }
190 
191 template <>
194 
195  static const Packet4i countdown = {0, 1, 2, 3};
196  return padd(pset1<Packet4i>(a), countdown);
197 }
198 
199 template <>
202 
203  return __builtin_msa_fsub_w(a, b);
204 }
205 
206 template <>
209 
210  return __builtin_msa_subv_w(a, b);
211 }
212 
213 template <>
216 
217  return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
218 }
219 
220 template <>
223 
224  return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
225 }
226 
227 template <>
230 
231  return a;
232 }
233 
234 template <>
237 
238  return a;
239 }
240 
241 template <>
244 
245  return __builtin_msa_fmul_w(a, b);
246 }
247 
248 template <>
251 
252  return __builtin_msa_mulv_w(a, b);
253 }
254 
255 template <>
258 
259  return __builtin_msa_fdiv_w(a, b);
260 }
261 
262 template <>
265 
266  return __builtin_msa_div_s_w(a, b);
267 }
268 
269 template <>
270 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
272 
273  return __builtin_msa_fmadd_w(c, a, b);
274 }
275 
276 template <>
277 EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
279 
280  // Use "asm" construct to avoid __builtin_msa_maddv_w GNU C bug.
281  Packet4i value = c;
282  __asm__("maddv.w %w[value], %w[a], %w[b]\n"
283  // Outputs
284  : [value] "+f"(value)
285  // Inputs
286  : [a] "f"(a), [b] "f"(b));
287  return value;
288 }
289 
290 template <>
293 
294  return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
295 }
296 
297 template <>
300 
301  return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
302 }
303 
304 template <>
307 
308  return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
309 }
310 
311 template <>
314 
315  return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
316 }
317 
318 template <>
321 
322  return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
323 }
324 
325 template <>
328 
329  return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
330 }
331 
332 template <>
335 
336  return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
337 }
338 
339 template <>
342 
343  return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
344 }
345 
346 template <>
349 
350 #if EIGEN_FAST_MATH
351  // This prefers numbers to NaNs.
352  return __builtin_msa_fmin_w(a, b);
353 #else
354  // This prefers NaNs to numbers.
355  Packet4i aNaN = __builtin_msa_fcun_w(a, a);
356  Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
357  return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
358 #endif
359 }
360 
361 template <>
364 
365  return __builtin_msa_min_s_w(a, b);
366 }
367 
368 template <>
371 
372 #if EIGEN_FAST_MATH
373  // This prefers numbers to NaNs.
374  return __builtin_msa_fmax_w(a, b);
375 #else
376  // This prefers NaNs to numbers.
377  Packet4i aNaN = __builtin_msa_fcun_w(a, a);
378  Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
379  return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
380 #endif
381 }
382 
383 template <>
386 
387  return __builtin_msa_max_s_w(a, b);
388 }
389 
390 template <>
391 EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
393 
394  EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
395 }
396 
397 template <>
400 
401  EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
402 }
403 
404 template <>
405 EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
407 
408  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
409 }
410 
411 template <>
414 
415  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
416 }
417 
418 template <>
421 
422  float f0 = from[0], f1 = from[1];
423  Packet4f v0 = {f0, f0, f0, f0};
424  Packet4f v1 = {f1, f1, f1, f1};
425  return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
426 }
427 
428 template <>
431 
432  int32_t i0 = from[0], i1 = from[1];
433  Packet4i v0 = {i0, i0, i0, i0};
434  Packet4i v1 = {i1, i1, i1, i1};
435  return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
436 }
437 
438 template <>
439 EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
441 
442  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
443 }
444 
445 template <>
446 EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
448 
449  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
450 }
451 
452 template <>
453 EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
455 
456  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
457 }
458 
459 template <>
460 EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {
462 
463  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
464 }
465 
466 template <>
467 EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
469 
470  float f = *from;
471  Packet4f v = {f, f, f, f};
472  v[1] = from[stride];
473  v[2] = from[2 * stride];
474  v[3] = from[3 * stride];
475  return v;
476 }
477 
478 template <>
481 
482  int32_t i = *from;
483  Packet4i v = {i, i, i, i};
484  v[1] = from[stride];
485  v[2] = from[2 * stride];
486  v[3] = from[3 * stride];
487  return v;
488 }
489 
490 template <>
491 EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
493 
494  *to = from[0];
495  to += stride;
496  *to = from[1];
497  to += stride;
498  *to = from[2];
499  to += stride;
500  *to = from[3];
501 }
502 
503 template <>
504 EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride) {
506 
507  *to = from[0];
508  to += stride;
509  *to = from[1];
510  to += stride;
511  *to = from[2];
512  to += stride;
513  *to = from[3];
514 }
515 
516 template <>
517 EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
519 
520  __builtin_prefetch(addr);
521 }
522 
523 template <>
526 
527  __builtin_prefetch(addr);
528 }
529 
530 template <>
533 
534  return a[0];
535 }
536 
537 template <>
540 
541  return a[0];
542 }
543 
544 template <>
547 
548  return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
549 }
550 
551 template <>
554 
555  return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
556 }
557 
558 template <>
561 
562  return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
563 }
564 
565 template <>
568 
569  Packet4i zero = __builtin_msa_ldi_w(0);
570  return __builtin_msa_add_a_w(zero, a);
571 }
572 
573 template <>
576 
577  Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
578  s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
579  return s[0];
580 }
581 
582 template <>
585 
586  Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
587  s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
588  return s[0];
589 }
590 
591 // Other reduction functions:
592 // mul
593 template <>
596 
597  Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
598  p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
599  return p[0];
600 }
601 
602 template <>
605 
606  Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
607  p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
608  return p[0];
609 }
610 
611 // min
612 template <>
615 
616  // Swap 64-bit halves of a.
617  Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
618 #if !EIGEN_FAST_MATH
619  // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
620  // masks of all zeroes/ones in low 64 bits.
621  v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
622  // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
623  unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
624 #endif
625  // Continue with min computation.
626  Packet4f v = __builtin_msa_fmin_w(a, swapped);
627  v = __builtin_msa_fmin_w(v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
628 #if !EIGEN_FAST_MATH
629  // Based on the mask select between v and 4 qNaNs.
630  v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
631  v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
632 #endif
633  return v[0];
634 }
635 
636 template <>
639 
640  Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
641  m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
642  return m[0];
643 }
644 
645 // max
646 template <>
649 
650  // Swap 64-bit halves of a.
651  Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
652 #if !EIGEN_FAST_MATH
653  // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
654  // masks of all zeroes/ones in low 64 bits.
655  v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
656  // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
657  unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
658 #endif
659  // Continue with max computation.
660  Packet4f v = __builtin_msa_fmax_w(a, swapped);
661  v = __builtin_msa_fmax_w(v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
662 #if !EIGEN_FAST_MATH
663  // Based on the mask select between v and 4 qNaNs.
664  v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
665  v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
666 #endif
667  return v[0];
668 }
669 
670 template <>
673 
674  Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
675  m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
676  return m[0];
677 }
678 
679 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
680  os << "[ " << value.packet[0] << "," << std::endl
681  << " " << value.packet[1] << "," << std::endl
682  << " " << value.packet[2] << "," << std::endl
683  << " " << value.packet[3] << " ]";
684  return os;
685 }
686 
687 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
689 
690  v4i32 tmp1, tmp2, tmp3, tmp4;
691 
692  tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
693  tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
694  tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
695  tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
696 
697  kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
698  kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
699  kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
700  kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
701 }
702 
703 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
704  os << "[ " << value.packet[0] << "," << std::endl
705  << " " << value.packet[1] << "," << std::endl
706  << " " << value.packet[2] << "," << std::endl
707  << " " << value.packet[3] << " ]";
708  return os;
709 }
710 
711 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
713 
714  v4i32 tmp1, tmp2, tmp3, tmp4;
715 
716  tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
717  tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
718  tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
719  tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
720 
721  kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
722  kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
723  kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
724  kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
725 }
726 
727 template <>
730 
731  return __builtin_msa_fsqrt_w(a);
732 }
733 
734 template <>
737 
738 #if EIGEN_FAST_MATH
739  return __builtin_msa_frsqrt_w(a);
740 #else
741  Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
742  return pdiv(ones, psqrt(a));
743 #endif
744 }
745 
746 template <>
748  Packet4f v = a;
749  int32_t old_mode, new_mode;
750  asm volatile(
751  "cfcmsa %[old_mode], $1\n"
752  "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
753  "ctcmsa $1, %[new_mode]\n"
754  "frint.w %w[v], %w[v]\n"
755  "ctcmsa $1, %[old_mode]\n"
756  : // outputs
757  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
758  [v] "+f"(v)
759  : // inputs
760  : // clobbers
761  );
762  return v;
763 }
764 
765 template <>
767  Packet4f v = a;
768  int32_t old_mode, new_mode;
769  asm volatile(
770  "cfcmsa %[old_mode], $1\n"
771  "ori %[new_mode], %[old_mode], 3\n"
772  "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
773  "ctcmsa $1, %[new_mode]\n"
774  "frint.w %w[v], %w[v]\n"
775  "ctcmsa $1, %[old_mode]\n"
776  : // outputs
777  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
778  [v] "+f"(v)
779  : // inputs
780  : // clobbers
781  );
782  return v;
783 }
784 
785 template <>
787  Packet4f v = a;
788  int32_t old_mode, new_mode;
789  asm volatile(
790  "cfcmsa %[old_mode], $1\n"
791  "ori %[new_mode], %[old_mode], 3\n"
792  "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
793  "ctcmsa $1, %[new_mode]\n"
794  "frint.w %w[v], %w[v]\n"
795  "ctcmsa $1, %[old_mode]\n"
796  : // outputs
797  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
798  [v] "+f"(v)
799  : // inputs
800  : // clobbers
801  );
802  return v;
803 }
804 
805 template <>
806 EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
807  const Packet4f& elsePacket) {
808  Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
809  Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
810  return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
811 }
812 
813 template <>
814 EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
815  const Packet4i& elsePacket) {
816  Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
817  Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
818  return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
819 }
820 
821 //---------- double ----------
822 
823 typedef v2f64 Packet2d;
824 typedef v2i64 Packet2l;
825 typedef v2u64 Packet2ul;
826 
827 #define EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = {X, X}
828 #define EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = {X, X}
829 #define EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = {X, X}
830 
831 inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
832  os << "[ " << value[0] << ", " << value[1] << " ]";
833  return os;
834 }
835 
836 inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
837  os << "[ " << value[0] << ", " << value[1] << " ]";
838  return os;
839 }
840 
841 inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
842  os << "[ " << value[0] << ", " << value[1] << " ]";
843  return os;
844 }
845 
846 template <>
847 struct packet_traits<double> : default_packet_traits {
848  typedef Packet2d type;
849  typedef Packet2d half;
850  enum {
851  Vectorizable = 1,
852  AlignedOnScalar = 1,
853  size = 2,
854  // FIXME check the Has*
855  HasDiv = 1,
856  HasExp = 1,
857  HasSqrt = 1,
858  HasRsqrt = 1,
859  HasBlend = 1
860  };
861 };
862 
863 template <>
864 struct unpacket_traits<Packet2d> {
865  typedef double type;
866  enum {
867  size = 2,
869  vectorizable = true,
870  masked_load_available = false,
871  masked_store_available = false
872  };
873  typedef Packet2d half;
874 };
875 
876 template <>
877 EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
879 
880  Packet2d value = {from, from};
881  return value;
882 }
883 
884 template <>
887 
888  return __builtin_msa_fadd_d(a, b);
889 }
890 
891 template <>
894 
895  static const Packet2d countdown = {0.0, 1.0};
896  return padd(pset1<Packet2d>(a), countdown);
897 }
898 
899 template <>
902 
903  return __builtin_msa_fsub_d(a, b);
904 }
905 
906 template <>
909 
910  return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
911 }
912 
913 template <>
916 
917  return a;
918 }
919 
920 template <>
923 
924  return __builtin_msa_fmul_d(a, b);
925 }
926 
927 template <>
930 
931  return __builtin_msa_fdiv_d(a, b);
932 }
933 
934 template <>
935 EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
937 
938  return __builtin_msa_fmadd_d(c, a, b);
939 }
940 
941 // Logical Operations are not supported for float, so we have to reinterpret casts using MSA
942 // intrinsics
943 template <>
946 
947  return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
948 }
949 
950 template <>
953 
954  return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
955 }
956 
957 template <>
960 
961  return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
962 }
963 
964 template <>
967 
968  return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
969 }
970 
971 template <>
972 EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
974 
975  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
976 }
977 
978 template <>
981 
982 #if EIGEN_FAST_MATH
983  // This prefers numbers to NaNs.
984  return __builtin_msa_fmin_d(a, b);
985 #else
986  // This prefers NaNs to numbers.
987  v2i64 aNaN = __builtin_msa_fcun_d(a, a);
988  v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
989  return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
990 #endif
991 }
992 
993 template <>
996 
997 #if EIGEN_FAST_MATH
998  // This prefers numbers to NaNs.
999  return __builtin_msa_fmax_d(a, b);
1000 #else
1001  // This prefers NaNs to numbers.
1002  v2i64 aNaN = __builtin_msa_fcun_d(a, a);
1003  v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
1004  return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1005 #endif
1006 }
1007 
1008 template <>
1009 EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
1011 
1012  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1013 }
1014 
1015 template <>
1016 EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
1018 
1019  Packet2d value = {*from, *from};
1020  return value;
1021 }
1022 
1023 template <>
1024 EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1026 
1027  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1028 }
1029 
1030 template <>
1031 EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1033 
1034  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1035 }
1036 
1037 template <>
1038 EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1040 
1041  Packet2d value;
1042  value[0] = *from;
1043  from += stride;
1044  value[1] = *from;
1045  return value;
1046 }
1047 
1048 template <>
1049 EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
1051 
1052  *to = from[0];
1053  to += stride;
1054  *to = from[1];
1055 }
1056 
1057 template <>
1058 EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1060 
1061  __builtin_prefetch(addr);
1062 }
1063 
1064 template <>
1067 
1068  return a[0];
1069 }
1070 
1071 template <>
1074 
1075  return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1076 }
1077 
1078 template <>
1081 
1082  return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1083 }
1084 
1085 template <>
1088 
1089  Packet2d s = padd(a, preverse(a));
1090  return s[0];
1091 }
1092 
1093 // Other reduction functions:
1094 // mul
1095 template <>
1098 
1099  Packet2d p = pmul(a, preverse(a));
1100  return p[0];
1101 }
1102 
1103 // min
1104 template <>
1107 
1108 #if EIGEN_FAST_MATH
1109  Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1110  Packet2d v = __builtin_msa_fmin_d(a, swapped);
1111  return v[0];
1112 #else
1113  double a0 = a[0], a1 = a[1];
1114  return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1115 #endif
1116 }
1117 
1118 // max
1119 template <>
1122 
1123 #if EIGEN_FAST_MATH
1124  Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1125  Packet2d v = __builtin_msa_fmax_d(a, swapped);
1126  return v[0];
1127 #else
1128  double a0 = a[0], a1 = a[1];
1129  return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1130 #endif
1131 }
1132 
1133 template <>
1136 
1137  return __builtin_msa_fsqrt_d(a);
1138 }
1139 
1140 template <>
1143 
1144 #if EIGEN_FAST_MATH
1145  return __builtin_msa_frsqrt_d(a);
1146 #else
1147  Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1148  return pdiv(ones, psqrt(a));
1149 #endif
1150 }
1151 
1152 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1153  os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1154  return os;
1155 }
1156 
1157 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
1159 
1160  Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1161  Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1162  kernel.packet[0] = trn1;
1163  kernel.packet[1] = trn2;
1164 }
1165 
1166 template <>
1168  Packet2d v = a;
1169  int32_t old_mode, new_mode;
1170  asm volatile(
1171  "cfcmsa %[old_mode], $1\n"
1172  "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
1173  "ctcmsa $1, %[new_mode]\n"
1174  "frint.d %w[v], %w[v]\n"
1175  "ctcmsa $1, %[old_mode]\n"
1176  : // outputs
1177  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1178  [v] "+f"(v)
1179  : // inputs
1180  : // clobbers
1181  );
1182  return v;
1183 }
1184 
1185 template <>
1187  Packet2d v = a;
1188  int32_t old_mode, new_mode;
1189  asm volatile(
1190  "cfcmsa %[old_mode], $1\n"
1191  "ori %[new_mode], %[old_mode], 3\n"
1192  "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
1193  "ctcmsa $1, %[new_mode]\n"
1194  "frint.d %w[v], %w[v]\n"
1195  "ctcmsa $1, %[old_mode]\n"
1196  : // outputs
1197  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1198  [v] "+f"(v)
1199  : // inputs
1200  : // clobbers
1201  );
1202  return v;
1203 }
1204 
1205 template <>
1207  Packet2d v = a;
1208  int32_t old_mode, new_mode;
1209  asm volatile(
1210  "cfcmsa %[old_mode], $1\n"
1211  "ori %[new_mode], %[old_mode], 3\n"
1212  "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
1213  "ctcmsa $1, %[new_mode]\n"
1214  "frint.d %w[v], %w[v]\n"
1215  "ctcmsa $1, %[old_mode]\n"
1216  : // outputs
1217  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1218  [v] "+f"(v)
1219  : // inputs
1220  : // clobbers
1221  );
1222  return v;
1223 }
1224 
1225 template <>
1226 EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1227  const Packet2d& elsePacket) {
1228  Packet2ul select = {ifPacket.select[0], ifPacket.select[1]};
1229  Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1230  return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1231 }
1232 
1233 } // end namespace internal
1234 
1235 } // end namespace Eigen
1236 
1237 #endif // EIGEN_PACKET_MATH_MSA_H
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1
int i
Definition: BiCGSTAB_step_by_step.cpp:9
MatrixXcf ones
Definition: ComplexEigenSolver_eigenvalues.cpp:1
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_MSA_SHF_I8(a, b, c, d)
Definition: MSA/PacketMath.h:51
#define EIGEN_MSA_DEBUG
Definition: MSA/PacketMath.h:48
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_FAST_MATH
Definition: Macros.h:51
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
float * p
Definition: Tutorial_Map_using.cpp:9
M1<< 1, 2, 3, 4, 5, 6, 7, 8, 9;Map< RowVectorXf > v1(M1.data(), M1.size())
Scalar * b
Definition: benchVecAdd.cpp:17
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
@ Aligned16
Definition: Constants.h:237
RealScalar s
Definition: level1_cplx_impl.h:130
const Scalar * a
Definition: level2_cplx_impl.h:32
int * m
Definition: level2_cplx_impl.h:294
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1465
std::ostream & operator<<(std::ostream &s, const Packet16c &v)
Definition: AltiVec/PacketMath.h:427
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1628
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1965
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1250
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:605
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1003
EIGEN_STRONG_INLINE Packet4i pload1< Packet4i >(const int32_t *from)
Definition: MSA/PacketMath.h:163
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1066
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1431
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: AltiVec/PacketMath.h:778
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1863
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:880
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1644
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2679
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1490
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:962
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:368
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:921
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1205
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1640
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1427
EIGEN_STRONG_INLINE void prefetch< int32_t >(const int32_t *addr)
Definition: LSX/PacketMath.h:1848
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2604
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1452
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2127
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1162
EIGEN_STRONG_INLINE Packet4f pload1< Packet4f >(const float *from)
Definition: MSA/PacketMath.h:154
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:663
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:3075
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: AltiVec/PacketMath.h:853
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: LSX/PacketMath.h:503
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:492
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2529
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< int32_t, Packet4i >(int32_t *to, const Packet4i &from, Index stride)
Definition: LSX/PacketMath.h:1771
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2309
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:1611
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1070
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1497
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1469
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:649
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1187
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1407
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:741
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2522
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: AltiVec/PacketMath.h:1854
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: LSX/PacketMath.h:1734
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1537
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2019
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:782
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:2073
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: AltiVec/PacketMath.h:773
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1099
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: AltiVec/PacketMath.h:954
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: LSX/PacketMath.h:563
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1493
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:642
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1936
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1448
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1448
EIGEN_STRONG_INLINE void pstoreu< int32_t >(int32_t *to, const Packet4i &from)
Definition: LSX/PacketMath.h:1591
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1261
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2176
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE void pstore< int32_t >(int32_t *to, const Packet4i &from)
Definition: LSX/PacketMath.h:1549
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1869
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: AltiVec/PacketMath.h:1045
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2435
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1186
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1533
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1166
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1410
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1244
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2445
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1406
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:2684
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: LSX/PacketMath.h:1621
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pgather< int32_t, Packet4i >(const int32_t *from, Index stride)
Definition: LSX/PacketMath.h:1660
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:1293
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:497
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:646
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1095
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: LSX/PacketMath.h:2528
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: AltiVec/PacketMath.h:1041
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1756
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:1256
EIGEN_STRONG_INLINE Packet4f pround< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1479
EIGEN_STRONG_INLINE Packet2d pround< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1206
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: LSX/PacketMath.h:1879
eigen_packet_wrapper< __m128i, 7 > Packet2ul
Definition: LSX/PacketMath.h:45
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1167
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:2599
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: AVX/PacketMath.h:1750
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1282
std::int32_t int32_t
Definition: Meta.h:41
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isnan(const Eigen::bfloat16 &h)
Definition: BFloat16.h:742
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
squared absolute value
Definition: GlobalFunctions.h:87
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
double f1(const Vector< double > &coord)
f1 function, in front of the C1 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:147
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
Definition: GenericPacketMath.h:1407
Definition: GenericPacketMath.h:1421
bool select[N]
Definition: GenericPacketMath.h:1422
@ HasRsqrt
Definition: GenericPacketMath.h:74
@ HasSin
Definition: GenericPacketMath.h:81
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasCos
Definition: GenericPacketMath.h:82
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasErf
Definition: GenericPacketMath.h:95
@ HasLog
Definition: GenericPacketMath.h:77
@ HasTanh
Definition: GenericPacketMath.h:90
@ HasDiv
Definition: GenericPacketMath.h:71
Packet2d half
Definition: MSA/PacketMath.h:849
Packet2d type
Definition: MSA/PacketMath.h:848
Packet4f type
Definition: MSA/PacketMath.h:78
Packet4f half
Definition: MSA/PacketMath.h:79
Packet4i half
Definition: MSA/PacketMath.h:101
Packet4i type
Definition: MSA/PacketMath.h:100
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
double type
Definition: MSA/PacketMath.h:865
Packet2d half
Definition: MSA/PacketMath.h:873
Packet4f half
Definition: MSA/PacketMath.h:122
float type
Definition: MSA/PacketMath.h:114
int32_t type
Definition: MSA/PacketMath.h:127
Packet4i half
Definition: MSA/PacketMath.h:135
Definition: GenericPacketMath.h:134
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140
EIGEN_DONT_INLINE Scalar zero()
Definition: svd_common.h:232