10 #ifndef EIGEN_COMPLEX_SSE_H
11 #define EIGEN_COMPLEX_SSE_H
14 #include "../../InternalHeaderCheck.h"
29 #ifndef EIGEN_VECTORIZE_AVX
31 struct packet_traits<std::
complex<float> > : default_packet_traits {
58 struct unpacket_traits<Packet2cf> {
59 typedef std::complex<float>
type;
73 return Packet2cf(_mm_add_ps(
a.v,
b.v));
77 return Packet2cf(_mm_sub_ps(
a.v,
b.v));
82 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000));
83 return Packet2cf(_mm_xor_ps(
a.v, mask));
87 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000));
88 return Packet2cf(_mm_xor_ps(
a.v, mask));
93 #ifdef EIGEN_VECTORIZE_SSE3
94 __m128 tmp1 = _mm_mul_ps(_mm_movehdup_ps(
a.v),
vec4f_swizzle1(
b.v, 1, 0, 3, 2));
95 __m128 tmp2 = _mm_moveldup_ps(
a.v);
100 #ifdef EIGEN_VECTORIZE_FMA
101 __m128 result = _mm_fmaddsub_ps(tmp2,
b.v, tmp1);
103 #ifdef EIGEN_VECTORIZE_SSE3
104 __m128 result = _mm_addsub_ps(_mm_mul_ps(tmp2,
b.v), tmp1);
106 const __m128 mask = _mm_setr_ps(-0.0f, 0.0f, -0.0f, 0.0f);
107 __m128 result = _mm_add_ps(_mm_mul_ps(tmp2,
b.v), _mm_xor_ps(tmp1, mask));
119 return Packet2cf(_mm_and_ps(
a.v,
b.v));
123 return Packet2cf(_mm_or_ps(
a.v,
b.v));
127 return Packet2cf(_mm_xor_ps(
a.v,
b.v));
131 return Packet2cf(_mm_andnot_ps(
b.v,
a.v));
147 return Packet2cf(_mm_set_ps(im, re, im, re));
156 EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to,
const Packet2cf& from) {
160 EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to,
const Packet2cf& from) {
165 EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from,
172 EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
174 to[stride * 0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
175 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
176 to[stride * 1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
177 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
187 alignas(
alignof(__m64)) std::complex<float>
res;
188 _mm_storel_pi((__m64*)&
res,
a.v);
199 return pfirst(Packet2cf(_mm_add_ps(
a.v, _mm_movehl_ps(
a.v,
a.v))));
227 #ifndef EIGEN_VECTORIZE_AVX
229 struct packet_traits<std::
complex<double> > : default_packet_traits {
254 struct unpacket_traits<Packet1cd> {
255 typedef std::complex<double>
type;
269 return Packet1cd(_mm_add_pd(
a.v,
b.v));
273 return Packet1cd(_mm_sub_pd(
a.v,
b.v));
281 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000, 0x0, 0x0, 0x0));
282 return Packet1cd(_mm_xor_pd(
a.v, mask));
288 #ifdef EIGEN_VECTORIZE_SSE3
289 __m128d tmp2 = _mm_movedup_pd(
a.v);
291 __m128d tmp2 = _mm_unpacklo_pd(
a.v,
a.v);
293 #ifdef EIGEN_VECTORIZE_FMA
294 __m128d result = _mm_fmaddsub_pd(tmp2,
b.v, tmp1);
296 #ifdef EIGEN_VECTORIZE_SSE3
297 __m128d result = _mm_addsub_pd(_mm_mul_pd(tmp2,
b.v), tmp1);
299 const __m128d mask = _mm_setr_pd(-0.0, 0.0);
300 __m128d result = _mm_add_pd(_mm_mul_pd(tmp2,
b.v), _mm_xor_pd(tmp1, mask));
312 return Packet1cd(_mm_and_pd(
a.v,
b.v));
316 return Packet1cd(_mm_or_pd(
a.v,
b.v));
320 return Packet1cd(_mm_xor_pd(
a.v,
b.v));
324 return Packet1cd(_mm_andnot_pd(
b.v,
a.v));
349 EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to,
const Packet1cd& from) {
353 EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to,
const Packet1cd& from) {
358 EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(
const std::complex<double>* addr) {
365 _mm_store_pd(
res,
a.v);
366 return std::complex<double>(
res[0],
res[1]);
396 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
397 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
399 __m128
tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
400 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
401 kernel.packet[1].v =
tmp;
406 __m128 eq = _mm_cmpeq_ps(
a.v,
b.v);
412 __m128d eq = _mm_cmpeq_pd(
a.v,
b.v);
418 const Packet2cf& elsePacket) {
419 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
420 return Packet2cf(_mm_castpd_ps(result));
425 return psqrt_complex<Packet1cd>(
a);
430 return psqrt_complex<Packet2cf>(
a);
435 return plog_complex<Packet1cd>(
a);
440 return plog_complex<Packet2cf>(
a);
445 return pexp_complex<Packet2cf>(
a);
448 #ifdef EIGEN_VECTORIZE_FMA
452 __m128 a_odd = _mm_movehdup_ps(
a.v);
453 __m128 a_even = _mm_moveldup_ps(
a.v);
454 __m128 b_swap = _mm_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
455 __m128 result = _mm_fmaddsub_ps(a_even,
b.v, _mm_fmaddsub_ps(a_odd, b_swap,
c.v));
456 return Packet2cf(result);
460 __m128 a_odd = _mm_movehdup_ps(
a.v);
461 __m128 a_even = _mm_moveldup_ps(
a.v);
462 __m128 b_swap = _mm_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
463 __m128 result = _mm_fmaddsub_ps(a_even,
b.v, _mm_fmsubadd_ps(a_odd, b_swap,
c.v));
464 return Packet2cf(result);
468 __m128 a_odd = _mm_movehdup_ps(
a.v);
469 __m128 a_even = _mm_moveldup_ps(
a.v);
470 __m128 b_swap = _mm_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
471 __m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmaddsub_ps(a_even,
b.v,
c.v));
472 return Packet2cf(result);
476 __m128 a_odd = _mm_movehdup_ps(
a.v);
477 __m128 a_even = _mm_moveldup_ps(
a.v);
478 __m128 b_swap = _mm_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
479 __m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmsubadd_ps(a_even,
b.v,
c.v));
480 return Packet2cf(result);
485 __m128d a_odd = _mm_permute_pd(
a.v, 0x3);
486 __m128d a_even = _mm_movedup_pd(
a.v);
487 __m128d b_swap = _mm_permute_pd(
b.v, 0x1);
488 __m128d result = _mm_fmaddsub_pd(a_even,
b.v, _mm_fmaddsub_pd(a_odd, b_swap,
c.v));
489 return Packet1cd(result);
493 __m128d a_odd = _mm_permute_pd(
a.v, 0x3);
494 __m128d a_even = _mm_movedup_pd(
a.v);
495 __m128d b_swap = _mm_permute_pd(
b.v, 0x1);
496 __m128d result = _mm_fmaddsub_pd(a_even,
b.v, _mm_fmsubadd_pd(a_odd, b_swap,
c.v));
497 return Packet1cd(result);
501 __m128d a_odd = _mm_permute_pd(
a.v, 0x3);
502 __m128d a_even = _mm_movedup_pd(
a.v);
503 __m128d b_swap = _mm_permute_pd(
b.v, 0x1);
504 __m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmaddsub_pd(a_even,
b.v,
c.v));
505 return Packet1cd(result);
509 __m128d a_odd = _mm_permute_pd(
a.v, 0x3);
510 __m128d a_even = _mm_movedup_pd(
a.v);
511 __m128d b_swap = _mm_permute_pd(
b.v, 0x1);
512 __m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmsubadd_pd(a_even,
b.v,
c.v));
513 return Packet1cd(result);
AnnoyingScalar imag(const AnnoyingScalar &)
Definition: AnnoyingScalar.h:132
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)
Definition: ConjHelper.h:14
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
#define vec2d_swizzle1(v, p, q)
Definition: SSE/PacketMath.h:102
Scalar * b
Definition: benchVecAdd.cpp:17
float real
Definition: datatypes.h:10
@ Aligned16
Definition: Constants.h:237
const Scalar * a
Definition: level2_cplx_impl.h:32
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
EIGEN_STRONG_INLINE std::complex< float > predux_mul< Packet2cf >(const Packet2cf &a)
Definition: AltiVec/Complex.h:318
EIGEN_STRONG_INLINE std::complex< float > predux< Packet2cf >(const Packet2cf &a)
Definition: AltiVec/Complex.h:310
EIGEN_STRONG_INLINE Packet2cf psqrt< Packet2cf >(const Packet2cf &a)
Definition: AltiVec/Complex.h:370
EIGEN_STRONG_INLINE std::complex< double > predux_mul< Packet1cd >(const Packet1cd &a)
Definition: LSX/Complex.h:420
EIGEN_STRONG_INLINE Packet2cf ptrue< Packet2cf >(const Packet2cf &a)
Definition: LSX/Complex.h:106
EIGEN_STRONG_INLINE Packet2cf pandnot< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:285
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: LSX/PacketMath.h:880
EIGEN_STRONG_INLINE Packet1cd psqrt< Packet1cd >(const Packet1cd &a)
Definition: LSX/Complex.h:462
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_STRONG_INLINE Packet1cd plog< Packet1cd >(const Packet1cd &a)
Definition: LSX/Complex.h:472
EIGEN_STRONG_INLINE Packet2cf ploaddup< Packet2cf >(const std::complex< float > *from)
Definition: AltiVec/Complex.h:162
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:3075
EIGEN_STRONG_INLINE std::complex< float > pfirst< Packet2cf >(const Packet2cf &a)
Definition: AltiVec/Complex.h:295
EIGEN_STRONG_INLINE Packet1cd ploadu< Packet1cd >(const std::complex< double > *from)
Definition: LSX/Complex.h:373
EIGEN_STRONG_INLINE Packet2cf por< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:277
EIGEN_STRONG_INLINE Packet2cf pset1< Packet2cf >(const std::complex< float > &from)
Definition: AltiVec/Complex.h:125
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf &x)
Definition: LSX/Complex.h:218
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition: AVX/PacketMath.h:2263
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet &x, const Packet &y)
Definition: GenericPacketMathFunctions.h:1318
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE Packet1cd pxor< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:355
EIGEN_STRONG_INLINE Packet2cf pdiv< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:330
EIGEN_STRONG_INLINE Packet1cd padd< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:305
EIGEN_STRONG_INLINE Packet2cf ploadu< Packet2cf >(const std::complex< float > *from)
Definition: AltiVec/Complex.h:148
EIGEN_STRONG_INLINE Packet1cd ploaddup< Packet1cd >(const std::complex< double > *from)
Definition: LSX/Complex.h:383
const char * SsePrefetchPtrType
Definition: SSE/PacketMath.h:1719
EIGEN_STRONG_INLINE Packet2cf pload< Packet2cf >(const std::complex< float > *from)
Definition: AltiVec/Complex.h:144
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_STRONG_INLINE std::complex< double > predux< Packet1cd >(const Packet1cd &a)
Definition: LSX/Complex.h:415
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835
EIGEN_STRONG_INLINE Packet1cd pload< Packet1cd >(const std::complex< double > *from)
Definition: LSX/Complex.h:369
EIGEN_STRONG_INLINE Packet1cd pand< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:343
EIGEN_STRONG_INLINE Packet2cf pand< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:273
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
Definition: LSX/PacketMath.h:126
EIGEN_STRONG_INLINE Packet2cf pexp< Packet2cf >(const Packet2cf &a)
Definition: AltiVec/Complex.h:380
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819
EIGEN_STRONG_INLINE Packet1cd pandnot< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:361
EIGEN_STRONG_INLINE Packet2cf pxor< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:281
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE Packet2cf psub< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:260
EIGEN_STRONG_INLINE Packet1cd ptrue< Packet1cd >(const Packet1cd &a)
Definition: LSX/Complex.h:339
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:1406
EIGEN_STRONG_INLINE Packet2cf plog< Packet2cf >(const Packet2cf &a)
Definition: AltiVec/Complex.h:375
EIGEN_STRONG_INLINE Packet2cf padd< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:256
EIGEN_STRONG_INLINE Packet1cd por< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:349
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet1cd pset1< Packet1cd >(const std::complex< double > &from)
Definition: LSX/Complex.h:378
EIGEN_STRONG_INLINE std::complex< double > pfirst< Packet1cd >(const Packet1cd &a)
Definition: LSX/Complex.h:403
EIGEN_STRONG_INLINE Packet1cd pdiv< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:427
EIGEN_STRONG_INLINE Packet1cd psub< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: LSX/Complex.h:309
EIGEN_DEVICE_FUNC internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(const Scalar &x)
Definition: MathFunctions.h:1051
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: LSX/Complex.h:260
EIGEN_STRONG_INLINE Packet1cd()
Definition: SSE/Complex.h:220
Packet2d v
Definition: LSX/Complex.h:263
EIGEN_STRONG_INLINE Packet1cd(const __m128d &a)
Definition: SSE/Complex.h:221
Definition: AltiVec/Complex.h:38
Packet4f v
Definition: AltiVec/Complex.h:78
EIGEN_STRONG_INLINE Packet2cf(const __m128 &a)
Definition: SSE/Complex.h:23
EIGEN_STRONG_INLINE Packet2cf()
Definition: SSE/Complex.h:22
@ HasBlend
Definition: GenericPacketMath.h:66
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasLog
Definition: GenericPacketMath.h:77
@ HasDiv
Definition: GenericPacketMath.h:71
Packet1cd half
Definition: SSE/Complex.h:231
Packet1cd type
Definition: SSE/Complex.h:230
Packet2cf half
Definition: SSE/Complex.h:33
Packet2cf type
Definition: SSE/Complex.h:32
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
Packet2d as_real
Definition: SSE/Complex.h:257
Packet1cd half
Definition: SSE/Complex.h:256
std::complex< double > type
Definition: SSE/Complex.h:255
std::complex< float > type
Definition: SSE/Complex.h:59
Packet4f as_real
Definition: SSE/Complex.h:61
Packet2cf half
Definition: SSE/Complex.h:60
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140
Definition: datatypes.h:12