10 #ifndef EIGEN_COMPLEX_AVX_H
11 #define EIGEN_COMPLEX_AVX_H
14 #include "../../InternalHeaderCheck.h"
27 #ifndef EIGEN_VECTORIZE_AVX512
56 typedef std::complex<float>
type;
82 const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
83 0x80000000, 0x00000000, 0x80000000));
89 __m256 tmp1 = _mm256_mul_ps(_mm256_movehdup_ps(
a.v), _mm256_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1)));
90 __m256 tmp2 = _mm256_moveldup_ps(
a.v);
91 #ifdef EIGEN_VECTORIZE_FMA
92 __m256 result = _mm256_fmaddsub_ps(tmp2,
b.v, tmp1);
94 __m256 result = _mm256_addsub_ps(_mm256_mul_ps(tmp2,
b.v), tmp1);
101 __m256 eq = _mm256_cmp_ps(
a.v,
b.v, _CMP_EQ_OQ);
102 return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
139 return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
147 return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(
a.v),
b.v, 1));
170 __m128 low = _mm256_extractf128_ps(from.v, 0);
172 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
174 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
176 __m128 high = _mm256_extractf128_ps(from.v, 1);
178 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
180 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
190 __m128 low = _mm256_extractf128_ps(
a.v, 0);
191 __m128 high = _mm256_extractf128_ps(
a.v, 1);
192 __m128d lowd = _mm_castps_pd(low);
193 __m128d highd = _mm_castps_pd(high);
194 low = _mm_castpd_ps(_mm_shuffle_pd(lowd, lowd, 0x1));
195 high = _mm_castpd_ps(_mm_shuffle_pd(highd, highd, 0x1));
196 __m256 result = _mm256_setzero_ps();
197 result = _mm256_insertf128_ps(result, low, 1);
198 result = _mm256_insertf128_ps(result, high, 0);
221 return Packet4cf(_mm256_shuffle_ps(
x.v,
x.v, _MM_SHUFFLE(2, 3, 0, 1)));
231 #ifndef EIGEN_VECTORIZE_AVX512
259 typedef std::complex<double>
type;
285 const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
291 __m256d tmp1 = _mm256_mul_pd(_mm256_permute_pd(
a.v, 0xF), _mm256_permute_pd(
b.v, 0x5));
292 __m256d tmp2 = _mm256_movedup_pd(
a.v);
293 #ifdef EIGEN_VECTORIZE_FMA
294 __m256d result = _mm256_fmaddsub_pd(tmp2,
b.v, tmp1);
296 __m256d result = _mm256_addsub_pd(_mm256_mul_pd(tmp2,
b.v), tmp1);
303 __m256d eq = _mm256_cmp_pd(
a.v,
b.v, _CMP_EQ_OQ);
341 return Packet2cd(_mm256_broadcast_pd((
const __m128d*)(
const void*)&from));
368 __m128d low = _mm256_extractf128_pd(from.v, 0);
369 to[stride * 0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
370 __m128d high = _mm256_extractf128_pd(from.v, 1);
371 to[stride * 1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
376 __m128d low = _mm256_extractf128_pd(
a.v, 0);
378 _mm_store_pd(
res, low);
379 return std::complex<double>(
res[0],
res[1]);
384 __m256d result = _mm256_permute2f128_pd(
a.v,
a.v, 1);
407 return Packet2cd(_mm256_shuffle_pd(
x.v,
x.v, 0x5));
411 __m256d
P0 = _mm256_castps_pd(kernel.
packet[0].v);
412 __m256d P1 = _mm256_castps_pd(kernel.
packet[1].v);
413 __m256d P2 = _mm256_castps_pd(kernel.
packet[2].v);
414 __m256d P3 = _mm256_castps_pd(kernel.
packet[3].v);
416 __m256d T0 = _mm256_shuffle_pd(
P0, P1, 15);
417 __m256d T1 = _mm256_shuffle_pd(
P0, P1, 0);
418 __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
419 __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
421 kernel.
packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
422 kernel.
packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
423 kernel.
packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
424 kernel.
packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
428 __m256d
tmp = _mm256_permute2f128_pd(kernel.
packet[0].v, kernel.
packet[1].v, 0 + (2 << 4));
429 kernel.
packet[1].v = _mm256_permute2f128_pd(kernel.
packet[0].v, kernel.
packet[1].v, 1 + (3 << 4));
435 return psqrt_complex<Packet2cd>(
a);
440 return psqrt_complex<Packet4cf>(
a);
445 return plog_complex<Packet2cd>(
a);
450 return plog_complex<Packet4cf>(
a);
455 return pexp_complex<Packet4cf>(
a);
458 #ifdef EIGEN_VECTORIZE_FMA
462 __m256 a_odd = _mm256_movehdup_ps(
a.v);
463 __m256 a_even = _mm256_moveldup_ps(
a.v);
464 __m256 b_swap = _mm256_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
465 __m256 result = _mm256_fmaddsub_ps(a_even,
b.v, _mm256_fmaddsub_ps(a_odd, b_swap,
c.v));
466 return Packet4cf(result);
470 __m256 a_odd = _mm256_movehdup_ps(
a.v);
471 __m256 a_even = _mm256_moveldup_ps(
a.v);
472 __m256 b_swap = _mm256_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
473 __m256 result = _mm256_fmaddsub_ps(a_even,
b.v, _mm256_fmsubadd_ps(a_odd, b_swap,
c.v));
474 return Packet4cf(result);
478 __m256 a_odd = _mm256_movehdup_ps(
a.v);
479 __m256 a_even = _mm256_moveldup_ps(
a.v);
480 __m256 b_swap = _mm256_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
481 __m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmaddsub_ps(a_even,
b.v,
c.v));
482 return Packet4cf(result);
486 __m256 a_odd = _mm256_movehdup_ps(
a.v);
487 __m256 a_even = _mm256_moveldup_ps(
a.v);
488 __m256 b_swap = _mm256_permute_ps(
b.v, _MM_SHUFFLE(2, 3, 0, 1));
489 __m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmsubadd_ps(a_even,
b.v,
c.v));
490 return Packet4cf(result);
495 __m256d a_odd = _mm256_permute_pd(
a.v, 0xF);
496 __m256d a_even = _mm256_movedup_pd(
a.v);
497 __m256d b_swap = _mm256_permute_pd(
b.v, 0x5);
498 __m256d result = _mm256_fmaddsub_pd(a_even,
b.v, _mm256_fmaddsub_pd(a_odd, b_swap,
c.v));
499 return Packet2cd(result);
503 __m256d a_odd = _mm256_permute_pd(
a.v, 0xF);
504 __m256d a_even = _mm256_movedup_pd(
a.v);
505 __m256d b_swap = _mm256_permute_pd(
b.v, 0x5);
506 __m256d result = _mm256_fmaddsub_pd(a_even,
b.v, _mm256_fmsubadd_pd(a_odd, b_swap,
c.v));
507 return Packet2cd(result);
511 __m256d a_odd = _mm256_permute_pd(
a.v, 0xF);
512 __m256d a_even = _mm256_movedup_pd(
a.v);
513 __m256d b_swap = _mm256_permute_pd(
b.v, 0x5);
514 __m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmaddsub_pd(a_even,
b.v,
c.v));
515 return Packet2cd(result);
519 __m256d a_odd = _mm256_permute_pd(
a.v, 0xF);
520 __m256d a_even = _mm256_movedup_pd(
a.v);
521 __m256d b_swap = _mm256_permute_pd(
b.v, 0x5);
522 __m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmsubadd_pd(a_even,
b.v,
c.v));
523 return Packet2cd(result);
AnnoyingScalar imag(const AnnoyingScalar &)
Definition: AnnoyingScalar.h:132
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)
Definition: ConjHelper.h:14
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:38
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:30
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:42
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:34
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
Scalar * b
Definition: benchVecAdd.cpp:17
float real
Definition: datatypes.h:10
@ Aligned32
Definition: Constants.h:238
const Scalar * a
Definition: level2_cplx_impl.h:32
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:268
EIGEN_STRONG_INLINE Packet4cf ploadu< Packet4cf >(const std::complex< float > *from)
Definition: AVX/Complex.h:131
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:318
EIGEN_STRONG_INLINE std::complex< double > predux_mul< Packet2cd >(const Packet2cd &a)
Definition: AVX/Complex.h:394
EIGEN_STRONG_INLINE Packet2cd pcplxflip< Packet2cd >(const Packet2cd &x)
Definition: AVX/Complex.h:406
EIGEN_STRONG_INLINE Packet4cf psub< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:73
EIGEN_STRONG_INLINE Packet2cd pset1< Packet2cd >(const std::complex< double > &from)
Definition: AVX/Complex.h:338
EIGEN_STRONG_INLINE Packet2cd ploaddup< Packet2cd >(const std::complex< double > *from)
Definition: AVX/Complex.h:345
EIGEN_STRONG_INLINE Packet2cd psqrt< Packet2cd >(const Packet2cd &a)
Definition: AVX/Complex.h:434
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_STRONG_INLINE Packet2cf ploaddup< Packet2cf >(const std::complex< float > *from)
Definition: AltiVec/Complex.h:162
EIGEN_STRONG_INLINE Packet2cd pand< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:312
EIGEN_STRONG_INLINE Packet4cf pxor< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:118
EIGEN_STRONG_INLINE Packet2cd ploadu< Packet2cd >(const std::complex< double > *from)
Definition: AVX/Complex.h:333
EIGEN_STRONG_INLINE Packet2cd por< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:316
EIGEN_STRONG_INLINE Packet2cd ptrue< Packet2cd >(const Packet2cd &a)
Definition: AVX/Complex.h:308
EIGEN_STRONG_INLINE Packet4cf ptrue< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:106
EIGEN_STRONG_INLINE Packet4cf pand< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:110
EIGEN_STRONG_INLINE Packet2cd pdiv< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:401
EIGEN_STRONG_INLINE Packet4cf pset1< Packet4cf >(const std::complex< float > &from)
Definition: AVX/Complex.h:136
EIGEN_STRONG_INLINE Packet2cd pload< Packet2cd >(const std::complex< double > *from)
Definition: AVX/Complex.h:329
EIGEN_STRONG_INLINE std::complex< float > predux< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:203
EIGEN_STRONG_INLINE std::complex< float > pfirst< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:184
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
EIGEN_STRONG_INLINE Packet2cd pandnot< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:324
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:88
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition: AVX/PacketMath.h:2263
EIGEN_STRONG_INLINE Packet2cd psub< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:276
EIGEN_STRONG_INLINE Packet4cf pexp< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:454
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet &x, const Packet &y)
Definition: GenericPacketMathFunctions.h:1318
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:264
EIGEN_STRONG_INLINE std::complex< double > predux< Packet2cd >(const Packet2cd &a)
Definition: AVX/Complex.h:389
EIGEN_STRONG_INLINE Packet4cf ploaddup< Packet4cf >(const std::complex< float > *from)
Definition: AVX/Complex.h:143
EIGEN_STRONG_INLINE std::complex< float > predux_mul< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:208
EIGEN_STRONG_INLINE Packet4cf pandnot< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:122
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_STRONG_INLINE Packet4cf pdiv< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:215
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:835
EIGEN_STRONG_INLINE Packet2cd pxor< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:320
EIGEN_STRONG_INLINE Packet4cf padd< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:69
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Definition: GenericPacketMath.h:1232
EIGEN_STRONG_INLINE Packet4cf plog< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:449
EIGEN_STRONG_INLINE std::complex< double > pfirst< Packet2cd >(const Packet2cd &a)
Definition: AVX/Complex.h:375
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: AltiVec/Complex.h:353
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:819
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2319
EIGEN_STRONG_INLINE Packet2cd plog< Packet2cd >(const Packet2cd &a)
Definition: AVX/Complex.h:444
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: LSX/PacketMath.h:827
EIGEN_STRONG_INLINE Packet4cf pcplxflip< Packet4cf >(const Packet4cf &x)
Definition: AVX/Complex.h:220
EIGEN_STRONG_INLINE Packet4cf pload< Packet4cf >(const std::complex< float > *from)
Definition: AVX/Complex.h:127
__m256 Packet8f
Definition: AVX/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4cf por< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Definition: AVX/Complex.h:114
EIGEN_STRONG_INLINE Packet2cd padd< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Definition: AVX/Complex.h:272
__m256d Packet4d
Definition: AVX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet4cf psqrt< Packet4cf >(const Packet4cf &a)
Definition: AVX/Complex.h:439
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux_mul(const Packet &a)
Definition: GenericPacketMath.h:1238
EIGEN_DEVICE_FUNC internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(const Scalar &x)
Definition: MathFunctions.h:1051
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
double P0
Definition: two_dim.cc:101
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: LSX/Complex.h:260
Definition: AVX/Complex.h:225
EIGEN_STRONG_INLINE Packet2cd()
Definition: AVX/Complex.h:226
__m256d v
Definition: AVX/Complex.h:228
EIGEN_STRONG_INLINE Packet2cd(const __m256d &a)
Definition: AVX/Complex.h:227
Definition: AltiVec/Complex.h:38
Definition: AVX/Complex.h:21
EIGEN_STRONG_INLINE Packet4cf(const __m256 &a)
Definition: AVX/Complex.h:23
__m256 v
Definition: AVX/Complex.h:24
EIGEN_STRONG_INLINE Packet4cf()
Definition: AVX/Complex.h:22
Definition: GenericPacketMath.h:1407
Packet packet[N]
Definition: GenericPacketMath.h:1408
Definition: GenericPacketMath.h:45
@ HasExp
Definition: GenericPacketMath.h:75
@ HasSqrt
Definition: GenericPacketMath.h:73
@ HasLog
Definition: GenericPacketMath.h:77
@ HasDiv
Definition: GenericPacketMath.h:71
Packet1cd half
Definition: AVX/Complex.h:235
Packet2cd type
Definition: AVX/Complex.h:234
Packet2cf half
Definition: AVX/Complex.h:31
Packet4cf type
Definition: AVX/Complex.h:30
Definition: GenericPacketMath.h:108
@ size
Definition: GenericPacketMath.h:113
@ AlignedOnScalar
Definition: GenericPacketMath.h:114
@ Vectorizable
Definition: GenericPacketMath.h:112
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
Packet1cd half
Definition: AVX/Complex.h:260
Packet4d as_real
Definition: AVX/Complex.h:261
std::complex< double > type
Definition: AVX/Complex.h:259
std::complex< float > type
Definition: AVX/Complex.h:56
Packet2cf half
Definition: AVX/Complex.h:57
Packet8f as_real
Definition: AVX/Complex.h:58
Definition: GenericPacketMath.h:134
@ masked_load_available
Definition: GenericPacketMath.h:142
@ size
Definition: GenericPacketMath.h:139
@ masked_store_available
Definition: GenericPacketMath.h:143
@ vectorizable
Definition: GenericPacketMath.h:141
@ alignment
Definition: GenericPacketMath.h:140
Definition: datatypes.h:12