11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12 #define EIGEN_CONFIGURE_VECTORIZATION_H
33 #if (defined EIGEN_CUDACC)
34 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
35 #define EIGEN_ALIGNOF(x) __alignof(x)
37 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
38 #define EIGEN_ALIGNOF(x) alignof(x)
43 #ifdef __cpp_lib_hardware_interference_size
45 #define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(std::hardware_destructive_interference_size)
48 #define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(128)
52 #if defined(EIGEN_DONT_VECTORIZE)
53 #if defined(EIGEN_GPUCC)
56 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
58 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
60 #elif defined(__AVX512F__)
62 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
63 #elif defined(__AVX__)
65 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
66 #elif defined __HVX__ && (__HVX_LENGTH__ == 128)
67 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
69 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
73 #define EIGEN_MIN_ALIGN_BYTES 16
79 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \
80 EIGEN_MAX_STATIC_ALIGN_BYTES > 0
81 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
86 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
87 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
88 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
90 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
93 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
103 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
104 EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
105 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
107 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
111 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX
112 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
114 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
117 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
118 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
120 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
126 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES
127 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
128 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
131 #if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
132 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
141 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
142 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
143 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
144 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
145 #if EIGEN_MAX_STATIC_ALIGN_BYTES > 0
146 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
148 #define EIGEN_ALIGN_MAX
153 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0
154 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
157 #ifdef EIGEN_DONT_ALIGN
158 #ifdef EIGEN_MAX_ALIGN_BYTES
159 #undef EIGEN_MAX_ALIGN_BYTES
161 #define EIGEN_MAX_ALIGN_BYTES 0
162 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
163 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
166 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
167 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
169 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
172 #ifndef EIGEN_UNALIGNED_VECTORIZE
173 #define EIGEN_UNALIGNED_VECTORIZE 1
180 #if EIGEN_MAX_ALIGN_BYTES == 0
181 #ifndef EIGEN_DONT_VECTORIZE
182 #define EIGEN_DONT_VECTORIZE
191 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
192 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
195 #if defined(__SSE2__)
196 #define EIGEN_SSE2_ON_NON_MSVC
200 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
202 #if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
207 #define EIGEN_VECTORIZE
208 #define EIGEN_VECTORIZE_SSE
209 #define EIGEN_VECTORIZE_SSE2
216 #define EIGEN_VECTORIZE_SSE3
219 #define EIGEN_VECTORIZE_SSSE3
222 #define EIGEN_VECTORIZE_SSE4_1
225 #define EIGEN_VECTORIZE_SSE4_2
228 #ifndef EIGEN_USE_SYCL
229 #define EIGEN_VECTORIZE_AVX
231 #define EIGEN_VECTORIZE_SSE3
232 #define EIGEN_VECTORIZE_SSSE3
233 #define EIGEN_VECTORIZE_SSE4_1
234 #define EIGEN_VECTORIZE_SSE4_2
237 #ifndef EIGEN_USE_SYCL
238 #define EIGEN_VECTORIZE_AVX2
239 #define EIGEN_VECTORIZE_AVX
241 #define EIGEN_VECTORIZE_SSE3
242 #define EIGEN_VECTORIZE_SSSE3
243 #define EIGEN_VECTORIZE_SSE4_1
244 #define EIGEN_VECTORIZE_SSE4_2
246 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
249 #define EIGEN_VECTORIZE_FMA
251 #if defined(__AVX512F__)
252 #ifndef EIGEN_VECTORIZE_FMA
254 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
256 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
259 #ifndef EIGEN_USE_SYCL
260 #define EIGEN_VECTORIZE_AVX512
261 #define EIGEN_VECTORIZE_AVX2
262 #define EIGEN_VECTORIZE_AVX
264 #define EIGEN_VECTORIZE_FMA
265 #define EIGEN_VECTORIZE_SSE3
266 #define EIGEN_VECTORIZE_SSSE3
267 #define EIGEN_VECTORIZE_SSE4_1
268 #define EIGEN_VECTORIZE_SSE4_2
269 #ifndef EIGEN_USE_SYCL
271 #define EIGEN_VECTORIZE_AVX512DQ
274 #define EIGEN_VECTORIZE_AVX512ER
276 #ifdef __AVX512BF16__
277 #define EIGEN_VECTORIZE_AVX512BF16
280 #define EIGEN_VECTORIZE_AVX512VL
282 #ifdef __AVX512FP16__
284 #define EIGEN_VECTORIZE_AVX512FP16
287 #error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
289 #error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
297 #if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)
300 #ifdef EIGEN_VECTORIZE_AVX
301 #undef EIGEN_VECTORIZE_AVX
303 "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
304 #ifdef EIGEN_VECTORIZE_AVX2
305 #undef EIGEN_VECTORIZE_AVX2
307 #ifdef EIGEN_VECTORIZE_FMA
308 #undef EIGEN_VECTORIZE_FMA
310 #ifdef EIGEN_VECTORIZE_AVX512
311 #undef EIGEN_VECTORIZE_AVX512
313 #ifdef EIGEN_VECTORIZE_AVX512DQ
314 #undef EIGEN_VECTORIZE_AVX512DQ
316 #ifdef EIGEN_VECTORIZE_AVX512ER
317 #undef EIGEN_VECTORIZE_AVX512ER
342 #if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
343 #include <immintrin.h>
345 #include <mmintrin.h>
346 #include <emmintrin.h>
347 #include <xmmintrin.h>
348 #ifdef EIGEN_VECTORIZE_SSE3
349 #include <pmmintrin.h>
351 #ifdef EIGEN_VECTORIZE_SSSE3
352 #include <tmmintrin.h>
354 #ifdef EIGEN_VECTORIZE_SSE4_1
355 #include <smmintrin.h>
357 #ifdef EIGEN_VECTORIZE_SSE4_2
358 #include <nmmintrin.h>
360 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
361 #include <immintrin.h>
366 #elif defined(__VSX__) && !defined(__APPLE__)
368 #define EIGEN_VECTORIZE
369 #define EIGEN_VECTORIZE_VSX 1
370 #define EIGEN_VECTORIZE_FMA
378 #elif defined __ALTIVEC__
380 #define EIGEN_VECTORIZE
381 #define EIGEN_VECTORIZE_ALTIVEC
382 #define EIGEN_VECTORIZE_FMA
390 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
392 #define EIGEN_VECTORIZE
393 #define EIGEN_VECTORIZE_NEON
394 #include <arm_neon.h>
398 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
400 #define EIGEN_VECTORIZE
401 #define EIGEN_VECTORIZE_SVE
406 #if defined __ARM_FEATURE_SVE_BITS
407 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
409 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
412 #elif (defined __s390x__ && defined __VEC__)
414 #define EIGEN_VECTORIZE
415 #define EIGEN_VECTORIZE_ZVECTOR
416 #include <vecintrin.h>
418 #elif defined __mips_msa
422 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
423 #if defined(__LP64__)
424 #define EIGEN_MIPS_64
426 #define EIGEN_MIPS_32
428 #define EIGEN_VECTORIZE
429 #define EIGEN_VECTORIZE_MSA
433 #elif (defined __loongarch64 && defined __loongarch_sx)
435 #define EIGEN_VECTORIZE
436 #define EIGEN_VECTORIZE_LSX
437 #include <lsxintrin.h>
439 #elif defined __HVX__ && (__HVX_LENGTH__ == 128)
441 #define EIGEN_VECTORIZE
442 #define EIGEN_VECTORIZE_HVX
443 #include <hexagon_types.h>
451 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
452 #include <arm_fp16.h>
456 #if defined(__ARM_FEATURE_FMA)
457 #define EIGEN_VECTORIZE_FMA
460 #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
462 #define EIGEN_HAS_FP16_C
470 #include <immintrin.h>
474 #if defined EIGEN_CUDACC
475 #define EIGEN_VECTORIZE_GPU
476 #include <vector_types.h>
477 #if EIGEN_CUDA_SDK_VER >= 70500
478 #define EIGEN_HAS_CUDA_FP16
482 #if defined(EIGEN_HAS_CUDA_FP16)
483 #include <cuda_runtime_api.h>
484 #include <cuda_fp16.h>
487 #if defined(EIGEN_HIPCC)
488 #define EIGEN_VECTORIZE_GPU
489 #include <hip/hip_vector_types.h>
490 #define EIGEN_HAS_HIP_FP16
491 #include <hip/hip_fp16.h>
492 #define EIGEN_HAS_HIP_BF16
493 #include <hip/hip_bfloat16.h>
498 #include "../InternalHeaderCheck.h"
503 #if defined(EIGEN_VECTORIZE_AVX512)
504 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
505 #elif defined(EIGEN_VECTORIZE_AVX)
506 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
507 #elif defined(EIGEN_VECTORIZE_SSE4_2)
508 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
509 #elif defined(EIGEN_VECTORIZE_SSE4_1)
510 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
511 #elif defined(EIGEN_VECTORIZE_SSSE3)
512 return "SSE, SSE2, SSE3, SSSE3";
513 #elif defined(EIGEN_VECTORIZE_SSE3)
514 return "SSE, SSE2, SSE3";
515 #elif defined(EIGEN_VECTORIZE_SSE2)
517 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
519 #elif defined(EIGEN_VECTORIZE_VSX)
521 #elif defined(EIGEN_VECTORIZE_NEON)
523 #elif defined(EIGEN_VECTORIZE_SVE)
525 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
526 return "S390X ZVECTOR";
527 #elif defined(EIGEN_VECTORIZE_MSA)
529 #elif defined(EIGEN_VECTORIZE_LSX)
530 return "LOONGARCH64 LSX";
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
static const char * SimdInstructionSetsInUse(void)
Definition: ConfigureVectorization.h:502