de/d4f/ConfigureVectorization_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>

 // Copyright (C) 2020, Arm Limited and Contributors

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H

 #define EIGEN_CONFIGURE_VECTORIZATION_H


 //------------------------------------------------------------------------------------------

 // Static and dynamic alignment control

 //

 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES

 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.

 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,

 // a default value is automatically computed based on architecture, compiler, and OS.

 //

 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}

 // to be used to declare statically aligned buffers.

 //------------------------------------------------------------------------------------------


 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.

  * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,

  * so that vectorization doesn't affect binary compatibility.

  *

  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link

  * vectorized and non-vectorized code.

  */

 #if (defined EIGEN_CUDACC)

 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)

 #define EIGEN_ALIGNOF(x) __alignof(x)

 #else

 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)

 #define EIGEN_ALIGNOF(x) alignof(x)

 #endif


 // Align to the boundary that avoids false sharing.

 // https://en.cppreference.com/w/cpp/thread/hardware_destructive_interference_size

 #ifdef __cpp_lib_hardware_interference_size

 #include <new>

 #define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(std::hardware_destructive_interference_size)

 #else

 // Overalign for the cache line size of 128 bytes (Apple M1)

 #define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(128)

 #endif


 // If the user explicitly disable vectorization, then we also disable alignment

 #if defined(EIGEN_DONT_VECTORIZE)

 #if defined(EIGEN_GPUCC)

 // GPU code is always vectorized and requires memory alignment for

 // statically allocated buffers.

 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16

 #else

 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0

 #endif

 #elif defined(__AVX512F__)

 // 64 bytes static alignment is preferred only if really required

 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64

 #elif defined(__AVX__)

 // 32 bytes static alignment is preferred only if really required

 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32

 #elif defined __HVX__ && (__HVX_LENGTH__ == 128)

 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 128

 #else

 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16

 #endif


 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense

 #define EIGEN_MIN_ALIGN_BYTES 16


 // Defined the boundary (in bytes) on which the data needs to be aligned. Note

 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be

 // aligned at all regardless of the value of this #define.


 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \

     EIGEN_MAX_STATIC_ALIGN_BYTES > 0

 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.

 #endif


 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated

 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0

 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)

 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES

 #undef EIGEN_MAX_STATIC_ALIGN_BYTES

 #endif

 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0

 #endif


 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES


 // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES


 // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable

 // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always

 // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in

 // certain common platform (compiler+architecture combinations) to avoid these problems.

 // Only static alignment is really problematic (relies on nonstandard compiler extensions),

 // try to keep heap alignment even when we have to disable static alignment.

 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \

                          EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)

 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1

 #else

 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0

 #endif


 // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX

 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX

 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1

 #else

 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0

 #endif


 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT

 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES

 #else

 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0

 #endif


 #endif


 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES

 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES

 #undef EIGEN_MAX_STATIC_ALIGN_BYTES

 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES

 #endif


 #if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)

 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT

 #endif


 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.

 // It takes into account both the user choice to explicitly enable/disable alignment (by setting

 // EIGEN_MAX_STATIC_ALIGN_BYTES) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only

 // EIGEN_MAX_STATIC_ALIGN_BYTES should be used.


 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY

 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)

 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)

 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)

 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)

 #if EIGEN_MAX_STATIC_ALIGN_BYTES > 0

 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)

 #else

 #define EIGEN_ALIGN_MAX

 #endif


 // Dynamic alignment control


 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0

 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.

 #endif


 #ifdef EIGEN_DONT_ALIGN

 #ifdef EIGEN_MAX_ALIGN_BYTES

 #undef EIGEN_MAX_ALIGN_BYTES

 #endif

 #define EIGEN_MAX_ALIGN_BYTES 0

 #elif !defined(EIGEN_MAX_ALIGN_BYTES)

 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES

 #endif


 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES

 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES

 #else

 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES

 #endif


 #ifndef EIGEN_UNALIGNED_VECTORIZE

 #define EIGEN_UNALIGNED_VECTORIZE 1

 #endif


 //----------------------------------------------------------------------


 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into

 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks

 #if EIGEN_MAX_ALIGN_BYTES == 0

 #ifndef EIGEN_DONT_VECTORIZE

 #define EIGEN_DONT_VECTORIZE

 #endif

 #endif


 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be

 // removed as gcc 4.1 and msvc 2008 are not supported anyways.

 #if EIGEN_COMP_MSVC

 #include <malloc.h>  // for _aligned_malloc -- need it regardless of whether vectorization is enabled

 // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.

 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64

 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER

 #endif

 #else

 #if defined(__SSE2__)

 #define EIGEN_SSE2_ON_NON_MSVC

 #endif

 #endif


 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))


 #if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)


 // Defines symbols for compile-time detection of which instructions are

 // used.

 // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used

 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_SSE

 #define EIGEN_VECTORIZE_SSE2


 // Detect sse3/ssse3/sse4:

 // gcc and icc defines __SSE3__, ...

 // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you

 // want to force the use of those instructions with msvc.

 #ifdef __SSE3__

 #define EIGEN_VECTORIZE_SSE3

 #endif

 #ifdef __SSSE3__

 #define EIGEN_VECTORIZE_SSSE3

 #endif

 #ifdef __SSE4_1__

 #define EIGEN_VECTORIZE_SSE4_1

 #endif

 #ifdef __SSE4_2__

 #define EIGEN_VECTORIZE_SSE4_2

 #endif

 #ifdef __AVX__

 #ifndef EIGEN_USE_SYCL

 #define EIGEN_VECTORIZE_AVX

 #endif

 #define EIGEN_VECTORIZE_SSE3

 #define EIGEN_VECTORIZE_SSSE3

 #define EIGEN_VECTORIZE_SSE4_1

 #define EIGEN_VECTORIZE_SSE4_2

 #endif

 #ifdef __AVX2__

 #ifndef EIGEN_USE_SYCL

 #define EIGEN_VECTORIZE_AVX2

 #define EIGEN_VECTORIZE_AVX

 #endif

 #define EIGEN_VECTORIZE_SSE3

 #define EIGEN_VECTORIZE_SSSE3

 #define EIGEN_VECTORIZE_SSE4_1

 #define EIGEN_VECTORIZE_SSE4_2

 #endif

 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))

 // MSVC does not expose a switch dedicated for FMA

 // For MSVC, AVX2 => FMA

 #define EIGEN_VECTORIZE_FMA

 #endif

 #if defined(__AVX512F__)

 #ifndef EIGEN_VECTORIZE_FMA

 #if EIGEN_COMP_GNUC

 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).

 #else

 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).

 #endif

 #endif

 #ifndef EIGEN_USE_SYCL

 #define EIGEN_VECTORIZE_AVX512

 #define EIGEN_VECTORIZE_AVX2

 #define EIGEN_VECTORIZE_AVX

 #endif

 #define EIGEN_VECTORIZE_FMA

 #define EIGEN_VECTORIZE_SSE3

 #define EIGEN_VECTORIZE_SSSE3

 #define EIGEN_VECTORIZE_SSE4_1

 #define EIGEN_VECTORIZE_SSE4_2

 #ifndef EIGEN_USE_SYCL

 #ifdef __AVX512DQ__

 #define EIGEN_VECTORIZE_AVX512DQ

 #endif

 #ifdef __AVX512ER__

 #define EIGEN_VECTORIZE_AVX512ER

 #endif

 #ifdef __AVX512BF16__

 #define EIGEN_VECTORIZE_AVX512BF16

 #endif

 #ifdef __AVX512VL__

 #define EIGEN_VECTORIZE_AVX512VL

 #endif

 #ifdef __AVX512FP16__

 #ifdef __AVX512VL__

 #define EIGEN_VECTORIZE_AVX512FP16

 #else

 #if EIGEN_COMP_GNUC

 #error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.

 #else

 #error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.

 #endif

 #endif

 #endif

 #endif

 #endif


 // Disable AVX support on broken xcode versions

 #if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)

 // A nasty bug in the clang compiler shipped with xcode in a common compilation situation

 // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1

 #ifdef EIGEN_VECTORIZE_AVX

 #undef EIGEN_VECTORIZE_AVX

 #warning \

     "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "

 #ifdef EIGEN_VECTORIZE_AVX2

 #undef EIGEN_VECTORIZE_AVX2

 #endif

 #ifdef EIGEN_VECTORIZE_FMA

 #undef EIGEN_VECTORIZE_FMA

 #endif

 #ifdef EIGEN_VECTORIZE_AVX512

 #undef EIGEN_VECTORIZE_AVX512

 #endif

 #ifdef EIGEN_VECTORIZE_AVX512DQ

 #undef EIGEN_VECTORIZE_AVX512DQ

 #endif

 #ifdef EIGEN_VECTORIZE_AVX512ER

 #undef EIGEN_VECTORIZE_AVX512ER

 #endif

 #endif

 // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with  -macosx-version-min=10.15 and AVX

 // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2

 // produce core dumps in 3 tests NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all

 // cases NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)"  XCode 11.0 <- Produces many segfault and core dumping

 // tests

 //                                                                    with  -macosx-version-min=10.15 and AVX

 // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with

 //                                                                    -macosx-version-min=10.15 and AVX

 #endif


 // include files


 // This extern "C" works around a MINGW-w64 compilation issue

 // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354

 // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).

 // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations

 // with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;

 // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.

 // notice that since these are C headers, the extern "C" is theoretically needed anyways.

 extern "C" {

 // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.

 // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:

 #if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN

 #include <immintrin.h>

 #else

 #include <mmintrin.h>

 #include <emmintrin.h>

 #include <xmmintrin.h>

 #ifdef EIGEN_VECTORIZE_SSE3

 #include <pmmintrin.h>

 #endif

 #ifdef EIGEN_VECTORIZE_SSSE3

 #include <tmmintrin.h>

 #endif

 #ifdef EIGEN_VECTORIZE_SSE4_1

 #include <smmintrin.h>

 #endif

 #ifdef EIGEN_VECTORIZE_SSE4_2

 #include <nmmintrin.h>

 #endif

 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)

 #include <immintrin.h>

 #endif

 #endif

 }  // end extern "C"


 #elif defined(__VSX__) && !defined(__APPLE__)


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_VSX 1

 #define EIGEN_VECTORIZE_FMA

 #include <altivec.h>

 // We need to #undef all these ugly tokens defined in <altivec.h>

 // => use __vector instead of vector

 #undef bool

 #undef vector

 #undef pixel


 #elif defined __ALTIVEC__


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_ALTIVEC

 #define EIGEN_VECTORIZE_FMA

 #include <altivec.h>

 // We need to #undef all these ugly tokens defined in <altivec.h>

 // => use __vector instead of vector

 #undef bool

 #undef vector

 #undef pixel


 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_NEON

 #include <arm_neon.h>


 // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and

 // will not select the backend automatically

 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_SVE

 #include <arm_sve.h>


 // Since we depend on knowing SVE vector lengths at compile-time, we need

 // to ensure a fixed lengths is set

 #if defined __ARM_FEATURE_SVE_BITS

 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS

 #else

 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."

 #endif


 #elif (defined __s390x__ && defined __VEC__)


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_ZVECTOR

 #include <vecintrin.h>


 #elif defined __mips_msa


 // Limit MSA optimizations to little-endian CPUs for now.

 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?

 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)

 #if defined(__LP64__)

 #define EIGEN_MIPS_64

 #else

 #define EIGEN_MIPS_32

 #endif

 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_MSA

 #include <msa.h>

 #endif


 #elif (defined __loongarch64 && defined __loongarch_sx)


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_LSX

 #include <lsxintrin.h>


 #elif defined __HVX__ && (__HVX_LENGTH__ == 128)


 #define EIGEN_VECTORIZE

 #define EIGEN_VECTORIZE_HVX

 #include <hexagon_types.h>


 #endif

 #endif


 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all

 // compilers seem to follow this. We therefore include it explicitly.

 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955

 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)

 #include <arm_fp16.h>

 #endif


 // Enable FMA for ARM.

 #if defined(__ARM_FEATURE_FMA)

 #define EIGEN_VECTORIZE_FMA

 #endif


 #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))

 // We can use the optimized fp16 to float and float to fp16 conversion routines

 #define EIGEN_HAS_FP16_C


 #if EIGEN_COMP_GNUC

 // Make sure immintrin.h is included, even if e.g. vectorization is

 // explicitly disabled (see also issue #2395).

 // Note that FP16C intrinsics for gcc and clang are included by immintrin.h,

 // as opposed to emmintrin.h as suggested by Intel:

 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711

 #include <immintrin.h>

 #endif

 #endif


 #if defined EIGEN_CUDACC

 #define EIGEN_VECTORIZE_GPU

 #include <vector_types.h>

 #if EIGEN_CUDA_SDK_VER >= 70500

 #define EIGEN_HAS_CUDA_FP16

 #endif

 #endif


 #if defined(EIGEN_HAS_CUDA_FP16)

 #include <cuda_runtime_api.h>

 #include <cuda_fp16.h>

 #endif


 #if defined(EIGEN_HIPCC)

 #define EIGEN_VECTORIZE_GPU

 #include <hip/hip_vector_types.h>

 #define EIGEN_HAS_HIP_FP16

 #include <hip/hip_fp16.h>

 #define EIGEN_HAS_HIP_BF16

 #include <hip/hip_bfloat16.h>

 #endif


 // IWYU pragma: private

 #include "../InternalHeaderCheck.h"


 namespace Eigen {


 inline static const char *SimdInstructionSetsInUse(void) {

 #if defined(EIGEN_VECTORIZE_AVX512)

   return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";

 #elif defined(EIGEN_VECTORIZE_AVX)

   return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";

 #elif defined(EIGEN_VECTORIZE_SSE4_2)

   return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";

 #elif defined(EIGEN_VECTORIZE_SSE4_1)

   return "SSE, SSE2, SSE3, SSSE3, SSE4.1";

 #elif defined(EIGEN_VECTORIZE_SSSE3)

   return "SSE, SSE2, SSE3, SSSE3";

 #elif defined(EIGEN_VECTORIZE_SSE3)

   return "SSE, SSE2, SSE3";

 #elif defined(EIGEN_VECTORIZE_SSE2)

   return "SSE, SSE2";

 #elif defined(EIGEN_VECTORIZE_ALTIVEC)

   return "AltiVec";

 #elif defined(EIGEN_VECTORIZE_VSX)

   return "VSX";

 #elif defined(EIGEN_VECTORIZE_NEON)

   return "ARM NEON";

 #elif defined(EIGEN_VECTORIZE_SVE)

   return "ARM SVE";

 #elif defined(EIGEN_VECTORIZE_ZVECTOR)

   return "S390X ZVECTOR";

 #elif defined(EIGEN_VECTORIZE_MSA)

   return "MIPS MSA";

 #elif defined(EIGEN_VECTORIZE_LSX)

   return "LOONGARCH64 LSX";

 #else

   return "None";

 #endif

 }


 }  // end namespace Eigen


 #endif  // EIGEN_CONFIGURE_VECTORIZATION_H

Eigen
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70

Eigen::SimdInstructionSetsInUse
static const char * SimdInstructionSetsInUse(void)
Definition: ConfigureVectorization.h:502