db/db7/SSE_2TypeCasting_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_TYPE_CASTING_SSE_H

 #define EIGEN_TYPE_CASTING_SSE_H


 // IWYU pragma: private

 #include "../../InternalHeaderCheck.h"


 namespace Eigen {


 namespace internal {


 #ifndef EIGEN_VECTORIZE_AVX

 template <>

 struct type_casting_traits<float, bool> : vectorized_type_casting_traits<float, bool> {};

 template <>

 struct type_casting_traits<bool, float> : vectorized_type_casting_traits<bool, float> {};


 template <>

 struct type_casting_traits<float, int> : vectorized_type_casting_traits<float, int> {};

 template <>

 struct type_casting_traits<int, float> : vectorized_type_casting_traits<int, float> {};


 template <>

 struct type_casting_traits<float, double> : vectorized_type_casting_traits<float, double> {};

 template <>

 struct type_casting_traits<double, float> : vectorized_type_casting_traits<double, float> {};


 template <>

 struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};

 template <>

 struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};


 #ifndef EIGEN_VECTORIZE_AVX2

 template <>

 struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};

 template <>

 struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};

 #endif

 #endif


 template <>

 EIGEN_STRONG_INLINE Packet16b pcast<Packet4f, Packet16b>(const Packet4f& a, const Packet4f& b, const Packet4f& c,

                                                          const Packet4f& d) {

   __m128 zero = pzero(a);

   __m128 nonzero_a = _mm_cmpneq_ps(a, zero);

   __m128 nonzero_b = _mm_cmpneq_ps(b, zero);

   __m128 nonzero_c = _mm_cmpneq_ps(c, zero);

   __m128 nonzero_d = _mm_cmpneq_ps(d, zero);

   __m128i ab_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_a), _mm_castps_si128(nonzero_b));

   __m128i cd_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_c), _mm_castps_si128(nonzero_d));

   __m128i merged = _mm_packs_epi16(ab_bytes, cd_bytes);

   return _mm_and_si128(merged, _mm_set1_epi8(1));

 }


 template <>

 EIGEN_STRONG_INLINE Packet4f pcast<Packet16b, Packet4f>(const Packet16b& a) {

   const __m128 cst_one = _mm_set_ps1(1.0f);

 #ifdef EIGEN_VECTORIZE_SSE4_1

   __m128i a_extended = _mm_cvtepi8_epi32(a);

   __m128i abcd = _mm_cmpeq_epi32(a_extended, _mm_setzero_si128());

 #else

   __m128i abcd_efhg_ijkl_mnop = _mm_cmpeq_epi8(a, _mm_setzero_si128());

   __m128i aabb_ccdd_eeff_gghh = _mm_unpacklo_epi8(abcd_efhg_ijkl_mnop, abcd_efhg_ijkl_mnop);

   __m128i abcd = _mm_unpacklo_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh);

 #endif

   __m128 result = _mm_andnot_ps(_mm_castsi128_ps(abcd), cst_one);

   return result;

 }


 template <>

 EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {

   return _mm_cvttps_epi32(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {

   return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)), _mm_castsi128_ps(_mm_cvttpd_epi32(b)),

                                          (1 << 2) | (1 << 6)));

 }


 template <>

 EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {

 #if EIGEN_ARCH_x86_64

   return _mm_set_epi64x(_mm_cvttsd_si64(preverse(a)), _mm_cvttsd_si64(a));

 #else

   return _mm_set_epi64x(static_cast<int64_t>(pfirst(preverse(a))), static_cast<int64_t>(pfirst(a)));

 #endif

 }


 template <>

 EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {

   EIGEN_ALIGN16 int64_t aux[2];

   pstore(aux, a);

   return _mm_set_pd(static_cast<double>(aux[1]), static_cast<double>(aux[0]));

 }


 template <>

 EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {

   return _mm_cvtepi32_ps(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {

   return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));

 }


 template <>

 EIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(const Packet4i& a) {

   // Simply discard the second half of the input

   return _mm_cvtepi32_pd(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {

   // Simply discard the second half of the input

   return _mm_cvtps_pd(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4f>(const Packet4f& a) {

   return _mm_castps_pd(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet2d>(const Packet2d& a) {

   return _mm_castpd_ps(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {

   return _mm_castps_si128(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {

   return _mm_castsi128_ps(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {

   return _mm_castsi128_pd(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {

   return _mm_castsi128_pd(a);

 }

 template <>

 EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {

   return _mm_castpd_si128(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {

   return _mm_castpd_si128(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {

   return Packet4ui(a);

 }


 template <>

 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {

   return Packet4i(a);

 }


 // Disable the following code since it's broken on too many platforms / compilers.

 // #elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)

 #if 0


 template <>

 struct type_casting_traits<Eigen::half, float> {

   enum {

     VectorizedCast = 1,

     SrcCoeffRatio = 1,

     TgtCoeffRatio = 1

   };

 };


 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {

   __int64_t a64 = _mm_cvtm64_si64(a.x);

   Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));

   float f1 = static_cast<float>(h);

   h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));

   float f2 = static_cast<float>(h);

   h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));

   float f3 = static_cast<float>(h);

   h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));

   float f4 = static_cast<float>(h);

   return _mm_set_ps(f4, f3, f2, f1);

 }


 template <>

 struct type_casting_traits<float, Eigen::half> {

   enum {

     VectorizedCast = 1,

     SrcCoeffRatio = 1,

     TgtCoeffRatio = 1

   };

 };


 template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {

   EIGEN_ALIGN16 float aux[4];

   pstore(aux, a);

   Eigen::half h0(aux[0]);

   Eigen::half h1(aux[1]);

   Eigen::half h2(aux[2]);

   Eigen::half h3(aux[3]);


   Packet4h result;

   result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);

   return result;

 }


 #endif


 }  // end namespace internal


 }  // end namespace Eigen


 #endif  // EIGEN_TYPE_CASTING_SSE_H

EIGEN_ALIGN16
#define EIGEN_ALIGN16
Definition: ConfigureVectorization.h:142

EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834

b
Scalar * b
Definition: benchVecAdd.cpp:17

bool

double

int
return int(ret)+1

a
const Scalar * a
Definition: level2_cplx_impl.h:32

Eigen::half_impl::raw_uint16_to_half
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:496

Eigen::internal::Packet2d
__m128d Packet2d
Definition: LSX/PacketMath.h:36

Eigen::internal::Packet2l
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41

Eigen::internal::pcast< Packet4i, Packet2d >
EIGEN_STRONG_INLINE Packet2d pcast< Packet4i, Packet2d >(const Packet4i &a)
Definition: LSX/TypeCasting.h:506

Eigen::internal::pcast< Packet2d, Packet4i >
EIGEN_STRONG_INLINE Packet4i pcast< Packet2d, Packet4i >(const Packet2d &a, const Packet2d &b)
Definition: LSX/TypeCasting.h:441

Eigen::internal::pzero
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774

Eigen::internal::Packet4i
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34

Eigen::internal::preinterpret< Packet4i, Packet2d >
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet2d >(const Packet2d &a)
Definition: LSX/TypeCasting.h:61

Eigen::internal::preinterpret< Packet2d, Packet2l >
EIGEN_STRONG_INLINE Packet2d preinterpret< Packet2d, Packet2l >(const Packet2l &a)
Definition: LSX/TypeCasting.h:33

Eigen::internal::pcast< Packet4f, Packet2d >
EIGEN_STRONG_INLINE Packet2d pcast< Packet4f, Packet2d >(const Packet4f &a)
Definition: LSX/TypeCasting.h:480

Eigen::internal::pcast< Packet2d, Packet2l >
EIGEN_STRONG_INLINE Packet2l pcast< Packet2d, Packet2l >(const Packet2d &a)
Definition: LSX/TypeCasting.h:433

Eigen::internal::preinterpret< Packet4f, Packet2d >
EIGEN_STRONG_INLINE Packet4f preinterpret< Packet4f, Packet2d >(const Packet2d &a)
Definition: SSE/TypeCasting.h:133

Eigen::internal::preinterpret< Packet4i, Packet4ui >
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet4ui >(const Packet4ui &a)
Definition: LSX/TypeCasting.h:57

Eigen::internal::pcast< Packet16b, Packet4f >
EIGEN_STRONG_INLINE Packet4f pcast< Packet16b, Packet4f >(const Packet16b &a)
Definition: SSE/TypeCasting.h:64

Eigen::internal::Packet4ui
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35

Eigen::internal::preverse
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303

Eigen::internal::pcast< Packet4i, Packet4f >
EIGEN_STRONG_INLINE Packet4f pcast< Packet4i, Packet4f >(const Packet4i &a)
Definition: AltiVec/TypeCasting.h:51

Eigen::internal::preinterpret< Packet2d, Packet4i >
EIGEN_STRONG_INLINE Packet2d preinterpret< Packet2d, Packet4i >(const Packet4i &a)
Definition: LSX/TypeCasting.h:41

Eigen::internal::preinterpret< Packet4ui, Packet4i >
EIGEN_STRONG_INLINE Packet4ui preinterpret< Packet4ui, Packet4i >(const Packet4i &a)
Definition: LSX/TypeCasting.h:81

Eigen::internal::pcast< Packet2l, Packet2d >
EIGEN_STRONG_INLINE Packet2d pcast< Packet2l, Packet2d >(const Packet2l &a)
Definition: LSX/TypeCasting.h:514

Eigen::internal::pfirst
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418

Eigen::internal::pstore
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891

Eigen::internal::preinterpret< Packet2l, Packet2d >
EIGEN_STRONG_INLINE Packet2l preinterpret< Packet2l, Packet2d >(const Packet2d &a)
Definition: LSX/TypeCasting.h:65

Eigen::internal::pcast< Packet4f, Packet4i >
EIGEN_STRONG_INLINE Packet4i pcast< Packet4f, Packet4i >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:41

Eigen::internal::preinterpret< Packet4i, Packet4f >
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet4f >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:122

Eigen::internal::preinterpret< Packet4f, Packet4i >
EIGEN_STRONG_INLINE Packet4f preinterpret< Packet4f, Packet4i >(const Packet4i &a)
Definition: AltiVec/TypeCasting.h:127

Eigen::internal::Packet4f
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33

Eigen::internal::preinterpret< Packet2d, Packet4f >
EIGEN_STRONG_INLINE Packet2d preinterpret< Packet2d, Packet4f >(const Packet4f &a)
Definition: SSE/TypeCasting.h:128

Eigen::internal::pcast< Packet2d, Packet4f >
EIGEN_STRONG_INLINE Packet4f pcast< Packet2d, Packet4f >(const Packet2d &a, const Packet2d &b)
Definition: LSX/TypeCasting.h:429

Eigen::internal::pcast< Packet4f, Packet16b >
EIGEN_STRONG_INLINE Packet16b pcast< Packet4f, Packet16b >(const Packet4f &a, const Packet4f &b, const Packet4f &c, const Packet4f &d)
Definition: SSE/TypeCasting.h:50

Eigen::numext::int64_t
std::int64_t int64_t
Definition: Meta.h:43

Eigen
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70

Global_parameters::f2
double f2(const Vector< double > &coord)
f2 function, in front of the C2 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:233

Global_parameters::f1
double f1(const Vector< double > &coord)
f1 function, in front of the C1 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:147

calibrate.c
int c
Definition: calibrate.py:100

internal
Definition: Eigen_Colamd.h:49

Eigen::half
Definition: Half.h:139

Eigen::internal::eigen_packet_wrapper
Definition: GenericPacketMath.h:225

Eigen::internal::type_casting_traits::TgtCoeffRatio
@ TgtCoeffRatio
Definition: GenericPacketMath.h:206

Eigen::internal::type_casting_traits::VectorizedCast
@ VectorizedCast
Definition: GenericPacketMath.h:203

Eigen::internal::type_casting_traits::SrcCoeffRatio
@ SrcCoeffRatio
Definition: GenericPacketMath.h:205

zero
EIGEN_DONT_INLINE Scalar zero()
Definition: svd_common.h:232