SSE/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_SSE_H
11 #define EIGEN_TYPE_CASTING_SSE_H
12 
13 // IWYU pragma: private
14 #include "../../InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 #ifndef EIGEN_VECTORIZE_AVX
21 template <>
22 struct type_casting_traits<float, bool> : vectorized_type_casting_traits<float, bool> {};
23 template <>
24 struct type_casting_traits<bool, float> : vectorized_type_casting_traits<bool, float> {};
25 
26 template <>
27 struct type_casting_traits<float, int> : vectorized_type_casting_traits<float, int> {};
28 template <>
29 struct type_casting_traits<int, float> : vectorized_type_casting_traits<int, float> {};
30 
31 template <>
32 struct type_casting_traits<float, double> : vectorized_type_casting_traits<float, double> {};
33 template <>
34 struct type_casting_traits<double, float> : vectorized_type_casting_traits<double, float> {};
35 
36 template <>
37 struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};
38 template <>
39 struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
40 
41 #ifndef EIGEN_VECTORIZE_AVX2
42 template <>
43 struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
44 template <>
45 struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
46 #endif
47 #endif
48 
49 template <>
51  const Packet4f& d) {
52  __m128 zero = pzero(a);
53  __m128 nonzero_a = _mm_cmpneq_ps(a, zero);
54  __m128 nonzero_b = _mm_cmpneq_ps(b, zero);
55  __m128 nonzero_c = _mm_cmpneq_ps(c, zero);
56  __m128 nonzero_d = _mm_cmpneq_ps(d, zero);
57  __m128i ab_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_a), _mm_castps_si128(nonzero_b));
58  __m128i cd_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_c), _mm_castps_si128(nonzero_d));
59  __m128i merged = _mm_packs_epi16(ab_bytes, cd_bytes);
60  return _mm_and_si128(merged, _mm_set1_epi8(1));
61 }
62 
63 template <>
65  const __m128 cst_one = _mm_set_ps1(1.0f);
66 #ifdef EIGEN_VECTORIZE_SSE4_1
67  __m128i a_extended = _mm_cvtepi8_epi32(a);
68  __m128i abcd = _mm_cmpeq_epi32(a_extended, _mm_setzero_si128());
69 #else
70  __m128i abcd_efhg_ijkl_mnop = _mm_cmpeq_epi8(a, _mm_setzero_si128());
71  __m128i aabb_ccdd_eeff_gghh = _mm_unpacklo_epi8(abcd_efhg_ijkl_mnop, abcd_efhg_ijkl_mnop);
72  __m128i abcd = _mm_unpacklo_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh);
73 #endif
74  __m128 result = _mm_andnot_ps(_mm_castsi128_ps(abcd), cst_one);
75  return result;
76 }
77 
78 template <>
80  return _mm_cvttps_epi32(a);
81 }
82 
83 template <>
85  return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)), _mm_castsi128_ps(_mm_cvttpd_epi32(b)),
86  (1 << 2) | (1 << 6)));
87 }
88 
89 template <>
91 #if EIGEN_ARCH_x86_64
92  return _mm_set_epi64x(_mm_cvttsd_si64(preverse(a)), _mm_cvttsd_si64(a));
93 #else
94  return _mm_set_epi64x(static_cast<int64_t>(pfirst(preverse(a))), static_cast<int64_t>(pfirst(a)));
95 #endif
96 }
97 
98 template <>
100  EIGEN_ALIGN16 int64_t aux[2];
101  pstore(aux, a);
102  return _mm_set_pd(static_cast<double>(aux[1]), static_cast<double>(aux[0]));
103 }
104 
105 template <>
107  return _mm_cvtepi32_ps(a);
108 }
109 
110 template <>
112  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
113 }
114 
115 template <>
117  // Simply discard the second half of the input
118  return _mm_cvtepi32_pd(a);
119 }
120 
121 template <>
123  // Simply discard the second half of the input
124  return _mm_cvtps_pd(a);
125 }
126 
127 template <>
129  return _mm_castps_pd(a);
130 }
131 
132 template <>
134  return _mm_castpd_ps(a);
135 }
136 
137 template <>
139  return _mm_castps_si128(a);
140 }
141 
142 template <>
144  return _mm_castsi128_ps(a);
145 }
146 
147 template <>
149  return _mm_castsi128_pd(a);
150 }
151 
152 template <>
154  return _mm_castsi128_pd(a);
155 }
156 template <>
158  return _mm_castpd_si128(a);
159 }
160 
161 template <>
163  return _mm_castpd_si128(a);
164 }
165 
166 template <>
168  return Packet4ui(a);
169 }
170 
171 template <>
173  return Packet4i(a);
174 }
175 
176 // Disable the following code since it's broken on too many platforms / compilers.
177 // #elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
178 #if 0
179 
180 template <>
181 struct type_casting_traits<Eigen::half, float> {
182  enum {
183  VectorizedCast = 1,
184  SrcCoeffRatio = 1,
185  TgtCoeffRatio = 1
186  };
187 };
188 
189 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
190  __int64_t a64 = _mm_cvtm64_si64(a.x);
191  Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
192  float f1 = static_cast<float>(h);
193  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
194  float f2 = static_cast<float>(h);
195  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
196  float f3 = static_cast<float>(h);
197  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
198  float f4 = static_cast<float>(h);
199  return _mm_set_ps(f4, f3, f2, f1);
200 }
201 
202 template <>
203 struct type_casting_traits<float, Eigen::half> {
204  enum {
205  VectorizedCast = 1,
206  SrcCoeffRatio = 1,
207  TgtCoeffRatio = 1
208  };
209 };
210 
211 template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
212  EIGEN_ALIGN16 float aux[4];
213  pstore(aux, a);
214  Eigen::half h0(aux[0]);
215  Eigen::half h1(aux[1]);
216  Eigen::half h2(aux[2]);
217  Eigen::half h3(aux[3]);
218 
219  Packet4h result;
220  result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
221  return result;
222 }
223 
224 #endif
225 
226 } // end namespace internal
227 
228 } // end namespace Eigen
229 
230 #endif // EIGEN_TYPE_CASTING_SSE_H
#define EIGEN_ALIGN16
Definition: ConfigureVectorization.h:142
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
Scalar * b
Definition: benchVecAdd.cpp:17
return int(ret)+1
const Scalar * a
Definition: level2_cplx_impl.h:32
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:496
__m128d Packet2d
Definition: LSX/PacketMath.h:36
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
EIGEN_STRONG_INLINE Packet2d pcast< Packet4i, Packet2d >(const Packet4i &a)
Definition: LSX/TypeCasting.h:506
EIGEN_STRONG_INLINE Packet4i pcast< Packet2d, Packet4i >(const Packet2d &a, const Packet2d &b)
Definition: LSX/TypeCasting.h:441
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: AVX/PacketMath.h:774
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet2d >(const Packet2d &a)
Definition: LSX/TypeCasting.h:61
EIGEN_STRONG_INLINE Packet2d preinterpret< Packet2d, Packet2l >(const Packet2l &a)
Definition: LSX/TypeCasting.h:33
EIGEN_STRONG_INLINE Packet2d pcast< Packet4f, Packet2d >(const Packet4f &a)
Definition: LSX/TypeCasting.h:480
EIGEN_STRONG_INLINE Packet2l pcast< Packet2d, Packet2l >(const Packet2d &a)
Definition: LSX/TypeCasting.h:433
EIGEN_STRONG_INLINE Packet4f preinterpret< Packet4f, Packet2d >(const Packet2d &a)
Definition: SSE/TypeCasting.h:133
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet4ui >(const Packet4ui &a)
Definition: LSX/TypeCasting.h:57
EIGEN_STRONG_INLINE Packet4f pcast< Packet16b, Packet4f >(const Packet16b &a)
Definition: SSE/TypeCasting.h:64
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:303
EIGEN_STRONG_INLINE Packet4f pcast< Packet4i, Packet4f >(const Packet4i &a)
Definition: AltiVec/TypeCasting.h:51
EIGEN_STRONG_INLINE Packet2d preinterpret< Packet2d, Packet4i >(const Packet4i &a)
Definition: LSX/TypeCasting.h:41
EIGEN_STRONG_INLINE Packet4ui preinterpret< Packet4ui, Packet4i >(const Packet4i &a)
Definition: LSX/TypeCasting.h:81
EIGEN_STRONG_INLINE Packet2d pcast< Packet2l, Packet2d >(const Packet2l &a)
Definition: LSX/TypeCasting.h:514
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2418
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891
EIGEN_STRONG_INLINE Packet2l preinterpret< Packet2l, Packet2d >(const Packet2d &a)
Definition: LSX/TypeCasting.h:65
EIGEN_STRONG_INLINE Packet4i pcast< Packet4f, Packet4i >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:41
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet4f >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:122
EIGEN_STRONG_INLINE Packet4f preinterpret< Packet4f, Packet4i >(const Packet4i &a)
Definition: AltiVec/TypeCasting.h:127
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet2d preinterpret< Packet2d, Packet4f >(const Packet4f &a)
Definition: SSE/TypeCasting.h:128
EIGEN_STRONG_INLINE Packet4f pcast< Packet2d, Packet4f >(const Packet2d &a, const Packet2d &b)
Definition: LSX/TypeCasting.h:429
EIGEN_STRONG_INLINE Packet16b pcast< Packet4f, Packet16b >(const Packet4f &a, const Packet4f &b, const Packet4f &c, const Packet4f &d)
Definition: SSE/TypeCasting.h:50
std::int64_t int64_t
Definition: Meta.h:43
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
double f2(const Vector< double > &coord)
f2 function, in front of the C2 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:233
double f1(const Vector< double > &coord)
f1 function, in front of the C1 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:147
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
Definition: Half.h:139
Definition: GenericPacketMath.h:225
@ TgtCoeffRatio
Definition: GenericPacketMath.h:206
@ VectorizedCast
Definition: GenericPacketMath.h:203
@ SrcCoeffRatio
Definition: GenericPacketMath.h:205
EIGEN_DONT_INLINE Scalar zero()
Definition: svd_common.h:232