SYCL/PacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Mehdi Goli Codeplay Software Ltd.
5 // Ralph Potter Codeplay Software Ltd.
6 // Luke Iwanski Codeplay Software Ltd.
7 // Contact: <eigen@codeplay.com>
8 //
9 // This Source Code Form is subject to the terms of the Mozilla
10 // Public License v. 2.0. If a copy of the MPL was not distributed
11 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12 
13 /*****************************************************************
14  * PacketMath.h
15  *
16  * \brief:
17  * PacketMath
18  *
19  *****************************************************************/
20 
21 #ifndef EIGEN_PACKET_MATH_SYCL_H
22 #define EIGEN_PACKET_MATH_SYCL_H
23 #include <type_traits>
24 
25 // IWYU pragma: private
26 #include "../../InternalHeaderCheck.h"
27 
28 namespace Eigen {
29 
30 namespace internal {
31 #ifdef SYCL_DEVICE_ONLY
32 #define SYCL_PLOAD(packet_type, AlignedType) \
33  template <> \
34  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType<packet_type>( \
35  const typename unpacket_traits<packet_type>::type* from) { \
36  auto ptr = \
37  cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
38  from); \
39  packet_type res{}; \
40  res.load(0, ptr); \
41  return res; \
42  }
43 
44 SYCL_PLOAD(cl::sycl::cl_float4, u)
45 SYCL_PLOAD(cl::sycl::cl_float4, )
46 SYCL_PLOAD(cl::sycl::cl_double2, u)
47 SYCL_PLOAD(cl::sycl::cl_double2, )
48 #undef SYCL_PLOAD
49 
50 template <>
51 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pload<cl::sycl::cl_half8>(
52  const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
53  auto ptr =
54  cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
55  reinterpret_cast<const cl::sycl::cl_half*>(from));
56  cl::sycl::cl_half8 res{};
57  res.load(0, ptr);
58  return res;
59 }
60 
61 template <>
62 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 ploadu<cl::sycl::cl_half8>(
63  const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
64  auto ptr =
65  cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
66  reinterpret_cast<const cl::sycl::cl_half*>(from));
67  cl::sycl::cl_half8 res{};
68  res.load(0, ptr);
69  return res;
70 }
71 
72 #define SYCL_PSTORE(scalar, packet_type, alignment) \
73  template <> \
74  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(scalar* to, const packet_type& from) { \
75  auto ptr = \
76  cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
77  to); \
78  from.store(0, ptr); \
79  }
80 
81 SYCL_PSTORE(float, cl::sycl::cl_float4, )
82 SYCL_PSTORE(float, cl::sycl::cl_float4, u)
83 SYCL_PSTORE(double, cl::sycl::cl_double2, )
84 SYCL_PSTORE(double, cl::sycl::cl_double2, u)
85 #undef SYCL_PSTORE
86 
87 template <>
88 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoreu(Eigen::half* to, const cl::sycl::cl_half8& from) {
89  auto ptr =
90  cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
91  reinterpret_cast<cl::sycl::cl_half*>(to));
92  from.store(0, ptr);
93 }
94 
95 template <>
96 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore(Eigen::half* to, const cl::sycl::cl_half8& from) {
97  auto ptr =
98  cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
99  reinterpret_cast<cl::sycl::cl_half*>(to));
100  from.store(0, ptr);
101 }
102 
103 #define SYCL_PSET1(packet_type) \
104  template <> \
105  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
106  const typename unpacket_traits<packet_type>::type& from) { \
107  return packet_type(from); \
108  }
109 
110 // global space
111 SYCL_PSET1(cl::sycl::cl_half8)
112 SYCL_PSET1(cl::sycl::cl_float4)
113 SYCL_PSET1(cl::sycl::cl_double2)
114 
115 #undef SYCL_PSET1
116 
117 template <typename packet_type>
118 struct get_base_packet {
119  template <typename sycl_multi_pointer>
120  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {}
121 
122  template <typename sycl_multi_pointer>
123  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {}
124 };
125 
126 template <>
127 struct get_base_packet<cl::sycl::cl_half8> {
128  template <typename sycl_multi_pointer>
129  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_ploaddup(sycl_multi_pointer from) {
130  return cl::sycl::cl_half8(static_cast<cl::sycl::half>(from[0]), static_cast<cl::sycl::half>(from[0]),
131  static_cast<cl::sycl::half>(from[1]), static_cast<cl::sycl::half>(from[1]),
132  static_cast<cl::sycl::half>(from[2]), static_cast<cl::sycl::half>(from[2]),
133  static_cast<cl::sycl::half>(from[3]), static_cast<cl::sycl::half>(from[3]));
134  }
135  template <typename sycl_multi_pointer>
136  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_pgather(sycl_multi_pointer from, Index stride) {
137  return cl::sycl::cl_half8(
138  static_cast<cl::sycl::half>(from[0 * stride]), static_cast<cl::sycl::half>(from[1 * stride]),
139  static_cast<cl::sycl::half>(from[2 * stride]), static_cast<cl::sycl::half>(from[3 * stride]),
140  static_cast<cl::sycl::half>(from[4 * stride]), static_cast<cl::sycl::half>(from[5 * stride]),
141  static_cast<cl::sycl::half>(from[6 * stride]), static_cast<cl::sycl::half>(from[7 * stride]));
142  }
143 
144  template <typename sycl_multi_pointer>
145  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_half8& from,
146  Index stride) {
147  auto tmp = stride;
148  to[0] = Eigen::half(from.s0());
149  to[tmp] = Eigen::half(from.s1());
150  to[tmp += stride] = Eigen::half(from.s2());
151  to[tmp += stride] = Eigen::half(from.s3());
152  to[tmp += stride] = Eigen::half(from.s4());
153  to[tmp += stride] = Eigen::half(from.s5());
154  to[tmp += stride] = Eigen::half(from.s6());
155  to[tmp += stride] = Eigen::half(from.s7());
156  }
157  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 set_plset(const cl::sycl::half& a) {
158  return cl::sycl::cl_half8(static_cast<cl::sycl::half>(a), static_cast<cl::sycl::half>(a + 1),
159  static_cast<cl::sycl::half>(a + 2), static_cast<cl::sycl::half>(a + 3),
160  static_cast<cl::sycl::half>(a + 4), static_cast<cl::sycl::half>(a + 5),
161  static_cast<cl::sycl::half>(a + 6), static_cast<cl::sycl::half>(a + 7));
162  }
163 };
164 
165 template <>
166 struct get_base_packet<cl::sycl::cl_float4> {
167  template <typename sycl_multi_pointer>
168  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
169  return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
170  }
171  template <typename sycl_multi_pointer>
172  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
173  return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
174  }
175 
176  template <typename sycl_multi_pointer>
177  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_float4& from,
178  Index stride) {
179  auto tmp = stride;
180  to[0] = from.x();
181  to[tmp] = from.y();
182  to[tmp += stride] = from.z();
183  to[tmp += stride] = from.w();
184  }
185  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
186  return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1), static_cast<float>(a + 2),
187  static_cast<float>(a + 3));
188  }
189 };
190 
191 template <>
192 struct get_base_packet<cl::sycl::cl_double2> {
193  template <typename sycl_multi_pointer>
194  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
195  return cl::sycl::cl_double2(from[0], from[0]);
196  }
197 
198  template <typename sycl_multi_pointer, typename Index>
199  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from,
200  Index stride) {
201  return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
202  }
203 
204  template <typename sycl_multi_pointer>
205  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to,
206  const cl::sycl::cl_double2& from, Index stride) {
207  to[0] = from.x();
208  to[stride] = from.y();
209  }
210 
211  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
212  return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
213  }
214 };
215 
216 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
217  template <> \
218  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
219  const typename unpacket_traits<packet_type>::type* from) { \
220  return get_base_packet<packet_type>::get_ploaddup(from); \
221  }
222 
223 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_half8)
224 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
225 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
226 
227 #undef SYCL_PLOAD_DUP_SPECILIZE
228 
229 #define SYCL_PLSET(packet_type) \
230  template <> \
231  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
232  const typename unpacket_traits<packet_type>::type& a) { \
233  return get_base_packet<packet_type>::set_plset(a); \
234  }
235 SYCL_PLSET(cl::sycl::cl_float4)
236 SYCL_PLSET(cl::sycl::cl_double2)
237 #undef SYCL_PLSET
238 
239 template <>
240 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 plset<cl::sycl::cl_half8>(
242  return get_base_packet<cl::sycl::cl_half8>::set_plset((const cl::sycl::half&)a);
243 }
244 
245 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
246  template <> \
247  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pgather<scalar, packet_type>( \
248  const typename unpacket_traits<packet_type>::type* from, Index stride) { \
249  return get_base_packet<packet_type>::get_pgather(from, stride); \
250  }
251 
252 SYCL_PGATHER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
253 SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
254 SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
255 #undef SYCL_PGATHER_SPECILIZE
256 
257 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
258  template <> \
259  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
260  typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride) { \
261  get_base_packet<packet_type>::set_pscatter(to, from, stride); \
262  }
263 
264 SYCL_PSCATTER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
265 SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
266 SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
267 
268 #undef SYCL_PSCATTER_SPECILIZE
269 
270 #define SYCL_PMAD(packet_type) \
271  template <> \
272  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(const packet_type& a, const packet_type& b, \
273  const packet_type& c) { \
274  return cl::sycl::mad(a, b, c); \
275  }
276 
277 SYCL_PMAD(cl::sycl::cl_half8)
278 SYCL_PMAD(cl::sycl::cl_float4)
279 SYCL_PMAD(cl::sycl::cl_double2)
280 #undef SYCL_PMAD
281 
282 template <>
283 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half pfirst<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
284  return Eigen::half(a.s0());
285 }
286 template <>
287 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
288  return a.x();
289 }
290 template <>
291 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
292  return a.x();
293 }
294 
295 template <>
296 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
297  return Eigen::half(a.s0() + a.s1() + a.s2() + a.s3() + a.s4() + a.s5() + a.s6() + a.s7());
298 }
299 
300 template <>
301 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
302  return a.x() + a.y() + a.z() + a.w();
303 }
304 
305 template <>
306 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
307  return a.x() + a.y();
308 }
309 
310 template <>
311 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_max<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
312  return Eigen::half(cl::sycl::fmax(cl::sycl::fmax(cl::sycl::fmax(a.s0(), a.s1()), cl::sycl::fmax(a.s2(), a.s3())),
313  cl::sycl::fmax(cl::sycl::fmax(a.s4(), a.s5()), cl::sycl::fmax(a.s6(), a.s7()))));
314 }
315 template <>
316 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
317  return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
318 }
319 template <>
320 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
321  return cl::sycl::fmax(a.x(), a.y());
322 }
323 
324 template <>
325 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_min<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
326  return Eigen::half(cl::sycl::fmin(cl::sycl::fmin(cl::sycl::fmin(a.s0(), a.s1()), cl::sycl::fmin(a.s2(), a.s3())),
327  cl::sycl::fmin(cl::sycl::fmin(a.s4(), a.s5()), cl::sycl::fmin(a.s6(), a.s7()))));
328 }
329 template <>
330 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
331  return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
332 }
333 template <>
334 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
335  return cl::sycl::fmin(a.x(), a.y());
336 }
337 
338 template <>
339 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_mul<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
340  return Eigen::half(a.s0() * a.s1() * a.s2() * a.s3() * a.s4() * a.s5() * a.s6() * a.s7());
341 }
342 template <>
343 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
344  return a.x() * a.y() * a.z() * a.w();
345 }
346 template <>
347 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
348  return a.x() * a.y();
349 }
350 
351 template <>
352 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pabs<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
353  return cl::sycl::cl_half8(cl::sycl::fabs(a.s0()), cl::sycl::fabs(a.s1()), cl::sycl::fabs(a.s2()),
354  cl::sycl::fabs(a.s3()), cl::sycl::fabs(a.s4()), cl::sycl::fabs(a.s5()),
355  cl::sycl::fabs(a.s6()), cl::sycl::fabs(a.s7()));
356 }
357 template <>
358 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
359  return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()),
360  cl::sycl::fabs(a.w()));
361 }
362 template <>
363 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
364  return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
365 }
366 
367 template <typename Packet>
368 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet& a, const Packet& b) {
369  return (a <= b).template as<Packet>();
370 }
371 
372 template <typename Packet>
373 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet& a, const Packet& b) {
374  return (a < b).template as<Packet>();
375 }
376 
377 template <typename Packet>
378 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet& a, const Packet& b) {
379  return (a == b).template as<Packet>();
380 }
381 
382 #define SYCL_PCMP(OP, TYPE) \
383  template <> \
384  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE& a, const TYPE& b) { \
385  return sycl_pcmp_##OP<TYPE>(a, b); \
386  }
387 
388 SYCL_PCMP(le, cl::sycl::cl_half8)
389 SYCL_PCMP(lt, cl::sycl::cl_half8)
390 SYCL_PCMP(eq, cl::sycl::cl_half8)
391 SYCL_PCMP(le, cl::sycl::cl_float4)
392 SYCL_PCMP(lt, cl::sycl::cl_float4)
393 SYCL_PCMP(eq, cl::sycl::cl_float4)
394 SYCL_PCMP(le, cl::sycl::cl_double2)
395 SYCL_PCMP(lt, cl::sycl::cl_double2)
396 SYCL_PCMP(eq, cl::sycl::cl_double2)
397 #undef SYCL_PCMP
398 
399 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_half8, 8>& kernel) {
400  cl::sycl::cl_half tmp = kernel.packet[0].s1();
401  kernel.packet[0].s1() = kernel.packet[1].s0();
402  kernel.packet[1].s0() = tmp;
403 
404  tmp = kernel.packet[0].s2();
405  kernel.packet[0].s2() = kernel.packet[2].s0();
406  kernel.packet[2].s0() = tmp;
407 
408  tmp = kernel.packet[0].s3();
409  kernel.packet[0].s3() = kernel.packet[3].s0();
410  kernel.packet[3].s0() = tmp;
411 
412  tmp = kernel.packet[0].s4();
413  kernel.packet[0].s4() = kernel.packet[4].s0();
414  kernel.packet[4].s0() = tmp;
415 
416  tmp = kernel.packet[0].s5();
417  kernel.packet[0].s5() = kernel.packet[5].s0();
418  kernel.packet[5].s0() = tmp;
419 
420  tmp = kernel.packet[0].s6();
421  kernel.packet[0].s6() = kernel.packet[6].s0();
422  kernel.packet[6].s0() = tmp;
423 
424  tmp = kernel.packet[0].s7();
425  kernel.packet[0].s7() = kernel.packet[7].s0();
426  kernel.packet[7].s0() = tmp;
427 
428  tmp = kernel.packet[1].s2();
429  kernel.packet[1].s2() = kernel.packet[2].s1();
430  kernel.packet[2].s1() = tmp;
431 
432  tmp = kernel.packet[1].s3();
433  kernel.packet[1].s3() = kernel.packet[3].s1();
434  kernel.packet[3].s1() = tmp;
435 
436  tmp = kernel.packet[1].s4();
437  kernel.packet[1].s4() = kernel.packet[4].s1();
438  kernel.packet[4].s1() = tmp;
439 
440  tmp = kernel.packet[1].s5();
441  kernel.packet[1].s5() = kernel.packet[5].s1();
442  kernel.packet[5].s1() = tmp;
443 
444  tmp = kernel.packet[1].s6();
445  kernel.packet[1].s6() = kernel.packet[6].s1();
446  kernel.packet[6].s1() = tmp;
447 
448  tmp = kernel.packet[1].s7();
449  kernel.packet[1].s7() = kernel.packet[7].s1();
450  kernel.packet[7].s1() = tmp;
451 
452  tmp = kernel.packet[2].s3();
453  kernel.packet[2].s3() = kernel.packet[3].s2();
454  kernel.packet[3].s2() = tmp;
455 
456  tmp = kernel.packet[2].s4();
457  kernel.packet[2].s4() = kernel.packet[4].s2();
458  kernel.packet[4].s2() = tmp;
459 
460  tmp = kernel.packet[2].s5();
461  kernel.packet[2].s5() = kernel.packet[5].s2();
462  kernel.packet[5].s2() = tmp;
463 
464  tmp = kernel.packet[2].s6();
465  kernel.packet[2].s6() = kernel.packet[6].s2();
466  kernel.packet[6].s2() = tmp;
467 
468  tmp = kernel.packet[2].s7();
469  kernel.packet[2].s7() = kernel.packet[7].s2();
470  kernel.packet[7].s2() = tmp;
471 
472  tmp = kernel.packet[3].s4();
473  kernel.packet[3].s4() = kernel.packet[4].s3();
474  kernel.packet[4].s3() = tmp;
475 
476  tmp = kernel.packet[3].s5();
477  kernel.packet[3].s5() = kernel.packet[5].s3();
478  kernel.packet[5].s3() = tmp;
479 
480  tmp = kernel.packet[3].s6();
481  kernel.packet[3].s6() = kernel.packet[6].s3();
482  kernel.packet[6].s3() = tmp;
483 
484  tmp = kernel.packet[3].s7();
485  kernel.packet[3].s7() = kernel.packet[7].s3();
486  kernel.packet[7].s3() = tmp;
487 
488  tmp = kernel.packet[4].s5();
489  kernel.packet[4].s5() = kernel.packet[5].s4();
490  kernel.packet[5].s4() = tmp;
491 
492  tmp = kernel.packet[4].s6();
493  kernel.packet[4].s6() = kernel.packet[6].s4();
494  kernel.packet[6].s4() = tmp;
495 
496  tmp = kernel.packet[4].s7();
497  kernel.packet[4].s7() = kernel.packet[7].s4();
498  kernel.packet[7].s4() = tmp;
499 
500  tmp = kernel.packet[5].s6();
501  kernel.packet[5].s6() = kernel.packet[6].s5();
502  kernel.packet[6].s5() = tmp;
503 
504  tmp = kernel.packet[5].s7();
505  kernel.packet[5].s7() = kernel.packet[7].s5();
506  kernel.packet[7].s5() = tmp;
507 
508  tmp = kernel.packet[6].s7();
509  kernel.packet[6].s7() = kernel.packet[7].s6();
510  kernel.packet[7].s6() = tmp;
511 }
512 
513 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
514  float tmp = kernel.packet[0].y();
515  kernel.packet[0].y() = kernel.packet[1].x();
516  kernel.packet[1].x() = tmp;
517 
518  tmp = kernel.packet[0].z();
519  kernel.packet[0].z() = kernel.packet[2].x();
520  kernel.packet[2].x() = tmp;
521 
522  tmp = kernel.packet[0].w();
523  kernel.packet[0].w() = kernel.packet[3].x();
524  kernel.packet[3].x() = tmp;
525 
526  tmp = kernel.packet[1].z();
527  kernel.packet[1].z() = kernel.packet[2].y();
528  kernel.packet[2].y() = tmp;
529 
530  tmp = kernel.packet[1].w();
531  kernel.packet[1].w() = kernel.packet[3].y();
532  kernel.packet[3].y() = tmp;
533 
534  tmp = kernel.packet[2].w();
535  kernel.packet[2].w() = kernel.packet[3].z();
536  kernel.packet[3].z() = tmp;
537 }
538 
539 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
540  double tmp = kernel.packet[0].y();
541  kernel.packet[0].y() = kernel.packet[1].x();
542  kernel.packet[1].x() = tmp;
543 }
544 
545 template <>
546 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend(
547  const Selector<unpacket_traits<cl::sycl::cl_half8>::size>& ifPacket, const cl::sycl::cl_half8& thenPacket,
548  const cl::sycl::cl_half8& elsePacket) {
549  cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
550  ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1,
551  ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1);
552  return cl::sycl::select(thenPacket, elsePacket, condition);
553 }
554 
555 template <>
556 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
557  const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket, const cl::sycl::cl_float4& thenPacket,
558  const cl::sycl::cl_float4& elsePacket) {
559  cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
560  ifPacket.select[3] ? 0 : -1);
561  return cl::sycl::select(thenPacket, elsePacket, condition);
562 }
563 
564 template <>
565 inline cl::sycl::cl_double2 pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
566  const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
567  cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1);
568  return cl::sycl::select(thenPacket, elsePacket, condition);
569 }
570 #endif // SYCL_DEVICE_ONLY
571 
572 } // end namespace internal
573 
574 } // end namespace Eigen
575 
576 #endif // EIGEN_PACKET_MATH_SYCL_H
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
Scalar * b
Definition: benchVecAdd.cpp:17
EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
Definition: PlainObjectBase.h:247
const Scalar * a
Definition: level2_cplx_impl.h:32
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:664
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:670
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:339
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:3075
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891
EIGEN_DEVICE_FUNC void pstoreu(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:911
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Real fabs(const Real &a)
Definition: boostmultiprec.cpp:117
Definition: Eigen_Colamd.h:49
Definition: Half.h:139
T type
Definition: GenericPacketMath.h:135
@ size
Definition: GenericPacketMath.h:139