GPU/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_GPU_H
11 #define EIGEN_TYPE_CASTING_GPU_H
12 
13 // IWYU pragma: private
14 #include "../../InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
21  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
22 
23 template <>
24 struct type_casting_traits<Eigen::half, float> {
25  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
26 };
27 
28 template <>
29 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
30  float2 r1 = __half22float2(a);
31  float2 r2 = __half22float2(b);
32  return make_float4(r1.x, r1.y, r2.x, r2.y);
33 }
34 
35 template <>
36 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
37  Packet4h2 r;
38  half2* r_alias = reinterpret_cast<half2*>(&r);
39  r_alias[0] = __floats2half2_rn(a.x, a.y);
40  r_alias[1] = __floats2half2_rn(a.z, a.w);
41  r_alias[2] = __floats2half2_rn(b.x, b.y);
42  r_alias[3] = __floats2half2_rn(b.z, b.w);
43  return r;
44 }
45 
46 template <>
47 struct type_casting_traits<float, Eigen::half> {
48  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
49 };
50 
51 template <>
52 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
53  // Simply discard the second half of the input
54  float4 r;
55  const half2* a_alias = reinterpret_cast<const half2*>(&a);
56  float2 r1 = __half22float2(a_alias[0]);
57  float2 r2 = __half22float2(a_alias[1]);
58  r.x = static_cast<float>(r1.x);
59  r.y = static_cast<float>(r1.y);
60  r.z = static_cast<float>(r2.x);
61  r.w = static_cast<float>(r2.y);
62  return r;
63 }
64 
65 template <>
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
67  // Simply discard the second half of the input
68  return __floats2half2_rn(a.x, a.y);
69 }
70 
71 #endif
72 
73 } // end namespace internal
74 
75 } // end namespace Eigen
76 
77 #endif // EIGEN_TYPE_CASTING_GPU_H
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
Scalar * b
Definition: benchVecAdd.cpp:17
const Scalar * a
Definition: level2_cplx_impl.h:32
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
r
Definition: UniformPSDSelfTest.py:20
Definition: Eigen_Colamd.h:49
@ TgtCoeffRatio
Definition: GenericPacketMath.h:206
@ VectorizedCast
Definition: GenericPacketMath.h:203
@ SrcCoeffRatio
Definition: GenericPacketMath.h:205