AltiVec/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>
5 // Copyright (C) 2023 Chip Kerchner (chip.kerchner@ibm.com)
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_TYPE_CASTING_ALTIVEC_H
12 #define EIGEN_TYPE_CASTING_ALTIVEC_H
13 
14 // IWYU pragma: private
15 #include "../../InternalHeaderCheck.h"
16 
17 namespace Eigen {
18 
19 namespace internal {
20 template <>
21 struct type_casting_traits<float, int> {
22  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
23 };
24 
25 template <>
26 struct type_casting_traits<int, float> {
27  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
28 };
29 
30 template <>
32  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
33 };
34 
35 template <>
37  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
38 };
39 
40 template <>
42  return vec_cts(a, 0);
43 }
44 
45 template <>
47  return vec_ctu(a, 0);
48 }
49 
50 template <>
52  return vec_ctf(a, 0);
53 }
54 
55 template <>
57  return vec_ctf(a, 0);
58 }
59 
60 template <>
62  Packet4f float_even = Bf16ToF32Even(a);
63  Packet4f float_odd = Bf16ToF32Odd(a);
64  Packet4ui int_even = pcast<Packet4f, Packet4ui>(float_even);
65  Packet4ui int_odd = pcast<Packet4f, Packet4ui>(float_odd);
66  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
67  Packet4ui low_even = pand<Packet4ui>(int_even, p4ui_low_mask);
68  Packet4ui low_odd = pand<Packet4ui>(int_odd, p4ui_low_mask);
69 
70  // Check values that are bigger than USHRT_MAX (0xFFFF)
71  Packet4bi overflow_selector;
72  if (vec_any_gt(int_even, p4ui_low_mask)) {
73  overflow_selector = vec_cmpgt(int_even, p4ui_low_mask);
74  low_even = vec_sel(low_even, p4ui_low_mask, overflow_selector);
75  }
76  if (vec_any_gt(int_odd, p4ui_low_mask)) {
77  overflow_selector = vec_cmpgt(int_odd, p4ui_low_mask);
78  low_odd = vec_sel(low_even, p4ui_low_mask, overflow_selector);
79  }
80 
81  return pmerge(low_even, low_odd);
82 }
83 
84 template <>
86  // short -> int -> float -> bfloat16
87  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
88  Packet4ui int_cast = reinterpret_cast<Packet4ui>(a);
89  Packet4ui int_even = pand<Packet4ui>(int_cast, p4ui_low_mask);
90  Packet4ui int_odd = plogical_shift_right<16>(int_cast);
91  Packet4f float_even = pcast<Packet4ui, Packet4f>(int_even);
92  Packet4f float_odd = pcast<Packet4ui, Packet4f>(int_odd);
93  return F32ToBf16(float_even, float_odd);
94 }
95 
96 template <>
97 struct type_casting_traits<bfloat16, float> {
98  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
99 };
100 
101 template <>
104 #ifdef _BIG_ENDIAN
105  return reinterpret_cast<Packet4f>(vec_mergeh(a.m_val, z));
106 #else
107  return reinterpret_cast<Packet4f>(vec_mergeh(z, a.m_val));
108 #endif
109 }
110 
111 template <>
114 };
115 
116 template <>
118  return F32ToBf16Both(a, b);
119 }
120 
121 template <>
123  return reinterpret_cast<Packet4i>(a);
124 }
125 
126 template <>
128  return reinterpret_cast<Packet4f>(a);
129 }
130 
131 #ifdef EIGEN_VECTORIZE_VSX
132 // VSX support varies between different compilers and even different
133 // versions of the same compiler. For gcc version >= 4.9.3, we can use
134 // vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
135 // a slow version that works with older compilers.
136 // Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
137 // are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
138 template <>
140 #if EIGEN_GNUC_STRICT_AT_LEAST(7, 1, 0)
141  return vec_cts(x, 0); // TODO: check clang version.
142 #else
143  double tmp[2];
144  memcpy(tmp, &x, sizeof(tmp));
145  Packet2l l = {static_cast<long long>(tmp[0]), static_cast<long long>(tmp[1])};
146  return l;
147 #endif
148 }
149 
150 template <>
152  unsigned long long tmp[2];
153  memcpy(tmp, &x, sizeof(tmp));
154  Packet2d d = {static_cast<double>(tmp[0]), static_cast<double>(tmp[1])};
155  return d;
156 }
157 #endif
158 
159 } // end namespace internal
160 
161 } // end namespace Eigen
162 
163 #endif // EIGEN_TYPE_CASTING_ALTIVEC_H
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
Scalar * b
Definition: benchVecAdd.cpp:17
return int(ret)+1
const Scalar * a
Definition: level2_cplx_impl.h:32
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
__m128d Packet2d
Definition: LSX/PacketMath.h:36
eigen_packet_wrapper< __m128i, 3 > Packet2l
Definition: LSX/PacketMath.h:41
__vector int Packet4i
Definition: AltiVec/PacketMath.h:34
EIGEN_STRONG_INLINE Packet4f pcast< Packet8bf, Packet4f >(const Packet8bf &a)
Definition: AltiVec/TypeCasting.h:102
EIGEN_STRONG_INLINE Packet4ui pand< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Definition: AltiVec/PacketMath.h:1414
EIGEN_ALWAYS_INLINE Packet8us pmerge(Packet4ui even, Packet4ui odd)
Definition: AltiVec/PacketMath.h:2032
__vector unsigned short int Packet8us
Definition: AltiVec/PacketMath.h:38
EIGEN_STRONG_INLINE Packet2l pcast< Packet2d, Packet2l >(const Packet2d &a)
Definition: LSX/TypeCasting.h:433
EIGEN_STRONG_INLINE Packet8us pcast< Packet8bf, Packet8us >(const Packet8bf &a)
Definition: AltiVec/TypeCasting.h:61
static EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u)
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:35
__vector __bool int Packet4bi
Definition: AltiVec/PacketMath.h:36
EIGEN_STRONG_INLINE Packet8bf pcast< Packet8us, Packet8bf >(const Packet8us &a)
Definition: AltiVec/TypeCasting.h:85
EIGEN_STRONG_INLINE Packet4f pcast< Packet4i, Packet4f >(const Packet4i &a)
Definition: AltiVec/TypeCasting.h:51
EIGEN_STRONG_INLINE Packet8bf F32ToBf16Both(Packet4f lo, Packet4f hi)
Definition: AltiVec/PacketMath.h:2237
EIGEN_STRONG_INLINE Packet2d pcast< Packet2l, Packet2d >(const Packet2l &a)
Definition: LSX/TypeCasting.h:514
EIGEN_STRONG_INLINE Packet4f Bf16ToF32Even(const Packet8bf &bf)
Definition: AltiVec/PacketMath.h:2023
EIGEN_STRONG_INLINE Packet8bf pcast< Packet4f, Packet8bf >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/TypeCasting.h:117
EIGEN_STRONG_INLINE Packet4i pcast< Packet4f, Packet4i >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:41
EIGEN_STRONG_INLINE Packet4f Bf16ToF32Odd(const Packet8bf &bf)
Definition: AltiVec/PacketMath.h:2027
EIGEN_STRONG_INLINE Packet4i preinterpret< Packet4i, Packet4f >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:122
EIGEN_STRONG_INLINE Packet4f preinterpret< Packet4f, Packet4i >(const Packet4i &a)
Definition: AltiVec/TypeCasting.h:127
EIGEN_STRONG_INLINE Packet8us pset1< Packet8us >(const unsigned short int &from)
Definition: AltiVec/PacketMath.h:788
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_STRONG_INLINE Packet4f pcast< Packet4ui, Packet4f >(const Packet4ui &a)
Definition: AltiVec/TypeCasting.h:56
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f)
Definition: AltiVec/PacketMath.h:2059
EIGEN_STRONG_INLINE Packet4ui pcast< Packet4f, Packet4ui >(const Packet4f &a)
Definition: AltiVec/TypeCasting.h:46
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: BFloat16.h:101
Definition: GenericPacketMath.h:225
Definition: GenericPacketMath.h:201
@ TgtCoeffRatio
Definition: GenericPacketMath.h:206
@ VectorizedCast
Definition: GenericPacketMath.h:203
@ SrcCoeffRatio
Definition: GenericPacketMath.h:205