Assign_MKL.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2011, Intel Corporation. All rights reserved.
3  Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
4 
5  Redistribution and use in source and binary forms, with or without modification,
6  are permitted provided that the following conditions are met:
7 
8  * Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13  * Neither the name of Intel Corporation nor the names of its contributors may
14  be used to endorse or promote products derived from this software without
15  specific prior written permission.
16 
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28  ********************************************************************************
29  * Content : Eigen bindings to Intel(R) MKL
30  * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
31  ********************************************************************************
32 */
33 
34 #ifndef EIGEN_ASSIGN_VML_H
35 #define EIGEN_ASSIGN_VML_H
36 
37 // IWYU pragma: private
38 #include "./InternalHeaderCheck.h"
39 
40 namespace Eigen {
41 
42 namespace internal {
43 
44 template <typename Dst, typename Src>
46  private:
47  enum {
50  StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
51  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
52  : int(Dst::Flags) & RowMajorBit ? int(Dst::ColsAtCompileTime)
53  : int(Dst::RowsAtCompileTime),
54  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
55  : int(Dst::Flags) & RowMajorBit ? int(Dst::MaxColsAtCompileTime)
56  : int(Dst::MaxRowsAtCompileTime),
57  MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
58 
60  Src::InnerStrideAtCompileTime == 1 && Dst::InnerStrideAtCompileTime == 1,
63  LargeEnough = VmlSize == Dynamic || VmlSize >= EIGEN_MKL_VML_THRESHOLD
64  };
65 
66  public:
68 };
69 
70 #define EIGEN_PP_EXPAND(ARG) ARG
71 #if !defined(EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
72 #define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
73 #else
74 #define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
75 #endif
76 
77 #define EIGEN_VMLMODE_EXPAND_x_
78 
79 #define EIGEN_VMLMODE_PREFIX_xLA vm
80 #define EIGEN_VMLMODE_PREFIX_x_ v
81 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x, VMLMODE)
82 
83 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
84  template <typename DstXprType, typename SrcXprNested> \
85  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, \
86  assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense, \
87  std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> { \
88  typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
89  static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \
90  resize_if_allowed(dst, src, func); \
91  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
92  if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == (int)LinearTraversal) { \
93  VMLOP(dst.size(), (const VMLTYPE *)src.nestedExpression().data(), \
94  (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
95  } else { \
96  const Index outerSize = dst.outerSize(); \
97  for (Index outer = 0; outer < outerSize; ++outer) { \
98  const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer, 0)) \
99  : &(src.nestedExpression().coeffRef(0, outer)); \
100  EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \
101  VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, \
102  (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
103  } \
104  } \
105  } \
106  };
107 
108 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
109  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), s##VMLOP), float, float, VMLMODE) \
110  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), d##VMLOP), double, double, VMLMODE)
111 
112 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
113  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), c##VMLOP), scomplex, \
114  MKL_Complex8, VMLMODE) \
115  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), z##VMLOP), dcomplex, \
116  MKL_Complex16, VMLMODE)
117 
118 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
119  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
120  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
121 
131 // EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
136 
143 
144 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
145  template <typename DstXprType, typename SrcXprNested, typename Plain> \
146  struct Assignment<DstXprType, \
147  CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested, \
148  const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>>, \
149  assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense, \
150  std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> { \
151  typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested, \
152  const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>> \
153  SrcXprType; \
154  static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \
155  resize_if_allowed(dst, src, func); \
156  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
157  VMLTYPE exponent = reinterpret_cast<const VMLTYPE &>(src.rhs().functor().m_other); \
158  if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == LinearTraversal) { \
159  VMLOP(dst.size(), (const VMLTYPE *)src.lhs().data(), exponent, \
160  (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
161  } else { \
162  const Index outerSize = dst.outerSize(); \
163  for (Index outer = 0; outer < outerSize; ++outer) { \
164  const EIGENTYPE *src_ptr = \
165  src.IsRowMajor ? &(src.lhs().coeffRef(outer, 0)) : &(src.lhs().coeffRef(0, outer)); \
166  EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \
167  VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, exponent, \
168  (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
169  } \
170  } \
171  } \
172  };
173 
174 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
175 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
176 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
177 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
178 
179 } // end namespace internal
180 
181 } // end namespace Eigen
182 
183 #endif // EIGEN_ASSIGN_VML_H
AnnoyingScalar cos(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:136
AnnoyingScalar acos(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:138
AnnoyingScalar sin(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:137
AnnoyingScalar sqrt(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:134
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:108
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:118
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)
Definition: Assign_MKL.h:144
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:112
static int Round(int n)
Definition: benchmark_main.cc:39
#define _(A, B)
Definition: cfortran.h:132
Definition: Assign_MKL.h:45
@ SrcHasDirectAccess
Definition: Assign_MKL.h:49
@ InnerSize
Definition: Assign_MKL.h:51
@ StorageOrdersAgree
Definition: Assign_MKL.h:50
@ MaxSizeAtCompileTime
Definition: Assign_MKL.h:57
@ InnerMaxSize
Definition: Assign_MKL.h:54
@ MightEnableVml
Definition: Assign_MKL.h:59
@ DstHasDirectAccess
Definition: Assign_MKL.h:48
@ MightLinearize
Definition: Assign_MKL.h:61
@ VmlSize
Definition: Assign_MKL.h:62
@ EnableVml
Definition: Assign_MKL.h:67
@ Traversal
Definition: Assign_MKL.h:67
@ DefaultTraversal
Definition: Constants.h:279
@ LinearTraversal
Definition: Constants.h:281
const unsigned int LinearAccessBit
Definition: Constants.h:133
const unsigned int DirectAccessBit
Definition: Constants.h:159
const unsigned int RowMajorBit
Definition: Constants.h:70
return int(ret)+1
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16 &a)
Definition: BFloat16.h:639
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16 &a)
Definition: BFloat16.h:643
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16 &a)
Definition: BFloat16.h:638
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16 &a)
Definition: BFloat16.h:644
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16 &a)
Definition: BFloat16.h:634
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16 &a)
Definition: BFloat16.h:620
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16 &a)
Definition: BFloat16.h:636
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16 &a)
Definition: BFloat16.h:615
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:625
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16 &a)
Definition: BFloat16.h:646
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16 &a)
Definition: BFloat16.h:618
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16 &a)
Definition: BFloat16.h:637
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16 &a)
Definition: BFloat16.h:633
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cbrt(const T &x)
Definition: MathFunctions.h:1320
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::complex< double > dcomplex
Definition: MKL_support.h:128
std::complex< float > scomplex
Definition: MKL_support.h:129
squared absolute sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2 square(power 2)
const int Dynamic
Definition: Constants.h:25
Extend namespace for flags.
Definition: fsi_chan_precond_driver.cc:56
Definition: Eigen_Colamd.h:49