34 #ifndef EIGEN_ASSIGN_VML_H
35 #define EIGEN_ASSIGN_VML_H
44 template <
typename Dst,
typename Src>
51 InnerSize =
int(Dst::IsVectorAtCompileTime) ?
int(Dst::SizeAtCompileTime)
53 :
int(Dst::RowsAtCompileTime),
56 :
int(Dst::MaxRowsAtCompileTime),
60 Src::InnerStrideAtCompileTime == 1 && Dst::InnerStrideAtCompileTime == 1,
70 #define EIGEN_PP_EXPAND(ARG) ARG
71 #if !defined(EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
72 #define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
74 #define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
77 #define EIGEN_VMLMODE_EXPAND_x_
79 #define EIGEN_VMLMODE_PREFIX_xLA vm
80 #define EIGEN_VMLMODE_PREFIX_x_ v
81 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x, VMLMODE)
83 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
84 template <typename DstXprType, typename SrcXprNested> \
85 struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, \
86 assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense, \
87 std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> { \
88 typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
89 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \
90 resize_if_allowed(dst, src, func); \
91 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
92 if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == (int)LinearTraversal) { \
93 VMLOP(dst.size(), (const VMLTYPE *)src.nestedExpression().data(), \
94 (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
96 const Index outerSize = dst.outerSize(); \
97 for (Index outer = 0; outer < outerSize; ++outer) { \
98 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer, 0)) \
99 : &(src.nestedExpression().coeffRef(0, outer)); \
100 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \
101 VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, \
102 (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
108 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
109 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), s##VMLOP), float, float, VMLMODE) \
110 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), d##VMLOP), double, double, VMLMODE)
112 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
113 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), c##VMLOP), scomplex, \
114 MKL_Complex8, VMLMODE) \
115 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), z##VMLOP), dcomplex, \
116 MKL_Complex16, VMLMODE)
118 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
119 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
120 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
144 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
145 template <typename DstXprType, typename SrcXprNested, typename Plain> \
146 struct Assignment<DstXprType, \
147 CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested, \
148 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>>, \
149 assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense, \
150 std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> { \
151 typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested, \
152 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>> \
154 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \
155 resize_if_allowed(dst, src, func); \
156 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
157 VMLTYPE exponent = reinterpret_cast<const VMLTYPE &>(src.rhs().functor().m_other); \
158 if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == LinearTraversal) { \
159 VMLOP(dst.size(), (const VMLTYPE *)src.lhs().data(), exponent, \
160 (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
162 const Index outerSize = dst.outerSize(); \
163 for (Index outer = 0; outer < outerSize; ++outer) { \
164 const EIGENTYPE *src_ptr = \
165 src.IsRowMajor ? &(src.lhs().coeffRef(outer, 0)) : &(src.lhs().coeffRef(0, outer)); \
166 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \
167 VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, exponent, \
168 (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
AnnoyingScalar cos(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:136
AnnoyingScalar acos(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:138
AnnoyingScalar sin(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:137
AnnoyingScalar sqrt(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:134
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:108
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:118
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)
Definition: Assign_MKL.h:144
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:112
static int Round(int n)
Definition: benchmark_main.cc:39
#define _(A, B)
Definition: cfortran.h:132
Definition: Assign_MKL.h:45
@ SrcHasDirectAccess
Definition: Assign_MKL.h:49
@ InnerSize
Definition: Assign_MKL.h:51
@ StorageOrdersAgree
Definition: Assign_MKL.h:50
@ MaxSizeAtCompileTime
Definition: Assign_MKL.h:57
@ InnerMaxSize
Definition: Assign_MKL.h:54
@ MightEnableVml
Definition: Assign_MKL.h:59
@ DstHasDirectAccess
Definition: Assign_MKL.h:48
@ MightLinearize
Definition: Assign_MKL.h:61
@ VmlSize
Definition: Assign_MKL.h:62
@ EnableVml
Definition: Assign_MKL.h:67
@ Traversal
Definition: Assign_MKL.h:67
@ DefaultTraversal
Definition: Constants.h:279
@ LinearTraversal
Definition: Constants.h:281
const unsigned int LinearAccessBit
Definition: Constants.h:133
const unsigned int DirectAccessBit
Definition: Constants.h:159
const unsigned int RowMajorBit
Definition: Constants.h:70
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16 &a)
Definition: BFloat16.h:639
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16 &a)
Definition: BFloat16.h:643
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16 &a)
Definition: BFloat16.h:638
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16 &a)
Definition: BFloat16.h:644
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16 &a)
Definition: BFloat16.h:634
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16 &a)
Definition: BFloat16.h:620
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16 &a)
Definition: BFloat16.h:636
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16 &a)
Definition: BFloat16.h:615
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:625
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16 &a)
Definition: BFloat16.h:646
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16 &a)
Definition: BFloat16.h:618
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16 &a)
Definition: BFloat16.h:637
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16 &a)
Definition: BFloat16.h:633
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cbrt(const T &x)
Definition: MathFunctions.h:1320
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::complex< double > dcomplex
Definition: MKL_support.h:128
std::complex< float > scomplex
Definition: MKL_support.h:129
squared absolute sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2 square(power 2)
const int Dynamic
Definition: Constants.h:25
Extend namespace for flags.
Definition: fsi_chan_precond_driver.cc:56
Definition: Eigen_Colamd.h:49