SparseDenseProduct.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SPARSEDENSEPRODUCT_H
11 #define EIGEN_SPARSEDENSEPRODUCT_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 template <>
22  typedef Sparse ret;
23 };
24 template <>
26  typedef Sparse ret;
27 };
28 
29 template <typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType,
30  int LhsStorageOrder = ((SparseLhsType::Flags & RowMajorBit) == RowMajorBit) ? RowMajor : ColMajor,
31  bool ColPerCol = ((DenseRhsType::Flags & RowMajorBit) == 0) || DenseRhsType::ColsAtCompileTime == 1>
33 
34 template <typename SparseLhsType, typename DenseRhsType, typename DenseResType>
35 struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, typename DenseResType::Scalar,
36  RowMajor, true> {
42  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
43  const typename Res::Scalar& alpha) {
44  LhsEval lhsEval(lhs);
45 
46  Index n = lhs.outerSize();
47 #ifdef EIGEN_HAS_OPENMP
48  Index threads = Eigen::nbThreads();
49 #endif
50 
51  for (Index c = 0; c < rhs.cols(); ++c) {
52 #ifdef EIGEN_HAS_OPENMP
53  // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
54  // It basically represents the minimal amount of work to be done to be worth it.
55  if (threads > 1 && lhsEval.nonZerosEstimate() > 20000) {
56 #pragma omp parallel for schedule(dynamic, (n + threads * 4 - 1) / (threads * 4)) num_threads(threads)
57  for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i, c);
58  } else
59 #endif
60  {
61  for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i, c);
62  }
63  }
64  }
65 
66  static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res,
67  const typename Res::Scalar& alpha, Index i, Index col) {
68  // Two accumulators, which breaks the dependency chain on the accumulator
69  // and allows more instruction-level parallelism in the following loop
70  typename Res::Scalar tmp_a(0);
71  typename Res::Scalar tmp_b(0);
72  for (LhsInnerIterator it(lhsEval, i); it; ++it) {
73  tmp_a += it.value() * rhs.coeff(it.index(), col);
74  ++it;
75  if (it) {
76  tmp_b += it.value() * rhs.coeff(it.index(), col);
77  }
78  }
79  res.coeffRef(i, col) += alpha * (tmp_a + tmp_b);
80  }
81 };
82 
83 // FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
84 // -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
85 // template<typename T1, typename T2/*, int Options_, typename StrideType_*/>
86 // struct ScalarBinaryOpTraits<T1, Ref<T2/*, Options_, StrideType_*/> >
87 // {
88 // enum {
89 // Defined = 1
90 // };
91 // typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;
92 // };
93 
94 template <typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
95 struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, AlphaType, ColMajor, true> {
100  typedef typename LhsEval::InnerIterator LhsInnerIterator;
101  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) {
102  LhsEval lhsEval(lhs);
103  for (Index c = 0; c < rhs.cols(); ++c) {
104  for (Index j = 0; j < lhs.outerSize(); ++j) {
105  // typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
107  for (LhsInnerIterator it(lhsEval, j); it; ++it) res.coeffRef(it.index(), c) += it.value() * rhs_j;
108  }
109  }
110  }
111 };
112 
113 template <typename SparseLhsType, typename DenseRhsType, typename DenseResType>
114 struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, typename DenseResType::Scalar,
115  RowMajor, false> {
120  typedef typename LhsEval::InnerIterator LhsInnerIterator;
121  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
122  const typename Res::Scalar& alpha) {
123  Index n = lhs.rows();
124  LhsEval lhsEval(lhs);
125 
126 #ifdef EIGEN_HAS_OPENMP
127  Index threads = Eigen::nbThreads();
128  // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
129  // It basically represents the minimal amount of work to be done to be worth it.
130  if (threads > 1 && lhsEval.nonZerosEstimate() * rhs.cols() > 20000) {
131 #pragma omp parallel for schedule(dynamic, (n + threads * 4 - 1) / (threads * 4)) num_threads(threads)
132  for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i);
133  } else
134 #endif
135  {
136  for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i);
137  }
138  }
139 
140  static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha,
141  Index i) {
142  typename Res::RowXpr res_i(res.row(i));
143  for (LhsInnerIterator it(lhsEval, i); it; ++it) res_i += (alpha * it.value()) * rhs.row(it.index());
144  }
145 };
146 
147 template <typename SparseLhsType, typename DenseRhsType, typename DenseResType>
148 struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, typename DenseResType::Scalar,
149  ColMajor, false> {
154  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
155  const typename Res::Scalar& alpha) {
156  evaluator<Lhs> lhsEval(lhs);
157  for (Index j = 0; j < lhs.outerSize(); ++j) {
158  typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
159  for (LhsInnerIterator it(lhsEval, j); it; ++it) res.row(it.index()) += (alpha * it.value()) * rhs_j;
160  }
161  }
162 };
163 
164 template <typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
165 inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
166  const AlphaType& alpha) {
168 }
169 
170 } // end namespace internal
171 
172 namespace internal {
173 
174 template <typename Lhs, typename Rhs, int ProductType>
176  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType> > {
178 
179  template <typename Dest>
180  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
181  typedef typename nested_eval<Lhs, ((Rhs::Flags & RowMajorBit) == 0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;
182  typedef typename nested_eval<Rhs, ((Lhs::Flags & RowMajorBit) == 0) ? 1 : Dynamic>::type RhsNested;
183  LhsNested lhsNested(lhs);
184  RhsNested rhsNested(rhs);
185  internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);
186  }
187 };
188 
189 template <typename Lhs, typename Rhs, int ProductType>
191  : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType> {};
192 
193 template <typename Lhs, typename Rhs, int ProductType>
195  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType> > {
197 
198  template <typename Dst>
199  static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
200  typedef typename nested_eval<Lhs, ((Rhs::Flags & RowMajorBit) == 0) ? Dynamic : 1>::type LhsNested;
201  typedef typename nested_eval<Rhs, ((Lhs::Flags & RowMajorBit) == RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type
202  RhsNested;
203  LhsNested lhsNested(lhs);
204  RhsNested rhsNested(rhs);
205 
206  // transpose everything
207  Transpose<Dst> dstT(dst);
208  internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
209  }
210 };
211 
212 template <typename Lhs, typename Rhs, int ProductType>
214  : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType> {};
215 
216 template <typename LhsT, typename RhsT, bool NeedToTranspose>
218  protected:
219  typedef std::conditional_t<NeedToTranspose, RhsT, LhsT> Lhs1;
220  typedef std::conditional_t<NeedToTranspose, LhsT, RhsT> ActualRhs;
222 
223  // if the actual left-hand side is a dense vector,
224  // then build a sparse-view so that we can seamlessly iterate over it.
225  typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind, Sparse>::value, Lhs1,
228  typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind, Sparse>::value, Lhs1 const&,
231 
235  typedef typename ProdXprType::Scalar Scalar;
236 
237  public:
238  enum { Flags = NeedToTranspose ? RowMajorBit : 0, CoeffReadCost = HugeCost };
239 
240  class InnerIterator : public LhsIterator {
241  public:
243  : LhsIterator(xprEval.m_lhsXprImpl, 0),
244  m_outer(outer),
245  m_empty(false),
246  m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind())) {}
247 
249  EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
250  EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; }
251 
253  EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }
254 
255  protected:
256  Scalar get(const RhsEval& rhs, Index outer, Dense = Dense()) const { return rhs.coeff(outer); }
257 
258  Scalar get(const RhsEval& rhs, Index outer, Sparse = Sparse()) {
259  typename RhsEval::InnerIterator it(rhs, outer);
260  if (it && it.index() == 0 && it.value() != Scalar(0)) return it.value();
261  m_empty = true;
262  return Scalar(0);
263  }
264 
266  bool m_empty;
268  };
269 
271  : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) {
273  }
274 
275  // transpose case
277  : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) {
279  }
280 
281  protected:
282  const LhsArg m_lhs;
285 };
286 
287 // sparse * dense outer product
288 template <typename Lhs, typename Rhs>
290  : sparse_dense_outer_product_evaluator<Lhs, Rhs, Lhs::IsRowMajor> {
292 
294  typedef typename XprType::PlainObject PlainObject;
295 
296  explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs()) {}
297 };
298 
299 template <typename Lhs, typename Rhs>
301  : sparse_dense_outer_product_evaluator<Lhs, Rhs, Rhs::IsRowMajor> {
303 
305  typedef typename XprType::PlainObject PlainObject;
306 
307  explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs()) {}
308 };
309 
310 } // end namespace internal
311 
312 } // end namespace Eigen
313 
314 #endif // EIGEN_SPARSEDENSEPRODUCT_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
m col(1)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C)
Definition: StaticAssert.h:101
SCALAR Scalar
Definition: bench_gemm.cpp:45
Expression of the product of two arbitrary matrices or vectors.
Definition: Product.h:202
Expression of a dense or sparse matrix with zero or too small values removed.
Definition: SparseView.h:45
Expression of the transpose of a matrix.
Definition: Transpose.h:56
EIGEN_STRONG_INLINE Index outer() const
Definition: SparseDenseProduct.h:248
Scalar get(const RhsEval &rhs, Index outer, Sparse=Sparse())
Definition: SparseDenseProduct.h:258
EIGEN_STRONG_INLINE Scalar value() const
Definition: SparseDenseProduct.h:252
InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
Definition: SparseDenseProduct.h:242
EIGEN_STRONG_INLINE Index col() const
Definition: SparseDenseProduct.h:250
Scalar get(const RhsEval &rhs, Index outer, Dense=Dense()) const
Definition: SparseDenseProduct.h:256
EIGEN_STRONG_INLINE Index row() const
Definition: SparseDenseProduct.h:249
@ DefaultProduct
Definition: Constants.h:503
@ OuterProduct
Definition: Constants.h:508
@ ColMajor
Definition: Constants.h:318
@ RowMajor
Definition: Constants.h:320
const unsigned int RowMajorBit
Definition: Constants.h:70
RealScalar alpha
Definition: level1_cplx_impl.h:151
@ Lhs
Definition: TensorContractionMapper.h:20
@ Rhs
Definition: TensorContractionMapper.h:20
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
void sparse_time_dense_product(const SparseLhsType &lhs, const DenseRhsType &rhs, DenseResType &res, const AlphaType &alpha)
Definition: SparseDenseProduct.h:165
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
squared absolute value
Definition: GlobalFunctions.h:87
const int HugeCost
Definition: Constants.h:48
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
int nbThreads()
Definition: Parallelizer.h:54
const int Dynamic
Definition: Constants.h:25
Extend namespace for flags.
Definition: fsi_chan_precond_driver.cc:56
int c
Definition: calibrate.py:100
type
Definition: compute_granudrum_aor.py:141
Definition: Eigen_Colamd.h:49
Definition: Constants.h:540
Definition: Constants.h:519
Determines whether the given binary operation of two numeric types is allowed and what the scalar ret...
Definition: XprHelper.h:1043
Definition: Constants.h:570
Definition: Constants.h:522
Definition: SparseUtil.h:152
Definition: CoreEvaluators.h:104
static void scaleAndAddTo(Dst &dst, const Lhs &lhs, const Rhs &rhs, const Scalar &alpha)
Definition: SparseDenseProduct.h:199
Product< Lhs, Rhs >::Scalar Scalar
Definition: SparseDenseProduct.h:196
static void scaleAndAddTo(Dest &dst, const Lhs &lhs, const Rhs &rhs, const Scalar &alpha)
Definition: SparseDenseProduct.h:180
Product< Lhs, Rhs >::Scalar Scalar
Definition: SparseDenseProduct.h:177
Definition: ProductEvaluators.h:341
Definition: ProductEvaluators.h:78
Definition: MoreMeta.h:202
Definition: XprHelper.h:533
sparse_dense_outer_product_evaluator< Lhs, Rhs, Lhs::IsRowMajor > Base
Definition: SparseDenseProduct.h:291
sparse_dense_outer_product_evaluator< Lhs, Rhs, Rhs::IsRowMajor > Base
Definition: SparseDenseProduct.h:302
Definition: ForwardDeclarations.h:221
Definition: SparseDenseProduct.h:217
evaluator< ActualLhs > m_lhsXprImpl
Definition: SparseDenseProduct.h:283
evaluator< ActualRhs > m_rhsXprImpl
Definition: SparseDenseProduct.h:284
ProdXprType::Scalar Scalar
Definition: SparseDenseProduct.h:235
std::conditional_t< is_same< typename internal::traits< Lhs1 >::StorageKind, Sparse >::value, Lhs1, SparseView< Lhs1 > > ActualLhs
Definition: SparseDenseProduct.h:227
sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)
Definition: SparseDenseProduct.h:270
sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
Definition: SparseDenseProduct.h:276
std::conditional_t< is_same< typename internal::traits< Lhs1 >::StorageKind, Sparse >::value, Lhs1 const &, SparseView< Lhs1 > > LhsArg
Definition: SparseDenseProduct.h:230
evaluator< ActualRhs > RhsEval
Definition: SparseDenseProduct.h:233
std::conditional_t< NeedToTranspose, LhsT, RhsT > ActualRhs
Definition: SparseDenseProduct.h:220
const LhsArg m_lhs
Definition: SparseDenseProduct.h:282
@ CoeffReadCost
Definition: SparseDenseProduct.h:238
evaluator< ActualLhs >::InnerIterator LhsIterator
Definition: SparseDenseProduct.h:234
Product< LhsT, RhsT, DefaultProduct > ProdXprType
Definition: SparseDenseProduct.h:221
evaluator< ActualLhs > LhsEval
Definition: SparseDenseProduct.h:232
std::conditional_t< NeedToTranspose, RhsT, LhsT > Lhs1
Definition: SparseDenseProduct.h:219
static void run(const SparseLhsType &lhs, const DenseRhsType &rhs, DenseResType &res, const AlphaType &alpha)
Definition: SparseDenseProduct.h:101
static void run(const SparseLhsType &lhs, const DenseRhsType &rhs, DenseResType &res, const typename Res::Scalar &alpha)
Definition: SparseDenseProduct.h:154
static void processRow(const LhsEval &lhsEval, const DenseRhsType &rhs, DenseResType &res, const typename Res::Scalar &alpha, Index i, Index col)
Definition: SparseDenseProduct.h:66
static void run(const SparseLhsType &lhs, const DenseRhsType &rhs, DenseResType &res, const typename Res::Scalar &alpha)
Definition: SparseDenseProduct.h:42
static void run(const SparseLhsType &lhs, const DenseRhsType &rhs, DenseResType &res, const typename Res::Scalar &alpha)
Definition: SparseDenseProduct.h:121
static void processRow(const LhsEval &lhsEval, const DenseRhsType &rhs, Res &res, const typename Res::Scalar &alpha, Index i)
Definition: SparseDenseProduct.h:140
Definition: SparseDenseProduct.h:32
Definition: ForwardDeclarations.h:21
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
void run(const string &dir_name, LinearSolver *linear_solver_pt, const unsigned nel_1d, bool mess_up_order)
Definition: two_d_poisson_compare_solvers.cc:317