GeneralMatrixMatrixTriangular.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
11 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
12 
13 // IWYU pragma: private
14 #include "../InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 template <typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
20 
21 namespace internal {
22 
23 /**********************************************************************
24  * This file implements a general A * B product while
25  * evaluating only one triangular part of the product.
26  * This is a more general version of self adjoint product (C += A A^T)
27  * as the level 3 SYRK Blas routine.
28  **********************************************************************/
29 
30 // forward declarations (defined at the end of this file)
31 template <typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs,
32  int ResInnerStride, int UpLo>
33 struct tribb_kernel;
34 
35 /* Optimized matrix-matrix product evaluating only one triangular half */
36 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
37  int RhsStorageOrder, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride, int UpLo,
38  int Version = Specialized>
40 
41 // as usual if the result is row major => we transpose the product
42 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
43  int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int UpLo, int Version>
44 struct general_matrix_matrix_triangular_product<Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar,
45  RhsStorageOrder, ConjugateRhs, RowMajor, ResInnerStride, UpLo,
46  Version> {
48  static EIGEN_STRONG_INLINE void run(Index size, Index depth, const LhsScalar* lhs, Index lhsStride,
49  const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr,
50  Index resStride, const ResScalar& alpha,
52  general_matrix_matrix_triangular_product<Index, RhsScalar, RhsStorageOrder == RowMajor ? ColMajor : RowMajor,
53  ConjugateRhs, LhsScalar, LhsStorageOrder == RowMajor ? ColMajor : RowMajor,
54  ConjugateLhs, ColMajor, ResInnerStride,
55  UpLo == Lower ? Upper : Lower>::run(size, depth, rhs, rhsStride, lhs,
56  lhsStride, res, resIncr, resStride,
57  alpha, blocking);
58  }
59 };
60 
61 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
62  int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int UpLo, int Version>
63 struct general_matrix_matrix_triangular_product<Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar,
64  RhsStorageOrder, ConjugateRhs, ColMajor, ResInnerStride, UpLo,
65  Version> {
67  static EIGEN_STRONG_INLINE void run(Index size, Index depth, const LhsScalar* lhs_, Index lhsStride,
68  const RhsScalar* rhs_, Index rhsStride, ResScalar* res_, Index resIncr,
69  Index resStride, const ResScalar& alpha,
71  if (size == 0) {
72  return;
73  }
74 
75  typedef gebp_traits<LhsScalar, RhsScalar> Traits;
76 
80  LhsMapper lhs(lhs_, lhsStride);
81  RhsMapper rhs(rhs_, rhsStride);
82  ResMapper res(res_, resStride, resIncr);
83 
84  Index kc = blocking.kc();
85  // Ensure that mc >= nr and <= size
86  Index mc = (std::min)(size, (std::max)(static_cast<decltype(blocking.mc())>(Traits::nr), blocking.mc()));
87 
88  // !!! mc must be a multiple of nr
89  if (mc > Traits::nr) {
90  using UnsignedIndex = typename make_unsigned<Index>::type;
91  mc = (UnsignedIndex(mc) / Traits::nr) * Traits::nr;
92  }
93 
94  std::size_t sizeA = kc * mc;
95  std::size_t sizeB = kc * size;
96 
97  ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
98  ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
99 
100  gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
101  LhsStorageOrder>
102  pack_lhs;
106  sybb;
107 
108  for (Index k2 = 0; k2 < depth; k2 += kc) {
109  const Index actual_kc = (std::min)(k2 + kc, depth) - k2;
110 
111  // note that the actual rhs is the transpose/adjoint of mat
112  pack_rhs(blockB, rhs.getSubMapper(k2, 0), actual_kc, size);
113 
114  for (Index i2 = 0; i2 < size; i2 += mc) {
115  const Index actual_mc = (std::min)(i2 + mc, size) - i2;
116 
117  pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
118 
119  // the selected actual_mc * size panel of res is split into three different part:
120  // 1 - before the diagonal => processed with gebp or skipped
121  // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
122  // 3 - after the diagonal => processed with gebp or skipped
123  if (UpLo == Lower)
124  gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, (std::min)(size, i2), alpha, -1, -1, 0,
125  0);
126 
127  sybb(res_ + resStride * i2 + resIncr * i2, resIncr, resStride, blockA, blockB + actual_kc * i2, actual_mc,
128  actual_kc, alpha);
129 
130  if (UpLo == Upper) {
131  Index j2 = i2 + actual_mc;
132  gebp(res.getSubMapper(i2, j2), blockA, blockB + actual_kc * j2, actual_mc, actual_kc,
133  (std::max)(Index(0), size - j2), alpha, -1, -1, 0, 0);
134  }
135  }
136  }
137  }
138 };
139 
140 // Optimized packed Block * packed Block product kernel evaluating only one given triangular part
141 // This kernel is built on top of the gebp kernel:
142 // - the current destination block is processed per panel of actual_mc x BlockSize
143 // where BlockSize is set to the minimal value allowing gebp to be as fast as possible
144 // - then, as usual, each panel is split into three parts along the diagonal,
145 // the sub blocks above and below the diagonal are processed as usual,
146 // while the triangular block overlapping the diagonal is evaluated into a
147 // small temporary buffer which is then accumulated into the result using a
148 // triangular traversal.
149 template <typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs,
150  int ResInnerStride, int UpLo>
151 struct tribb_kernel {
153  typedef typename Traits::ResScalar ResScalar;
154 
156  void operator()(ResScalar* res_, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB,
157  Index size, Index depth, const ResScalar& alpha) {
160  ResMapper res(res_, resStride, resIncr);
163 
165 
166  // let's process the block per panel of actual_mc x BlockSize,
167  // again, each is split into three parts, etc.
168  for (Index j = 0; j < size; j += BlockSize) {
169  Index actualBlockSize = std::min<Index>(BlockSize, size - j);
170  const RhsScalar* actual_b = blockB + j * depth;
171 
172  if (UpLo == Upper)
173  gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, -1, -1, 0, 0);
174 
175  // selfadjoint micro block
176  {
177  Index i = j;
178  buffer.setZero();
179  // 1 - apply the kernel on the temporary buffer
180  gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA + depth * i, actual_b, actualBlockSize, depth,
181  actualBlockSize, alpha, -1, -1, 0, 0);
182 
183  // 2 - triangular accumulation
184  for (Index j1 = 0; j1 < actualBlockSize; ++j1) {
185  typename ResMapper::LinearMapper r = res.getLinearMapper(i, j + j1);
186  for (Index i1 = UpLo == Lower ? j1 : 0; UpLo == Lower ? i1 < actualBlockSize : i1 <= j1; ++i1)
187  r(i1) += buffer(i1, j1);
188  }
189  }
190 
191  if (UpLo == Lower) {
192  Index i = j + actualBlockSize;
193  gebp_kernel1(res.getSubMapper(i, j), blockA + depth * i, actual_b, size - i, depth, actualBlockSize, alpha, -1,
194  -1, 0, 0);
195  }
196  }
197  }
198 };
199 
200 } // end namespace internal
201 
202 // high level API
203 
204 template <typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
206 
207 template <typename MatrixType, typename ProductType, int UpLo>
208 struct general_product_to_triangular_selector<MatrixType, ProductType, UpLo, true> {
209  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) {
210  typedef typename MatrixType::Scalar Scalar;
211 
213  typedef internal::blas_traits<Lhs> LhsBlasTraits;
214  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
215  typedef internal::remove_all_t<ActualLhs> ActualLhs_;
216  internal::add_const_on_value_type_t<ActualLhs> actualLhs = LhsBlasTraits::extract(prod.lhs());
217 
219  typedef internal::blas_traits<Rhs> RhsBlasTraits;
220  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
221  typedef internal::remove_all_t<ActualRhs> ActualRhs_;
222  internal::add_const_on_value_type_t<ActualRhs> actualRhs = RhsBlasTraits::extract(prod.rhs());
223 
224  Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) *
225  RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
226 
227  if (!beta) mat.template triangularView<UpLo>().setZero();
228 
229  enum {
231  UseLhsDirectly = ActualLhs_::InnerStrideAtCompileTime == 1,
232  UseRhsDirectly = ActualRhs_::InnerStrideAtCompileTime == 1
233  };
234 
236  static_lhs;
238  Scalar, actualLhsPtr, actualLhs.size(),
239  (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
240  if (!UseLhsDirectly) Map<typename ActualLhs_::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
241 
243  static_rhs;
245  Scalar, actualRhsPtr, actualRhs.size(),
246  (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
247  if (!UseRhsDirectly) Map<typename ActualRhs_::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
248 
250  Scalar, Index, StorageOrder, UpLo, LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
251  RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>::run(actualLhs.size(), mat.data(),
252  mat.outerStride(), actualLhsPtr,
253  actualRhsPtr, actualAlpha);
254  }
255 };
256 
257 template <typename MatrixType, typename ProductType, int UpLo>
258 struct general_product_to_triangular_selector<MatrixType, ProductType, UpLo, false> {
259  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) {
261  typedef internal::blas_traits<Lhs> LhsBlasTraits;
262  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
263  typedef internal::remove_all_t<ActualLhs> ActualLhs_;
264  internal::add_const_on_value_type_t<ActualLhs> actualLhs = LhsBlasTraits::extract(prod.lhs());
265 
267  typedef internal::blas_traits<Rhs> RhsBlasTraits;
268  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
269  typedef internal::remove_all_t<ActualRhs> ActualRhs_;
270  internal::add_const_on_value_type_t<ActualRhs> actualRhs = RhsBlasTraits::extract(prod.rhs());
271 
272  typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) *
273  RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
274 
275  if (!beta) mat.template triangularView<UpLo>().setZero();
276 
277  enum {
278  IsRowMajor = (internal::traits<MatrixType>::Flags & RowMajorBit) ? 1 : 0,
279  LhsIsRowMajor = ActualLhs_::Flags & RowMajorBit ? 1 : 0,
280  RhsIsRowMajor = ActualRhs_::Flags & RowMajorBit ? 1 : 0,
281  SkipDiag = (UpLo & (UnitDiag | ZeroDiag)) != 0
282  };
283 
284  Index size = mat.cols();
285  if (SkipDiag) size--;
286  Index depth = actualLhs.cols();
287 
288  typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor, typename Lhs::Scalar, typename Rhs::Scalar,
289  MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime,
290  ActualRhs_::MaxColsAtCompileTime>
291  BlockingType;
292 
293  BlockingType blocking(size, size, depth, 1, false);
294 
296  Index, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
297  typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
298  IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime,
299  UpLo&(Lower | Upper)>::run(size, depth, &actualLhs.coeffRef(SkipDiag && (UpLo & Lower) == Lower ? 1 : 0, 0),
300  actualLhs.outerStride(),
301  &actualRhs.coeffRef(0, SkipDiag && (UpLo & Upper) == Upper ? 1 : 0),
302  actualRhs.outerStride(),
303  mat.data() +
304  (SkipDiag ? (bool(IsRowMajor) != ((UpLo & Lower) == Lower) ? mat.innerStride()
305  : mat.outerStride())
306  : 0),
307  mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
308  }
309 };
310 
311 template <typename MatrixType, unsigned int UpLo>
312 template <typename ProductType>
313 EIGEN_DEVICE_FUNC TriangularView<MatrixType, UpLo>& TriangularViewImpl<MatrixType, UpLo, Dense>::_assignProduct(
314  const ProductType& prod, const Scalar& alpha, bool beta) {
315  EIGEN_STATIC_ASSERT((UpLo & UnitDiag) == 0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
316  eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
317 
318  general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize == 1>::
319  run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
320 
321  return derived();
322 }
323 
324 } // end namespace Eigen
325 
326 #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:806
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
MatrixXf MatrixType
Definition: benchmark-blocking-sizes.cpp:52
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:96
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186
constexpr EIGEN_DEVICE_FUNC const Scalar * data() const
Definition: PlainObjectBase.h:273
EIGEN_DEVICE_FUNC Derived & setZero(Index size)
Definition: CwiseNullaryOp.h:569
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT
Definition: Product.h:227
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNestedCleaned & lhs() const
Definition: Product.h:230
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNestedCleaned & rhs() const
Definition: Product.h:231
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT
Definition: Product.h:228
void setZero()
Definition: SparseMatrix.h:303
Index cols() const
Definition: SparseMatrix.h:161
constexpr Storage & data()
Definition: SparseMatrix.h:205
Definition: BlasUtil.h:304
Definition: BlasUtil.h:443
Definition: products/GeneralBlockPanelKernel.h:397
ScalarBinaryOpTraits< LhsScalar, RhsScalar >::ReturnType ResScalar
Definition: products/GeneralBlockPanelKernel.h:401
Definition: GeneralMatrixMatrix.h:223
Definition: GeneralMatrixMatrix.h:226
RhsScalar * blockB()
Definition: GeneralMatrixMatrix.h:246
LhsScalar * blockA()
Definition: GeneralMatrixMatrix.h:245
Index mc() const
Definition: GeneralMatrixMatrix.h:241
Index kc() const
Definition: GeneralMatrixMatrix.h:243
#define min(a, b)
Definition: datatypes.h:22
#define max(a, b)
Definition: datatypes.h:23
@ UnitDiag
Definition: Constants.h:215
@ ZeroDiag
Definition: Constants.h:217
@ Lower
Definition: Constants.h:211
@ Upper
Definition: Constants.h:213
@ Specialized
Definition: Constants.h:311
@ ColMajor
Definition: Constants.h:318
@ RowMajor
Definition: Constants.h:320
const unsigned int RowMajorBit
Definition: Constants.h:70
Eigen::DenseIndex ret
Definition: level1_cplx_impl.h:43
RealScalar alpha
Definition: level1_cplx_impl.h:151
Scalar beta
Definition: level2_cplx_impl.h:36
if(code !=NOTR) std const Scalar * actual_b
Definition: level2_impl.h:67
constexpr int plain_enum_min(A a, B b)
Definition: Meta.h:649
@ Lhs
Definition: TensorContractionMapper.h:20
@ Rhs
Definition: TensorContractionMapper.h:20
constexpr int plain_enum_max(A a, B b)
Definition: Meta.h:656
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
typename add_const_on_value_type< T >::type add_const_on_value_type_t
Definition: Meta.h:274
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
const Product< Lhs, Rhs > prod(const Lhs &lhs, const Rhs &rhs)
Definition: evaluators.cpp:7
r
Definition: UniformPSDSelfTest.py:20
type
Definition: compute_granudrum_aor.py:141
Definition: Eigen_Colamd.h:49
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217
Determines whether the given binary operation of two numeric types is allowed and what the scalar ret...
Definition: XprHelper.h:1043
static void run(MatrixType &mat, const ProductType &prod, const typename MatrixType::Scalar &alpha, bool beta)
Definition: GeneralMatrixMatrixTriangular.h:259
static void run(MatrixType &mat, const ProductType &prod, const typename MatrixType::Scalar &alpha, bool beta)
Definition: GeneralMatrixMatrixTriangular.h:209
Definition: GeneralMatrixMatrixTriangular.h:205
Definition: BlasUtil.h:459
Definition: products/GeneralBlockPanelKernel.h:960
Definition: BlasUtil.h:34
Definition: BlasUtil.h:30
Definition: GeneralProduct.h:228
static EIGEN_STRONG_INLINE void run(Index size, Index depth, const LhsScalar *lhs_, Index lhsStride, const RhsScalar *rhs_, Index rhsStride, ResScalar *res_, Index resIncr, Index resStride, const ResScalar &alpha, level3_blocking< LhsScalar, RhsScalar > &blocking)
Definition: GeneralMatrixMatrixTriangular.h:67
static EIGEN_STRONG_INLINE void run(Index size, Index depth, const LhsScalar *lhs, Index lhsStride, const RhsScalar *rhs, Index rhsStride, ResScalar *res, Index resIncr, Index resStride, const ResScalar &alpha, level3_blocking< RhsScalar, LhsScalar > &blocking)
Definition: GeneralMatrixMatrixTriangular.h:48
Definition: GeneralMatrixMatrixTriangular.h:39
Definition: ForwardDeclarations.h:21
Definition: GeneralMatrixMatrixTriangular.h:151
Traits::ResScalar ResScalar
Definition: GeneralMatrixMatrixTriangular.h:153
@ BlockSize
Definition: GeneralMatrixMatrixTriangular.h:155
void operator()(ResScalar *res_, Index resIncr, Index resStride, const LhsScalar *blockA, const RhsScalar *blockB, Index size, Index depth, const ResScalar &alpha)
Definition: GeneralMatrixMatrixTriangular.h:156
gebp_traits< LhsScalar, RhsScalar, ConjLhs, ConjRhs > Traits
Definition: GeneralMatrixMatrixTriangular.h:152
Definition: GeneralMatrixMatrixTriangular.h:19
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2