10 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
11 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
14 #include "../InternalHeaderCheck.h"
47 template <
typename Scalar,
typename Index,
int Mode,
bool LhsIsTriangular,
int LhsStorageOrder,
bool ConjugateLhs,
48 int RhsStorageOrder,
bool ConjugateRhs,
int ResStorageOrder,
int ResInnerStride,
int Version =
Specialized>
51 template <
typename Scalar,
typename Index,
int Mode,
bool LhsIsTriangular,
int LhsStorageOrder,
bool ConjugateLhs,
52 int RhsStorageOrder,
bool ConjugateRhs,
int ResInnerStride,
int Version>
54 RhsStorageOrder, ConjugateRhs,
RowMajor, ResInnerStride, Version> {
62 res, resIncr, resStride,
alpha, blocking);
67 template <
typename Scalar,
typename Index,
int Mode,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
68 bool ConjugateRhs,
int ResInnerStride,
int Version>
70 ConjugateRhs,
ColMajor, ResInnerStride, Version> {
83 template <
typename Scalar,
typename Index,
int Mode,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
84 bool ConjugateRhs,
int ResInnerStride,
int Version>
86 Scalar,
Index, Mode,
true, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs,
ColMajor, ResInnerStride,
93 Index depth = IsLower ? diagSize : _depth;
99 LhsMapper lhs(lhs_, lhsStride);
100 RhsMapper rhs(rhs_, rhsStride);
101 ResMapper
res(res_, resStride, resIncr);
110 std::size_t sizeA = kc * mc;
111 std::size_t sizeB = kc *
cols;
119 triangularBuffer.diagonal().
setZero();
121 triangularBuffer.diagonal().
setOnes();
129 for (
Index k2 = IsLower ? depth : 0; IsLower ? k2 > 0 : k2 < depth; IsLower ? k2 -= kc : k2 += kc) {
131 Index actual_k2 = IsLower ? k2 - actual_kc : k2;
134 if ((!IsLower) && (k2 <
rows) && (k2 + actual_kc >
rows)) {
135 actual_kc =
rows - k2;
136 k2 = k2 + actual_kc - kc;
139 pack_rhs(blockB, rhs.getSubMapper(actual_k2, 0), actual_kc,
cols);
147 if (IsLower || actual_k2 <
rows) {
149 for (
Index k1 = 0; k1 < actual_kc; k1 += panelWidth) {
150 Index actualPanelWidth = std::min<Index>(actual_kc - k1, panelWidth);
151 Index lengthTarget = IsLower ? actual_kc - k1 - actualPanelWidth : k1;
152 Index startBlock = actual_k2 + k1;
153 Index blockBOffset = k1;
158 for (
Index k = 0;
k < actualPanelWidth; ++
k) {
159 if (SetDiag) triangularBuffer.
coeffRef(
k,
k) = lhs(startBlock +
k, startBlock +
k);
160 for (
Index i = IsLower ?
k + 1 : 0; IsLower ?
i < actualPanelWidth :
i <
k; ++
i)
161 triangularBuffer.
coeffRef(
i,
k) = lhs(startBlock +
i, startBlock +
k);
163 pack_lhs(blockA, LhsMapper(triangularBuffer.
data(), triangularBuffer.
outerStride()), actualPanelWidth,
167 actualPanelWidth, actual_kc, 0, blockBOffset);
170 if (lengthTarget > 0) {
171 Index startTarget = IsLower ? actual_k2 + k1 + actualPanelWidth : actual_k2;
173 pack_lhs(blockA, lhs.getSubMapper(startTarget, startBlock), actualPanelWidth, lengthTarget);
176 actualPanelWidth, actual_kc, 0, blockBOffset);
187 LhsStorageOrder,
false>()(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
189 gebp_kernel(
res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,
cols,
alpha, -1, -1, 0, 0);
196 template <
typename Scalar,
typename Index,
int Mode,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
197 bool ConjugateRhs,
int ResInnerStride,
int Version>
199 ConjugateRhs,
ColMajor, ResInnerStride, Version> {
212 template <
typename Scalar,
typename Index,
int Mode,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
213 bool ConjugateRhs,
int ResInnerStride,
int Version>
215 Scalar,
Index, Mode,
false, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs,
ColMajor, ResInnerStride,
223 Index depth = IsLower ? _depth : diagSize;
229 LhsMapper lhs(lhs_, lhsStride);
230 RhsMapper rhs(rhs_, rhsStride);
231 ResMapper
res(res_, resStride, resIncr);
236 std::size_t sizeA = kc * mc;
245 triangularBuffer.diagonal().
setZero();
247 triangularBuffer.diagonal().
setOnes();
256 for (
Index k2 = IsLower ? 0 : depth; IsLower ? k2 < depth : k2 > 0; IsLower ? k2 += kc : k2 -= kc) {
258 Index actual_k2 = IsLower ? k2 : k2 - actual_kc;
261 if (IsLower && (k2 <
cols) && (actual_k2 + actual_kc >
cols)) {
262 actual_kc =
cols - k2;
263 k2 = actual_k2 + actual_kc - kc;
269 Index ts = (IsLower && actual_k2 >=
cols) ? 0 : actual_kc;
271 Scalar* geb = blockB + ts * ts;
272 geb = geb + internal::first_aligned<PacketBytes>(geb, PacketBytes /
sizeof(
Scalar));
274 pack_rhs(geb, rhs.getSubMapper(actual_k2, IsLower ? 0 : k2), actual_kc, rs);
278 for (
Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
279 Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
280 Index actual_j2 = actual_k2 + j2;
281 Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
282 Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
284 pack_rhs_panel(blockB + j2 * actual_kc, rhs.getSubMapper(actual_k2 + panelOffset, actual_j2), panelLength,
285 actualPanelWidth, actual_kc, panelOffset);
288 for (
Index j = 0;
j < actualPanelWidth; ++
j) {
289 if (SetDiag) triangularBuffer.
coeffRef(
j,
j) = rhs(actual_j2 +
j, actual_j2 +
j);
290 for (
Index k = IsLower ?
j + 1 : 0; IsLower ?
k < actualPanelWidth :
k <
j; ++
k)
291 triangularBuffer.
coeffRef(
k,
j) = rhs(actual_j2 +
k, actual_j2 +
j);
294 pack_rhs_panel(blockB + j2 * actual_kc, RhsMapper(triangularBuffer.
data(), triangularBuffer.
outerStride()),
295 actualPanelWidth, actualPanelWidth, actual_kc, j2);
299 for (
Index i2 = 0; i2 <
rows; i2 += mc) {
301 pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
305 for (
Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
306 Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
307 Index panelLength = IsLower ? actual_kc - j2 : j2 + actualPanelWidth;
308 Index blockOffset = IsLower ? j2 : 0;
310 gebp_kernel(
res.getSubMapper(i2, actual_k2 + j2), blockA, blockB + j2 * actual_kc, actual_mc, panelLength,
311 actualPanelWidth,
alpha, actual_kc, actual_kc,
312 blockOffset, blockOffset);
315 gebp_kernel(
res.getSubMapper(i2, IsLower ? 0 : k2), blockA, geb, actual_mc, actual_kc, rs,
alpha, -1, -1, 0, 0);
327 template <
int Mode,
bool LhsIsTriangular,
typename Lhs,
typename Rhs>
329 template <
typename Dest>
336 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
339 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
347 if (lhs.size() == 0 || rhs.size() == 0) {
351 LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
352 RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
353 Scalar actualAlpha =
alpha * lhs_alpha * rhs_alpha;
356 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
357 Lhs::MaxColsAtCompileTime, 4>
361 Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (
std::min)(lhs.rows(), lhs.cols());
362 Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (
std::min)(rhs.cols(), rhs.rows());
363 Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (
std::min)(lhs.cols(), lhs.rows()))
364 : ((IsLower) ? rhs.rows() : (
std::min)(rhs.rows(), rhs.cols()));
366 BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1,
false);
371 LhsBlasTraits::NeedToConjugate,
374 Dest::InnerStrideAtCompileTime>::
run(stripedRows, stripedCols, stripedDepth,
375 &lhs.coeffRef(0, 0), lhs.outerStride(),
376 &rhs.coeffRef(0, 0), rhs.outerStride(),
377 &dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(),
378 actualAlpha, blocking);
384 dst.topRows(diagSize) -= ((lhs_alpha - LhsScalar(1)) * a_rhs).topRows(diagSize);
387 dst.leftCols(diagSize) -= (rhs_alpha - RhsScalar(1)) * a_lhs.leftCols(diagSize);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_DONT_INLINE
Definition: Macros.h:853
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:806
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
SCALAR Scalar
Definition: bench_gemm.cpp:45
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186
EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar & coeffRef(Index rowId, Index colId)
Definition: PlainObjectBase.h:217
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT
Definition: Eigen/Eigen/src/Core/Matrix.h:390
EIGEN_DEVICE_FUNC Derived & setOnes(Index size)
Definition: CwiseNullaryOp.h:708
constexpr EIGEN_DEVICE_FUNC const Scalar * data() const
Definition: PlainObjectBase.h:273
EIGEN_DEVICE_FUNC Derived & setZero(Index size)
Definition: CwiseNullaryOp.h:569
Definition: BlasUtil.h:304
Definition: BlasUtil.h:443
Definition: products/GeneralBlockPanelKernel.h:397
LhsPacket LhsPacket4Packing
Definition: products/GeneralBlockPanelKernel.h:440
Definition: GeneralMatrixMatrix.h:223
Definition: GeneralMatrixMatrix.h:226
RhsScalar * blockB()
Definition: GeneralMatrixMatrix.h:246
LhsScalar * blockA()
Definition: GeneralMatrixMatrix.h:245
Index mc() const
Definition: GeneralMatrixMatrix.h:241
Index kc() const
Definition: GeneralMatrixMatrix.h:243
#define min(a, b)
Definition: datatypes.h:22
static constexpr lastp1_t end
Definition: IndexedViewHelper.h:79
@ UnitDiag
Definition: Constants.h:215
@ ZeroDiag
Definition: Constants.h:217
@ Lower
Definition: Constants.h:211
@ Upper
Definition: Constants.h:213
@ Specialized
Definition: Constants.h:311
@ ColMajor
Definition: Constants.h:318
@ RowMajor
Definition: Constants.h:320
const unsigned int RowMajorBit
Definition: Constants.h:70
RealScalar alpha
Definition: level1_cplx_impl.h:151
char char char int int * k
Definition: level2_impl.h:374
@ Lhs
Definition: TensorContractionMapper.h:20
@ Rhs
Definition: TensorContractionMapper.h:20
constexpr int plain_enum_max(A a, B b)
Definition: Meta.h:656
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
typename add_const_on_value_type< T >::type add_const_on_value_type_t
Definition: Meta.h:274
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool is_exactly_one(const X &x)
Definition: Meta.h:601
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
void start(const unsigned &i)
(Re-)start i-th timer
Definition: oomph_utilities.cc:243
Definition: BlasUtil.h:459
Definition: products/GeneralBlockPanelKernel.h:960
Definition: BlasUtil.h:34
Definition: BlasUtil.h:30
Definition: GenericPacketMath.h:108
Eigen::internal::product_triangular_matrix_matrix< Scalar, Index, Mode, true, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, ResInnerStride, Version >::Traits gebp_traits< Scalar, Scalar > Traits
Definition: TriangularMatrixMatrix.h:71
Eigen::internal::product_triangular_matrix_matrix< Scalar, Index, Mode, false, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, ResInnerStride, Version >::Traits gebp_traits< Scalar, Scalar > Traits
Definition: TriangularMatrixMatrix.h:200
Eigen::internal::product_triangular_matrix_matrix< Scalar, Index, Mode, LhsIsTriangular, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs, RowMajor, ResInnerStride, Version >::run static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resIncr, Index resStride, const Scalar &alpha, level3_blocking< Scalar, Scalar > &blocking)
Definition: TriangularMatrixMatrix.h:55
Definition: TriangularMatrixMatrix.h:49
Definition: ForwardDeclarations.h:21
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar &alpha)
Definition: TriangularMatrixMatrix.h:330
Definition: ProductEvaluators.h:738
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2