10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
14 #include "../InternalHeaderCheck.h"
21 template <
typename Scalar,
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
23 template <
int BlockRows>
28 for (
Index w = 0;
w < BlockRows;
w++) blockA[count++] = lhs(
i +
w,
k);
36 for (
Index w = h + 1;
w < BlockRows;
w++) blockA[count++] = lhs(
i +
w,
k);
51 HasHalf = (
int)HalfPacketSize < (
int)PacketSize,
52 HasQuarter = (
int)QuarterPacketSize < (
int)HalfPacketSize
59 const Index peeled_mc3 = Pack1 >= 3 * PacketSize ? (
rows / (3 * PacketSize)) * (3 * PacketSize) : 0;
60 const Index peeled_mc2 =
61 Pack1 >= 2 * PacketSize ? peeled_mc3 + ((
rows - peeled_mc3) / (2 * PacketSize)) * (2 * PacketSize) : 0;
62 const Index peeled_mc1 =
63 Pack1 >= 1 * PacketSize ? peeled_mc2 + ((
rows - peeled_mc2) / (1 * PacketSize)) * (1 * PacketSize) : 0;
64 const Index peeled_mc_half =
65 Pack1 >= HalfPacketSize ? peeled_mc1 + ((
rows - peeled_mc1) / (HalfPacketSize)) * (HalfPacketSize) : 0;
66 const Index peeled_mc_quarter =
67 Pack1 >= QuarterPacketSize
68 ? peeled_mc_half + ((
rows - peeled_mc_half) / (QuarterPacketSize)) * (QuarterPacketSize)
71 if (Pack1 >= 3 * PacketSize)
72 for (
Index i = 0; i < peeled_mc3; i += 3 * PacketSize) pack<3 * PacketSize>(blockA, lhs,
cols,
i, count);
74 if (Pack1 >= 2 * PacketSize)
75 for (
Index i = peeled_mc3; i < peeled_mc2; i += 2 * PacketSize) pack<2 * PacketSize>(blockA, lhs,
cols,
i, count);
77 if (Pack1 >= 1 * PacketSize)
78 for (
Index i = peeled_mc2; i < peeled_mc1; i += 1 * PacketSize) pack<1 * PacketSize>(blockA, lhs,
cols,
i, count);
80 if (HasHalf && Pack1 >= HalfPacketSize)
81 for (
Index i = peeled_mc1;
i < peeled_mc_half;
i += HalfPacketSize)
82 pack<HalfPacketSize>(blockA, lhs,
cols,
i, count);
84 if (HasQuarter && Pack1 >= QuarterPacketSize)
85 for (
Index i = peeled_mc_half;
i < peeled_mc_quarter;
i += QuarterPacketSize)
86 pack<QuarterPacketSize>(blockA, lhs,
cols,
i, count);
90 for (
Index k = 0;
k <
i;
k++) blockA[count++] = lhs(
i,
k);
99 template <
typename Scalar,
typename Index,
int nr,
int StorageOrder>
106 Index packet_cols8 = nr >= 8 ? (
cols / 8) * 8 : 0;
107 Index packet_cols4 = nr >= 4 ? (
cols / 4) * 4 : 0;
110 for (
Index j2 = 0; j2 < k2; j2 += nr) {
111 for (
Index k = k2;
k < end_k;
k++) {
112 blockB[count + 0] = rhs(
k, j2 + 0);
113 blockB[count + 1] = rhs(
k, j2 + 1);
115 blockB[count + 2] = rhs(
k, j2 + 2);
116 blockB[count + 3] = rhs(
k, j2 + 3);
119 blockB[count + 4] = rhs(
k, j2 + 4);
120 blockB[count + 5] = rhs(
k, j2 + 5);
121 blockB[count + 6] = rhs(
k, j2 + 6);
122 blockB[count + 7] = rhs(
k, j2 + 7);
131 for (
Index j2 = k2; j2 < end8; j2 += 8) {
147 for (
Index k = j2;
k < j2 + 8;
k++) {
149 for (
Index w = 0;
w < h; ++
w) blockB[count +
w] = rhs(
k, j2 +
w);
159 for (
Index k = j2 + 8;
k < end_k;
k++) {
160 blockB[count + 0] = rhs(
k, j2 + 0);
161 blockB[count + 1] = rhs(
k, j2 + 1);
162 blockB[count + 2] = rhs(
k, j2 + 2);
163 blockB[count + 3] = rhs(
k, j2 + 3);
164 blockB[count + 4] = rhs(
k, j2 + 4);
165 blockB[count + 5] = rhs(
k, j2 + 5);
166 blockB[count + 6] = rhs(
k, j2 + 6);
167 blockB[count + 7] = rhs(
k, j2 + 7);
185 for (
Index k = j2;
k < j2 + 4;
k++) {
187 for (
Index w = 0;
w < h; ++
w) blockB[count +
w] = rhs(
k, j2 +
w);
197 for (
Index k = j2 + 4;
k < end_k;
k++) {
198 blockB[count + 0] = rhs(
k, j2 + 0);
199 blockB[count + 1] = rhs(
k, j2 + 1);
200 blockB[count + 2] = rhs(
k, j2 + 2);
201 blockB[count + 3] = rhs(
k, j2 + 3);
209 for (
Index j2 = k2 +
rows; j2 < packet_cols8; j2 += 8) {
210 for (
Index k = k2;
k < end_k;
k++) {
224 for (
Index j2 = (
std::max)(packet_cols8, k2 +
rows); j2 < packet_cols4; j2 += 4) {
225 for (
Index k = k2;
k < end_k;
k++) {
236 for (
Index j2 = packet_cols4; j2 <
cols; ++j2) {
252 blockB[count] = rhs(
k, j2);
262 template <
typename Scalar,
typename Index,
int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
263 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs,
int ResStorageOrder,
int ResInnerStride>
266 template <
typename Scalar,
typename Index,
int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
267 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs,
int ResInnerStride>
269 RhsSelfAdjoint, ConjugateRhs,
RowMajor, ResInnerStride> {
278 ResInnerStride>
::run(
cols,
rows, rhs, rhsStride, lhs, lhsStride,
res, resIncr, resStride,
alpha, blocking);
282 template <
typename Scalar,
typename Index,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
283 bool ConjugateRhs,
int ResInnerStride>
285 ConjugateRhs,
ColMajor, ResInnerStride> {
291 template <
typename Scalar,
typename Index,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
292 bool ConjugateRhs,
int ResInnerStride>
307 LhsMapper lhs(lhs_, lhsStride);
308 LhsTransposeMapper lhs_transpose(lhs_, lhsStride);
309 RhsMapper rhs(rhs_, rhsStride);
310 ResMapper
res(res_, resStride, resIncr);
316 std::size_t sizeA = kc * mc;
317 std::size_t sizeB = kc *
cols;
324 gemm_pack_lhs<
Scalar,
Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress,
typename Traits::LhsPacket4Packing,
328 for (
Index k2 = 0; k2 <
size; k2 += kc) {
334 pack_rhs(blockB, rhs.getSubMapper(k2, 0), actual_kc,
cols);
340 for (
Index i2 = 0; i2 < k2; i2 += mc) {
343 pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
351 pack_lhs(blockA, &lhs(k2, k2), lhsStride, actual_kc, actual_mc);
356 for (
Index i2 = k2 + kc; i2 <
size; i2 += mc) {
359 LhsStorageOrder,
false>()(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
367 template <
typename Scalar,
typename Index,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
368 bool ConjugateRhs,
int ResInnerStride>
370 ConjugateRhs,
ColMajor, ResInnerStride> {
376 template <
typename Scalar,
typename Index,
int LhsStorageOrder,
bool ConjugateLhs,
int RhsStorageOrder,
377 bool ConjugateRhs,
int ResInnerStride>
390 LhsMapper lhs(lhs_, lhsStride);
391 ResMapper
res(res_, resStride, resIncr);
395 std::size_t sizeA = kc * mc;
396 std::size_t sizeB = kc *
cols;
406 for (
Index k2 = 0; k2 <
size; k2 += kc) {
409 pack_rhs(blockB, rhs_, rhsStride, actual_kc,
cols, k2);
412 for (
Index i2 = 0; i2 <
rows; i2 += mc) {
414 pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
429 template <
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
445 template <
typename Dest>
447 eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
452 Scalar actualAlpha =
alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs);
455 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
456 Lhs::MaxColsAtCompileTime, 1>
459 BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1,
false);
470 Dest::InnerStrideAtCompileTime>
::run(lhs.rows(), rhs.cols(),
471 &lhs.coeffRef(0, 0), lhs.outerStride(),
472 &rhs.coeffRef(0, 0), rhs.outerStride(),
473 &dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(),
474 actualAlpha, blocking
AnnoyingScalar conj(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:133
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_DONT_INLINE
Definition: Macros.h:853
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
RowVector3d w
Definition: Matrix_resize_int.cpp:3
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:806
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Expression of the product of two arbitrary matrices or vectors.
Definition: Product.h:202
Definition: BlasUtil.h:304
Definition: BlasUtil.h:443
Definition: products/GeneralBlockPanelKernel.h:397
Definition: GeneralMatrixMatrix.h:223
Definition: GeneralMatrixMatrix.h:226
RhsScalar * blockB()
Definition: GeneralMatrixMatrix.h:246
LhsScalar * blockA()
Definition: GeneralMatrixMatrix.h:245
Index mc() const
Definition: GeneralMatrixMatrix.h:241
Index kc() const
Definition: GeneralMatrixMatrix.h:243
float real
Definition: datatypes.h:10
#define min(a, b)
Definition: datatypes.h:22
#define max(a, b)
Definition: datatypes.h:23
@ SelfAdjoint
Definition: Constants.h:227
@ Lower
Definition: Constants.h:211
@ Upper
Definition: Constants.h:213
@ ColMajor
Definition: Constants.h:318
@ RowMajor
Definition: Constants.h:320
const unsigned int RowMajorBit
Definition: Constants.h:70
RealScalar alpha
Definition: level1_cplx_impl.h:151
char char char int int * k
Definition: level2_impl.h:374
@ Lhs
Definition: TensorContractionMapper.h:20
@ Rhs
Definition: TensorContractionMapper.h:20
constexpr bool logical_xor(bool a, bool b)
Definition: Meta.h:737
typename add_const_on_value_type< T >::type add_const_on_value_type_t
Definition: Meta.h:274
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217
Definition: BlasUtil.h:459
std::conditional_t< bool(HasUsableDirectAccess), ExtractType, typename ExtractType_::PlainObject > DirectLinearAccessType
Definition: BlasUtil.h:475
Definition: products/GeneralBlockPanelKernel.h:960
Definition: BlasUtil.h:34
Definition: BlasUtil.h:30
Definition: GenericPacketMath.h:108
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resIncr, Index resStride, const Scalar &alpha, level3_blocking< Scalar, Scalar > &blocking)
Definition: SelfadjointMatrixMatrix.h:270
Definition: SelfadjointMatrixMatrix.h:264
internal::blas_traits< Lhs > LhsBlasTraits
Definition: SelfadjointMatrixMatrix.h:433
Product< Lhs, Rhs >::Scalar Scalar
Definition: SelfadjointMatrixMatrix.h:431
RhsBlasTraits::DirectLinearAccessType ActualRhsType
Definition: SelfadjointMatrixMatrix.h:436
LhsBlasTraits::DirectLinearAccessType ActualLhsType
Definition: SelfadjointMatrixMatrix.h:434
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar &alpha)
Definition: SelfadjointMatrixMatrix.h:446
internal::blas_traits< Rhs > RhsBlasTraits
Definition: SelfadjointMatrixMatrix.h:435
Definition: ProductEvaluators.h:768
Definition: SelfadjointMatrixMatrix.h:22
void pack(Scalar *blockA, const const_blas_data_mapper< Scalar, Index, StorageOrder > &lhs, Index cols, Index i, Index &count)
Definition: SelfadjointMatrixMatrix.h:24
void operator()(Scalar *blockA, const Scalar *lhs_, Index lhsStride, Index cols, Index rows)
Definition: SelfadjointMatrixMatrix.h:43
Definition: SelfadjointMatrixMatrix.h:100
@ PacketSize
Definition: SelfadjointMatrixMatrix.h:101
void operator()(Scalar *blockB, const Scalar *rhs_, Index rhsStride, Index rows, Index cols, Index k2)
Definition: SelfadjointMatrixMatrix.h:102
Definition: ForwardDeclarations.h:21
Definition: GenericPacketMath.h:134