d7/df8/SelfadjointMatrixVector_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H

 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H


 // IWYU pragma: private

 #include "../InternalHeaderCheck.h"


 namespace Eigen {


 namespace internal {


 /* Optimized selfadjoint matrix * vector product:

  * This algorithm processes 2 columns at once that allows to both reduce

  * the number of load/stores of the result by a factor 2 and to reduce

  * the instruction dependency.

  */


 template <typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs,

           int Version = Specialized>

 struct selfadjoint_matrix_vector_product;


 template <typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs,

           int Version>

 struct selfadjoint_matrix_vector_product


 {

   static EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC void run(Index size, const Scalar* lhs, Index lhsStride, const Scalar* rhs,

                                                       Scalar* res, Scalar alpha);

 };


 template <typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs,

           int Version>

 EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC void

 selfadjoint_matrix_vector_product<Scalar, Index, StorageOrder, UpLo, ConjugateLhs, ConjugateRhs, Version>::run(

     Index size, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Scalar* res, Scalar alpha) {

   typedef typename packet_traits<Scalar>::type Packet;

   typedef typename NumTraits<Scalar>::Real RealScalar;

   const Index PacketSize = sizeof(Packet) / sizeof(Scalar);


   enum {

     IsRowMajor = StorageOrder == RowMajor ? 1 : 0,

     IsLower = UpLo == Lower ? 1 : 0,

     FirstTriangular = IsRowMajor == IsLower

   };


   conj_helper<Scalar, Scalar, NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;

   conj_helper<Scalar, Scalar, NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;

   conj_helper<RealScalar, Scalar, false, ConjugateRhs> cjd;


   conj_helper<Packet, Packet, NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;

   conj_helper<Packet, Packet, NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs, !IsRowMajor), ConjugateRhs>

       pcj1;


   Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;


   Index bound = numext::maxi(Index(0), size - 8) & 0xfffffffe;

   if (FirstTriangular) bound = size - bound;


   for (Index j = FirstTriangular ? bound : 0; j < (FirstTriangular ? size : bound); j += 2) {

     const Scalar* EIGEN_RESTRICT A0 = lhs + j * lhsStride;

     const Scalar* EIGEN_RESTRICT A1 = lhs + (j + 1) * lhsStride;


     Scalar t0 = cjAlpha * rhs[j];

     Packet ptmp0 = pset1<Packet>(t0);

     Scalar t1 = cjAlpha * rhs[j + 1];

     Packet ptmp1 = pset1<Packet>(t1);


     Scalar t2(0);

     Packet ptmp2 = pset1<Packet>(t2);

     Scalar t3(0);

     Packet ptmp3 = pset1<Packet>(t3);


     Index starti = FirstTriangular ? 0 : j + 2;

     Index endi = FirstTriangular ? j : size;

     Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi - starti);

     Index alignedEnd = alignedStart + ((endi - alignedStart) / (PacketSize)) * (PacketSize);


     res[j] += cjd.pmul(numext::real(A0[j]), t0);

     res[j + 1] += cjd.pmul(numext::real(A1[j + 1]), t1);

     if (FirstTriangular) {

       res[j] += cj0.pmul(A1[j], t1);

       t3 += cj1.pmul(A1[j], rhs[j]);

     } else {

       res[j + 1] += cj0.pmul(A0[j + 1], t0);

       t2 += cj1.pmul(A0[j + 1], rhs[j + 1]);

     }


     for (Index i = starti; i < alignedStart; ++i) {

       res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i], t1);

       t2 += cj1.pmul(A0[i], rhs[i]);

       t3 += cj1.pmul(A1[i], rhs[i]);

     }

     // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)

     // gcc 4.2 does this optimization automatically.

     const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart;

     const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart;

     const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;

     Scalar* EIGEN_RESTRICT resIt = res + alignedStart;

     for (Index i = alignedStart; i < alignedEnd; i += PacketSize) {

       Packet A0i = ploadu<Packet>(a0It);

       a0It += PacketSize;

       Packet A1i = ploadu<Packet>(a1It);

       a1It += PacketSize;

       Packet Bi = ploadu<Packet>(rhsIt);

       rhsIt += PacketSize;  // FIXME should be aligned in most cases

       Packet Xi = pload<Packet>(resIt);


       Xi = pcj0.pmadd(A0i, ptmp0, pcj0.pmadd(A1i, ptmp1, Xi));

       ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);

       ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);

       pstore(resIt, Xi);

       resIt += PacketSize;

     }

     for (Index i = alignedEnd; i < endi; i++) {

       res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i], t1);

       t2 += cj1.pmul(A0[i], rhs[i]);

       t3 += cj1.pmul(A1[i], rhs[i]);

     }


     res[j] += alpha * (t2 + predux(ptmp2));

     res[j + 1] += alpha * (t3 + predux(ptmp3));

   }

   for (Index j = FirstTriangular ? 0 : bound; j < (FirstTriangular ? bound : size); j++) {

     const Scalar* EIGEN_RESTRICT A0 = lhs + j * lhsStride;


     Scalar t1 = cjAlpha * rhs[j];

     Scalar t2(0);

     res[j] += cjd.pmul(numext::real(A0[j]), t1);

     for (Index i = FirstTriangular ? 0 : j + 1; i < (FirstTriangular ? j : size); i++) {

       res[i] += cj0.pmul(A0[i], t1);

       t2 += cj1.pmul(A0[i], rhs[i]);

     }

     res[j] += alpha * t2;

   }

 }


 }  // end namespace internal


 /***************************************************************************

  * Wrapper to product_selfadjoint_vector

  ***************************************************************************/


 namespace internal {


 template <typename Lhs, int LhsMode, typename Rhs>

 struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, 0, true> {

   typedef typename Product<Lhs, Rhs>::Scalar Scalar;


   typedef internal::blas_traits<Lhs> LhsBlasTraits;

   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;

   typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;


   typedef internal::blas_traits<Rhs> RhsBlasTraits;

   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;

   typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;


   enum { LhsUpLo = LhsMode & (Upper | Lower) };


   template <typename Dest>

   static EIGEN_DEVICE_FUNC void run(Dest& dest, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {

     typedef typename Dest::Scalar ResScalar;

     typedef typename Rhs::Scalar RhsScalar;

     typedef Map<Matrix<ResScalar, Dynamic, 1>, plain_enum_min(AlignedMax, internal::packet_traits<ResScalar>::size)>

         MappedDest;


     eigen_assert(dest.rows() == a_lhs.rows() && dest.cols() == a_rhs.cols());


     add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);

     add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);


     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs);


     enum {

       EvalToDest = (Dest::InnerStrideAtCompileTime == 1),

       UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1)

     };


     internal::gemv_static_vector_if<ResScalar, Dest::SizeAtCompileTime, Dest::MaxSizeAtCompileTime, !EvalToDest>

         static_dest;

     internal::gemv_static_vector_if<RhsScalar, ActualRhsTypeCleaned::SizeAtCompileTime,

                                     ActualRhsTypeCleaned::MaxSizeAtCompileTime, !UseRhs>

         static_rhs;


     ei_declare_aligned_stack_constructed_variable(ResScalar, actualDestPtr, dest.size(),

                                                   EvalToDest ? dest.data() : static_dest.data());


     ei_declare_aligned_stack_constructed_variable(RhsScalar, actualRhsPtr, rhs.size(),

                                                   UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());


     if (!EvalToDest) {

 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN

       constexpr int Size = Dest::SizeAtCompileTime;

       Index size = dest.size();

       EIGEN_DENSE_STORAGE_CTOR_PLUGIN

 #endif

       MappedDest(actualDestPtr, dest.size()) = dest;

     }


     if (!UseRhs) {

 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN

       constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime;

       Index size = rhs.size();

       EIGEN_DENSE_STORAGE_CTOR_PLUGIN

 #endif

       Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;

     }


     internal::selfadjoint_matrix_vector_product<

         Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags & RowMajorBit) ? RowMajor : ColMajor,

         int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate),

         bool(RhsBlasTraits::NeedToConjugate)>::run(lhs.rows(),                              // size

                                                    &lhs.coeffRef(0, 0), lhs.outerStride(),  // lhs info

                                                    actualRhsPtr,                            // rhs info

                                                    actualDestPtr,                           // result info

                                                    actualAlpha                              // scale factor

     );


     if (!EvalToDest) dest = MappedDest(actualDestPtr, dest.size());

   }

 };


 template <typename Lhs, typename Rhs, int RhsMode>

 struct selfadjoint_product_impl<Lhs, 0, true, Rhs, RhsMode, false> {

   typedef typename Product<Lhs, Rhs>::Scalar Scalar;

   enum { RhsUpLo = RhsMode & (Upper | Lower) };


   template <typename Dest>

   static void run(Dest& dest, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {

     // let's simply transpose the product

     Transpose<Dest> destT(dest);

     selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo) == Upper ? Lower : Upper, false, Transpose<const Lhs>,

                              0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);

   }

 };


 }  // end namespace internal


 }  // end namespace Eigen


 #endif  // EIGEN_SELFADJOINT_MATRIX_VECTOR_H

conj
AnnoyingScalar conj(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:133

i
int i
Definition: BiCGSTAB_step_by_step.cpp:9

EIGEN_RESTRICT
#define EIGEN_RESTRICT
Definition: Macros.h:1067

EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892

EIGEN_DONT_INLINE
#define EIGEN_DONT_INLINE
Definition: Macros.h:853

eigen_assert
#define eigen_assert(x)
Definition: Macros.h:910

ei_declare_aligned_stack_constructed_variable
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:806

res
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3

size
Scalar Scalar int size
Definition: benchVecAdd.cpp:17

Scalar
SCALAR Scalar
Definition: bench_gemm.cpp:45

RealScalar
NumTraits< Scalar >::Real RealScalar
Definition: bench_gemm.cpp:46

Packet
internal::packet_traits< Scalar >::type Packet
Definition: benchmark-blocking-sizes.cpp:54

Eigen::Map
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:96

Eigen::Product
Expression of the product of two arbitrary matrices or vectors.
Definition: Product.h:202

Eigen::Transpose
Expression of the transpose of a matrix.
Definition: Transpose.h:56

bool

real
float real
Definition: datatypes.h:10

Eigen::Lower
@ Lower
Definition: Constants.h:211

Eigen::Upper
@ Upper
Definition: Constants.h:213

Eigen::AlignedMax
@ AlignedMax
Definition: Constants.h:254

Eigen::Specialized
@ Specialized
Definition: Constants.h:311

Eigen::ColMajor
@ ColMajor
Definition: Constants.h:318

Eigen::RowMajor
@ RowMajor
Definition: Constants.h:320

Eigen::RowMajorBit
const unsigned int RowMajorBit
Definition: Constants.h:70

int
return int(ret)+1

alpha
RealScalar alpha
Definition: level1_cplx_impl.h:151

Eigen::internal::plain_enum_min
constexpr int plain_enum_min(A a, B b)
Definition: Meta.h:649

Eigen::internal::Lhs
@ Lhs
Definition: TensorContractionMapper.h:20

Eigen::internal::Rhs
@ Rhs
Definition: TensorContractionMapper.h:20

Eigen::internal::logical_xor
constexpr bool logical_xor(bool a, bool b)
Definition: Meta.h:737

Eigen::internal::first_default_aligned
static Index first_default_aligned(const DenseBase< Derived > &m)
Definition: DenseCoeffsBase.h:539

Eigen::internal::remove_all_t
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142

Eigen::internal::pstore
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:891

Eigen::internal::predux
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Definition: GenericPacketMath.h:1232

Eigen::internal::add_const_on_value_type_t
typename add_const_on_value_type< T >::type add_const_on_value_type_t
Definition: Meta.h:274

Eigen::numext::maxi
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition: MathFunctions.h:926

Eigen
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70

Eigen::run
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414

Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

Global_Physical_Variables::Bi
double Bi
Biot number.
Definition: thermal_fibre.cc:85

internal
Definition: Eigen_Colamd.h:49

Eigen::NumTraits
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217

Eigen::internal::blas_traits
Definition: BlasUtil.h:459

Eigen::internal::blas_traits::DirectLinearAccessType
std::conditional_t< bool(HasUsableDirectAccess), ExtractType, typename ExtractType_::PlainObject > DirectLinearAccessType
Definition: BlasUtil.h:475

Eigen::internal::conj_helper
Definition: ConjHelper.h:71

Eigen::internal::conj_helper::pmadd
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType pmadd(const LhsType &x, const RhsType &y, const ResultType &c) const
Definition: ConjHelper.h:74

Eigen::internal::conj_helper::pmul
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType pmul(const LhsType &x, const RhsType &y) const
Definition: ConjHelper.h:79

Eigen::internal::gemv_static_vector_if
Definition: GeneralProduct.h:228

Eigen::internal::packet_traits
Definition: GenericPacketMath.h:108

Eigen::internal::selfadjoint_matrix_vector_product
Definition: SelfadjointMatrixVector.h:34

Eigen::internal::selfadjoint_matrix_vector_product::run
static EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC void run(Index size, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Scalar *res, Scalar alpha)
Definition: SelfadjointMatrixVector.h:42

Eigen::internal::selfadjoint_product_impl< Lhs, 0, true, Rhs, RhsMode, false >::run
static void run(Dest &dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar &alpha)
Definition: SelfadjointMatrixVector.h:236

Eigen::internal::selfadjoint_product_impl< Lhs, 0, true, Rhs, RhsMode, false >::Scalar
Product< Lhs, Rhs >::Scalar Scalar
Definition: SelfadjointMatrixVector.h:232

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::run
static EIGEN_DEVICE_FUNC void run(Dest &dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar &alpha)
Definition: SelfadjointMatrixVector.h:168

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::Scalar
Product< Lhs, Rhs >::Scalar Scalar
Definition: SelfadjointMatrixVector.h:155

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::ActualLhsType
LhsBlasTraits::DirectLinearAccessType ActualLhsType
Definition: SelfadjointMatrixVector.h:158

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::LhsBlasTraits
internal::blas_traits< Lhs > LhsBlasTraits
Definition: SelfadjointMatrixVector.h:157

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::ActualRhsType
RhsBlasTraits::DirectLinearAccessType ActualRhsType
Definition: SelfadjointMatrixVector.h:162

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::ActualRhsTypeCleaned
internal::remove_all_t< ActualRhsType > ActualRhsTypeCleaned
Definition: SelfadjointMatrixVector.h:163

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::ActualLhsTypeCleaned
internal::remove_all_t< ActualLhsType > ActualLhsTypeCleaned
Definition: SelfadjointMatrixVector.h:159

Eigen::internal::selfadjoint_product_impl< Lhs, LhsMode, false, Rhs, 0, true >::RhsBlasTraits
internal::blas_traits< Rhs > RhsBlasTraits
Definition: SelfadjointMatrixVector.h:161

Eigen::internal::selfadjoint_product_impl
Definition: ProductEvaluators.h:768

Eigen::internal::traits
Definition: ForwardDeclarations.h:21

j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

Eigen::internal::Packet
Definition: ZVector/PacketMath.h:50