MatrixVectorProduct.h File Reference

Go to the source code of this file.

Classes

struct  loadColData_impl< RhsMapper, linear >
 
struct  loadColData_impl< RhsMapper, true >
 
struct  UseStride< RhsMapper, LhsMapper, typename >
 
struct  UseStride< RhsMapper, LhsMapper, std::enable_if_t< std::is_member_function_pointer< decltype(&RhsMapper::stride)>::value > >
 
struct  alpha_store< PResPacket, ResPacket, ResScalar, Scalar >
 
struct  alpha_store< PResPacket, ResPacket, ResScalar, Scalar >::ri
 
struct  ScalarBlock< Scalar, N >
 

Macros

#define EIGEN_POWER_GEMV_PREFETCH(p)
 
#define GEMV_BUILDPAIR_MMA(dst, src1, src2)    __builtin_vsx_assemble_pair(&dst, (__vector unsigned char)src1, (__vector unsigned char)src2)
 
#define GEMV_IS_COMPLEX_COMPLEX   ((sizeof(LhsPacket) == 16) && (sizeof(RhsPacket) == 16))
 
#define GEMV_IS_FLOAT   (ResPacketSize == (16 / sizeof(float)))
 
#define GEMV_IS_SCALAR   (sizeof(ResPacket) != 16)
 
#define GEMV_IS_COMPLEX_FLOAT   (ResPacketSize == (16 / sizeof(std::complex<float>)))
 
#define GEMV_UNROLL(func, N)   func(0, N) func(1, N) func(2, N) func(3, N) func(4, N) func(5, N) func(6, N) func(7, N)
 
#define GEMV_UNROLL_HALF(func, N)   func(0, 0, 1, N) func(1, 2, 3, N) func(2, 4, 5, N) func(3, 6, 7, N)
 
#define GEMV_GETN(N)   (((N) * ResPacketSize) >> 2)
 
#define GEMV_LOADPACKET_COL(iter)   lhs.template load<LhsPacket, LhsAlignment>(i + ((iter) * LhsPacketSize), j)
 
#define GEMV_INIT(iter, N)
 
#define GEMV_PREFETCH(iter, N)
 
#define GEMV_WORK_COL(iter, N)
 
#define GEMV_STORE_COL(iter, N)
 
#define GEMV_PROCESS_COL_ONE(N)
 
#define GEMV_PROCESS_COL(N)   GEMV_PROCESS_COL_ONE(N)
 
#define MAX_BFLOAT16_VEC_ACC_VSX   8
 
#define COMPLEX_DELTA   2
 
#define GEMV_MULT_COMPLEX_COMPLEX(LhsType, RhsType, ResType)
 
#define GEMV_MULT_REAL_COMPLEX(LhsType, RhsType, ResType)
 
#define GEMV_MULT_COMPLEX_REAL(LhsType, RhsType, ResType1, ResType2)
 
#define GEMV_GETN_COMPLEX(N)   (((N) * ResPacketSize) >> 1)
 
#define GEMV_LOADPACKET_COL_COMPLEX(iter)    loadLhsPacket<Scalar, LhsScalar, LhsMapper, PLhsPacket>(lhs, i + ((iter) * ResPacketSize), j)
 
#define GEMV_LOADPACKET_COL_COMPLEX_DATA(iter)   convertReal(GEMV_LOADPACKET_COL_COMPLEX(iter))
 
#define GEMV_INIT_COMPLEX(iter, N)
 
#define GEMV_WORK_COL_COMPLEX(iter, N)
 
#define GEMV_STORE_COL_COMPLEX(iter, N)
 
#define GEMV_PROCESS_COL_COMPLEX_ONE(N)
 
#define GEMV_PROCESS_COL_COMPLEX(N)   GEMV_PROCESS_COL_COMPLEX_ONE(N)
 
#define GEMV_UNROLL_ROW(func, N)   func(0, N) func(1, N) func(2, N) func(3, N) func(4, N) func(5, N) func(6, N) func(7, N)
 
#define GEMV_UNROLL_ROW_HALF(func, N)   func(0, 0, 1, N) func(1, 2, 3, N) func(2, 4, 5, N) func(3, 6, 7, N)
 
#define GEMV_LOADPACKET_ROW(iter)   lhs.template load<LhsPacket, Unaligned>(i + (iter), j)
 
#define GEMV_INIT_ROW(iter, N)
 
#define GEMV_WORK_ROW(iter, N)
 
#define GEMV_PREDUX2(iter1, iter2, iter3, N)
 
#define GEMV_MULT(iter1, iter2, iter3, N)
 
#define GEMV_STORE_ROW(iter1, iter2, iter3, N)
 
#define GEMV_PROCESS_ROW(N)
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL(Scalar)
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW(Scalar)
 
#define gemv_bf16_col   gemv_bfloat16_col
 
#define gemv_bf16_row   gemv_bfloat16_row
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16()
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16()
 
#define GEMV_LOADPACKET_ROW_COMPLEX(iter)   loadLhsPacket<Scalar, LhsScalar, LhsMapper, PLhsPacket>(lhs, i + (iter), j)
 
#define GEMV_LOADPACKET_ROW_COMPLEX_DATA(iter)   convertReal(GEMV_LOADPACKET_ROW_COMPLEX(iter))
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(which, N)
 
#define GEMV_PROCESS_END_ROW_COMPLEX(N)
 
#define GEMV_WORK_ROW_COMPLEX(iter, N)
 
#define GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N)
 
#define GEMV_MULT_COMPLEX(iter1, iter2, iter3, N)
 
#define GEMV_STORE_ROW_COMPLEX(iter1, iter2, iter3, N)
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter)
 
#define GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter)   lhs.template load<LhsPacket, LhsAlignment>(i + (iter), j)
 
#define GEMV_INIT_COMPLEX_OLD(iter, N)
 
#define GEMV_WORK_ROW_COMPLEX_OLD(iter, N)
 
#define GEMV_PREDUX4_COMPLEX_OLD(iter1, iter2, iter3, N)
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_ONE_OLD(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD(iter)   dd0 = predux(c1##iter);
 
#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW   (sizeof(Scalar) == sizeof(float)) || GEMV_IS_COMPLEX_COMPLEX
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_ONE(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter)
 
#define GEMV_PROCESS_ROW_COMPLEX(N)   GEMV_PROCESS_ROW_COMPLEX_ONE(N)
 
#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(Scalar, LhsScalar, RhsScalar)
 
#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(Scalar, LhsScalar, RhsScalar)
 

Functions

template<typename ResPacket , typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData (ResScalar *res, ResPacket &palpha, ResPacket &data)
 
template<typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData (ResScalar *res, ResScalar &alpha, ResScalar &data)
 
template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
EIGEN_STRONG_INLINE void gemv_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<bool extraRows>
EIGEN_ALWAYS_INLINE void outputVecCol (Packet4f acc, float *result, Packet4f pAlpha, Index extra_rows)
 
template<Index num_acc, bool extraRows, Index size>
EIGEN_ALWAYS_INLINE void outputVecColResults (Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha, Index extra_rows)
 
template<Index num_acc, typename LhsMapper , bool zero>
EIGEN_ALWAYS_INLINE void loadVecLoopVSX (Index k, LhsMapper &lhs, Packet4f(&a0)[num_acc][2])
 
template<Index num_acc, bool zero>
EIGEN_ALWAYS_INLINE void multVecVSX (Packet4f(&acc)[num_acc][2], Packet4f(&a0)[num_acc][2], Packet4f(&b0)[2])
 
template<typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE Packet8bf loadColData (RhsMapper &rhs, Index j)
 
template<Index num_acc, typename LhsMapper , typename RhsMapper , bool zero, bool linear>
EIGEN_ALWAYS_INLINE void vecColLoopVSX (Index j, LhsMapper &lhs, RhsMapper &rhs, Packet4f(&acc)[num_acc][2])
 
template<Index num_acc>
EIGEN_ALWAYS_INLINE void addResultsVSX (Packet4f(&acc)[num_acc][2])
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
void colVSXVecColLoopBody (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtraN (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtra (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE void calcVSXVecColLoops (Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index size, bool inc, Index delta>
EIGEN_ALWAYS_INLINE void storeBF16fromResult (bfloat16 *dst, Packet8bf data, Index resInc, Index extra)
 
template<const Index size, bool inc = false>
EIGEN_ALWAYS_INLINE void convertPointerF32toBF16VSX (Index &i, float *result, Index rows, bfloat16 *&dst, Index resInc=1)
 
template<bool inc = false>
EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX (float *result, Index rows, bfloat16 *dst, Index resInc=1)
 
template<typename LhsMapper , typename RhsMapper >
void gemv_bfloat16_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, bfloat16 *res, Index resIncr, bfloat16 alpha)
 
template<Index num_acc, Index size>
EIGEN_ALWAYS_INLINE void outputVecResults (Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha)
 
template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResults2VSX (Packet4f(&acc)[num_acc][2], Index k)
 
template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResultsVSX (Packet4f(&acc)[num_acc][2])
 
EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero (Packet8us data, Index extra_cols)
 
template<Index num_acc, typename LhsMapper , typename RhsMapper , bool extra>
EIGEN_ALWAYS_INLINE void multVSXVecLoop (Packet4f(&acc)[num_acc][2], const LhsMapper &lhs, RhsMapper &rhs, Index j, Index extra_cols)
 
template<Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void vecVSXLoop (Index cols, const LhsMapper &lhs, RhsMapper &rhs, Packet4f(&acc)[num_acc][2], Index extra_cols)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper >
void colVSXVecLoopBody (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtraN (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtra (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void calcVSXVecLoops (Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper >
EIGEN_STRONG_INLINE void gemv_bfloat16_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, bfloat16 *res, Index resIncr, bfloat16 alpha)
 
EIGEN_ALWAYS_INLINE Packet2cf pconj2 (const Packet2cf &a)
 
EIGEN_ALWAYS_INLINE Packet1cd pconj2 (const Packet1cd &a)
 
EIGEN_ALWAYS_INLINE Packet2cf pconjinv (const Packet2cf &a)
 
EIGEN_ALWAYS_INLINE Packet1cd pconjinv (const Packet1cd &a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxflipconj (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxconjflip (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxconjflip (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pnegate2 (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pnegate2 (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxflipnegate (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxflipnegate (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2 (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2 (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet4f pload_complex_half (std::complex< float > *src)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag (RhsScalar *src, Packet4f &r, Packet4f &i)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag (RhsScalar *src, Packet2d &r, Packet2d &i)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row (RhsScalar *src, Packet4f &r, Packet4f &i)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row (RhsScalar *src, Packet2d &r, Packet2d &i)
 
EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine_row (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine_row (std::complex< double > *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex (std::complex< float > *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex (std::complex< double > *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex (Packet2cf *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex (Packet1cd *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_complex_full (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_complex_full_row (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real (float *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real (double *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real (Packet4f &src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real (Packet2d &src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real_full (float *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real_full (double *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real_full (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real_full (std::complex< double > *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_real_row (float *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_real_row (double *src)
 
EIGEN_ALWAYS_INLINE Packet2cf padd (Packet2cf &a, std::complex< float > &b)
 
EIGEN_ALWAYS_INLINE Packet1cd padd (Packet1cd &a, std::complex< double > &b)
 
template<typename Scalar , typename ResScalar >
EIGEN_ALWAYS_INLINE Scalar pset1_realimag (ResScalar &alpha, int which, int conj)
 
template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet2cf pset1_complex (std::complex< float > &alpha)
 
template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet1cd pset1_complex (std::complex< double > &alpha)
 
template<typename Packet >
EIGEN_ALWAYS_INLINE Packet pset_zero ()
 
template<>
EIGEN_ALWAYS_INLINE Packet2cf pset_zero< Packet2cf > ()
 
template<>
EIGEN_ALWAYS_INLINE Packet1cd pset_zero< Packet1cd > ()
 
template<typename Packet , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE Packet pset_init (Packet &c1)
 
template<typename ScalarPacket , typename AlphaData >
EIGEN_ALWAYS_INLINE ScalarPacket pmadd_complex (ScalarPacket &c0, ScalarPacket &c2, ScalarPacket &c4, AlphaData &b0)
 
template<typename Scalar , typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData >
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex (PResPacket &c0, AlphaData &b0, ResScalar *res)
 
template<typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData , Index ResPacketSize, Index iter2>
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex (PResPacket &c0, PResPacket &c1, AlphaData &b0, ResScalar *res)
 
template<typename Scalar , typename LhsScalar , typename LhsMapper , typename LhsPacket >
EIGEN_ALWAYS_INLINE LhsPacket loadLhsPacket (LhsMapper &lhs, Index i, Index j)
 
template<typename ComplexPacket , typename RealPacket , bool ConjugateLhs, bool ConjugateRhs, bool Negate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_complex (RealPacket &a, RealPacket &b, RealPacket &c)
 
template<typename ComplexPacket , typename RealPacket , bool Conjugate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_real (RealPacket &a, RealPacket &b, RealPacket &c)
 
template<typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_generic (LhsPacket &a0, RhsScalar *b, PResPacket &c0)
 
template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex (LhsPacket &a0, RhsScalar *b, PResPacket &c0, ResPacket &c1)
 
template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_real_complex (LhsPacket &a0, RhsScalar *b, PResPacket &c0)
 
template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_real (LhsPacket &a0, RhsScalar *b, PResPacket &c0)
 
template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
EIGEN_STRONG_INLINE void gemv_complex_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 > predux_real (ResPacket &a, ResPacket &b)
 
template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 > predux_complex (ResPacket &a, ResPacket &b)
 
template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
EIGEN_STRONG_INLINE void gemv_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<typename ResScalar , typename PResPacket , typename ResPacket , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 > predux_complex (PResPacket &a0, PResPacket &b0, ResPacket &a1, ResPacket &b1)
 
template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
EIGEN_STRONG_INLINE void gemv_complex_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 

Variables

static Packet16uc p16uc_MERGE16_32_V1 = {0, 1, 16, 17, 0, 1, 16, 17, 0, 1, 16, 17, 0, 1, 16, 17}
 
static Packet16uc p16uc_MERGE16_32_V2 = {2, 3, 18, 19, 2, 3, 18, 19, 2, 3, 18, 19, 2, 3, 18, 19}
 
const Packet16uc p16uc_COMPLEX32_XORFLIP
 
const Packet16uc p16uc_COMPLEX64_XORFLIP
 
const Packet16uc p16uc_COMPLEX32_CONJ_XOR
 
const Packet16uc p16uc_COMPLEX64_CONJ_XOR
 
const Packet16uc p16uc_COMPLEX32_CONJ_XOR2
 
const Packet16uc p16uc_COMPLEX64_CONJ_XOR2
 
const Packet16uc p16uc_COMPLEX32_NEGATE
 
const Packet16uc p16uc_COMPLEX64_NEGATE
 
const Packet16uc p16uc_MERGEE
 
const Packet16uc p16uc_MERGEO
 

Macro Definition Documentation

◆ COMPLEX_DELTA

#define COMPLEX_DELTA   2

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL

#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL (   Scalar,
  LhsScalar,
  RhsScalar 
)
Value:
template <typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
struct general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, ConjugateLhs, RhsScalar, RhsMapper, \
ConjugateRhs, Version> { \
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; \
\
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(Index rows, Index cols, const LhsMapper& lhs, \
const RhsMapper& rhs, ResScalar* res, Index resIncr, \
ResScalar alpha) { \
gemv_complex_col<Scalar, LhsScalar, LhsMapper, ConjugateLhs, sizeof(Scalar) == sizeof(LhsScalar), RhsScalar, \
RhsMapper, ConjugateRhs, sizeof(Scalar) == sizeof(RhsScalar), ResScalar>(rows, cols, lhs, rhs, \
res, resIncr, alpha); \
} \
};
#define EIGEN_DONT_INLINE
Definition: Macros.h:853
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
SCALAR Scalar
Definition: bench_gemm.cpp:45
@ ColMajor
Definition: Constants.h:318
RealScalar alpha
Definition: level1_cplx_impl.h:151
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
void run(const string &dir_name, LinearSolver *linear_solver_pt, const unsigned nel_1d, bool mess_up_order)
Definition: two_d_poisson_compare_solvers.cc:317

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW

#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW (   Scalar,
  LhsScalar,
  RhsScalar 
)
Value:
template <typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
struct general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLhs, RhsScalar, RhsMapper, \
ConjugateRhs, Version> { \
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; \
\
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(Index rows, Index cols, const LhsMapper& lhs, \
const RhsMapper& rhs, ResScalar* res, Index resIncr, \
ResScalar alpha) { \
gemv_complex_row<Scalar, LhsScalar, LhsMapper, ConjugateLhs, sizeof(Scalar) == sizeof(LhsScalar), RhsScalar, \
RhsMapper, ConjugateRhs, sizeof(Scalar) == sizeof(RhsScalar), ResScalar>(rows, cols, lhs, rhs, \
res, resIncr, alpha); \
} \
};
@ RowMajor
Definition: Constants.h:320

◆ EIGEN_POWER_GEMV_PREFETCH

#define EIGEN_POWER_GEMV_PREFETCH (   p)

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL (   Scalar)
Value:
template <typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
struct general_matrix_vector_product<Index, Scalar, LhsMapper, ColMajor, ConjugateLhs, Scalar, RhsMapper, \
ConjugateRhs, Version> { \
typedef typename ScalarBinaryOpTraits<Scalar, Scalar>::ReturnType ResScalar; \
\
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(Index rows, Index cols, const LhsMapper& lhs, \
const RhsMapper& rhs, ResScalar* res, Index resIncr, \
ResScalar alpha) { \
gemv_col<Scalar, LhsMapper, Scalar, RhsMapper, ResScalar>(rows, cols, lhs, rhs, res, resIncr, alpha); \
} \
};

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16 ( )
Value:
template <typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
struct general_matrix_vector_product<Index, bfloat16, LhsMapper, ColMajor, ConjugateLhs, bfloat16, RhsMapper, \
ConjugateRhs, Version> { \
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(Index rows, Index cols, const LhsMapper& lhs, \
const RhsMapper& rhs, bfloat16* res, Index resIncr, \
bfloat16 alpha) { \
gemv_bf16_col<LhsMapper, RhsMapper>(rows, cols, lhs, rhs, res, resIncr, alpha); \
} \
};

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW (   Scalar)
Value:
template <typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
struct general_matrix_vector_product<Index, Scalar, LhsMapper, RowMajor, ConjugateLhs, Scalar, RhsMapper, \
ConjugateRhs, Version> { \
typedef typename ScalarBinaryOpTraits<Scalar, Scalar>::ReturnType ResScalar; \
\
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(Index rows, Index cols, const LhsMapper& lhs, \
const RhsMapper& rhs, ResScalar* res, Index resIncr, \
ResScalar alpha) { \
gemv_row<Scalar, LhsMapper, Scalar, RhsMapper, ResScalar>(rows, cols, lhs, rhs, res, resIncr, alpha); \
} \
};

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16 ( )
Value:
template <typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
struct general_matrix_vector_product<Index, bfloat16, LhsMapper, RowMajor, ConjugateLhs, bfloat16, RhsMapper, \
ConjugateRhs, Version> { \
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(Index rows, Index cols, const LhsMapper& lhs, \
const RhsMapper& rhs, bfloat16* res, Index resIncr, \
bfloat16 alpha) { \
gemv_bf16_row<LhsMapper, RhsMapper>(rows, cols, lhs, rhs, res, resIncr, alpha); \
} \
};

◆ gemv_bf16_col

#define gemv_bf16_col   gemv_bfloat16_col

◆ gemv_bf16_row

#define gemv_bf16_row   gemv_bfloat16_row

◆ GEMV_BUILDPAIR_MMA

#define GEMV_BUILDPAIR_MMA (   dst,
  src1,
  src2 
)     __builtin_vsx_assemble_pair(&dst, (__vector unsigned char)src1, (__vector unsigned char)src2)

◆ GEMV_GETN

#define GEMV_GETN (   N)    (((N) * ResPacketSize) >> 2)

◆ GEMV_GETN_COMPLEX

#define GEMV_GETN_COMPLEX (   N)    (((N) * ResPacketSize) >> 1)

◆ GEMV_INIT

#define GEMV_INIT (   iter,
  N 
)
Value:
if (N > iter) { \
c##iter = pset1<ResPacket>(ResScalar(0)); \
} else { \
EIGEN_UNUSED_VARIABLE(c##iter); \
}
@ N
Definition: constructor.cpp:22
int c
Definition: calibrate.py:100

◆ GEMV_INIT_COMPLEX

#define GEMV_INIT_COMPLEX (   iter,
  N 
)
Value:
if (N > iter) { \
c0##iter = pset_zero<PResPacket>(); \
c1##iter = pset_init<ResPacket, LhsPacket, RhsPacket>(c1##iter); \
} else { \
EIGEN_UNUSED_VARIABLE(c0##iter); \
EIGEN_UNUSED_VARIABLE(c1##iter); \
}

◆ GEMV_INIT_COMPLEX_OLD

#define GEMV_INIT_COMPLEX_OLD (   iter,
  N 
)
Value:
EIGEN_UNUSED_VARIABLE(c0##iter); \
if (N > iter) { \
c1##iter = pset_zero<ResPacket>(); \
} else { \
EIGEN_UNUSED_VARIABLE(c1##iter); \
}
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966

◆ GEMV_INIT_ROW

#define GEMV_INIT_ROW (   iter,
  N 
)
Value:
if (N > iter) { \
c##iter = pset1<ResPacket>(ResScalar(0)); \
} else { \
EIGEN_UNUSED_VARIABLE(c##iter); \
}

◆ GEMV_IS_COMPLEX_COMPLEX

#define GEMV_IS_COMPLEX_COMPLEX   ((sizeof(LhsPacket) == 16) && (sizeof(RhsPacket) == 16))

◆ GEMV_IS_COMPLEX_FLOAT

#define GEMV_IS_COMPLEX_FLOAT   (ResPacketSize == (16 / sizeof(std::complex<float>)))

◆ GEMV_IS_FLOAT

#define GEMV_IS_FLOAT   (ResPacketSize == (16 / sizeof(float)))

◆ GEMV_IS_SCALAR

#define GEMV_IS_SCALAR   (sizeof(ResPacket) != 16)

◆ GEMV_LOADPACKET_COL

#define GEMV_LOADPACKET_COL (   iter)    lhs.template load<LhsPacket, LhsAlignment>(i + ((iter) * LhsPacketSize), j)

◆ GEMV_LOADPACKET_COL_COMPLEX

#define GEMV_LOADPACKET_COL_COMPLEX (   iter)     loadLhsPacket<Scalar, LhsScalar, LhsMapper, PLhsPacket>(lhs, i + ((iter) * ResPacketSize), j)

◆ GEMV_LOADPACKET_COL_COMPLEX_DATA

#define GEMV_LOADPACKET_COL_COMPLEX_DATA (   iter)    convertReal(GEMV_LOADPACKET_COL_COMPLEX(iter))

◆ GEMV_LOADPACKET_ROW

#define GEMV_LOADPACKET_ROW (   iter)    lhs.template load<LhsPacket, Unaligned>(i + (iter), j)

◆ GEMV_LOADPACKET_ROW_COMPLEX

#define GEMV_LOADPACKET_ROW_COMPLEX (   iter)    loadLhsPacket<Scalar, LhsScalar, LhsMapper, PLhsPacket>(lhs, i + (iter), j)

◆ GEMV_LOADPACKET_ROW_COMPLEX_DATA

#define GEMV_LOADPACKET_ROW_COMPLEX_DATA (   iter)    convertReal(GEMV_LOADPACKET_ROW_COMPLEX(iter))

◆ GEMV_LOADPACKET_ROW_COMPLEX_OLD

#define GEMV_LOADPACKET_ROW_COMPLEX_OLD (   iter)    lhs.template load<LhsPacket, LhsAlignment>(i + (iter), j)

◆ GEMV_MULT

#define GEMV_MULT (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
cc##iter1.scalar[0] += cj.pmul(lhs(i + iter2, j), a0); \
cc##iter1.scalar[1] += cj.pmul(lhs(i + iter3, j), a0); \
}
int i
Definition: BiCGSTAB_step_by_step.cpp:9
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

◆ GEMV_MULT_COMPLEX

#define GEMV_MULT_COMPLEX (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
cc##iter1.scalar[0] += cj.pmul(lhs(i + iter2, j), b0); \
cc##iter1.scalar[1] += cj.pmul(lhs(i + iter3, j), b0); \
}

◆ GEMV_MULT_COMPLEX_COMPLEX

#define GEMV_MULT_COMPLEX_COMPLEX (   LhsType,
  RhsType,
  ResType 
)
Value:
template <typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, \
typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
EIGEN_ALWAYS_INLINE void gemv_mult_complex(LhsType& a0, RhsType* b, ResType& c0, ResType& c1) { \
gemv_mult_complex_complex<ScalarPacket, LhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, \
ConjugateRhs, StorageOrder>(a0, b, c0, c1); \
}
Scalar * b
Definition: benchVecAdd.cpp:17

◆ GEMV_MULT_COMPLEX_REAL

#define GEMV_MULT_COMPLEX_REAL (   LhsType,
  RhsType,
  ResType1,
  ResType2 
)
Value:
template <typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, \
typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
EIGEN_ALWAYS_INLINE void gemv_mult_complex(LhsType& a0, RhsType* b, ResType1& c0, ResType2&) { \
gemv_mult_complex_real<ScalarPacket, LhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, \
ConjugateRhs, StorageOrder>(a0, b, c0); \
}

◆ GEMV_MULT_REAL_COMPLEX

#define GEMV_MULT_REAL_COMPLEX (   LhsType,
  RhsType,
  ResType 
)
Value:
template <typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, \
typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
EIGEN_ALWAYS_INLINE void gemv_mult_complex(LhsType& a0, RhsType* b, ResType& c0, RhsType&) { \
gemv_mult_real_complex<ScalarPacket, LhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, \
ConjugateRhs, StorageOrder>(a0, b, c0); \
}

◆ GEMV_PREDUX2

#define GEMV_PREDUX2 (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
cc##iter1 = predux_real<ResScalar, ResPacket>(c##iter2, c##iter3); \
} else { \
EIGEN_UNUSED_VARIABLE(cc##iter1); \
}

◆ GEMV_PREDUX4_COMPLEX

#define GEMV_PREDUX4_COMPLEX (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
cc##iter1 = predux_complex<ResScalar, PResPacket, ResPacket, LhsPacket, RhsPacket>(c0##iter2, c0##iter3, \
c1##iter2, c1##iter3); \
} else { \
EIGEN_UNUSED_VARIABLE(cc##iter1); \
}

◆ GEMV_PREDUX4_COMPLEX_OLD

#define GEMV_PREDUX4_COMPLEX_OLD (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
cc##iter1.scalar[0] = predux(c1##iter2); \
cc##iter1.scalar[1] = predux(c1##iter3); \
} else { \
EIGEN_UNUSED_VARIABLE(cc##iter1); \
}
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Definition: GenericPacketMath.h:1232

◆ GEMV_PREFETCH

#define GEMV_PREFETCH (   iter,
  N 
)

◆ GEMV_PROCESS_COL

#define GEMV_PROCESS_COL (   N)    GEMV_PROCESS_COL_ONE(N)

◆ GEMV_PROCESS_COL_COMPLEX

#define GEMV_PROCESS_COL_COMPLEX (   N)    GEMV_PROCESS_COL_COMPLEX_ONE(N)

◆ GEMV_PROCESS_COL_COMPLEX_ONE

#define GEMV_PROCESS_COL_COMPLEX_ONE (   N)
Value:
Index j = j2; \
do { \
const RhsScalar& b1 = rhs2(j, 0); \
RhsScalar* b = const_cast<RhsScalar*>(&b1); \
GEMV_UNROLL(GEMV_PREFETCH, N) \
GEMV_UNROLL(GEMV_WORK_COL_COMPLEX, N) \
} while (++j < jend); \
GEMV_UNROLL(GEMV_STORE_COL_COMPLEX, N) \
i += (ResPacketSize * N);
#define GEMV_STORE_COL_COMPLEX(iter, N)
Definition: MatrixVectorProduct.h:1994
#define GEMV_UNROLL(func, N)
Definition: MatrixVectorProduct.h:80
#define GEMV_WORK_COL_COMPLEX(iter, N)
Definition: MatrixVectorProduct.h:1985
#define GEMV_PREFETCH(iter, N)
Definition: MatrixVectorProduct.h:308
#define GEMV_INIT_COMPLEX(iter, N)
Definition: MatrixVectorProduct.h:1976

main macro for gemv_complex_col - initialize accumulators, multiply and add inputs, and store results

◆ GEMV_PROCESS_COL_ONE

#define GEMV_PROCESS_COL_ONE (   N)
Value:
Index j = j2; \
do { \
RhsPacket a0 = pset1<RhsPacket>(rhs2(j, 0)); \
GEMV_UNROLL(GEMV_PREFETCH, N) \
GEMV_UNROLL(GEMV_WORK_COL, N) \
} while (++j < jend); \
GEMV_UNROLL(GEMV_STORE_COL, N) \
i += (ResPacketSize * N);
#define GEMV_INIT(iter, N)
Definition: MatrixVectorProduct.h:295
#define GEMV_STORE_COL(iter, N)
Definition: MatrixVectorProduct.h:316
#define GEMV_WORK_COL(iter, N)
Definition: MatrixVectorProduct.h:311

main macro for gemv_col - initialize accumulators, multiply and add inputs, and store results

◆ GEMV_PROCESS_END_ROW_COMPLEX

#define GEMV_PROCESS_END_ROW_COMPLEX (   N)
Value:
for (; j < cols; ++j) { \
RhsScalar b0 = rhs2(j); \
GEMV_UNROLL_ROW_HALF(GEMV_MULT_COMPLEX, (N >> 1)) \
} \
GEMV_UNROLL_ROW_HALF(GEMV_STORE_ROW_COMPLEX, (N >> 1))
#define GEMV_STORE_ROW_COMPLEX(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2592
#define GEMV_MULT_COMPLEX(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2586

◆ GEMV_PROCESS_ROW

#define GEMV_PROCESS_ROW (   N)
Value:
for (; i < n##N; i += N) { \
GEMV_UNROLL_ROW(GEMV_INIT_ROW, N) \
Index j = 0; \
for (; j + LhsPacketSize <= cols; j += LhsPacketSize) { \
RhsPacket a0 = rhs2.template load<RhsPacket, Unaligned>(j); \
GEMV_UNROLL_ROW(GEMV_WORK_ROW, N) \
} \
GEMV_UNROLL_ROW_HALF(GEMV_PREDUX2, (N >> 1)) \
for (; j < cols; ++j) { \
RhsScalar a0 = rhs2(j); \
GEMV_UNROLL_ROW_HALF(GEMV_MULT, (N >> 1)) \
} \
GEMV_UNROLL_ROW_HALF(GEMV_STORE_ROW, (N >> 1)) \
}
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define GEMV_INIT_ROW(iter, N)
Definition: MatrixVectorProduct.h:2318
#define GEMV_MULT(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2338
#define GEMV_PREDUX2(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2330
#define GEMV_STORE_ROW(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2344
#define GEMV_WORK_ROW(iter, N)
Definition: MatrixVectorProduct.h:2325

main macro for gemv_row - initialize accumulators, multiply and add inputs, predux and store results

◆ GEMV_PROCESS_ROW_COMPLEX

#define GEMV_PROCESS_ROW_COMPLEX (   N)    GEMV_PROCESS_ROW_COMPLEX_ONE(N)

◆ GEMV_PROCESS_ROW_COMPLEX_IS_NEW

#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW   (sizeof(Scalar) == sizeof(float)) || GEMV_IS_COMPLEX_COMPLEX

◆ GEMV_PROCESS_ROW_COMPLEX_ONE

#define GEMV_PROCESS_ROW_COMPLEX_ONE (   N)
Value:
GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N) \
} else { \
GEMV_PROCESS_ROW_COMPLEX_ONE_OLD(N) \
}
#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW
Definition: MatrixVectorProduct.h:2671

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_NEW

#define GEMV_PROCESS_ROW_COMPLEX_ONE_NEW (   N)
Value:
for (; i < n##N; i += N) { \
GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N) \
GEMV_UNROLL_ROW_HALF(GEMV_PREDUX4_COMPLEX, (N >> 1)) \
GEMV_PROCESS_END_ROW_COMPLEX(N); \
}
#define GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2578

main macro for gemv_complex_row - initialize accumulators, multiply and add inputs, predux and store results

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_OLD

#define GEMV_PROCESS_ROW_COMPLEX_ONE_OLD (   N)
Value:
for (; i < n##N; i += N) { \
GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
GEMV_UNROLL_ROW_HALF(GEMV_PREDUX4_COMPLEX_OLD, (N >> 1)) \
GEMV_PROCESS_END_ROW_COMPLEX(N) \
}
#define GEMV_PREDUX4_COMPLEX_OLD(iter1, iter2, iter3, N)
Definition: MatrixVectorProduct.h:2643

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX (   iter)
Value:
GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter) \
} else { \
GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD(iter) \
}

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW (   iter)
Value:
c0##iter = padd(c0##iter, c1##iter); \
} \
dd0 = predux(c0##iter);
EIGEN_ALWAYS_INLINE Packet2cf padd(Packet2cf &a, std::complex< float > &b)
Definition: MatrixVectorProduct.h:1277
#define GEMV_IS_COMPLEX_COMPLEX
Definition: MatrixVectorProduct.h:64

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD (   iter)    dd0 = predux(c1##iter);

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE (   N)
Value:
GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N) \
} else { \
GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
}

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW (   N)
Value:
GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(GEMV_WORK_ROW_COMPLEX, N)
#define GEMV_UNROLL_ROW(func, N)
Definition: MatrixVectorProduct.h:2275
#define GEMV_WORK_ROW_COMPLEX(iter, N)
Definition: MatrixVectorProduct.h:2571

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD (   N)
Value:
j = 0; \
for (; j + LhsPacketSize <= cols; j += LhsPacketSize) { \
RhsPacket b0 = rhs2.template load<RhsPacket, Unaligned>(j); \
GEMV_UNROLL_ROW(GEMV_WORK_ROW_COMPLEX_OLD, N) \
}
#define GEMV_INIT_COMPLEX_OLD(iter, N)
Definition: MatrixVectorProduct.h:2629
#define GEMV_WORK_ROW_COMPLEX_OLD(iter, N)
Definition: MatrixVectorProduct.h:2637

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK (   which,
  N 
)
Value:
j = 0; \
for (; j + LhsPacketSize <= cols; j += LhsPacketSize) { \
const RhsScalar& b1 = rhs2(j); \
RhsScalar* b = const_cast<RhsScalar*>(&b1); \
GEMV_UNROLL_ROW(which, N) \
}

◆ GEMV_STORE_COL

#define GEMV_STORE_COL (   iter,
  N 
)
Value:
if (N > iter) { \
pstoreu(res + i + (iter * ResPacketSize), \
pmadd(c##iter, palpha, ploadu<ResPacket>(res + i + (iter * ResPacketSize)))); \
}
RealScalar * palpha
Definition: level1_cplx_impl.h:147
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218

◆ GEMV_STORE_COL_COMPLEX

#define GEMV_STORE_COL_COMPLEX (   iter,
  N 
)
Value:
if (N > iter) { \
c0##iter = padd(c0##iter, c1##iter); \
} \
pstoreu_pmadd_complex<Scalar, ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData>( \
c0##iter, alpha_data, res + i + (iter * ResPacketSize)); \
}

◆ GEMV_STORE_ROW

#define GEMV_STORE_ROW (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
storeMaddData<ResScalar>(res + ((i + iter2) * resIncr), alpha, cc##iter1.scalar[0]); \
storeMaddData<ResScalar>(res + ((i + iter3) * resIncr), alpha, cc##iter1.scalar[1]); \
}

◆ GEMV_STORE_ROW_COMPLEX

#define GEMV_STORE_ROW_COMPLEX (   iter1,
  iter2,
  iter3,
  N 
)
Value:
if (N > iter1) { \
storeMaddData<ResScalar>(res + ((i + iter2) * resIncr), alpha, cc##iter1.scalar[0]); \
storeMaddData<ResScalar>(res + ((i + iter3) * resIncr), alpha, cc##iter1.scalar[1]); \
}

◆ GEMV_UNROLL

#define GEMV_UNROLL (   func,
  N 
)    func(0, N) func(1, N) func(2, N) func(3, N) func(4, N) func(5, N) func(6, N) func(7, N)

◆ GEMV_UNROLL_HALF

#define GEMV_UNROLL_HALF (   func,
  N 
)    func(0, 0, 1, N) func(1, 2, 3, N) func(2, 4, 5, N) func(3, 6, 7, N)

◆ GEMV_UNROLL_ROW

#define GEMV_UNROLL_ROW (   func,
  N 
)    func(0, N) func(1, N) func(2, N) func(3, N) func(4, N) func(5, N) func(6, N) func(7, N)

◆ GEMV_UNROLL_ROW_HALF

#define GEMV_UNROLL_ROW_HALF (   func,
  N 
)    func(0, 0, 1, N) func(1, 2, 3, N) func(2, 4, 5, N) func(3, 6, 7, N)

◆ GEMV_WORK_COL

#define GEMV_WORK_COL (   iter,
  N 
)
Value:
if (N > iter) { \
c##iter = pcj.pmadd(GEMV_LOADPACKET_COL(iter), a0, c##iter); \
}
#define GEMV_LOADPACKET_COL(iter)
Definition: MatrixVectorProduct.h:86

◆ GEMV_WORK_COL_COMPLEX

#define GEMV_WORK_COL_COMPLEX (   iter,
  N 
)
Value:
if (N > iter) { \
f##iter = GEMV_LOADPACKET_COL_COMPLEX(iter); \
gemv_mult_complex<ScalarPacket, PLhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, \
ConjugateRhs, ColMajor>(f##iter, b, c0##iter, c1##iter); \
} else { \
EIGEN_UNUSED_VARIABLE(f##iter); \
}
#define GEMV_LOADPACKET_COL_COMPLEX(iter)
Definition: MatrixVectorProduct.h:1810
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237

◆ GEMV_WORK_ROW

#define GEMV_WORK_ROW (   iter,
  N 
)
Value:
if (N > iter) { \
c##iter = pcj.pmadd(GEMV_LOADPACKET_ROW(iter), a0, c##iter); \
}
#define GEMV_LOADPACKET_ROW(iter)
Definition: MatrixVectorProduct.h:2279

◆ GEMV_WORK_ROW_COMPLEX

#define GEMV_WORK_ROW_COMPLEX (   iter,
  N 
)
Value:
if (N > iter) { \
PLhsPacket a##iter = GEMV_LOADPACKET_ROW_COMPLEX(iter); \
gemv_mult_complex<ScalarPacket, PLhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, \
ConjugateRhs, RowMajor>(a##iter, b, c0##iter, c1##iter); \
}
#define GEMV_LOADPACKET_ROW_COMPLEX(iter)
Definition: MatrixVectorProduct.h:2504
const Scalar * a
Definition: level2_cplx_impl.h:32

◆ GEMV_WORK_ROW_COMPLEX_OLD

#define GEMV_WORK_ROW_COMPLEX_OLD (   iter,
  N 
)
Value:
if (N > iter) { \
LhsPacket a##iter = GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter); \
c1##iter = pcj.pmadd(a##iter, b0, c1##iter); \
}
#define GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter)
Definition: MatrixVectorProduct.h:2627

◆ MAX_BFLOAT16_VEC_ACC_VSX

#define MAX_BFLOAT16_VEC_ACC_VSX   8

Function Documentation

◆ addResultsVSX()

template<Index num_acc>
EIGEN_ALWAYS_INLINE void addResultsVSX ( Packet4f(&)  acc[num_acc][2])
546  {
547  for (Index i = 0; i < num_acc; i++) {
548  acc[i][0] = acc[i][0] + acc[i][1];
549  }
550 }

References i.

◆ calcVSXVecColLoops()

template<typename LhsMapper , typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE void calcVSXVecColLoops ( Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
629  {
630  Index row = 0;
631  if (rows >= (MAX_BFLOAT16_VEC_ACC_VSX * 4)) {
632  colVSXVecColLoopBody<MAX_BFLOAT16_VEC_ACC_VSX, LhsMapper, RhsMapper, false, linear>(row, cend, rows, lhs, rhs,
633  pAlpha, result);
634  result += row;
635  }
636  if (rows & 3) {
637  colVSXVecColLoopBodyExtra<LhsMapper, RhsMapper, true, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
638  } else {
639  colVSXVecColLoopBodyExtra<LhsMapper, RhsMapper, false, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
640  }
641 }
m row(1)
#define MAX_BFLOAT16_VEC_ACC_VSX
Definition: MatrixVectorProduct.h:553

References MAX_BFLOAT16_VEC_ACC_VSX, row(), and rows.

◆ calcVSXVecLoops()

template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void calcVSXVecLoops ( Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
932  {
933  Index row = 0;
935  colVSXVecLoopBody<MAX_BFLOAT16_VEC_ACC_VSX, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
936  result += row;
937  }
938  colVSXVecLoopBodyExtra<LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
939 }

References cols, MAX_BFLOAT16_VEC_ACC_VSX, row(), and rows.

◆ colVSXVecColLoopBody()

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
void colVSXVecColLoopBody ( Index &  row,
Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
557  {
558  constexpr Index step = (num_acc * 4);
559  const Index extra_rows = (extraRows) ? (rows & 3) : 0;
560  constexpr bool multiIters = !extraRows && (num_acc == MAX_BFLOAT16_VEC_ACC_VSX);
561 
562  do {
563  Packet4f acc[num_acc][2];
564 
565  zeroAccumulators<num_acc, 2>(acc);
566 
567  using LhsSubMapper = typename LhsMapper::SubMapper;
568 
569  LhsSubMapper lhs2 = lhs.getSubMapper(row, 0);
570  for (Index j = 0; j + 2 <= cend; j += 2) {
571  vecColLoopVSX<num_acc, LhsSubMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc);
572  }
573  if (cend & 1) {
574  vecColLoopVSX<num_acc, LhsSubMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc);
575  }
576 
577  addResultsVSX<num_acc>(acc);
578 
579  outputVecColResults<num_acc, extraRows, 2>(acc, result, pAlpha, extra_rows);
580 
581  result += step;
582  } while (multiIters && (step <= rows - (row += step)));
583 }
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33

References j, MAX_BFLOAT16_VEC_ACC_VSX, row(), and rows.

◆ colVSXVecColLoopBodyExtra()

template<typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtra ( Index &  row,
Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
596  {
597  switch ((rows - row) >> 2) {
598  case 7:
599  colVSXVecColLoopBodyExtraN<7, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
600  break;
601  case 6:
602  colVSXVecColLoopBodyExtraN<6, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
603  break;
604  case 5:
605  colVSXVecColLoopBodyExtraN<5, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
606  break;
607  case 4:
608  colVSXVecColLoopBodyExtraN<4, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
609  break;
610  case 3:
611  colVSXVecColLoopBodyExtraN<3, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
612  break;
613  case 2:
614  colVSXVecColLoopBodyExtraN<2, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
615  break;
616  case 1:
617  colVSXVecColLoopBodyExtraN<1, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
618  break;
619  default:
620  if (extraRows) {
621  colVSXVecColLoopBody<1, LhsMapper, RhsMapper, true, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
622  }
623  break;
624  }
625 }

References row(), and rows.

◆ colVSXVecColLoopBodyExtraN()

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtraN ( Index &  row,
Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
587  {
588  if (MAX_BFLOAT16_VEC_ACC_VSX > num_acc) {
589  colVSXVecColLoopBody<num_acc + (extraRows ? 1 : 0), LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs,
590  rhs, pAlpha, result);
591  }
592 }

References MAX_BFLOAT16_VEC_ACC_VSX, row(), and rows.

◆ colVSXVecLoopBody()

template<const Index num_acc, typename LhsMapper , typename RhsMapper >
void colVSXVecLoopBody ( Index &  row,
Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
872  {
873  constexpr bool multiIters = (num_acc == MAX_BFLOAT16_VEC_ACC_VSX);
874  const Index extra_cols = (cols & 7);
875 
876  do {
877  Packet4f acc[num_acc][2];
878 
879  zeroAccumulators<num_acc, 2>(acc);
880 
881  const LhsMapper lhs2 = lhs.getSubMapper(row, 0);
882  vecVSXLoop<num_acc, LhsMapper, RhsMapper>(cols, lhs2, rhs, acc, extra_cols);
883 
884  addResultsVSX<num_acc>(acc);
885 
886  preduxVecResultsVSX<num_acc>(acc);
887 
888  outputVecResults<num_acc, 2>(acc, result, pAlpha);
889 
890  result += num_acc;
891  } while (multiIters && (num_acc <= rows - (row += num_acc)));
892 }

References cols, MAX_BFLOAT16_VEC_ACC_VSX, row(), and rows.

◆ colVSXVecLoopBodyExtra()

template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtra ( Index &  row,
Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
904  {
905  switch (rows - row) {
906  case 7:
907  colVSXVecLoopBodyExtraN<7, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
908  break;
909  case 6:
910  colVSXVecLoopBodyExtraN<6, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
911  break;
912  case 5:
913  colVSXVecLoopBodyExtraN<5, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
914  break;
915  case 4:
916  colVSXVecLoopBodyExtraN<4, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
917  break;
918  case 3:
919  colVSXVecLoopBodyExtraN<3, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
920  break;
921  case 2:
922  colVSXVecLoopBodyExtraN<2, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
923  break;
924  case 1:
925  colVSXVecLoopBodyExtraN<1, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
926  break;
927  }
928 }

References cols, row(), and rows.

◆ colVSXVecLoopBodyExtraN()

template<const Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtraN ( Index &  row,
Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)
896  {
897  if (MAX_BFLOAT16_VEC_ACC_VSX > num_acc) {
898  colVSXVecLoopBody<num_acc, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
899  }
900 }

References cols, MAX_BFLOAT16_VEC_ACC_VSX, row(), and rows.

◆ convertArrayPointerF32toBF16VSX()

template<bool inc = false>
EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX ( float *  result,
Index  rows,
bfloat16 *  dst,
Index  resInc = 1 
)
689  {
690  Index i = 0;
691  convertPointerF32toBF16VSX<32, inc>(i, result, rows, dst, resInc);
692  convertPointerF32toBF16VSX<16, inc>(i, result, rows, dst, resInc);
693  convertPointerF32toBF16VSX<8, inc>(i, result, rows, dst, resInc);
694  convertPointerF32toBF16VSX<1, inc>(i, result, rows, dst, resInc);
695 }

References i, and rows.

Referenced by gemv_bfloat16_col(), and gemv_bfloat16_row().

◆ convertPointerF32toBF16VSX()

template<const Index size, bool inc = false>
EIGEN_ALWAYS_INLINE void convertPointerF32toBF16VSX ( Index &  i,
float *  result,
Index  rows,
bfloat16 *&  dst,
Index  resInc = 1 
)
662  {
663  constexpr Index extra = ((size < 8) ? 8 : size);
664  while (i + size <= rows) {
665  PacketBlock<Packet8bf, (size + 7) / 8> r32;
666  r32.packet[0] = convertF32toBF16VSX(result + i + 0);
667  if (size >= 16) {
668  r32.packet[1] = convertF32toBF16VSX(result + i + 8);
669  }
670  if (size >= 32) {
671  r32.packet[2] = convertF32toBF16VSX(result + i + 16);
672  r32.packet[3] = convertF32toBF16VSX(result + i + 24);
673  }
674  storeBF16fromResult<size, inc, 0>(dst, r32.packet[0], resInc, rows & 7);
675  if (size >= 16) {
676  storeBF16fromResult<size, inc, 8>(dst, r32.packet[1], resInc);
677  }
678  if (size >= 32) {
679  storeBF16fromResult<size, inc, 16>(dst, r32.packet[2], resInc);
680  storeBF16fromResult<size, inc, 24>(dst, r32.packet[3], resInc);
681  }
682  i += extra;
683  dst += extra * resInc;
684  if (size != 32) break;
685  }
686 }
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
Definition: AltiVec/PacketMath.h:42
EIGEN_ALWAYS_INLINE Packet8bf convertF32toBF16VSX(const float *res)
Definition: MatrixProduct.h:3066

References Eigen::internal::convertF32toBF16VSX(), i, rows, and size.

◆ gemv_bfloat16_col()

template<typename LhsMapper , typename RhsMapper >
void gemv_bfloat16_col ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
bfloat16 *  res,
Index  resIncr,
bfloat16  alpha 
)
727  {
728  EIGEN_UNUSED_VARIABLE(resIncr);
729  eigen_internal_assert(resIncr == 1);
730 
731  // The following copy tells the compiler that lhs's attributes are not modified outside this function
732  // This helps GCC to generate proper code.
733  LhsMapper lhs(alhs);
734  RhsMapper rhs2(rhs);
735 
736  const Index lhsStride = lhs.stride();
737 
738  // TODO: improve the following heuristic:
739  const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(bfloat16) < 16000 ? 16 : 8);
741  Packet4f pAlpha = pset1<Packet4f>(falpha);
742 
744 
746 
747  for (Index j2 = 0; j2 < cols; j2 += block_cols) {
748  Index jend = numext::mini(j2 + block_cols, cols);
749 
750  using LhsSubMapper = typename LhsMapper::SubMapper;
751 
752  LhsSubMapper lhs2 = lhs.getSubMapper(0, j2);
753  UseStride<RhsMapper, LhsSubMapper>::run(j2, jend, rows, lhs2, rhs2, pAlpha, result);
754  }
755 
757 }
#define eigen_internal_assert(x)
Definition: Macros.h:916
EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX(float *result, Index rows, bfloat16 *dst, Index resInc=1)
Definition: MatrixVectorProduct.h:689
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:806
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h)
Definition: BFloat16.h:581
EIGEN_ALWAYS_INLINE void convertArrayPointerBF16toF32(float *result, Index cols, Index rows, bfloat16 *src, Index resInc)
Definition: MatrixProduct.h:2813
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: AltiVec/PacketMath.h:773
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: MathFunctions.h:920
static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper &lhs, RhsMapper &rhs, Packet4f pAlpha, float *result)
Definition: MatrixVectorProduct.h:699

References alpha, Eigen::bfloat16_impl::bfloat16_to_float(), cols, Eigen::internal::convertArrayPointerBF16toF32(), convertArrayPointerF32toBF16VSX(), ei_declare_aligned_stack_constructed_variable, eigen_internal_assert, EIGEN_UNUSED_VARIABLE, Eigen::numext::mini(), Eigen::internal::pset1< Packet4f >(), res, rows, and UseStride< RhsMapper, LhsMapper, typename >::run().

◆ gemv_bfloat16_row()

template<typename LhsMapper , typename RhsMapper >
EIGEN_STRONG_INLINE void gemv_bfloat16_row ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
bfloat16 *  res,
Index  resIncr,
bfloat16  alpha 
)
943  {
944  typedef typename RhsMapper::LinearMapper LinearMapper;
945 
946  // The following copy tells the compiler that lhs's attributes are not modified outside this function
947  // This helps GCC to generate proper code.
948  LhsMapper lhs(alhs);
949  LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
950 
951  eigen_internal_assert(rhs.stride() == 1);
952 
954  const Packet4f pAlpha = pset1<Packet4f>(falpha);
955 
957  if (resIncr == 1) {
959  } else {
960  convertArrayPointerBF16toF32<true>(result, 1, rows, res, resIncr);
961  }
962  calcVSXVecLoops<LhsMapper, LinearMapper>(cols, rows, lhs, rhs2, pAlpha, result);
963  if (resIncr == 1) {
965  } else {
966  convertArrayPointerF32toBF16VSX<true>(result, rows, res, resIncr);
967  }
968 }

References alpha, Eigen::bfloat16_impl::bfloat16_to_float(), cols, Eigen::internal::convertArrayPointerBF16toF32(), convertArrayPointerF32toBF16VSX(), ei_declare_aligned_stack_constructed_variable, eigen_internal_assert, Eigen::internal::pset1< Packet4f >(), res, and rows.

◆ gemv_col()

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
EIGEN_STRONG_INLINE void gemv_col ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)

perform a matrix multiply and accumulate of packet a and packet b

364  {
365  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
366 
367  typedef typename Traits::LhsPacket LhsPacket;
368  typedef typename Traits::RhsPacket RhsPacket;
369  typedef typename Traits::ResPacket ResPacket;
370 
371  EIGEN_UNUSED_VARIABLE(resIncr);
372  eigen_internal_assert(resIncr == 1);
373 
374  // The following copy tells the compiler that lhs's attributes are not modified outside this function
375  // This helps GCC to generate proper code.
376  LhsMapper lhs(alhs);
377  RhsMapper rhs2(rhs);
378 
379  conj_helper<LhsScalar, RhsScalar, false, false> cj;
380  conj_helper<LhsPacket, RhsPacket, false, false> pcj;
381 
382  const Index lhsStride = lhs.stride();
383  // TODO: for padded aligned inputs, we could enable aligned reads
384  enum {
385  LhsAlignment = Unaligned,
386  ResPacketSize = Traits::ResPacketSize,
387  LhsPacketSize = Traits::LhsPacketSize,
388  RhsPacketSize = Traits::RhsPacketSize,
389  };
390 
391 #ifndef GCC_ONE_VECTORPAIR_BUG
392  const Index n8 = rows - 8 * ResPacketSize + 1;
393  const Index n4 = rows - 4 * ResPacketSize + 1;
394  const Index n2 = rows - 2 * ResPacketSize + 1;
395 #endif
396  const Index n1 = rows - 1 * ResPacketSize + 1;
397 #ifdef EIGEN_POWER_USE_GEMV_PREFETCH
398  const Index prefetch_dist = 64 * LhsPacketSize;
399 #endif
400 
401  // TODO: improve the following heuristic:
402  const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
403  ResPacket palpha = pset1<ResPacket>(alpha);
404 
405  for (Index j2 = 0; j2 < cols; j2 += block_cols) {
406  Index jend = numext::mini(j2 + block_cols, cols);
407  Index i = 0;
408  ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
409 #ifdef USE_GEMV_MMA
410  __vector_quad e0, e1, e2, e3, e4, e5, e6, e7;
411  PacketBlock<ResPacket, 4> result0, result1, result2, result3, result4, result5, result6, result7;
412  GEMV_UNUSED(8, e)
413  GEMV_UNUSED(8, result)
414  GEMV_UNUSED_EXTRA(1, c)
415 #endif
416 #ifndef GCC_ONE_VECTORPAIR_BUG
417  while (i < n8) {
419  }
420  if (i < n4) {
422  }
423  if (i < n2) {
425  }
426  if (i < n1)
427 #else
428  while (i < n1)
429 #endif
430  {
432  }
433  for (; i < rows; ++i) {
434  ResScalar d0(0);
435  Index j = j2;
436  do {
437  d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
438  } while (++j < jend);
439  res[i] += alpha * d0;
440  }
441  }
442 }
Array< double, 1, 3 > e(1./3., 0.5, 2.)
#define GEMV_PROCESS_COL(N)
Definition: MatrixVectorProduct.h:337
#define GEMV_PROCESS_COL_ONE(N)
Definition: MatrixVectorProduct.h:323
@ Unaligned
Definition: Constants.h:235

References alpha, calibrate::c, cols, e(), eigen_internal_assert, EIGEN_UNUSED_VARIABLE, GEMV_PROCESS_COL, GEMV_PROCESS_COL_ONE, i, j, Eigen::numext::mini(), palpha, res, rows, and Eigen::Unaligned.

◆ gemv_complex_col()

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
EIGEN_STRONG_INLINE void gemv_complex_col ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
2038  {
2039  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
2040 
2041  typedef typename Traits::LhsPacket LhsPacket;
2042  typedef typename Traits::RhsPacket RhsPacket;
2043  typedef typename Traits::ResPacket ResPacket;
2044 
2045  typedef typename packet_traits<Scalar>::type ScalarPacket;
2046  typedef typename packet_traits<LhsScalar>::type PLhsPacket;
2047  typedef typename packet_traits<ResScalar>::type PResPacket;
2048  typedef gemv_traits<ResPacket, ResPacket> PTraits;
2049 
2050  EIGEN_UNUSED_VARIABLE(resIncr);
2051  eigen_internal_assert(resIncr == 1);
2052 
2053  // The following copy tells the compiler that lhs's attributes are not modified outside this function
2054  // This helps GCC to generate proper code.
2055  LhsMapper lhs(alhs);
2056  RhsMapper rhs2(rhs);
2057 
2058  conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
2059 
2060  const Index lhsStride = lhs.stride();
2061  // TODO: for padded aligned inputs, we could enable aligned reads
2062  enum {
2063  LhsAlignment = Unaligned,
2064  ResPacketSize = PTraits::ResPacketSize,
2065  LhsPacketSize = PTraits::LhsPacketSize,
2066  RhsPacketSize = PTraits::RhsPacketSize,
2067  };
2068 #ifdef EIGEN_POWER_USE_GEMV_PREFETCH
2069  const Index prefetch_dist = 64 * LhsPacketSize;
2070 #endif
2071 
2072 #ifndef GCC_ONE_VECTORPAIR_BUG
2073  const Index n8 = rows - 8 * ResPacketSize + 1;
2074  const Index n4 = rows - 4 * ResPacketSize + 1;
2075  const Index n2 = rows - 2 * ResPacketSize + 1;
2076 #endif
2077  const Index n1 = rows - 1 * ResPacketSize + 1;
2078 
2079  // TODO: improve the following heuristic:
2080  const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
2081 
2083  AlphaData alpha_data(alpha);
2084 
2085  for (Index j2 = 0; j2 < cols; j2 += block_cols) {
2086  Index jend = numext::mini(j2 + block_cols, cols);
2087  Index i = 0;
2088  PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
2089  ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
2090  PLhsPacket f0, f1, f2, f3, f4, f5, f6, f7;
2091 #ifdef USE_GEMV_MMA
2092  __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
2093  __vector_pair a0, a1, a2, a3, a4, a5, a6, a7;
2094  PacketBlock<ScalarPacket, 4> result00, result01, result02, result03, result04, result05, result06, result07;
2095  GEMV_UNUSED(8, e0)
2096  GEMV_UNUSED(8, result0)
2097  GEMV_UNUSED(8, a)
2098  GEMV_UNUSED(8, f)
2099 #if !defined(GCC_ONE_VECTORPAIR_BUG) && defined(USE_GEMV_COL_COMPLEX_MMA)
2101 #endif
2102 #endif
2103 #ifndef GCC_ONE_VECTORPAIR_BUG
2104  {
2105  while (i < n8) {
2107  }
2108  }
2109  while (i < n4) {
2111  }
2112  if (i < n2) {
2114  }
2115  if (i < n1)
2116 #else
2117  while (i < n1)
2118 #endif
2119  {
2121  }
2122  for (; i < rows; ++i) {
2123  ResScalar d0(0);
2124  Index j = j2;
2125  do {
2126  d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
2127  } while (++j < jend);
2128  res[i] += alpha * d0;
2129  }
2130  }
2131 }
#define GEMV_PROCESS_COL_COMPLEX(N)
Definition: MatrixVectorProduct.h:2031
#define GEMV_IS_COMPLEX_FLOAT
Definition: MatrixVectorProduct.h:67
#define GEMV_PROCESS_COL_COMPLEX_ONE(N)
Definition: MatrixVectorProduct.h:2004
double f2(const Vector< double > &coord)
f2 function, in front of the C2 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:233
double f1(const Vector< double > &coord)
f1 function, in front of the C1 unknown
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:147
type
Definition: compute_granudrum_aor.py:141
Scalar type
Definition: GenericPacketMath.h:109
Definition: MatrixVectorProduct.h:1340

References a, alpha, cols, eigen_internal_assert, EIGEN_UNUSED_VARIABLE, f(), Global_parameters::f1(), Global_parameters::f2(), GEMV_IS_COMPLEX_COMPLEX, GEMV_IS_COMPLEX_FLOAT, GEMV_PROCESS_COL_COMPLEX, GEMV_PROCESS_COL_COMPLEX_ONE, i, j, Eigen::numext::mini(), res, rows, compute_granudrum_aor::type, and Eigen::Unaligned.

◆ gemv_complex_row()

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
EIGEN_STRONG_INLINE void gemv_complex_row ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
2705  {
2706  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
2707 
2708  typedef typename Traits::LhsPacket LhsPacket;
2709  typedef typename Traits::RhsPacket RhsPacket;
2710  typedef typename Traits::ResPacket ResPacket;
2711 
2712  typedef typename packet_traits<Scalar>::type ScalarPacket;
2713  typedef typename packet_traits<LhsScalar>::type PLhsPacket;
2714  typedef typename packet_traits<ResScalar>::type PResPacket;
2715  typedef gemv_traits<ResPacket, ResPacket> PTraits;
2716 
2717  // The following copy tells the compiler that lhs's attributes are not modified outside this function
2718  // This helps GCC to generate proper code.
2719  LhsMapper lhs(alhs);
2720  typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
2721 
2722  eigen_internal_assert(rhs.stride() == 1);
2723  conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
2724 #if !EIGEN_COMP_LLVM
2725  conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
2726 #endif
2727 
2728  // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
2729  // processing 8 rows at once might be counter productive wrt cache.
2730 #ifndef GCC_ONE_VECTORPAIR_BUG
2731  const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
2732  const Index n4 = rows - 3;
2733  const Index n2 = rows - 1;
2734 #endif
2735 
2736  // TODO: for padded aligned inputs, we could enable aligned reads
2737  enum {
2738  LhsAlignment = Unaligned,
2739  ResPacketSize = PTraits::ResPacketSize,
2740  LhsPacketSize = PTraits::LhsPacketSize,
2741  RhsPacketSize = PTraits::RhsPacketSize,
2742  };
2743 
2744  Index i = 0, j;
2745  PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
2746  ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
2747 #ifdef USE_GEMV_MMA
2748  __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
2749  GEMV_UNUSED_ROW(8, e0)
2750  GEMV_UNUSED_EXTRA(1, c0)
2751  GEMV_UNUSED_EXTRA(1, c1)
2752 #endif
2753  ResScalar dd0;
2754 #ifndef GCC_ONE_VECTORPAIR_BUG
2755  ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
2756 #ifdef USE_GEMV_MMA
2758 #endif
2759  {
2761  }
2764 #endif
2765  for (; i < rows; ++i) {
2768  for (; j < cols; ++j) {
2769  dd0 += cj.pmul(lhs(i, j), rhs2(j));
2770  }
2771  res[i * resIncr] += alpha * dd0;
2772  }
2773 }
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter)
Definition: MatrixVectorProduct.h:2688
#define GEMV_PROCESS_ROW_COMPLEX(N)
Definition: MatrixVectorProduct.h:2699
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE(N)
Definition: MatrixVectorProduct.h:2674
for(int j=0;j< nb;++j)
Definition: level2_impl.h:287
Definition: MatrixVectorProduct.h:2134

References alpha, cols, eigen_internal_assert, GEMV_IS_COMPLEX_COMPLEX, GEMV_PROCESS_ROW_COMPLEX, GEMV_PROCESS_ROW_COMPLEX_PREDUX, GEMV_PROCESS_ROW_COMPLEX_SINGLE, i, j, res, rows, compute_granudrum_aor::type, and Eigen::Unaligned.

◆ gemv_mult_complex_complex()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0,
ResPacket &  c1 
)

core multiply operation for vectors - complex times complex

1454  {
1455  ScalarPacket br, bi;
1456  if (StorageOrder == ColMajor) {
1457  pload_realimag<RhsScalar>(b, br, bi);
1458  } else {
1459  pload_realimag_row<RhsScalar>(b, br, bi);
1460  }
1461  if (ConjugateLhs && !ConjugateRhs) a0 = pconj2(a0);
1462  LhsPacket a1 = pcplxflipconj(a0);
1463  ScalarPacket cr = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, false>(a0.v, br, c0.v);
1464  ScalarPacket ci = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, true>(a1.v, bi, c1.v);
1465  c1 = ResPacket(ci);
1466  c0 = PResPacket(cr);
1467 }
EIGEN_ALWAYS_INLINE Packet2cf pconj2(const Packet2cf &a)
Definition: MatrixVectorProduct.h:1012
EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj(Packet2cf a)
Definition: MatrixVectorProduct.h:1038

References b, Eigen::ColMajor, pconj2(), and pcplxflipconj().

◆ gemv_mult_complex_real()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_real ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0 
)

core multiply operation for vectors - complex times real

1486  {
1487  ScalarPacket a1 = pload_complex<ResPacket>(&a0);
1488  ScalarPacket b0;
1489  if (StorageOrder == ColMajor) {
1490  b0 = pload_real(b);
1491  } else {
1492  b0 = pload_real_row<ResPacket>(b);
1493  }
1494  ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateLhs>(a1, b0, c0.v);
1495  c0 = PResPacket(cri);
1496 }
EIGEN_ALWAYS_INLINE Packet4f pload_real(float *src)
Definition: MatrixVectorProduct.h:1238

References b, Eigen::ColMajor, and pload_real().

◆ gemv_mult_generic()

template<typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_generic ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0 
)
1440  {
1441  conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
1442  RhsPacket b0;
1443  if (StorageOrder == ColMajor) {
1444  b0 = pset1<RhsPacket>(*b);
1445  } else {
1446  b0 = ploadu<RhsPacket>(b);
1447  }
1448  c0 = pcj.pmadd(a0, b0, c0);
1449 }

References b, and Eigen::ColMajor.

◆ gemv_mult_real_complex()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_real_complex ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0 
)

core multiply operation for vectors - real times complex

1472  {
1473  ScalarPacket b0;
1474  if (StorageOrder == ColMajor) {
1475  b0 = pload_complex_full(b);
1476  } else {
1477  b0 = pload_complex_full_row(b);
1478  }
1479  ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateRhs>(a0, b0, c0.v);
1480  c0 = PResPacket(cri);
1481 }
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full(std::complex< float > *src)
Definition: MatrixVectorProduct.h:1226
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row(std::complex< float > *src)
Definition: MatrixVectorProduct.h:1233

References b, Eigen::ColMajor, pload_complex_full(), and pload_complex_full_row().

◆ gemv_row()

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
EIGEN_STRONG_INLINE void gemv_row ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
2369  {
2370  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
2371 
2372  typedef typename Traits::LhsPacket LhsPacket;
2373  typedef typename Traits::RhsPacket RhsPacket;
2374  typedef typename Traits::ResPacket ResPacket;
2375 
2376  // The following copy tells the compiler that lhs's attributes are not modified outside this function
2377  // This helps GCC to generate proper code.
2378  LhsMapper lhs(alhs);
2379  typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
2380 
2381  eigen_internal_assert(rhs.stride() == 1);
2382  conj_helper<LhsScalar, RhsScalar, false, false> cj;
2383  conj_helper<LhsPacket, RhsPacket, false, false> pcj;
2384 
2385  // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
2386  // processing 8 rows at once might be counter productive wrt cache.
2387 #ifndef GCC_ONE_VECTORPAIR_BUG
2388  const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
2389  const Index n4 = rows - 3;
2390  const Index n2 = rows - 1;
2391 #endif
2392 
2393  // TODO: for padded aligned inputs, we could enable aligned reads
2394  enum {
2395  LhsAlignment = Unaligned,
2396  ResPacketSize = Traits::ResPacketSize,
2397  LhsPacketSize = Traits::LhsPacketSize,
2398  RhsPacketSize = Traits::RhsPacketSize,
2399  };
2400 
2401  Index i = 0;
2402 #ifdef USE_GEMV_MMA
2403  __vector_quad c0, c1, c2, c3, c4, c5, c6, c7;
2404  GEMV_UNUSED_ROW(8, c)
2405 #else
2406  ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
2407 #endif
2408 #ifndef GCC_ONE_VECTORPAIR_BUG
2409  ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
2410  GEMV_PROCESS_ROW(8)
2411  GEMV_PROCESS_ROW(4)
2412  GEMV_PROCESS_ROW(2)
2413 #endif
2414  for (; i < rows; ++i) {
2415  ResPacket d0 = pset1<ResPacket>(ResScalar(0));
2416  Index j = 0;
2417  for (; j + LhsPacketSize <= cols; j += LhsPacketSize) {
2418  RhsPacket b0 = rhs2.template load<RhsPacket, Unaligned>(j);
2419 
2420  d0 = pcj.pmadd(lhs.template load<LhsPacket, LhsAlignment>(i + 0, j), b0, d0);
2421  }
2422  ResScalar dd0 = predux(d0);
2423  for (; j < cols; ++j) {
2424  dd0 += cj.pmul(lhs(i, j), rhs2(j));
2425  }
2426  res[i * resIncr] += alpha * dd0;
2427  }
2428 }
#define GEMV_PROCESS_ROW(N)
Definition: MatrixVectorProduct.h:2351

References alpha, calibrate::c, cols, eigen_internal_assert, GEMV_PROCESS_ROW, i, j, Eigen::internal::predux(), res, rows, and Eigen::Unaligned.

◆ loadColData()

template<typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE Packet8bf loadColData ( RhsMapper &  rhs,
Index  j 
)
521  {
523 }
static EIGEN_ALWAYS_INLINE Packet8bf run(RhsMapper &rhs, Index j)
Definition: MatrixVectorProduct.h:501

References j, and loadColData_impl< RhsMapper, linear >::run().

◆ loadLhsPacket()

template<typename Scalar , typename LhsScalar , typename LhsMapper , typename LhsPacket >
EIGEN_ALWAYS_INLINE LhsPacket loadLhsPacket ( LhsMapper &  lhs,
Index  i,
Index  j 
)

load lhs packet

1408  {
1409  if (sizeof(Scalar) == sizeof(LhsScalar)) {
1410  const LhsScalar& src = lhs(i + 0, j);
1411  return LhsPacket(pload_real_full(const_cast<LhsScalar*>(&src)));
1412  }
1413  return lhs.template load<LhsPacket, Unaligned>(i + 0, j);
1414 }
EIGEN_ALWAYS_INLINE Packet4f pload_real_full(float *src)
Definition: MatrixVectorProduct.h:1247

References i, j, and pload_real_full().

◆ loadPacketPartialZero()

EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero ( Packet8us  data,
Index  extra_cols 
)
813  {
814  Packet16uc shift = pset1<Packet16uc>(8 * 2 * (8 - extra_cols));
815 #ifdef _BIG_ENDIAN
816  return reinterpret_cast<Packet8us>(vec_slo(vec_sro(reinterpret_cast<Packet16uc>(data), shift), shift));
817 #else
818  return reinterpret_cast<Packet8us>(vec_sro(vec_slo(reinterpret_cast<Packet16uc>(data), shift), shift));
819 #endif
820 }
int data[]
Definition: Map_placement_new.cpp:1
__vector unsigned char Packet16uc
Definition: AltiVec/PacketMath.h:41
EIGEN_STRONG_INLINE Packet16uc pset1< Packet16uc >(const unsigned char &from)
Definition: AltiVec/PacketMath.h:798
__vector unsigned short int Packet8us
Definition: AltiVec/PacketMath.h:38

References data, and Eigen::internal::pset1< Packet16uc >().

Referenced by multVSXVecLoop().

◆ loadVecLoopVSX()

template<Index num_acc, typename LhsMapper , bool zero>
EIGEN_ALWAYS_INLINE void loadVecLoopVSX ( Index  k,
LhsMapper &  lhs,
Packet4f(&)  a0[num_acc][2] 
)
471  {
472  Packet8bf c0 = lhs.template loadPacket<Packet8bf>(k * 4, 0);
473  Packet8bf b1;
474  if (!zero) {
475  b1 = lhs.template loadPacket<Packet8bf>(k * 4, 1);
476 
477  a0[k + 0][1] = oneConvertBF16Hi(b1.m_val);
478  }
479  a0[k + 0][0] = oneConvertBF16Hi(c0.m_val);
480 
481  if (num_acc > (k + 1)) {
482  a0[k + 1][0] = oneConvertBF16Lo(c0.m_val);
483  if (!zero) {
484  a0[k + 1][1] = oneConvertBF16Lo(b1.m_val);
485  }
486  }
487 }
char char char int int * k
Definition: level2_impl.h:374
EIGEN_ALWAYS_INLINE Packet4f oneConvertBF16Lo(Packet8us data)
Definition: MatrixProduct.h:2680
EIGEN_ALWAYS_INLINE Packet4f oneConvertBF16Hi(Packet8us data)
Definition: MatrixProduct.h:2671
EIGEN_DONT_INLINE Scalar zero()
Definition: svd_common.h:232

References k, Eigen::internal::oneConvertBF16Hi(), Eigen::internal::oneConvertBF16Lo(), and zero().

◆ multVecVSX()

template<Index num_acc, bool zero>
EIGEN_ALWAYS_INLINE void multVecVSX ( Packet4f(&)  acc[num_acc][2],
Packet4f(&)  a0[num_acc][2],
Packet4f(&)  b0[2] 
)
490  {
491  for (Index k = 0; k < num_acc; k++) {
492  for (Index i = 0; i < (zero ? 1 : 2); i++) {
493  acc[k][i] = pmadd(b0[i], a0[k][i], acc[k][i]);
494  }
495  }
496 }

References i, k, Eigen::internal::pmadd(), and zero().

◆ multVSXVecLoop()

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool extra>
EIGEN_ALWAYS_INLINE void multVSXVecLoop ( Packet4f(&)  acc[num_acc][2],
const LhsMapper &  lhs,
RhsMapper &  rhs,
Index  j,
Index  extra_cols 
)
825  {
826  Packet4f a0[num_acc][2], b0[2];
827  Packet8bf a1, b1;
828 
829  if (extra) {
830  b1 = rhs.template loadPacketPartial<Packet8bf>(j, extra_cols);
831 #ifndef _ARCH_PWR9
832  b1 = loadPacketPartialZero(b1.m_val, extra_cols);
833 #endif
834  } else {
835  b1 = rhs.template loadPacket<Packet8bf>(j);
836  }
837  b0[0] = oneConvertBF16Hi(b1.m_val);
838  b0[1] = oneConvertBF16Lo(b1.m_val);
839 
840  const LhsMapper lhs2 = lhs.getSubMapper(0, j);
841  for (Index k = 0; k < num_acc; k++) {
842  if (extra) {
843  a1 = lhs2.template loadPacketPartial<Packet8bf>(k, 0, extra_cols);
844 #ifndef _ARCH_PWR9
845  a1 = loadPacketPartialZero(a1.m_val, extra_cols);
846 #endif
847  } else {
848  a1 = lhs2.template loadPacket<Packet8bf>(k, 0);
849  }
850  a0[k][0] = oneConvertBF16Hi(a1.m_val);
851  a0[k][1] = oneConvertBF16Lo(a1.m_val);
852  }
853 
854  multVecVSX<num_acc, false>(acc, a0, b0);
855 }
EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero(Packet8us data, Index extra_cols)
Definition: MatrixVectorProduct.h:813

References j, k, loadPacketPartialZero(), Eigen::internal::oneConvertBF16Hi(), and Eigen::internal::oneConvertBF16Lo().

◆ outputVecCol()

template<bool extraRows>
EIGEN_ALWAYS_INLINE void outputVecCol ( Packet4f  acc,
float *  result,
Packet4f  pAlpha,
Index  extra_rows 
)
445  {
446  Packet4f d0 = ploadu<Packet4f>(result);
447  d0 = pmadd(acc, pAlpha, d0);
448  if (extraRows) {
449  pstoreu_partial(result, d0, extra_rows);
450  } else {
451  pstoreu(result, d0);
452  }
453 }
EIGEN_DEVICE_FUNC void pstoreu_partial(Scalar *to, const Packet &from, const Index n, const Index offset=0)
Definition: GenericPacketMath.h:917
EIGEN_DEVICE_FUNC void pstoreu(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:911
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1533

References Eigen::internal::ploadu< Packet4f >(), Eigen::internal::pmadd(), Eigen::internal::pstoreu(), and Eigen::internal::pstoreu_partial().

◆ outputVecColResults()

template<Index num_acc, bool extraRows, Index size>
EIGEN_ALWAYS_INLINE void outputVecColResults ( Packet4f(&)  acc[num_acc][size],
float *  result,
Packet4f  pAlpha,
Index  extra_rows 
)
457  {
458  constexpr Index real_acc = (num_acc - (extraRows ? 1 : 0));
459  for (Index k = 0; k < real_acc; k++) {
460  outputVecCol<false>(acc[k][0], result + k * 4, pAlpha, extra_rows);
461  }
462  if (extraRows) {
463  outputVecCol<true>(acc[real_acc][0], result + real_acc * 4, pAlpha, extra_rows);
464  }
465 }

References k.

◆ outputVecResults()

template<Index num_acc, Index size>
EIGEN_ALWAYS_INLINE void outputVecResults ( Packet4f(&)  acc[num_acc][size],
float *  result,
Packet4f  pAlpha 
)
760  {
761  constexpr Index extra = num_acc & 3;
762 
763  for (Index k = 0; k < num_acc; k += 4) {
764  Packet4f d0 = ploadu<Packet4f>(result + k);
765  d0 = pmadd(acc[k + 0][0], pAlpha, d0);
766 
767  if (num_acc > (k + 3)) {
768  pstoreu(result + k, d0);
769  } else {
770  if (extra == 3) {
771  pstoreu_partial(result + k, d0, extra);
772  } else {
773  memcpy((void*)(result + k), (void*)(&d0), sizeof(float) * extra);
774  }
775  }
776  }
777 }

References k, Eigen::internal::ploadu< Packet4f >(), Eigen::internal::pmadd(), Eigen::internal::pstoreu(), and Eigen::internal::pstoreu_partial().

◆ padd() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd padd ( Packet1cd &  a,
std::complex< double > &  b 
)
1282  {
1284  return a; // Just for compilation
1285 }

References a, b, and EIGEN_UNUSED_VARIABLE.

◆ padd() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf padd ( Packet2cf &  a,
std::complex< float > &  b 
)

◆ pconj2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pconj2 ( const Packet1cd &  a)
1016  {
1017  return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR)));
1018 }
const Packet16uc p16uc_COMPLEX64_CONJ_XOR
Definition: MatrixVectorProduct.h:993
__m128d Packet2d
Definition: LSX/PacketMath.h:36
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:2315

References a, p16uc_COMPLEX64_CONJ_XOR, and Eigen::internal::pxor().

◆ pconj2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pconj2 ( const Packet2cf &  a)

packet conjugate (same as pconj but uses the constants in pcplxflipconj for better code generation)

1012  {
1013  return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR)));
1014 }
const Packet16uc p16uc_COMPLEX32_CONJ_XOR
Definition: MatrixVectorProduct.h:991

References a, p16uc_COMPLEX32_CONJ_XOR, and Eigen::internal::pxor().

Referenced by gemv_mult_complex_complex(), pcplxconjflip(), pcplxflipconj(), pmadd_complex_complex(), and pmadd_complex_real().

◆ pconjinv() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pconjinv ( const Packet1cd &  a)
1029  {
1030  return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR2)));
1031 }
const Packet16uc p16uc_COMPLEX64_CONJ_XOR2
Definition: MatrixVectorProduct.h:997

References a, p16uc_COMPLEX64_CONJ_XOR2, and Eigen::internal::pxor().

◆ pconjinv() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pconjinv ( const Packet2cf &  a)

packet conjugate with real & imaginary operation inverted

1021  {
1022 #ifdef __POWER8_VECTOR__
1023  return Packet2cf(Packet4f(vec_neg(Packet2d(a.v))));
1024 #else
1025  return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR2)));
1026 #endif
1027 }
const Packet16uc p16uc_COMPLEX32_CONJ_XOR2
Definition: MatrixVectorProduct.h:995

References a, p16uc_COMPLEX32_CONJ_XOR2, and Eigen::internal::pxor().

◆ pcplxconjflip() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxconjflip ( Packet1cd  a)
1063  {
1064 #ifdef PERMXOR_GOOD
1065  return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR2, p16uc_COMPLEX64_XORFLIP)));
1066 #else
1067  return pconj2(pcplxflip(a));
1068 #endif
1069 }
const Packet16uc p16uc_COMPLEX64_XORFLIP
Definition: MatrixVectorProduct.h:974
EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf &x)
Definition: LSX/Complex.h:218

References a, p16uc_COMPLEX64_CONJ_XOR2, p16uc_COMPLEX64_XORFLIP, pconj2(), and Eigen::internal::pcplxflip().

◆ pcplxconjflip() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxconjflip ( Packet2cf  a)

packet conjugate and flip the real & imaginary results

1055  {
1056 #ifdef PERMXOR_GOOD
1057  return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR2, p16uc_COMPLEX32_XORFLIP)));
1058 #else
1059  return pconj2(pcplxflip(a));
1060 #endif
1061 }
const Packet16uc p16uc_COMPLEX32_XORFLIP
Definition: MatrixVectorProduct.h:972

References a, p16uc_COMPLEX32_CONJ_XOR2, p16uc_COMPLEX32_XORFLIP, pconj2(), and Eigen::internal::pcplxflip().

◆ pcplxflip2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2 ( Packet1cd  a)
1110  {
1111 #ifdef EIGEN_VECTORIZE_VSX
1112  return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2));
1113 #else
1114  return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP)));
1115 #endif
1116 }

References a, and p16uc_COMPLEX64_XORFLIP.

◆ pcplxflip2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2 ( Packet2cf  a)

flip the real & imaginary results

1106  {
1107  return Packet2cf(Packet4f(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX32_XORFLIP)));
1108 }

References a, and p16uc_COMPLEX32_XORFLIP.

◆ pcplxflipconj() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflipconj ( Packet1cd  a)
1046  {
1047 #ifdef PERMXOR_GOOD
1048  return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR, p16uc_COMPLEX64_XORFLIP)));
1049 #else
1050  return pcplxflip(pconj2(a));
1051 #endif
1052 }

References a, p16uc_COMPLEX64_CONJ_XOR, p16uc_COMPLEX64_XORFLIP, pconj2(), and Eigen::internal::pcplxflip().

◆ pcplxflipconj() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj ( Packet2cf  a)

flip the real & imaginary results and packet conjugate

1038  {
1039 #ifdef PERMXOR_GOOD
1040  return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR, p16uc_COMPLEX32_XORFLIP)));
1041 #else
1042  return pcplxflip(pconj2(a));
1043 #endif
1044 }

References a, p16uc_COMPLEX32_CONJ_XOR, p16uc_COMPLEX32_XORFLIP, pconj2(), and Eigen::internal::pcplxflip().

Referenced by gemv_mult_complex_complex(), and pstoreu_pmadd_complex().

◆ pcplxflipnegate() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflipnegate ( Packet1cd  a)
1097  {
1098 #ifdef PERMXOR_GOOD
1099  return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_NEGATE, p16uc_COMPLEX64_XORFLIP)));
1100 #else
1101  return pcplxflip(pnegate2(a));
1102 #endif
1103 }
EIGEN_ALWAYS_INLINE Packet2cf pnegate2(Packet2cf a)
Definition: MatrixVectorProduct.h:1072
const Packet16uc p16uc_COMPLEX64_NEGATE
Definition: MatrixVectorProduct.h:1001

References a, p16uc_COMPLEX64_NEGATE, p16uc_COMPLEX64_XORFLIP, Eigen::internal::pcplxflip(), and pnegate2().

◆ pcplxflipnegate() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflipnegate ( Packet2cf  a)

flip the real & imaginary results and negate

1089  {
1090 #ifdef PERMXOR_GOOD
1091  return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_NEGATE, p16uc_COMPLEX32_XORFLIP)));
1092 #else
1093  return pcplxflip(pnegate2(a));
1094 #endif
1095 }
const Packet16uc p16uc_COMPLEX32_NEGATE
Definition: MatrixVectorProduct.h:999

References a, p16uc_COMPLEX32_NEGATE, p16uc_COMPLEX32_XORFLIP, Eigen::internal::pcplxflip(), and pnegate2().

◆ pload_complex() [1/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex ( Packet1cd *  src)
1221  {
1222  return src->v;
1223 }

◆ pload_complex() [2/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex ( Packet2cf *  src)

load from a complex vector and convert to a real vector

1216  {
1217  return src->v;
1218 }

◆ pload_complex() [3/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex ( std::complex< double > *  src)
1210  {
1211  return ploadu<Packet2d>(reinterpret_cast<double*>(src));
1212 }
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1448

References Eigen::internal::ploadu< Packet2d >().

◆ pload_complex() [4/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex ( std::complex< float > *  src)

load a scalar or a vector from complex location

1201  {
1202  if (GEMV_IS_SCALAR) {
1203  return pload_complex_half(src);
1204  } else {
1205  return ploadu<Packet4f>(reinterpret_cast<float*>(src));
1206  }
1207 }
EIGEN_ALWAYS_INLINE Packet4f pload_complex_half(std::complex< float > *src)
Definition: MatrixVectorProduct.h:1119
#define GEMV_IS_SCALAR
Definition: MatrixVectorProduct.h:66

References GEMV_IS_SCALAR, pload_complex_half(), and Eigen::internal::ploadu< Packet4f >().

◆ pload_complex_full() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_complex_full ( std::complex< double > *  src)
1230 { return ploadu<Packet1cd>(src).v; }
EIGEN_STRONG_INLINE Packet1cd ploadu< Packet1cd >(const std::complex< double > *from)
Definition: LSX/Complex.h:373
Packet2d v
Definition: LSX/Complex.h:263

References Eigen::internal::ploadu< Packet1cd >(), and Eigen::internal::Packet1cd::v.

◆ pload_complex_full() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_complex_full ( std::complex< float > *  src)

load a full vector from complex location - column-wise

1226  {
1227  return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double*>(src)));
1228 }
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: LSX/PacketMath.h:1490

References Eigen::internal::ploaddup< Packet2d >().

Referenced by gemv_mult_real_complex(), pload_complex_full_row(), and pload_real_full().

◆ pload_complex_full_row() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_complex_full_row ( std::complex< double > *  src)
1235 { return pload_complex_full(src); }

References pload_complex_full().

◆ pload_complex_full_row() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row ( std::complex< float > *  src)

load a full vector from complex location - row-wise

1233 { return ploadu<Packet2cf>(src).v; }
EIGEN_STRONG_INLINE Packet2cf ploadu< Packet2cf >(const std::complex< float > *from)
Definition: AltiVec/Complex.h:148
Packet4f v
Definition: AltiVec/Complex.h:78

References Eigen::internal::ploadu< Packet2cf >(), and Eigen::internal::Packet2cf::v.

Referenced by gemv_mult_real_complex().

◆ pload_complex_half()

EIGEN_ALWAYS_INLINE Packet4f pload_complex_half ( std::complex< float > *  src)

load half a vector with one complex value

1119  {
1120  Packet4f t;
1121 #ifdef EIGEN_VECTORIZE_VSX
1122  // Load float64/two float32 (doubleword alignment)
1123  __asm__("lxsdx %x0,%y1" : "=wa"(t) : "Z"(*src));
1124 #else
1125  *reinterpret_cast<std::complex<float>*>(reinterpret_cast<float*>(&t) + COMPLEX_DELTA) = *src;
1126 #endif
1127  return t;
1128 }
#define COMPLEX_DELTA
Definition: MatrixVectorProduct.h:1008
t
Definition: plotPSD.py:36

References COMPLEX_DELTA, and plotPSD::t.

Referenced by pload_complex(), and pload_realimag().

◆ pload_real() [1/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real ( double src)
1240 { return pset1<Packet2d>(*src); }
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: LSX/PacketMath.h:503

References Eigen::internal::pset1< Packet2d >().

◆ pload_real() [2/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real ( float *  src)

load a vector from a real-only scalar location - column-wise

1238 { return pset1<Packet4f>(*src); }

References Eigen::internal::pset1< Packet4f >().

Referenced by gemv_mult_complex_real(), pload_real_full(), and pload_real_row().

◆ pload_real() [3/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real ( Packet2d &  src)
1244 { return src; }

◆ pload_real() [4/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real ( Packet4f &  src)
1242 { return src; }

◆ pload_real_full() [1/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real_full ( double src)
1252 { return pload_real(src); }

References pload_real().

◆ pload_real_full() [2/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real_full ( float *  src)

load a vector from a real-only vector location

1247  {
1248  Packet4f ret = ploadu<Packet4f>(src);
1249  return vec_mergeh(ret, ret);
1250 }
Eigen::DenseIndex ret
Definition: level1_cplx_impl.h:43

References Eigen::internal::ploadu< Packet4f >(), and ret.

Referenced by loadLhsPacket(), and pload_real_row().

◆ pload_real_full() [3/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real_full ( std::complex< double > *  src)
1258  {
1259  return pload_complex_full(src); // Just for compilation
1260 }

References pload_complex_full().

◆ pload_real_full() [4/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real_full ( std::complex< float > *  src)
1254  {
1255  return pload_complex_full(src); // Just for compilation
1256 }

References pload_complex_full().

◆ pload_real_row() [1/2]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_real_row ( double src)
1273  {
1274  return pload_real(src);
1275 }

References pload_real().

◆ pload_real_row() [2/2]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_real_row ( float *  src)

load a vector from a real-only scalar location - row-wise

1264  {
1265  if (GEMV_IS_SCALAR) {
1266  return pload_real_full(src);
1267  } else {
1268  return ploadu<Packet4f>(src);
1269  }
1270 }

References GEMV_IS_SCALAR, pload_real_full(), and Eigen::internal::ploadu< Packet4f >().

◆ pload_realimag() [1/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag ( RhsScalar *  src,
Packet2d &  r,
Packet2d &  i 
)
1144  {
1145 #ifdef EIGEN_VECTORIZE_VSX
1146  __asm__("lxvdsx %x0,%y1" : "=wa"(r) : "Z"(*(reinterpret_cast<double*>(src) + 0)));
1147  __asm__("lxvdsx %x0,%y1" : "=wa"(i) : "Z"(*(reinterpret_cast<double*>(src) + 1)));
1148 #else
1149  Packet2d t = ploadu<Packet2d>(reinterpret_cast<double*>(src));
1150  r = vec_splat(t, 0);
1151  i = vec_splat(t, 1);
1152 #endif
1153 }
r
Definition: UniformPSDSelfTest.py:20

References i, Eigen::internal::ploadu< Packet2d >(), UniformPSDSelfTest::r, and plotPSD::t.

◆ pload_realimag() [2/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag ( RhsScalar *  src,
Packet4f &  r,
Packet4f &  i 
)

load two vectors from the real and imaginary portions of a complex value

1132  {
1133 #ifdef _ARCH_PWR9
1134  __asm__("lxvwsx %x0,%y1" : "=wa"(r) : "Z"(*(reinterpret_cast<float*>(src) + 0)));
1135  __asm__("lxvwsx %x0,%y1" : "=wa"(i) : "Z"(*(reinterpret_cast<float*>(src) + 1)));
1136 #else
1137  Packet4f t = pload_complex_half(src);
1138  r = vec_splat(t, COMPLEX_DELTA + 0);
1139  i = vec_splat(t, COMPLEX_DELTA + 1);
1140 #endif
1141 }

References COMPLEX_DELTA, i, pload_complex_half(), UniformPSDSelfTest::r, and plotPSD::t.

Referenced by pload_realimag_row().

◆ pload_realimag_combine() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine ( std::complex< double > *  src)

◆ pload_realimag_combine() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine ( std::complex< float > *  src)

load and splat a complex value into a vector - column-wise

1182  {
1183 #ifdef EIGEN_VECTORIZE_VSX
1184  Packet4f ret;
1185  __asm__("lxvdsx %x0,%y1" : "=wa"(ret) : "Z"(*(reinterpret_cast<double*>(src) + 0)));
1186  return ret;
1187 #else
1188  return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double*>(src)));
1189 #endif
1190 }

References Eigen::internal::ploaddup< Packet2d >(), and ret.

◆ pload_realimag_combine_row() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine_row ( std::complex< double > *  src)

◆ pload_realimag_combine_row() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine_row ( std::complex< float > *  src)

load a complex value into a vector - row-wise

1195 { return ploadu<Packet2cf>(src).v; }

References Eigen::internal::ploadu< Packet2cf >(), and Eigen::internal::Packet2cf::v.

◆ pload_realimag_row() [1/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row ( RhsScalar *  src,
Packet2d &  r,
Packet2d &  i 
)
1177  {
1178  return pload_realimag(src, r, i);
1179 }
EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar *src, Packet4f &r, Packet4f &i)
Definition: MatrixVectorProduct.h:1132

References i, pload_realimag(), and UniformPSDSelfTest::r.

◆ pload_realimag_row() [2/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row ( RhsScalar *  src,
Packet4f &  r,
Packet4f &  i 
)

load two vectors from the interleaved real & imaginary values of src

1165  {
1166  Packet4f t = ploadu<Packet4f>(reinterpret_cast<float*>(src));
1167 #ifdef __POWER8_VECTOR__
1168  r = vec_mergee(t, t);
1169  i = vec_mergeo(t, t);
1170 #else
1171  r = vec_perm(t, t, p16uc_MERGEE);
1172  i = vec_perm(t, t, p16uc_MERGEO);
1173 #endif
1174 }
const Packet16uc p16uc_MERGEE
Definition: MatrixVectorProduct.h:1156
const Packet16uc p16uc_MERGEO
Definition: MatrixVectorProduct.h:1159

References i, p16uc_MERGEE, p16uc_MERGEO, Eigen::internal::ploadu< Packet4f >(), UniformPSDSelfTest::r, and plotPSD::t.

◆ pmadd_complex()

template<typename ScalarPacket , typename AlphaData >
EIGEN_ALWAYS_INLINE ScalarPacket pmadd_complex ( ScalarPacket &  c0,
ScalarPacket &  c2,
ScalarPacket &  c4,
AlphaData &  b0 
)

multiply and add for complex math

1353  {
1354  return pmadd(c2, b0.separate.i.v, pmadd(c0, b0.separate.r.v, c4));
1355 }

References Eigen::internal::pmadd().

◆ pmadd_complex_complex()

template<typename ComplexPacket , typename RealPacket , bool ConjugateLhs, bool ConjugateRhs, bool Negate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_complex ( RealPacket &  a,
RealPacket &  b,
RealPacket &  c 
)

madd for complex times complex

1418  {
1419  if (ConjugateLhs && ConjugateRhs) {
1420  return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
1421  } else if (Negate && !ConjugateLhs && ConjugateRhs) {
1422  return vec_nmsub(a, b, c);
1423  } else {
1424  return vec_madd(a, b, c);
1425  }
1426 }
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1

References a, b, calibrate::c, pconj2(), and v.

◆ pmadd_complex_real()

template<typename ComplexPacket , typename RealPacket , bool Conjugate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_real ( RealPacket &  a,
RealPacket &  b,
RealPacket &  c 
)

madd for complex times real

1430  {
1431  if (Conjugate) {
1432  return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
1433  } else {
1434  return vec_madd(a, b, c);
1435  }
1436 }

References a, b, calibrate::c, pconj2(), and v.

◆ pnegate2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pnegate2 ( Packet1cd  a)
1080  {
1081 #ifdef __POWER8_VECTOR__
1082  return Packet1cd(vec_neg(a.v));
1083 #else
1084  return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_NEGATE)));
1085 #endif
1086 }

References a, p16uc_COMPLEX64_NEGATE, and Eigen::internal::pxor().

◆ pnegate2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pnegate2 ( Packet2cf  a)

packet negate

1072  {
1073 #ifdef __POWER8_VECTOR__
1074  return Packet2cf(vec_neg(a.v));
1075 #else
1076  return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_NEGATE)));
1077 #endif
1078 }

References a, p16uc_COMPLEX32_NEGATE, and Eigen::internal::pxor().

Referenced by pcplxflipnegate().

◆ predux_complex() [1/2]

template<typename ResScalar , typename PResPacket , typename ResPacket , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex ( PResPacket &  a0,
PResPacket &  b0,
ResPacket &  a1,
ResPacket &  b1 
)
2496  {
2498  a0 = padd(a0, a1);
2499  b0 = padd(b0, b1);
2500  }
2501  return predux_complex<ResScalar, PResPacket>(a0, b0);
2502 }

References GEMV_IS_COMPLEX_COMPLEX, and padd().

◆ predux_complex() [2/2]

template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex ( ResPacket &  a,
ResPacket &  b 
)
2271  {
2272  return predux_real<ResScalar, ResPacket>(a, b);
2273 }

References a, and b.

◆ predux_real()

template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_real ( ResPacket &  a,
ResPacket &  b 
)
2263  {
2265  cc0.scalar[0] = predux(a);
2266  cc0.scalar[1] = predux(b);
2267  return cc0;
2268 }
Scalar scalar[N]
Definition: MatrixVectorProduct.h:2135

References a, b, Eigen::internal::predux(), and ScalarBlock< Scalar, N >::scalar.

◆ preduxVecResults2VSX()

template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResults2VSX ( Packet4f(&)  acc[num_acc][2],
Index  k 
)
780  {
781  if (num_acc > (k + 1)) {
782  acc[k][1] = vec_mergel(acc[k + 0][0], acc[k + 1][0]);
783  acc[k][0] = vec_mergeh(acc[k + 0][0], acc[k + 1][0]);
784  acc[k][0] = acc[k][0] + acc[k][1];
785  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 8);
786  } else {
787  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 8);
788 #ifdef _BIG_ENDIAN
789  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 12);
790 #else
791  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 4);
792 #endif
793  }
794 }

References k.

◆ preduxVecResultsVSX()

template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResultsVSX ( Packet4f(&)  acc[num_acc][2])
797  {
798  for (Index k = 0; k < num_acc; k += 4) {
799  preduxVecResults2VSX<num_acc>(acc, k + 0);
800  if (num_acc > (k + 2)) {
801  preduxVecResults2VSX<num_acc>(acc, k + 2);
802 #ifdef EIGEN_VECTORIZE_VSX
803  acc[k + 0][0] = reinterpret_cast<Packet4f>(
804  vec_mergeh(reinterpret_cast<Packet2ul>(acc[k + 0][0]), reinterpret_cast<Packet2ul>(acc[k + 2][0])));
805 #else
806  acc[k + 0][0] = reinterpret_cast<Packet4f>(vec_perm(acc[k + 0][0], acc[k + 2][0], p16uc_TRANSPOSE64_HI));
807 #endif
808  }
809  }
810 }
eigen_packet_wrapper< __m128i, 7 > Packet2ul
Definition: LSX/PacketMath.h:45
static Packet16uc p16uc_TRANSPOSE64_HI
Definition: AltiVec/PacketMath.h:143

References k, and Eigen::internal::p16uc_TRANSPOSE64_HI.

◆ pset1_complex() [1/2]

template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet1cd pset1_complex ( std::complex< double > &  alpha)
1305  {
1306  Packet1cd ret;
1307  ret.v[0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
1308  ret.v[1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
1309  return ret;
1310 }

References alpha, and ret.

◆ pset1_complex() [2/2]

template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet2cf pset1_complex ( std::complex< float > &  alpha)

set a vector from complex location

1295  {
1296  Packet2cf ret;
1297  ret.v[COMPLEX_DELTA + 0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
1298  ret.v[COMPLEX_DELTA + 1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
1299  ret.v[2 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 0];
1300  ret.v[3 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 1];
1301  return ret;
1302 }

References alpha, COMPLEX_DELTA, and ret.

◆ pset1_realimag()

template<typename Scalar , typename ResScalar >
EIGEN_ALWAYS_INLINE Scalar pset1_realimag ( ResScalar &  alpha,
int  which,
int  conj 
)

set a scalar from complex location

1289  {
1290  return (which) ? ((conj) ? -alpha.real() : alpha.real()) : ((conj) ? -alpha.imag() : alpha.imag());
1291 }
AnnoyingScalar conj(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:133

References alpha, and conj().

◆ pset_init()

template<typename Packet , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE Packet pset_init ( Packet c1)

initialize a vector from another vector

1330  {
1333  return pset_zero<Packet>();
1334  } else {
1335  return c1; // Intentionally left uninitialized
1336  }
1337 }

References EIGEN_UNUSED_VARIABLE, and GEMV_IS_COMPLEX_COMPLEX.

◆ pset_zero()

template<typename Packet >
EIGEN_ALWAYS_INLINE Packet pset_zero ( )

zero out a vector for real or complex forms

1314  {
1315  return pset1<Packet>(__UNPACK_TYPE__(Packet)(0));
1316 }
#define __UNPACK_TYPE__(PACKETNAME)
Definition: AltiVec/PacketMath.h:70

References __UNPACK_TYPE__.

◆ pset_zero< Packet1cd >()

template<>
EIGEN_ALWAYS_INLINE Packet1cd pset_zero< Packet1cd > ( )
1324  {
1325  return Packet1cd(pset1<Packet2d>(double(0)));
1326 }

References Eigen::internal::pset1< Packet2d >().

◆ pset_zero< Packet2cf >()

template<>
EIGEN_ALWAYS_INLINE Packet2cf pset_zero< Packet2cf > ( )
1319  {
1320  return Packet2cf(pset1<Packet4f>(float(0)));
1321 }

References Eigen::internal::pset1< Packet4f >().

◆ pstoreu_pmadd_complex() [1/2]

template<typename Scalar , typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData >
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex ( PResPacket &  c0,
AlphaData &  b0,
ResScalar *  res 
)

store and madd for complex math

1360  {
1361  PResPacket c2 = pcplxflipconj(c0);
1362  if (GEMV_IS_SCALAR) {
1363  ScalarPacket c4 = ploadu<ScalarPacket>(reinterpret_cast<Scalar*>(res));
1364  ScalarPacket c3 = pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0);
1365  pstoreu(reinterpret_cast<Scalar*>(res), c3);
1366  } else {
1367  ScalarPacket c4 = pload_complex<ResPacket>(res);
1368  PResPacket c3 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
1369  pstoreu(res, c3);
1370  }
1371 }

References GEMV_IS_SCALAR, pcplxflipconj(), Eigen::internal::pstoreu(), and res.

◆ pstoreu_pmadd_complex() [2/2]

template<typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData , Index ResPacketSize, Index iter2>
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex ( PResPacket &  c0,
PResPacket &  c1,
AlphaData &  b0,
ResScalar *  res 
)
1375  {
1376  PResPacket c2 = pcplxflipconj(c0);
1377  PResPacket c3 = pcplxflipconj(c1);
1378 #if !defined(_ARCH_PWR10)
1379  ScalarPacket c4 = pload_complex<ResPacket>(res + (iter2 * ResPacketSize));
1380  ScalarPacket c5 = pload_complex<ResPacket>(res + ((iter2 + 1) * ResPacketSize));
1381  PResPacket c6 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
1382  PResPacket c7 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c5, b0));
1383  pstoreu(res + (iter2 * ResPacketSize), c6);
1384  pstoreu(res + ((iter2 + 1) * ResPacketSize), c7);
1385 #else
1386  __vector_pair a = *reinterpret_cast<__vector_pair*>(res + (iter2 * ResPacketSize));
1387 #if EIGEN_COMP_LLVM
1388  PResPacket c6[2];
1389  __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(c6), &a);
1390  c6[0] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c6[0].v, b0));
1391  c6[1] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c6[1].v, b0));
1392  GEMV_BUILDPAIR_MMA(a, c6[0].v, c6[1].v);
1393 #else
1394  if (GEMV_IS_COMPLEX_FLOAT) {
1395  __asm__("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d"(a) : "wa"(b0.separate.r.v), "wa"(c0.v), "wa"(c1.v));
1396  __asm__("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d"(a) : "wa"(b0.separate.i.v), "wa"(c2.v), "wa"(c3.v));
1397  } else {
1398  __asm__("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d"(a) : "wa"(b0.separate.r.v), "wa"(c0.v), "wa"(c1.v));
1399  __asm__("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d"(a) : "wa"(b0.separate.i.v), "wa"(c2.v), "wa"(c3.v));
1400  }
1401 #endif
1402  *reinterpret_cast<__vector_pair*>(res + (iter2 * ResPacketSize)) = a;
1403 #endif
1404 }
#define GEMV_BUILDPAIR_MMA(dst, src1, src2)
Definition: MatrixVectorProduct.h:55

References a, GEMV_BUILDPAIR_MMA, GEMV_IS_COMPLEX_FLOAT, pcplxflipconj(), Eigen::internal::pstoreu(), res, and v.

◆ storeBF16fromResult()

template<const Index size, bool inc, Index delta>
EIGEN_ALWAYS_INLINE void storeBF16fromResult ( bfloat16 *  dst,
Packet8bf  data,
Index  resInc,
Index  extra 
)
644  {
645  if (inc) {
646  if (size < 8) {
647  pscatter_partial(dst + delta * resInc, data, resInc, extra);
648  } else {
649  pscatter(dst + delta * resInc, data, resInc);
650  }
651  } else {
652  if (size < 8) {
653  pstoreu_partial(dst + delta, data, extra);
654  } else {
655  pstoreu(dst + delta, data);
656  }
657  }
658 }
EIGEN_DEVICE_FUNC void pscatter(Scalar *to, const Packet &from, Index stride, typename unpacket_traits< Packet >::mask_t umask)
EIGEN_DEVICE_FUNC void pscatter_partial(Scalar *to, const Packet &from, Index stride, const Index n)
Definition: GenericPacketMath.h:956
int delta
Definition: MultiOpt.py:96

References data, MultiOpt::delta, Eigen::internal::pscatter(), Eigen::internal::pscatter_partial(), Eigen::internal::pstoreu(), Eigen::internal::pstoreu_partial(), and size.

◆ storeMaddData() [1/2]

template<typename ResPacket , typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData ( ResScalar *  res,
ResPacket &  palpha,
ResPacket &  data 
)

multiply and add and store results

71  {
72  pstoreu(res, pmadd(data, palpha, ploadu<ResPacket>(res)));
73 }

References data, palpha, Eigen::internal::pmadd(), Eigen::internal::pstoreu(), and res.

◆ storeMaddData() [2/2]

template<typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData ( ResScalar *  res,
ResScalar &  alpha,
ResScalar &  data 
)
76  {
77  *res += (alpha * data);
78 }

References alpha, data, and res.

◆ vecColLoopVSX()

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool zero, bool linear>
EIGEN_ALWAYS_INLINE void vecColLoopVSX ( Index  j,
LhsMapper &  lhs,
RhsMapper &  rhs,
Packet4f(&)  acc[num_acc][2] 
)
526  {
527  Packet4f a0[num_acc][2], b0[2];
528  Packet8bf b2 = loadColData<RhsMapper, linear>(rhs, j);
529 
530  b0[0] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V1);
531  if (!zero) {
532  b0[1] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V2);
533  }
534 
535  using LhsSubMapper = typename LhsMapper::SubMapper;
536 
537  LhsSubMapper lhs2 = lhs.getSubMapper(0, j);
538  for (Index k = 0; k < num_acc; k += 2) {
539  loadVecLoopVSX<num_acc, LhsSubMapper, zero>(k, lhs2, a0);
540  }
541 
542  multVecVSX<num_acc, zero>(acc, a0, b0);
543 }
static Packet16uc p16uc_MERGE16_32_V1
Definition: MatrixVectorProduct.h:467
static Packet16uc p16uc_MERGE16_32_V2
Definition: MatrixVectorProduct.h:468
EIGEN_ALWAYS_INLINE Packet4f oneConvertBF16Perm(Packet8us data, Packet16uc mask)
Definition: MatrixProduct.h:2732

References j, k, Eigen::internal::oneConvertBF16Perm(), p16uc_MERGE16_32_V1, p16uc_MERGE16_32_V2, and zero().

◆ vecVSXLoop()

template<Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void vecVSXLoop ( Index  cols,
const LhsMapper &  lhs,
RhsMapper &  rhs,
Packet4f(&)  acc[num_acc][2],
Index  extra_cols 
)
859  {
860  Index j = 0;
861  for (; j + 8 <= cols; j += 8) {
862  multVSXVecLoop<num_acc, LhsMapper, RhsMapper, false>(acc, lhs, rhs, j, extra_cols);
863  }
864 
865  if (extra_cols) {
866  multVSXVecLoop<num_acc, LhsMapper, RhsMapper, true>(acc, lhs, rhs, j, extra_cols);
867  }
868 }

References cols, and j.

Variable Documentation

◆ p16uc_COMPLEX32_CONJ_XOR

const Packet16uc p16uc_COMPLEX32_CONJ_XOR
Initial value:
= {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}

Referenced by pconj2(), and pcplxflipconj().

◆ p16uc_COMPLEX32_CONJ_XOR2

const Packet16uc p16uc_COMPLEX32_CONJ_XOR2
Initial value:
= {0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00}

Referenced by pconjinv(), and pcplxconjflip().

◆ p16uc_COMPLEX32_NEGATE

const Packet16uc p16uc_COMPLEX32_NEGATE
Initial value:
= {0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80}

Referenced by pcplxflipnegate(), and pnegate2().

◆ p16uc_COMPLEX32_XORFLIP

const Packet16uc p16uc_COMPLEX32_XORFLIP
Initial value:
= {0x44, 0x55, 0x66, 0x77, 0x00, 0x11, 0x22, 0x33,
0xcc, 0xdd, 0xee, 0xff, 0x88, 0x99, 0xaa, 0xbb}

Referenced by pcplxconjflip(), pcplxflip2(), pcplxflipconj(), and pcplxflipnegate().

◆ p16uc_COMPLEX64_CONJ_XOR

const Packet16uc p16uc_COMPLEX64_CONJ_XOR
Initial value:
= {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}

Referenced by pconj2(), and pcplxflipconj().

◆ p16uc_COMPLEX64_CONJ_XOR2

const Packet16uc p16uc_COMPLEX64_CONJ_XOR2
Initial value:
= {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}

Referenced by pconjinv(), and pcplxconjflip().

◆ p16uc_COMPLEX64_NEGATE

const Packet16uc p16uc_COMPLEX64_NEGATE
Initial value:
= {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}

Referenced by pcplxflipnegate(), and pnegate2().

◆ p16uc_COMPLEX64_XORFLIP

const Packet16uc p16uc_COMPLEX64_XORFLIP
Initial value:
= {0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}

Referenced by pcplxconjflip(), pcplxflip2(), pcplxflipconj(), and pcplxflipnegate().

◆ p16uc_MERGE16_32_V1

Packet16uc p16uc_MERGE16_32_V1 = {0, 1, 16, 17, 0, 1, 16, 17, 0, 1, 16, 17, 0, 1, 16, 17}
static

Referenced by vecColLoopVSX().

◆ p16uc_MERGE16_32_V2

Packet16uc p16uc_MERGE16_32_V2 = {2, 3, 18, 19, 2, 3, 18, 19, 2, 3, 18, 19, 2, 3, 18, 19}
static

Referenced by vecColLoopVSX().

◆ p16uc_MERGEE

const Packet16uc p16uc_MERGEE
Initial value:
= {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B}

Referenced by pload_realimag_row().

◆ p16uc_MERGEO

const Packet16uc p16uc_MERGEO
Initial value:
= {0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F}

Referenced by pload_realimag_row().