2 #ifdef EIGEN_POWER_USE_PREFETCH
3 #define EIGEN_POWER_PREFETCH(p) prefetch(p)
5 #define EIGEN_POWER_PREFETCH(p)
8 #if defined(_ARCH_PWR9) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
9 #define USE_PARTIAL_PACKETS
13 #include "../../InternalHeaderCheck.h"
19 template <
typename Scalar,
typename Packet,
typename DataMapper, const Index accRows, const Index accCols>
24 template <
typename Scalar,
typename Packet,
typename DataMapper, const Index accCols>
30 template <
typename Packet>
33 template <
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
const Index accRows,
34 const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
40 template <
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
const Index accCols,
41 bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
48 template <
typename DataMapper>
51 template <const Index size,
bool non_unit_str
ide, Index delta>
54 template <
bool non_unit_str
ide = false>
58 template <
bool rhsExtraCols,
bool lhsExtraRows>
62 template <Index num_acc,
bool extraRows, Index size = 4>
66 template <Index num_acc, Index size = 4>
69 template <
typename RhsMapper,
bool linear>
72 template <
typename Packet>
75 template <
typename DataMapper,
typename Packet,
const Index accCols,
int StorageOrder,
bool Complex,
int N,
80 template <
typename DataMapper,
typename Packet,
int N>
83 #ifdef USE_PARTIAL_PACKETS
84 template <
typename DataMapper,
typename Packet, const Index accCols,
bool Complex, Index N,
bool full = true>
88 template <
typename DataMapper,
typename Packet, Index N>
92 template <
typename Packet,
int N>
95 template <
typename Packet,
int N,
bool mask>
99 template <
typename Packet,
int N,
bool mask>
104 template <
typename Packet,
typename Packetc,
int N,
bool full>
109 #define MICRO_NORMAL(iter) (accCols == accCols2) || (unroll_factor != (iter + 1))
111 #define MICRO_UNROLL_ITER1(func, N) \
112 switch (remaining_rows) { \
118 if (sizeof(Scalar) == sizeof(float)) { \
123 if (sizeof(Scalar) == sizeof(float)) { \
129 #ifdef USE_PARTIAL_PACKETS
130 #define MICRO_UNROLL_ITER(func, N) \
131 if (remaining_rows) { \
137 #define MICRO_NORMAL_PARTIAL(iter) full || (unroll_factor != (iter + 1))
139 #define MICRO_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
142 #define MICRO_COMPLEX_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
144 #define MICRO_NORMAL_COLS(iter, a, b) ((MICRO_NORMAL(iter)) ? a : b)
146 #define MICRO_LOAD1(lhs_ptr, iter) \
147 if (unroll_factor > iter) { \
148 lhsV##iter = ploadLhs<Packet>(lhs_ptr##iter); \
149 lhs_ptr##iter += MICRO_NORMAL_COLS(iter, accCols, accCols2); \
151 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
154 #define MICRO_LOAD_ONE(iter) MICRO_LOAD1(lhs_ptr, iter)
156 #define MICRO_COMPLEX_LOAD_ONE(iter) \
157 if (!LhsIsReal && (unroll_factor > iter)) { \
158 lhsVi##iter = ploadLhs<Packet>(lhs_ptr_real##iter + MICRO_NORMAL_COLS(iter, imag_delta, imag_delta2)); \
160 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
162 MICRO_LOAD1(lhs_ptr_real, iter)
164 #define MICRO_SRC_PTR1(lhs_ptr, advRows, iter) \
165 if (unroll_factor > iter) { \
166 lhs_ptr##iter = lhs_base + (row + (iter * accCols)) * strideA * advRows - \
167 MICRO_NORMAL_COLS(iter, 0, (accCols - accCols2) * offsetA); \
169 EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
172 #define MICRO_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr, 1, iter)
174 #define MICRO_COMPLEX_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr_real, advanceRows, iter)
176 #define MICRO_PREFETCH1(lhs_ptr, iter) \
177 if (unroll_factor > iter) { \
178 EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
181 #define MICRO_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr, iter)
183 #define MICRO_COMPLEX_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr_real, iter)
185 #ifdef USE_PARTIAL_PACKETS
186 #define MICRO_UPDATE_MASK
188 #define MICRO_UPDATE_MASK EIGEN_UNUSED_VARIABLE(pMask);
191 #define MICRO_UPDATE \
192 if (accCols == accCols2) { \
194 EIGEN_UNUSED_VARIABLE(offsetA); \
195 row += unroll_factor * accCols; \
198 #define MICRO_COMPLEX_UPDATE \
200 if (LhsIsReal || (accCols == accCols2)) { \
201 EIGEN_UNUSED_VARIABLE(imag_delta2); \
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
int data[]
Definition: Map_placement_new.cpp:1
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
internal::packet_traits< Scalar >::type Packet
Definition: benchmark-blocking-sizes.cpp:54
std::complex< RealScalar > Complex
Definition: common.h:71
@ N
Definition: constructor.cpp:22
EIGEN_ALWAYS_INLINE void storeResults(Packet4f(&acc)[4], Index rows, const Packet4f pAlpha, float *result, Index extra_cols, Index extra_rows)
Definition: MatrixProduct.h:2649
EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows)
Definition: MatrixProduct.h:1660
EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index rows, Index remaining_rows, const Packet &pAlphaReal, const Packet &pAlphaImag, const Packet &pMask)
Definition: MatrixProduct.h:2404
EIGEN_ALWAYS_INLINE void storeBF16fromResult(bfloat16 *dst, Packet8bf data, Index resInc, Index extra=0)
EIGEN_ALWAYS_INLINE void outputVecColResults(Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha, Index extra_rows)
EIGEN_ALWAYS_INLINE void convertArrayPointerBF16toF32(float *result, Index cols, Index rows, bfloat16 *src, Index resInc)
Definition: MatrixProduct.h:2813
EIGEN_ALWAYS_INLINE void bstore(PacketBlock< Packet, N > &acc, const DataMapper &res, Index row)
Definition: MatrixProduct.h:1621
EIGEN_ALWAYS_INLINE Packet8bf loadColData(RhsMapper &rhs, Index j)
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock< Packet, N > &aReal, PacketBlock< Packet, N > &aImag, const Packet &bReal, const Packet &bImag, PacketBlock< Packet, N > &cReal, PacketBlock< Packet, N > &cImag, const Packet &pMask)
Definition: MatrixProduct.h:1574
EIGEN_ALWAYS_INLINE void gemm_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index rows, Index remaining_rows, const Packet &pAlpha, const Packet &pMask)
Definition: MatrixProduct.h:1982
EIGEN_ALWAYS_INLINE void gemm_complex_extra_cols(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index depth, Index strideA, Index offsetA, Index strideB, Index offsetB, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlphaReal, const Packet &pAlphaImag, const Packet &pMask)
Definition: MatrixProduct.h:2579
EIGEN_ALWAYS_INLINE void outputVecResults(Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha)
EIGEN_ALWAYS_INLINE void bscale(PacketBlock< Packet, N > &acc, PacketBlock< Packet, N > &accZ, const Packet &pAlpha)
Definition: MatrixProduct.h:1688
EIGEN_ALWAYS_INLINE void bload(PacketBlock< Packet, N *(Complex ? 2 :1)> &acc, const DataMapper &res, Index row, Index col)
Definition: MatrixProduct.h:1597
EIGEN_ALWAYS_INLINE Packet ploadLhs(const __UNPACK_TYPE__(Packet) *lhs)
Definition: MatrixProduct.h:1545
EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) pfirst_common(const Packet &a)
Definition: AltiVec/PacketMath.h:1876
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock< Packet, N > &taccReal, PacketBlock< Packet, N > &taccImag, PacketBlock< Packetc, N *2 > &tRes, PacketBlock< Packetc, N > &acc1, PacketBlock< Packetc, N > &acc2)
Definition: MatrixProduct.h:1773
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_ALWAYS_INLINE void gemm_extra_cols(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index depth, Index strideA, Index offsetA, Index strideB, Index offsetB, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlpha, const Packet &pMask)
Definition: MatrixProduct.h:2178
EIGEN_ALWAYS_INLINE void convertArrayBF16toF32(float *result, Index cols, Index rows, const DataMapper &src)
Definition: MatrixProduct.h:3053
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
Definition: BFloat16.h:101
Definition: GenericPacketMath.h:1407
Definition: GenericPacketMath.h:225
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Definition: ZVector/PacketMath.h:50