MatrixProductCommon.h
Go to the documentation of this file.
1 // #define EIGEN_POWER_USE_PREFETCH // Use prefetching in gemm routines
2 #ifdef EIGEN_POWER_USE_PREFETCH
3 #define EIGEN_POWER_PREFETCH(p) prefetch(p)
4 #else
5 #define EIGEN_POWER_PREFETCH(p)
6 #endif
7 
8 #if defined(_ARCH_PWR9) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
9 #define USE_PARTIAL_PACKETS
10 #endif
11 
12 // IWYU pragma: private
13 #include "../../InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 template <typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols>
20 EIGEN_ALWAYS_INLINE void gemm_extra_row(const DataMapper& res, const Scalar* lhs_base, const Scalar* rhs_base,
21  Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index rows,
22  Index remaining_rows, const Packet& pAlpha, const Packet& pMask);
23 
24 template <typename Scalar, typename Packet, typename DataMapper, const Index accCols>
25 EIGEN_ALWAYS_INLINE void gemm_extra_cols(const DataMapper& res, const Scalar* blockA, const Scalar* blockB, Index depth,
26  Index strideA, Index offsetA, Index strideB, Index offsetB, Index col,
27  Index rows, Index cols, Index remaining_rows, const Packet& pAlpha,
28  const Packet& pMask);
29 
30 template <typename Packet>
31 EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows);
32 
33 template <typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows,
34  const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
35 EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(const DataMapper& res, const Scalar* lhs_base, const Scalar* rhs_base,
36  Index depth, Index strideA, Index offsetA, Index strideB, Index row,
37  Index rows, Index remaining_rows, const Packet& pAlphaReal,
38  const Packet& pAlphaImag, const Packet& pMask);
39 
40 template <typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accCols,
41  bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
42 EIGEN_ALWAYS_INLINE void gemm_complex_extra_cols(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
43  Index depth, Index strideA, Index offsetA, Index strideB,
44  Index offsetB, Index col, Index rows, Index cols, Index remaining_rows,
45  const Packet& pAlphaReal, const Packet& pAlphaImag,
46  const Packet& pMask);
47 
48 template <typename DataMapper>
49 EIGEN_ALWAYS_INLINE void convertArrayBF16toF32(float* result, Index cols, Index rows, const DataMapper& src);
50 
51 template <const Index size, bool non_unit_stride, Index delta>
53 
54 template <bool non_unit_stride = false>
56  Index resInc = 1);
57 
58 template <bool rhsExtraCols, bool lhsExtraRows>
59 EIGEN_ALWAYS_INLINE void storeResults(Packet4f (&acc)[4], Index rows, const Packet4f pAlpha, float* result,
60  Index extra_cols, Index extra_rows);
61 
62 template <Index num_acc, bool extraRows, Index size = 4>
63 EIGEN_ALWAYS_INLINE void outputVecColResults(Packet4f (&acc)[num_acc][size], float* result, Packet4f pAlpha,
64  Index extra_rows);
65 
66 template <Index num_acc, Index size = 4>
67 EIGEN_ALWAYS_INLINE void outputVecResults(Packet4f (&acc)[num_acc][size], float* result, Packet4f pAlpha);
68 
69 template <typename RhsMapper, bool linear>
71 
72 template <typename Packet>
74 
75 template <typename DataMapper, typename Packet, const Index accCols, int StorageOrder, bool Complex, int N,
76  bool full = true>
77 EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet, N*(Complex ? 2 : 1)>& acc, const DataMapper& res, Index row,
78  Index col);
79 
80 template <typename DataMapper, typename Packet, int N>
81 EIGEN_ALWAYS_INLINE void bstore(PacketBlock<Packet, N>& acc, const DataMapper& res, Index row);
82 
83 #ifdef USE_PARTIAL_PACKETS
84 template <typename DataMapper, typename Packet, const Index accCols, bool Complex, Index N, bool full = true>
85 EIGEN_ALWAYS_INLINE void bload_partial(PacketBlock<Packet, N*(Complex ? 2 : 1)>& acc, const DataMapper& res, Index row,
86  Index elements);
87 
88 template <typename DataMapper, typename Packet, Index N>
89 EIGEN_ALWAYS_INLINE void bstore_partial(PacketBlock<Packet, N>& acc, const DataMapper& res, Index row, Index elements);
90 #endif
91 
92 template <typename Packet, int N>
94 
95 template <typename Packet, int N, bool mask>
97  const Packet& pMask);
98 
99 template <typename Packet, int N, bool mask>
101  const Packet& bImag, PacketBlock<Packet, N>& cReal, PacketBlock<Packet, N>& cImag,
102  const Packet& pMask);
103 
104 template <typename Packet, typename Packetc, int N, bool full>
108 
109 #define MICRO_NORMAL(iter) (accCols == accCols2) || (unroll_factor != (iter + 1))
110 
111 #define MICRO_UNROLL_ITER1(func, N) \
112  switch (remaining_rows) { \
113  default: \
114  func(N, 0) break; \
115  case 1: \
116  func(N, 1) break; \
117  case 2: \
118  if (sizeof(Scalar) == sizeof(float)) { \
119  func(N, 2) \
120  } \
121  break; \
122  case 3: \
123  if (sizeof(Scalar) == sizeof(float)) { \
124  func(N, 3) \
125  } \
126  break; \
127  }
128 
129 #ifdef USE_PARTIAL_PACKETS
130 #define MICRO_UNROLL_ITER(func, N) \
131  if (remaining_rows) { \
132  func(N, true); \
133  } else { \
134  func(N, false); \
135  }
136 
137 #define MICRO_NORMAL_PARTIAL(iter) full || (unroll_factor != (iter + 1))
138 #else
139 #define MICRO_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
140 #endif
141 
142 #define MICRO_COMPLEX_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
143 
144 #define MICRO_NORMAL_COLS(iter, a, b) ((MICRO_NORMAL(iter)) ? a : b)
145 
146 #define MICRO_LOAD1(lhs_ptr, iter) \
147  if (unroll_factor > iter) { \
148  lhsV##iter = ploadLhs<Packet>(lhs_ptr##iter); \
149  lhs_ptr##iter += MICRO_NORMAL_COLS(iter, accCols, accCols2); \
150  } else { \
151  EIGEN_UNUSED_VARIABLE(lhsV##iter); \
152  }
153 
154 #define MICRO_LOAD_ONE(iter) MICRO_LOAD1(lhs_ptr, iter)
155 
156 #define MICRO_COMPLEX_LOAD_ONE(iter) \
157  if (!LhsIsReal && (unroll_factor > iter)) { \
158  lhsVi##iter = ploadLhs<Packet>(lhs_ptr_real##iter + MICRO_NORMAL_COLS(iter, imag_delta, imag_delta2)); \
159  } else { \
160  EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
161  } \
162  MICRO_LOAD1(lhs_ptr_real, iter)
163 
164 #define MICRO_SRC_PTR1(lhs_ptr, advRows, iter) \
165  if (unroll_factor > iter) { \
166  lhs_ptr##iter = lhs_base + (row + (iter * accCols)) * strideA * advRows - \
167  MICRO_NORMAL_COLS(iter, 0, (accCols - accCols2) * offsetA); \
168  } else { \
169  EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
170  }
171 
172 #define MICRO_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr, 1, iter)
173 
174 #define MICRO_COMPLEX_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr_real, advanceRows, iter)
175 
176 #define MICRO_PREFETCH1(lhs_ptr, iter) \
177  if (unroll_factor > iter) { \
178  EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
179  }
180 
181 #define MICRO_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr, iter)
182 
183 #define MICRO_COMPLEX_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr_real, iter)
184 
185 #ifdef USE_PARTIAL_PACKETS
186 #define MICRO_UPDATE_MASK
187 #else
188 #define MICRO_UPDATE_MASK EIGEN_UNUSED_VARIABLE(pMask);
189 #endif
190 
191 #define MICRO_UPDATE \
192  if (accCols == accCols2) { \
193  MICRO_UPDATE_MASK \
194  EIGEN_UNUSED_VARIABLE(offsetA); \
195  row += unroll_factor * accCols; \
196  }
197 
198 #define MICRO_COMPLEX_UPDATE \
199  MICRO_UPDATE \
200  if (LhsIsReal || (accCols == accCols2)) { \
201  EIGEN_UNUSED_VARIABLE(imag_delta2); \
202  }
203 
204 } // end namespace internal
205 } // end namespace Eigen
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
int data[]
Definition: Map_placement_new.cpp:1
m col(1)
m row(1)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
internal::packet_traits< Scalar >::type Packet
Definition: benchmark-blocking-sizes.cpp:54
std::complex< RealScalar > Complex
Definition: common.h:71
@ N
Definition: constructor.cpp:22
EIGEN_ALWAYS_INLINE void storeResults(Packet4f(&acc)[4], Index rows, const Packet4f pAlpha, float *result, Index extra_cols, Index extra_rows)
Definition: MatrixProduct.h:2649
EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows)
Definition: MatrixProduct.h:1660
EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index rows, Index remaining_rows, const Packet &pAlphaReal, const Packet &pAlphaImag, const Packet &pMask)
Definition: MatrixProduct.h:2404
EIGEN_ALWAYS_INLINE void storeBF16fromResult(bfloat16 *dst, Packet8bf data, Index resInc, Index extra=0)
EIGEN_ALWAYS_INLINE void outputVecColResults(Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha, Index extra_rows)
EIGEN_ALWAYS_INLINE void convertArrayPointerBF16toF32(float *result, Index cols, Index rows, bfloat16 *src, Index resInc)
Definition: MatrixProduct.h:2813
EIGEN_ALWAYS_INLINE void bstore(PacketBlock< Packet, N > &acc, const DataMapper &res, Index row)
Definition: MatrixProduct.h:1621
EIGEN_ALWAYS_INLINE Packet8bf loadColData(RhsMapper &rhs, Index j)
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock< Packet, N > &aReal, PacketBlock< Packet, N > &aImag, const Packet &bReal, const Packet &bImag, PacketBlock< Packet, N > &cReal, PacketBlock< Packet, N > &cImag, const Packet &pMask)
Definition: MatrixProduct.h:1574
EIGEN_ALWAYS_INLINE void gemm_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index rows, Index remaining_rows, const Packet &pAlpha, const Packet &pMask)
Definition: MatrixProduct.h:1982
EIGEN_ALWAYS_INLINE void gemm_complex_extra_cols(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index depth, Index strideA, Index offsetA, Index strideB, Index offsetB, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlphaReal, const Packet &pAlphaImag, const Packet &pMask)
Definition: MatrixProduct.h:2579
EIGEN_ALWAYS_INLINE void outputVecResults(Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha)
EIGEN_ALWAYS_INLINE void bscale(PacketBlock< Packet, N > &acc, PacketBlock< Packet, N > &accZ, const Packet &pAlpha)
Definition: MatrixProduct.h:1688
EIGEN_ALWAYS_INLINE void bload(PacketBlock< Packet, N *(Complex ? 2 :1)> &acc, const DataMapper &res, Index row, Index col)
Definition: MatrixProduct.h:1597
EIGEN_ALWAYS_INLINE Packet ploadLhs(const __UNPACK_TYPE__(Packet) *lhs)
Definition: MatrixProduct.h:1545
EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) pfirst_common(const Packet &a)
Definition: AltiVec/PacketMath.h:1876
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock< Packet, N > &taccReal, PacketBlock< Packet, N > &taccImag, PacketBlock< Packetc, N *2 > &tRes, PacketBlock< Packetc, N > &acc1, PacketBlock< Packetc, N > &acc2)
Definition: MatrixProduct.h:1773
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
EIGEN_ALWAYS_INLINE void gemm_extra_cols(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index depth, Index strideA, Index offsetA, Index strideB, Index offsetB, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlpha, const Packet &pMask)
Definition: MatrixProduct.h:2178
EIGEN_ALWAYS_INLINE void convertArrayBF16toF32(float *result, Index cols, Index rows, const DataMapper &src)
Definition: MatrixProduct.h:3053
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
Definition: BFloat16.h:101
Definition: GenericPacketMath.h:1407
Definition: GenericPacketMath.h:225
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Definition: ZVector/PacketMath.h:50