![]() |
|
#include <GemmKernel.h>
Public Member Functions | |
template<int max_a_unroll, int max_b_unroll> | |
EIGEN_ALWAYS_INLINE void | compute_kern () |
gemm_class (Index m_, Index n_, Index k_, Index ldc_, Index inc_, const Scalar *alpha_, const Scalar *a_, const Scalar *b_, Scalar *c_, bool is_alpha1_, bool is_beta0_, Index a_stride_, Index b_stride_, Index a_off_, Index b_off_) | |
Private Types | |
using | vec = typename packet_traits< Scalar >::type |
using | vec_ymm = typename unpacket_traits< vec >::half |
using | vec_xmm = typename unpacket_traits< vec_ymm >::half |
using | umask_t = typename unpacket_traits< vec >::mask_t |
Private Member Functions | |
EIGEN_ALWAYS_INLINE void | prefetch_a (const Scalar *a_addr) |
EIGEN_ALWAYS_INLINE void | prefetch_b (const Scalar *b_addr) |
EIGEN_ALWAYS_INLINE void | prefetch_x (const Scalar *x_addr) |
EIGEN_ALWAYS_INLINE void | prefetch_c (const Scalar *c_addr) |
template<int nelems> | |
EIGEN_ALWAYS_INLINE void | a_load (vec &a_reg, const Scalar *a_addr) |
EIGEN_ALWAYS_INLINE void | b_load (vec &b_reg, const Scalar *b_addr) |
template<int nelems> | |
EIGEN_ALWAYS_INLINE void | c_store (Scalar *mem, vec &src) |
template<int nelems> | |
EIGEN_ALWAYS_INLINE void | vaddm (vec &dst, const Scalar *mem, vec &src, vec ®) |
EIGEN_STRONG_INLINE void | vfmadd (vec &dst, const vec &src1, const vec &src2) |
template<int nelems> | |
EIGEN_ALWAYS_INLINE void | vfmaddm (vec &dst, const Scalar *mem, vec &src, vec &scale, vec ®) |
template<int j, int endX, int i, int endY, int nelems> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(j > endX)||(i > endY)> | a_loads (const Scalar *ao) |
template<int j, int endX, int i, int endY, int nelems> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(j<=endX) &&(i<=endY)> | a_loads (const Scalar *ao) |
template<int un, int max_b_unroll, int i, int um_vecs, int a_unroll, int b_unroll> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(un > max_b_unroll)||(i > um_vecs)> | prefetch_cs (const Scalar *co1, const Scalar *co2) |
template<int un, int max_b_unroll, int i, int um_vecs, int a_unroll, int b_unroll> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(un<=max_b_unroll) &&(i<=um_vecs)> | prefetch_cs (Scalar *&co1, Scalar *&co2) |
template<int i, int um_vecs, int idx, int nelems> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(i > um_vecs)> | scale_load_c (const Scalar *cox, vec &alpha_reg) |
template<int i, int um_vecs, int idx, int nelems> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(i<=um_vecs)> | scale_load_c (const Scalar *cox, vec &alpha_reg) |
template<int i, int um_vecs, int idx, int nelems> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(i > um_vecs)> | write_c (Scalar *cox) |
template<int i, int um_vecs, int idx, int nelems> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(i<=um_vecs)> | write_c (Scalar *cox) |
template<int pow, int a_unroll, int idx> | |
EIGEN_ALWAYS_INLINE void | c_update_1count (Scalar *&cox) |
template<int pow, int a_unroll> | |
EIGEN_ALWAYS_INLINE void | c_update_1pow (Scalar *&co1, Scalar *&co2) |
template<int max_b_unroll, int a_unroll, int b_unroll> | |
EIGEN_ALWAYS_INLINE void | c_update (Scalar *&co1, Scalar *&co2) |
template<int um, int um_vecs, int idx, int uk, bool fetch_x, bool ktail> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(um > um_vecs)> | compute (const Scalar *ao, const Scalar *bo, int &fetchA_idx, int &fetchB_idx, vec &b_reg) |
template<int um, int um_vecs, int idx, int uk, bool fetch_x, bool ktail> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(um<=um_vecs)> | compute (const Scalar *ao, const Scalar *bo, int &fetchA_idx, int &fetchB_idx, vec &b_reg) |
template<int um, int um_vecs, int uk, int nelems, bool ktail> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(um > um_vecs)> | load_a (const Scalar *ao) |
template<int um, int um_vecs, int uk, int nelems, bool ktail> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(um<=um_vecs)> | load_a (const Scalar *ao) |
template<int uk, int pow, int count, int um_vecs, int b_unroll, bool ktail, bool fetch_x, bool c_fetch> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(count >pow+1)/2)> | innerkernel_1pow (const Scalar *&aa, const Scalar *const &ao, const Scalar *const &bo, Scalar *&co2, int &fetchA_idx, int &fetchB_idx) |
template<int uk, int pow, int count, int um_vecs, int b_unroll, bool ktail, bool fetch_x, bool c_fetch> | |
EIGEN_ALWAYS_INLINE std::enable_if_t<(count<=(pow+1)/2)> | innerkernel_1pow (const Scalar *&aa, const Scalar *const &ao, const Scalar *const &bo, Scalar *&co2, int &fetchA_idx, int &fetchB_idx) |
template<int uk, int max_b_unroll, int a_unroll, int b_unroll, bool ktail, bool fetch_x, bool c_fetch, bool no_a_preload = false> | |
EIGEN_ALWAYS_INLINE void | innerkernel_1uk (const Scalar *&aa, const Scalar *const &ao, const Scalar *const &bo, Scalar *&co2, int &fetchA_idx, int &fetchB_idx) |
template<int a_unroll, int b_unroll, int k_factor, int max_b_unroll, int max_k_factor, bool c_fetch, bool no_a_preload = false> | |
EIGEN_ALWAYS_INLINE void | innerkernel (const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co2) |
template<int a_unroll, int b_unroll, int max_b_unroll> | |
EIGEN_ALWAYS_INLINE void | kloop (const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) |
template<int a_unroll, int b_unroll, int max_b_unroll> | |
EIGEN_ALWAYS_INLINE void | nloop (const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) |
template<int a_unroll, int max_a_unroll, int max_b_unroll> | |
EIGEN_ALWAYS_INLINE void | mloop (const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) |
Private Attributes | |
vec | zmm [32] |
umask_t | mask |
Index | m |
const Index | n |
const Index | k |
const Index | ldc |
const Index | inc |
const Scalar * | alpha |
const Scalar * | a |
const Scalar * | b |
Scalar * | c |
const bool | is_alpha1 |
const bool | is_beta0 |
const Index | a_stride |
const Index | b_stride |
const Index | a_off |
const Index | b_off |
Static Private Attributes | |
static constexpr bool | is_f32 = sizeof(Scalar) == sizeof(float) |
static constexpr bool | is_f64 = sizeof(Scalar) == sizeof(double) |
static constexpr bool | use_less_a_regs = !is_unit_inc |
static constexpr bool | use_less_b_regs = !is_unit_inc |
static constexpr int | a_regs [] = {0, 1, 2, use_less_a_regs ? 0 : 3, use_less_a_regs ? 1 : 4, use_less_a_regs ? 2 : 5} |
static constexpr int | b_regs [] = {6, use_less_b_regs ? 6 : 7} |
static constexpr int | c_regs [] |
static constexpr int | alpha_load_reg = 0 |
static constexpr int | c_load_regs [] = {1, 2, 6} |
static constexpr int | a_shift = 128 |
static constexpr int | b_shift = 128 |
static constexpr int | nelems_in_cache_line = is_f32 ? 16 : 8 |
static constexpr int | a_prefetch_size = nelems_in_cache_line * 2 |
static constexpr int | b_prefetch_size = nelems_in_cache_line * 8 |
|
private |
|
private |
|
private |
|
private |
|
inline |
References Eigen::internal::pzero(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References Eigen::internal::pload1< Packet8d >(), Eigen::internal::ploadu< Packet4d >(), and Eigen::internal::ploadu< Packet4f >().
|
inlineprivate |
References EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_shift, i, j, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::inc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::mask, Eigen::internal::pscatter(), Eigen::internal::pstorel(), Eigen::internal::pstores(), and Eigen::internal::pstoreu().
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::alpha, Eigen::internal::gemm_class< Scalar, is_unit_inc >::alpha_load_reg, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_alpha1, Eigen::internal::gemm_class< Scalar, is_unit_inc >::ldc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::mask, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::alpha_load_reg, Eigen::numext::div_ceil(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::ldc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, Eigen::bfloat16_impl::pow(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References Eigen::bfloat16_impl::pow().
|
inlineprivate |
References plotDoE::bo, and EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_regs, plotDoE::bo, Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_f64, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_a(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_b(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::vfmadd(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inline |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a, Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_shift, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_shift, plotDoE::bo, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_f32, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_f64, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::m.
|
inlineprivate |
References plotDoE::bo.
|
inlineprivate |
References plotDoE::bo, and EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_load(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_shift, plotDoE::bo, Eigen::internal::gemm_class< Scalar, is_unit_inc >::ldc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, Eigen::bfloat16_impl::pow(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_c(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_x(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::use_less_b_regs, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References plotDoE::bo, Eigen::numext::div_ceil(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_load(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_shift, plotDoE::bo, Eigen::numext::div_ceil(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::k, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, SECOND_FETCH, Eigen::internal::gemm_class< Scalar, is_unit_inc >::use_less_a_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::use_less_b_regs, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_shift, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, Eigen::internal::gemm_class< Scalar, is_unit_inc >::use_less_a_regs, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a, Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_off, Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_stride, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b, plotDoE::bo, Eigen::internal::gemm_class< Scalar, is_unit_inc >::c, i, Eigen::internal::gemm_class< Scalar, is_unit_inc >::inc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::k, Eigen::internal::gemm_class< Scalar, is_unit_inc >::ldc, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::n.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a, Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_off, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_off, Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_stride, plotDoE::bo, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::k.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_prefetch_size, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_shift.
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute().
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_prefetch_size, and Eigen::internal::gemm_class< Scalar, is_unit_inc >::b_shift.
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute().
|
inlineprivate |
|
inlineprivate |
References EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
|
inlineprivate |
|
inlineprivate |
References EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_load_regs, Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_regs, i, Eigen::internal::gemm_class< Scalar, is_unit_inc >::inc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_alpha1, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_beta0, Eigen::internal::gemm_class< Scalar, is_unit_inc >::nelems_in_cache_line, Eigen::internal::pmul(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::zmm.
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::inc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_f32, Eigen::internal::gemm_class< Scalar, is_unit_inc >::mask, Eigen::internal::padd(), Eigen::internal::padds(), Eigen::internal::pgather(), and Eigen::internal::pzero().
|
inlineprivate |
References Eigen::internal::pmadd().
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute().
|
inlineprivate |
References Eigen::internal::gemm_class< Scalar, is_unit_inc >::inc, Eigen::internal::gemm_class< Scalar, is_unit_inc >::is_f32, Eigen::internal::gemm_class< Scalar, is_unit_inc >::mask, Eigen::internal::pgather(), Eigen::internal::pmadd(), and Eigen::internal::pzero().
|
inlineprivate |
References EIGEN_UNUSED_VARIABLE.
|
inlineprivate |
|
private |
|
private |
|
staticconstexprprivate |
|
staticconstexprprivate |
|
staticconstexprprivate |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_loads(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute_kern(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::load_a(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_a(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_x().
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::mloop().
|
private |
|
staticconstexprprivate |
|
private |
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::nloop().
|
staticconstexprprivate |
|
staticconstexprprivate |
|
staticconstexprprivate |
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::nloop().
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::mloop().
|
staticconstexprprivate |
|
staticconstexprprivate |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::scale_load_c(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::write_c().
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_store(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::mloop(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::scale_load_c(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::vaddm(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::vfmaddm(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::write_c().
|
private |
|
private |
|
staticconstexprprivate |
|
staticconstexprprivate |
|
private |
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_update(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_update_1count(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::innerkernel_1pow(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::mloop(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_cs().
|
private |
|
private |
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::mloop().
|
staticconstexprprivate |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_loads(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_update(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_update_1count(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::innerkernel_1pow(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::innerkernel_1uk(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::kloop(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::load_a(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::prefetch_cs(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::scale_load_c(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::write_c().
|
staticconstexprprivate |
|
staticconstexprprivate |
|
private |
Referenced by Eigen::internal::gemm_class< Scalar, is_unit_inc >::a_loads(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_update(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::c_update_1count(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::compute(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::gemm_class(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::innerkernel_1pow(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::kloop(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::load_a(), Eigen::internal::gemm_class< Scalar, is_unit_inc >::scale_load_c(), and Eigen::internal::gemm_class< Scalar, is_unit_inc >::write_c().