Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp > Class Template Reference

TensorContractionKernel is a template class that provides Tensor -Tensor contraction operation. More...

#include <TensorContractionSycl.h>

Classes

struct  MemHolder
 MemHolder this is a place holder struct for creating memory hierarchy in SYCL. Inside SYCL kernel it is not allowed to have dynamic memory allocation. While the local memory is created outside of the kernel and passed to the kernel as an accessor, the private memory can only allowed to be allocated statically. Since we are abstracting the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out different type of memory needed when local/no_local memory computation is called. More...
 
struct  MemHolder< contraction_type::no_local, MemSize >
 specialization of memHolder class when no local memory kernel is used. More...
 
struct  TiledMemory
 TiledMemory: contains required memory pointer for loading each tile of the TensorContraction panel from global memory to local/private memory when local/no_local algorithm used. More...
 

Public Types

typedef Eigen::TensorSycl::internal::Vectorise< OutScalar, Eigen::SyclDevice, Vectorizable >::PacketReturnType PacketReturnType
 
typedef BlockProperties< is_lhs_transposed, false, input_mapper_properties::is_lhs_matrix &&Vectorizable, PacketReturnTypeLHSBlockProperties
 
typedef BlockProperties< is_rhs_transposed, true, input_mapper_properties::is_rhs_matrix &&Vectorizable, PacketReturnTypeRHSBlockProperties
 
typedef cl::sycl::accessor< OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > Scratch
 
typedef cl::sycl::multi_ptr< OutScalar, cl::sycl::access::address_space::local_space > local_ptr
 
typedef OutScalar * private_ptr
 
typedef std::conditional_t< contraction_tp==contraction_type::local, local_ptr, private_ptrtile_ptr
 

Public Member Functions

EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionKernel (Scratch scratch_, const LhsMapper lhs_, const RhsMapper rhs_, OutAccessor out_res_, const StorageIndex groupSizeM_, const StorageIndex groupSizeN_, const StorageIndex numTiles_, const TripleDim triple_dim_)
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionKernel (Scratch scratch_, const LhsMapper lhs_, const RhsMapper rhs_, OutAccessor out_res_, const StorageIndex groupSizeM_, const StorageIndex numTiles_, const TripleDim triple_dim_)
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator() (cl::sycl::nd_item< 1 > itemID) const
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_block_per_tile (OutScalar *lhs_block_ptr, OutScalar *rhs_block_ptr, PacketReturnType *privateRes) const
 
template<bool is_internal_block, StorageIndex PrivateNStride, typename OutPtr >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void store (OutPtr *out_ptr, PacketReturnType *privateRes, StorageIndex mGlobalOffset, StorageIndex nGlobalOffset) const
 
template<typename InputBlockProperties , bool is_internal_block, typename Input , typename PrivateReg , contraction_type contract_tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< contract_tp==contraction_type::no_localextract_block (const Input &inpt, PrivateReg private_ptr, const std::pair< StorageIndex, StorageIndex > &, const StorageIndex &ncOffset, const StorageIndex cOffset) const
 
template<bool is_internal_block>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_tile_per_panel (const cl::sycl::nd_item< 1 > &itemID, ThreadProperties< StorageIndex > &thread_properties, TiledMemory &tiled_input_block, PacketReturnType *privateRes, bool &db_offset) const
 
template<bool is_internal_block, typename OutPtr >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_panel (const cl::sycl::nd_item< 1 > &itemID, ThreadProperties< StorageIndex > &thread_properties, OutPtr out_ptr) const
 
template<typename InputBlockProperties , bool is_internal_block, typename Input , typename Local , contraction_type contract_tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< contract_tp==contraction_type::localextract_block (const Input &inpt, Local local_ptr, const std::pair< StorageIndex, StorageIndex > &local_index, const StorageIndex &ncOffset, const StorageIndex cOffset) const
 

Static Public Member Functions

template<typename InputBlockProperties , StorageIndex TileSizeDimNC>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::pair< StorageIndex, StorageIndex > local_id_extract (const StorageIndex &linearLocalThreadId)
 
template<bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< db &&ctp==contraction_type::localsync_mem (const cl::sycl::nd_item< 1 > &, bool &db_offset) noexcept
 
template<bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!db &&ctp==contraction_type::localsync_mem (const cl::sycl::nd_item< 1 > &itemID, bool &) noexcept
 
template<contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< ctp==contraction_type::no_localsync_mem (const cl::sycl::nd_item< 1 > &, bool &) noexcept
 
template<bool need_sync, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< need_sync &&ctp==contraction_type::no_localsync_thread (const cl::sycl::nd_item< 1 > &) noexcept
 
template<bool need_sync, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< need_sync &&ctp==contraction_type::localsync_thread (const cl::sycl::nd_item< 1 > &itemID)
 
template<bool need_sync>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!need_sync > sync_thread (const cl::sycl::nd_item< 1 > &)
 

Public Attributes

Scratch scratch
 
const LhsMapper lhs
 
const RhsMapper rhs
 
OutAccessor out_res
 
const StorageIndex groupSizeM
 
const StorageIndex groupSizeN
 
const StorageIndex numTiles
 
const TripleDim triple_dim
 

Static Public Attributes

static EIGEN_CONSTEXPR int PacketSize
 
static EIGEN_CONSTEXPR bool is_lhs_transposed
 
static EIGEN_CONSTEXPR bool is_rhs_transposed
 
static EIGEN_CONSTEXPR StorageIndex NStride
 
static EIGEN_CONSTEXPR StorageIndex LSDL
 
static EIGEN_CONSTEXPR StorageIndex LSDR
 
static EIGEN_CONSTEXPR StorageIndex LocalOffset = Properties::LocalThreadSizeM * Properties::LocalThreadSizeN
 

Detailed Description

template<typename OutScalar, typename LhsScalar, typename RhsScalar, typename OutAccessor, typename LhsMapper, typename RhsMapper, typename StorageIndex, typename Properties, typename TripleDim, bool Vectorizable, typename input_mapper_properties, bool IsFinal, contraction_type contraction_tp>
class Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >

TensorContractionKernel is a template class that provides Tensor -Tensor contraction operation.

Template Parameters
OutScalardetermines the output scalar type
LhsScalardetermines the left-hand-side scalar type
RhsScalardetermines the right-hand-side scalar type
OutAccessordetermines the sycl accessor type for out put (please see the sycl-1.2.1 specification (https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf) for accessor definition)
LhsMapperdetermines the tensor contraction mapper type for left-hand-side matrix
RhsMapperdetermines the tensor contraction mapper type for right-hand-side matrix
StorageIndexdetermines the StorageIndex Type
Propertiesdetermines the Contraction Panel properties
TripleDimdetermines the M, K, N dimensions for the flatten tensors in order to treat them as a matrix
Vectorizabledetermines whether or not the vectorization is enabled for the Eigen expression.
input_mapper_properties: determine if the input tensors are matrix. If they are matrix, special memory access is used to guarantee that always the memory access are coalesced.

\tptaram IsFinal : determine if this is the final kernel. If so, the result will be written in a final output. Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny contraction is used. So in this case, a final reduction step is required to compute final output.

Template Parameters
contraction_tpit is an enum value representing whether the local memory/no local memory implementation of the algorithm to be used
Parameters
scratchlocal memory containing tiles of LHS and RHS tensors for each work-group
lhsdetermines the left-hand-side flattened tensor (tensor mapper)
rhsdetermines the right-hand-side flattened tensor (tensor mapper)
out_resdetermines the output tensor containing the contraction result
groupSizeMa logical number determining the number of work-group for m dimension
groupSizeNa logical number determining the number of work-group for n dimension
numTilesdetermines total number of tiles on the k dimension
TripleDimdetermines the M, K, N dimensions for the flatten tensors in order to treat them as a matrix

Member Typedef Documentation

◆ LHSBlockProperties

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef BlockProperties<is_lhs_transposed, false, input_mapper_properties::is_lhs_matrix && Vectorizable, PacketReturnType> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LHSBlockProperties

◆ local_ptr

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::local_space> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::local_ptr

◆ PacketReturnType

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef Eigen::TensorSycl::internal::Vectorise<OutScalar, Eigen::SyclDevice, Vectorizable>::PacketReturnType Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketReturnType

◆ private_ptr

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef OutScalar* Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::private_ptr

◆ RHSBlockProperties

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef BlockProperties<is_rhs_transposed, true, input_mapper_properties::is_rhs_matrix && Vectorizable, PacketReturnType> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::RHSBlockProperties

◆ Scratch

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::Scratch

◆ tile_ptr

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
typedef std::conditional_t<contraction_tp == contraction_type::local, local_ptr, private_ptr> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::tile_ptr

Constructor & Destructor Documentation

◆ TensorContractionKernel() [1/2]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TensorContractionKernel ( Scratch  scratch_,
const LhsMapper  lhs_,
const RhsMapper  rhs_,
OutAccessor  out_res_,
const StorageIndex  groupSizeM_,
const StorageIndex  groupSizeN_,
const StorageIndex  numTiles_,
const TripleDim  triple_dim_ 
)
inline
584  : scratch(scratch_),
585  lhs(lhs_),
586  rhs(rhs_),
587  out_res(out_res_),
588  groupSizeM(groupSizeM_),
589  groupSizeN(groupSizeN_),
590  numTiles(numTiles_),
591  triple_dim(triple_dim_) {}
const StorageIndex groupSizeM
Definition: TensorContractionSycl.h:573
const TripleDim triple_dim
Definition: TensorContractionSycl.h:576
const RhsMapper rhs
Definition: TensorContractionSycl.h:571
const StorageIndex numTiles
Definition: TensorContractionSycl.h:575
Scratch scratch
Definition: TensorContractionSycl.h:569
const LhsMapper lhs
Definition: TensorContractionSycl.h:570
OutAccessor out_res
Definition: TensorContractionSycl.h:572
const StorageIndex groupSizeN
Definition: TensorContractionSycl.h:574

◆ TensorContractionKernel() [2/2]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TensorContractionKernel ( Scratch  scratch_,
const LhsMapper  lhs_,
const RhsMapper  rhs_,
OutAccessor  out_res_,
const StorageIndex  groupSizeM_,
const StorageIndex  numTiles_,
const TripleDim  triple_dim_ 
)
inline
598  : TensorContractionKernel(scratch_, lhs_, rhs_, out_res_, groupSizeM_, 1, numTiles_, triple_dim_) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionKernel(Scratch scratch_, const LhsMapper lhs_, const RhsMapper rhs_, OutAccessor out_res_, const StorageIndex groupSizeM_, const StorageIndex groupSizeN_, const StorageIndex numTiles_, const TripleDim triple_dim_)
Definition: TensorContractionSycl.h:578

Member Function Documentation

◆ compute_block_per_tile()

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::compute_block_per_tile ( OutScalar *  lhs_block_ptr,
OutScalar *  rhs_block_ptr,
PacketReturnType privateRes 
) const
inline
639  {
640  StorageIndex idx = 0;
641  EIGEN_CONSTEXPR StorageIndex lhs_stride =
642  contraction_tp == contraction_type::local ? (PacketSize * Properties::LocalThreadSizeM) : 1;
644  for (StorageIndex wLPTN = 0; wLPTN < Properties::WorkLoadPerThreadN; wLPTN++) {
645  auto rhsPacket = PacketReturnType{*(rhs_block_ptr + wLPTN)};
646  StorageIndex lhs_index = 0;
648  for (StorageIndex wLPTM = 0; wLPTM < Properties::WorkLoadPerThreadM / PacketSize; wLPTM++) {
649  PacketReturnType lhsPack{};
651  lhs_block_ptr + lhs_index);
652  privateRes[idx] = ::Eigen::internal::pmadd(lhsPack, rhsPacket, privateRes[idx]);
653 
654  lhs_index += lhs_stride;
655  idx++;
656  }
657  }
658  }
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_CONSTEXPR
Definition: Macros.h:758
Eigen::TensorSycl::internal::Vectorise< OutScalar, Eigen::SyclDevice, Vectorizable >::PacketReturnType PacketReturnType
Definition: TensorContractionSycl.h:460
static EIGEN_CONSTEXPR int PacketSize
Definition: TensorContractionSycl.h:461
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:1218
static EIGEN_DEVICE_FUNC void set_packet(PacketReturnType, Scalar *)
Definition: InteropHeaders.h:145

References EIGEN_CONSTEXPR, EIGEN_UNROLL_LOOP, Eigen::TensorSycl::internal::local, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketSize, Eigen::internal::pmadd(), and Eigen::TensorSycl::internal::PacketWrapper< PacketReturnType, PacketSize >::set_packet().

Referenced by Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::compute_tile_per_panel().

◆ compute_panel()

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool is_internal_block, typename OutPtr >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::compute_panel ( const cl::sycl::nd_item< 1 > &  itemID,
ThreadProperties< StorageIndex > &  thread_properties,
OutPtr  out_ptr 
) const
inline
868  {
869  auto tiled_input_block = TiledMemory{thread_properties, scratch.get_pointer()};
870  // Allocate register space
871  PacketReturnType privateRes[Properties::WorkLoadPerThreadM * Properties::WorkLoadPerThreadN / PacketSize] = {
872  PacketReturnType{0}};
873  bool db_offset = 0;
874 
875  while (thread_properties.kSize >= Properties::TileSizeDimK) {
876  compute_tile_per_panel<is_internal_block>(itemID, thread_properties, tiled_input_block, privateRes, db_offset);
877  }
878  if (thread_properties.kSize > 0) {
879  compute_tile_per_panel<false>(itemID, thread_properties, tiled_input_block, privateRes, db_offset);
880  }
881 
882  // Storing the final results in the output
883  store<is_internal_block,
884  contraction_tp == contraction_type::local ? static_cast<StorageIndex>(1) : RHSBlockProperties::nc_stride>(
885  out_ptr + thread_properties.nGlobalOffset * triple_dim.M, privateRes, thread_properties.mGlobalOffset,
886  thread_properties.nGlobalOffset);
887  }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void store(OutPtr *out_ptr, PacketReturnType *privateRes, StorageIndex mGlobalOffset, StorageIndex nGlobalOffset) const
Definition: TensorContractionSycl.h:663
static EIGEN_CONSTEXPR int nc_stride
Definition: TensorContractionSycl.h:329

References Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::kSize, Eigen::TensorSycl::internal::local, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::mGlobalOffset, Eigen::TensorSycl::internal::BlockProperties< is_transposed, is_rhs_, packet_load_, PacketType >::nc_stride, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::nGlobalOffset, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketSize, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::scratch, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::store(), and Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::triple_dim.

◆ compute_tile_per_panel()

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool is_internal_block>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::compute_tile_per_panel ( const cl::sycl::nd_item< 1 > &  itemID,
ThreadProperties< StorageIndex > &  thread_properties,
TiledMemory tiled_input_block,
PacketReturnType privateRes,
bool db_offset 
) const
inline
830  {
831  // Tiling the Rhs block from global to local memory
832  extract_block<RHSBlockProperties, is_internal_block>(
833  rhs, tiled_input_block.rhs_scratch_extract.ptr + (db_offset * Properties::TileSizeDimK * LSDR),
834  tiled_input_block.rhs_extract_index,
835  contraction_tp == contraction_type::local ? thread_properties.nGroupOffset : thread_properties.nGlobalOffset,
836  thread_properties.kGroupOffset - thread_properties.kSize);
837 
838  sync_thread<contraction_tp == contraction_type::no_local>(itemID);
839 
840  // Tiling the Lhs block from global to local memory
841  extract_block<LHSBlockProperties, is_internal_block>(
842  lhs, tiled_input_block.lhs_scratch_extract.ptr + (db_offset * LSDL * Properties::TileSizeDimK),
843  tiled_input_block.lhs_extract_index,
844  contraction_tp == contraction_type::local ? thread_properties.mGroupOffset : thread_properties.mGlobalOffset,
845  thread_properties.kGroupOffset - thread_properties.kSize);
846 
847  // itemID.barrier(cl::sycl::access::fence_space::local_space);
848  sync_thread<contraction_tp == contraction_type::local>(itemID);
849  // switch to compute mede
850  StorageIndex lhs_offset = (db_offset * LSDL * Properties::TileSizeDimK);
851  StorageIndex rhs_offset = (db_offset * Properties::TileSizeDimK * LSDR);
852  // Loop over the values of a single tile
853  for (StorageIndex k = 0; k < Properties::TileSizeDimK; k++) {
854  compute_block_per_tile(tiled_input_block.lhs_scratch_ptr_compute + lhs_offset,
855  tiled_input_block.rhs_scratch_ptr_compute + rhs_offset, privateRes);
856  lhs_offset += LSDL;
857  rhs_offset += LSDR;
858  }
859  // computing the K index for the next tile
860  thread_properties.kSize -= Properties::TileSizeDimK;
861  sync_mem(itemID, db_offset);
862  }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< db &&ctp==contraction_type::local > sync_mem(const cl::sycl::nd_item< 1 > &, bool &db_offset) noexcept
Definition: TensorContractionSycl.h:785
static EIGEN_CONSTEXPR StorageIndex LSDR
Definition: TensorContractionSycl.h:486
static EIGEN_CONSTEXPR StorageIndex LSDL
Definition: TensorContractionSycl.h:483
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_block_per_tile(OutScalar *lhs_block_ptr, OutScalar *rhs_block_ptr, PacketReturnType *privateRes) const
Definition: TensorContractionSycl.h:638
char char char int int * k
Definition: level2_impl.h:374

References Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::compute_block_per_tile(), k, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::kGroupOffset, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::kSize, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::lhs, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TiledMemory::lhs_extract_index, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TiledMemory::lhs_scratch_extract, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TiledMemory::lhs_scratch_ptr_compute, Eigen::TensorSycl::internal::local, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LSDL, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LSDR, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::mGlobalOffset, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::mGroupOffset, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::nGlobalOffset, Eigen::TensorSycl::internal::ThreadProperties< StorageIndex >::nGroupOffset, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::MemHolder< contraction_type, StorageIndex >::ptr, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::rhs, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TiledMemory::rhs_extract_index, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TiledMemory::rhs_scratch_extract, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::TiledMemory::rhs_scratch_ptr_compute, and Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_mem().

◆ extract_block() [1/2]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<typename InputBlockProperties , bool is_internal_block, typename Input , typename Local , contraction_type contract_tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<contract_tp == contraction_type::local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::extract_block ( const Input &  inpt,
Local  local_ptr,
const std::pair< StorageIndex, StorageIndex > &  local_index,
const StorageIndex &  ncOffset,
const StorageIndex  cOffset 
) const
inline
893  {
894  EIGEN_CONSTEXPR StorageIndex TileSizeDimNC =
895  InputBlockProperties::is_rhs ? Properties::TileSizeDimN : Properties::TileSizeDimM;
896  EIGEN_CONSTEXPR StorageIndex LoadPerThread =
897  InputBlockProperties::is_rhs ? Properties::LoadPerThreadRhs : Properties::LoadPerThreadLhs;
898  EIGEN_CONSTEXPR StorageIndex LSD = InputBlockProperties::is_rhs ? LSDR : LSDL;
899  static_assert(((LocalOffset % (TileSizeDimNC / InputBlockProperties::nc_stride) == 0) &&
900  (LocalOffset % (Properties::TileSizeDimK / InputBlockProperties::c_stride) == 0)),
901  " LocalOffset must be divisible by stride");
902  const StorageIndex &NC = InputBlockProperties::is_rhs ? triple_dim.N : triple_dim.M;
903  StorageIndex localThreadNC = local_index.first;
904  StorageIndex localThreadC = local_index.second;
905  auto chk_bound = [&](const StorageIndex &CIndex, const StorageIndex &NCIndex) EIGEN_DEVICE_FUNC {
906  return ((CIndex + InputBlockProperties::c_stride - 1 < triple_dim.K) &&
907  (NCIndex + InputBlockProperties::nc_stride - 1 < NC));
908  };
910  for (StorageIndex lPT = 0; lPT < LoadPerThread / InputBlockProperties::elements_per_access; lPT++) {
911  const StorageIndex CIndex = cOffset + (InputBlockProperties::c_stride * localThreadC);
912  const StorageIndex NCIndex = ncOffset + (InputBlockProperties::nc_stride * localThreadNC);
913  const StorageIndex ld = InputBlockProperties::is_coalesced_layout ? NC : triple_dim.K;
914  if (check_boundary<is_internal_block>(chk_bound(CIndex, NCIndex))) {
915  auto val =
916  read<InputBlockProperties::packet_load, InputBlockProperties::is_coalesced_layout,
917  InputBlockProperties::is_rhs, typename InputBlockProperties::OutType>(inpt, NCIndex, CIndex, ld);
918  write<StorageIndex, (InputBlockProperties::is_coalesced_layout ? 1 : LSD), data_source::local_mem>(
919  val, local_ptr + (InputBlockProperties::nc_stride * localThreadNC) +
920  (InputBlockProperties::c_stride * localThreadC * LSD));
921  } else {
923  for (StorageIndex i = 0; i < InputBlockProperties::elements_per_access; i++) {
924  const StorageIndex nCInd = NCIndex + (InputBlockProperties::is_coalesced_layout ? i : 0);
925  const StorageIndex cInd = CIndex + (InputBlockProperties::is_coalesced_layout ? 0 : i);
926  OutScalar val =
927  (nCInd < NC && cInd < triple_dim.K)
928  ? read<false, InputBlockProperties::is_coalesced_layout, InputBlockProperties::is_rhs, OutScalar>(
929  inpt, nCInd, cInd, ld)
930  : OutScalar(0);
931 
932  write<StorageIndex, (InputBlockProperties::is_coalesced_layout ? 1 : LSD), data_source::local_mem>(
933  val, local_ptr + (InputBlockProperties::nc_stride * localThreadNC) +
934  (InputBlockProperties::is_coalesced_layout ? i : 0) +
935  ((InputBlockProperties::c_stride * localThreadC +
936  (InputBlockProperties::is_coalesced_layout ? 0 : i)) *
937  LSD));
938  }
939  }
940  localThreadNC += (InputBlockProperties::is_coalesced_layout)
941  ? LocalOffset % (TileSizeDimNC / InputBlockProperties::nc_stride)
942  : LocalOffset / (Properties::TileSizeDimK / InputBlockProperties::c_stride);
943  localThreadC += (InputBlockProperties::is_coalesced_layout)
944  ? LocalOffset / (TileSizeDimNC / InputBlockProperties::nc_stride)
945  : LocalOffset % (Properties::TileSizeDimK / InputBlockProperties::c_stride);
946  }
947  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
static EIGEN_CONSTEXPR StorageIndex LocalOffset
Definition: TensorContractionSycl.h:489
cl::sycl::multi_ptr< OutScalar, cl::sycl::access::address_space::local_space > local_ptr
Definition: TensorContractionSycl.h:480
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< PacketLoad, PacketType > read(const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &ld)
read, a template function used for loading the data from global memory. This function is used to guar...
Definition: TensorContractionSycl.h:162
val
Definition: calibrate.py:119

References EIGEN_CONSTEXPR, EIGEN_DEVICE_FUNC, EIGEN_UNROLL_LOOP, i, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LocalOffset, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LSDL, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LSDR, Eigen::TensorSycl::internal::read(), Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::triple_dim, and calibrate::val.

◆ extract_block() [2/2]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<typename InputBlockProperties , bool is_internal_block, typename Input , typename PrivateReg , contraction_type contract_tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<contract_tp == contraction_type::no_local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::extract_block ( const Input &  inpt,
PrivateReg  private_ptr,
const std::pair< StorageIndex, StorageIndex > &  ,
const StorageIndex &  ncOffset,
const StorageIndex  cOffset 
) const
inline
715  {
716  EIGEN_CONSTEXPR StorageIndex LocalThreadSizeNC =
717  InputBlockProperties::is_rhs ? Properties::LocalThreadSizeN : Properties::LocalThreadSizeM;
718  EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadNC =
719  InputBlockProperties::is_rhs ? Properties::WorkLoadPerThreadN : Properties::WorkLoadPerThreadM;
720  const StorageIndex &NC = InputBlockProperties::is_rhs ? triple_dim.N : triple_dim.M;
721 
722  auto chk_bound = [&](const StorageIndex &CIndex, const StorageIndex &NCIndex) EIGEN_DEVICE_FUNC {
723  return ((CIndex + InputBlockProperties::c_stride - 1 < triple_dim.K) &&
724  (NCIndex + InputBlockProperties::nc_stride - 1 < NC));
725  };
726  const StorageIndex ld = InputBlockProperties::is_coalesced_layout ? NC : triple_dim.K;
727  StorageIndex cIndex = cOffset;
728 
730  for (StorageIndex cId = 0; cId < Properties::TileSizeDimK / InputBlockProperties::c_stride; cId++) {
731  StorageIndex ncIndex = ncOffset;
733  for (StorageIndex ncId = 0; ncId < WorkLoadPerThreadNC / InputBlockProperties::nc_stride; ncId++) {
734  if (check_boundary<is_internal_block>(chk_bound(cIndex, ncIndex))) {
735  auto val =
736  read<InputBlockProperties::packet_load, InputBlockProperties::is_coalesced_layout,
737  InputBlockProperties::is_rhs, typename InputBlockProperties::OutType>(inpt, ncIndex, cIndex, ld);
738 
739  write<StorageIndex, (InputBlockProperties::is_coalesced_layout ? 1 : WorkLoadPerThreadNC),
741  } else {
743  for (StorageIndex i = 0; i < InputBlockProperties::elements_per_access; i++) {
744  const StorageIndex ncInd = ncIndex + (InputBlockProperties::is_coalesced_layout ? i : 0);
745  const StorageIndex cInd = cIndex + (InputBlockProperties::is_coalesced_layout ? 0 : i);
746  OutScalar val =
747  (ncInd < NC && cInd < triple_dim.K)
748  ? read<false, InputBlockProperties::is_coalesced_layout, InputBlockProperties::is_rhs, OutScalar>(
749  inpt, ncInd, cInd, ld)
750  : OutScalar(0);
751  write<StorageIndex, (InputBlockProperties::is_coalesced_layout ? 1 : WorkLoadPerThreadNC),
753  val, private_ptr + (InputBlockProperties::is_coalesced_layout ? i : 0) +
754  ((InputBlockProperties::is_coalesced_layout ? 0 : i) * WorkLoadPerThreadNC));
755  }
756  }
757 
758  // if it is lhs we have to load it packetised when the packet size is > 1, because the output is coalesced. So
759  // even if M is not accessed in a coalesced mode, we have to load packet_size number of m per thread.
760  ncIndex = (!InputBlockProperties::is_rhs && InputBlockProperties::nc_stride == 1 && PacketSize != 1)
761  ? ncOffset + (ncId + 1) % PacketSize + ((ncId + 1) / PacketSize) * LocalThreadSizeNC
762  : (ncIndex + InputBlockProperties::nc_stride * LocalThreadSizeNC);
763  private_ptr += InputBlockProperties::nc_stride;
764  }
765  // the previous for loop ( private_ptr += (ncId * nc_stride)) has already moved ptr with one WorkLoadPerThreadNC
766  private_ptr += (InputBlockProperties::c_stride - 1) * WorkLoadPerThreadNC;
767  cIndex += InputBlockProperties::c_stride;
768  }
769  }
OutScalar * private_ptr
Definition: TensorContractionSycl.h:481
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< dt !=data_source::global_mem, void > write(PacketType &packet_data, DataScalar ptr)
write, a template function used for storing the data to local memory. This function is used to guaran...
Definition: TensorContractionSycl.h:221

References EIGEN_CONSTEXPR, EIGEN_DEVICE_FUNC, EIGEN_UNROLL_LOOP, i, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketSize, Eigen::TensorSycl::internal::private_mem, Eigen::TensorSycl::internal::read(), Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::triple_dim, calibrate::val, and Eigen::TensorSycl::internal::write().

◆ local_id_extract()

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<typename InputBlockProperties , StorageIndex TileSizeDimNC>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::pair<StorageIndex, StorageIndex> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::local_id_extract ( const StorageIndex &  linearLocalThreadId)
inlinestatic
772  {
773  const StorageIndex localThreadNC =
774  (InputBlockProperties::is_coalesced_layout)
775  ? linearLocalThreadId % (TileSizeDimNC / InputBlockProperties::nc_stride)
776  : linearLocalThreadId / (Properties::TileSizeDimK / InputBlockProperties::c_stride);
777  const StorageIndex localThreadC =
778  (InputBlockProperties::is_coalesced_layout)
779  ? linearLocalThreadId / (TileSizeDimNC / InputBlockProperties::nc_stride)
780  : linearLocalThreadId % (Properties::TileSizeDimK / InputBlockProperties::c_stride);
781  return std::pair<StorageIndex, StorageIndex>(localThreadNC, localThreadC);
782  }

◆ operator()()

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::operator() ( cl::sycl::nd_item< 1 >  itemID) const
inline
600  {
601  const StorageIndex linearLocalThreadId = itemID.get_local_id(0);
602  const StorageIndex nLocalThreadId = linearLocalThreadId / Properties::LocalThreadSizeM;
603  const StorageIndex mLocalThreadId = linearLocalThreadId % Properties::LocalThreadSizeM;
604  const StorageIndex mGroupId = itemID.get_group(0) % groupSizeM;
605  const StorageIndex tmp = itemID.get_group(0) / groupSizeM;
606  const StorageIndex nGroupId = IsFinal ? tmp : tmp % groupSizeN;
607  const StorageIndex kGroupId = IsFinal ? 0 : tmp / groupSizeN;
608  const StorageIndex mGroupOffset = mGroupId * Properties::TileSizeDimM;
609  const StorageIndex nGroupOffset = nGroupId * Properties::TileSizeDimN;
610  const StorageIndex mLocalOffset = PacketSize * mLocalThreadId;
611  const StorageIndex nLocalOffset = NStride * nLocalThreadId;
612  const StorageIndex mGlobalOffset = mGroupOffset + mLocalOffset;
613  const StorageIndex nGlobalOffset = nGroupOffset + nLocalOffset;
614 
615  const StorageIndex kSizePerWG = IsFinal ? triple_dim.K : numTiles * Properties::TileSizeDimK;
616  StorageIndex kGroupOffset = kGroupId * kSizePerWG;
617  const bool is_internal = triple_dim.M - mGroupOffset >= Properties::TileSizeDimM &&
618  triple_dim.N - nGroupOffset >= Properties::TileSizeDimN &&
619  triple_dim.K - kGroupOffset >= kSizePerWG;
620  // this is used to adjust the last block
621  StorageIndex kSize = IsFinal ? triple_dim.K : std::min(kSizePerWG, triple_dim.K - kGroupOffset);
622  // This is used to find out the lats K offset so that kGroupOffset -kSize can compute the coffset for loading to
623  // tile
624  kGroupOffset += kSize;
625 
626  auto thread_properties =
627  ThreadProperties<StorageIndex>(linearLocalThreadId, kGroupId, mGroupOffset, nGroupOffset, kGroupOffset,
628  mLocalOffset, nLocalOffset, mGlobalOffset, nGlobalOffset, kSize, is_internal);
629 
630  auto out_ptr = out_res + (IsFinal ? 0 : thread_properties.kGroupId * triple_dim.M * triple_dim.N);
631 
632  (thread_properties.is_internal) ? compute_panel<true>(itemID, thread_properties, out_ptr)
633  : compute_panel<false>(itemID, thread_properties, out_ptr);
634  }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_panel(const cl::sycl::nd_item< 1 > &itemID, ThreadProperties< StorageIndex > &thread_properties, OutPtr out_ptr) const
Definition: TensorContractionSycl.h:866
static EIGEN_CONSTEXPR StorageIndex NStride
Definition: TensorContractionSycl.h:476
#define min(a, b)
Definition: datatypes.h:22
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365

References Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::groupSizeM, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::groupSizeN, min, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::NStride, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::numTiles, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::out_res, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketSize, tmp, and Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::triple_dim.

◆ store()

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool is_internal_block, StorageIndex PrivateNStride, typename OutPtr >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::store ( OutPtr *  out_ptr,
PacketReturnType privateRes,
StorageIndex  mGlobalOffset,
StorageIndex  nGlobalOffset 
) const
inline
664  {
665  auto chk_bound = [&](const StorageIndex &mIndex, const StorageIndex &nIndex) EIGEN_DEVICE_FUNC {
666  return (mIndex + PacketSize - 1 < triple_dim.M && nGlobalOffset + nIndex < triple_dim.N);
667  };
668  // when local memory is not used M and N are both accessed in a coalesced way. However, when local memory is
669  // available the k*N is transposed in the local to N*K therefore, each blocks operates on blockId*
670  // WorkLoadPerThreadN slice of N
671  EIGEN_CONSTEXPR StorageIndex GlobalNStride =
672  contraction_tp == contraction_type::local ? 1 : Properties::LocalThreadSizeN;
674  for (StorageIndex wLPTN = 0; wLPTN < Properties::WorkLoadPerThreadN / PrivateNStride; wLPTN++) {
675  // output leading dimension
676  StorageIndex outputLD = 0;
677  // When local memory is used the PrivateNstride is always 1 because the coalesced access on N is loaded into Local
678  // memory and extracting from local to global is the same as no transposed version. However, when local memory is
679  // not used and RHS is transposed we packetize the load for RHS.
681  for (StorageIndex nId = 0; nId < PrivateNStride; nId++) {
682  StorageIndex globalRow = mGlobalOffset;
684  for (StorageIndex wLPTM = 0; wLPTM < Properties::WorkLoadPerThreadM / PacketSize; wLPTM++) {
685  PacketReturnType privetOut = privateRes[wLPTM];
686  if (check_boundary<is_internal_block>(chk_bound(globalRow, nId))) {
687  // Store the final results in C. The C matrix has always M as a first StorageIndex and N as a second
688  // StorageIndex Therefore it is always coalesced layout
689  write<data_source::global_mem>(privetOut, out_ptr + outputLD + globalRow);
690  } else {
692  for (StorageIndex mId = 0; mId < PacketSize; mId++) {
693  StorageIndex mOffset = globalRow + mId;
694  if (mOffset < triple_dim.M && (nGlobalOffset + nId < triple_dim.N)) {
695  out_ptr[mOffset + outputLD] =
697  }
698  }
699  }
700  globalRow += (PacketSize * Properties::LocalThreadSizeM);
701  }
702  outputLD += triple_dim.M;
703  privateRes += Properties::WorkLoadPerThreadM / PacketSize;
704  }
705  out_ptr += (GlobalNStride * outputLD);
706 
707  nGlobalOffset += (PrivateNStride * GlobalNStride);
708  }
709  }
static EIGEN_DEVICE_FUNC Scalar scalarize(Index, PacketReturnType &)
Definition: InteropHeaders.h:138

References EIGEN_CONSTEXPR, EIGEN_DEVICE_FUNC, EIGEN_UNROLL_LOOP, Eigen::TensorSycl::internal::local, Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketSize, Eigen::TensorSycl::internal::PacketWrapper< PacketReturnType, PacketSize >::scalarize(), and Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::triple_dim.

Referenced by Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::compute_panel().

◆ sync_mem() [1/3]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<ctp == contraction_type::no_local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_mem ( const cl::sycl::nd_item< 1 > &  ,
bool  
)
inlinestaticnoexcept
798  {
799  return;
800  }

◆ sync_mem() [2/3]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<db && ctp == contraction_type::local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_mem ( const cl::sycl::nd_item< 1 > &  ,
bool db_offset 
)
inlinestaticnoexcept

◆ sync_mem() [3/3]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!db && ctp == contraction_type::local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_mem ( const cl::sycl::nd_item< 1 > &  itemID,
bool  
)
inlinestaticnoexcept
792  {
793  itemID.barrier(cl::sycl::access::fence_space::local_space);
794  }

◆ sync_thread() [1/3]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool need_sync>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!need_sync> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_thread ( const cl::sycl::nd_item< 1 > &  )
inlinestatic
821  {
822  return;
823  }

◆ sync_thread() [2/3]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool need_sync, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<need_sync && ctp == contraction_type::no_local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_thread ( const cl::sycl::nd_item< 1 > &  )
inlinestaticnoexcept
808  {
809 #ifdef EIGEN_SYCL_ARM_GPU_CACHE_OPTIMISATION
810  itemID.barrier(cl::sycl::access::fence_spacce::local_space);
811 #else
812  return;
813 #endif
814  }

◆ sync_thread() [3/3]

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
template<bool need_sync, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<need_sync && ctp == contraction_type::local> Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::sync_thread ( const cl::sycl::nd_item< 1 > &  itemID)
inlinestatic
817  {
818  itemID.barrier(cl::sycl::access::fence_space::local_space);
819  }

Member Data Documentation

◆ groupSizeM

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
const StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::groupSizeM

◆ groupSizeN

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
const StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::groupSizeN

◆ is_lhs_transposed

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR bool Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::is_lhs_transposed
static

◆ is_rhs_transposed

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR bool Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::is_rhs_transposed
static

◆ lhs

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
const LhsMapper Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::lhs

◆ LocalOffset

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LocalOffset = Properties::LocalThreadSizeM * Properties::LocalThreadSizeN
static

◆ LSDL

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LSDL
static

◆ LSDR

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::LSDR
static

◆ NStride

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::NStride
static

◆ numTiles

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
const StorageIndex Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::numTiles

◆ out_res

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
OutAccessor Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::out_res

◆ PacketSize

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
EIGEN_CONSTEXPR int Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::PacketSize
static

◆ rhs

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
const RhsMapper Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::rhs

◆ scratch

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
Scratch Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::scratch

◆ triple_dim

template<typename OutScalar , typename LhsScalar , typename RhsScalar , typename OutAccessor , typename LhsMapper , typename RhsMapper , typename StorageIndex , typename Properties , typename TripleDim , bool Vectorizable, typename input_mapper_properties , bool IsFinal, contraction_type contraction_tp>
const TripleDim Eigen::TensorSycl::internal::TensorContractionKernel< OutScalar, LhsScalar, RhsScalar, OutAccessor, LhsMapper, RhsMapper, StorageIndex, Properties, TripleDim, Vectorizable, input_mapper_properties, IsFinal, contraction_tp >::triple_dim

The documentation for this class was generated from the following file: