Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt > Struct Template Reference

#include <TensorReductionSycl.h>

Public Types

typedef Evaluator::CoeffReturnType CoeffReturnType
 
typedef Evaluator::EvaluatorPointerType EvaluatorPointerType
 
typedef Evaluator::Index Index
 
typedef OpDefiner< OpType, CoeffReturnType, Index, false > OpDef
 
typedef OpDef::type Op
 
typedef cl::sycl::accessor< CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > ScratchAcc
 

Public Member Functions

 PartialReductionKernel (ScratchAcc scratch_, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType op_, const Index preserve_elements_num_groups_, const Index reduce_elements_num_groups_, const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_)
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce (Index globalRId, Index globalPId, CoeffReturnType &accumulator) const
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator() (cl::sycl::nd_item< 1 > itemID) const
 

Public Attributes

ScratchAcc scratch
 
Evaluator evaluator
 
EvaluatorPointerType output_accessor
 
Op op
 
const Index preserve_elements_num_groups
 
const Index reduce_elements_num_groups
 
const Index num_coeffs_to_preserve
 
const Index num_coeffs_to_reduce
 

Member Typedef Documentation

◆ CoeffReturnType

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
typedef Evaluator::CoeffReturnType Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::CoeffReturnType

◆ EvaluatorPointerType

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
typedef Evaluator::EvaluatorPointerType Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::EvaluatorPointerType

◆ Index

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
typedef Evaluator::Index Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::Index

◆ Op

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
typedef OpDef::type Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::Op

◆ OpDef

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
typedef OpDefiner<OpType, CoeffReturnType, Index, false> Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::OpDef

◆ ScratchAcc

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::ScratchAcc

Constructor & Destructor Documentation

◆ PartialReductionKernel()

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::PartialReductionKernel ( ScratchAcc  scratch_,
Evaluator  evaluator_,
EvaluatorPointerType  output_accessor_,
OpType  op_,
const Index  preserve_elements_num_groups_,
const Index  reduce_elements_num_groups_,
const Index  num_coeffs_to_preserve_,
const Index  num_coeffs_to_reduce_ 
)
inline
278  : scratch(scratch_),
279  evaluator(evaluator_),
280  output_accessor(output_accessor_),
281  op(OpDef::get_op(op_)),
282  preserve_elements_num_groups(preserve_elements_num_groups_),
283  reduce_elements_num_groups(reduce_elements_num_groups_),
284  num_coeffs_to_preserve(num_coeffs_to_preserve_),
285  num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op)
Definition: TensorReductionSycl.h:41
const Index preserve_elements_num_groups
Definition: TensorReductionSycl.h:270
const Index num_coeffs_to_reduce
Definition: TensorReductionSycl.h:273
ScratchAcc scratch
Definition: TensorReductionSycl.h:266
const Index num_coeffs_to_preserve
Definition: TensorReductionSycl.h:272
EvaluatorPointerType output_accessor
Definition: TensorReductionSycl.h:268
const Index reduce_elements_num_groups
Definition: TensorReductionSycl.h:271
Op op
Definition: TensorReductionSycl.h:269
Evaluator evaluator
Definition: TensorReductionSycl.h:267

Member Function Documentation

◆ element_wise_reduce()

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::element_wise_reduce ( Index  globalRId,
Index  globalPId,
CoeffReturnType accumulator 
) const
inline
288  {
289  if (globalPId >= num_coeffs_to_preserve) {
290  return;
291  }
292  Index global_offset = rt == reduction_dim::outer_most ? globalPId + (globalRId * num_coeffs_to_preserve)
293  : globalRId + (globalPId * num_coeffs_to_reduce);
294  Index localOffset = globalRId;
295 
296  const Index per_thread_local_stride = PannelParameters::LocalThreadSizeR * reduce_elements_num_groups;
297  const Index per_thread_global_stride =
298  rt == reduction_dim::outer_most ? num_coeffs_to_preserve * per_thread_local_stride : per_thread_local_stride;
299  for (Index i = globalRId; i < num_coeffs_to_reduce; i += per_thread_local_stride) {
300  op.reduce(evaluator.impl().coeff(global_offset), &accumulator);
301  localOffset += per_thread_local_stride;
302  global_offset += per_thread_global_stride;
303  }
304  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Evaluator::Index Index
Definition: TensorReductionSycl.h:261

References Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::evaluator, i, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::num_coeffs_to_preserve, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::num_coeffs_to_reduce, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::op, Eigen::TensorSycl::internal::outer_most, and Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::reduce_elements_num_groups.

Referenced by Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::operator()().

◆ operator()()

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::operator() ( cl::sycl::nd_item< 1 >  itemID) const
inline
305  {
306  const Index linearLocalThreadId = itemID.get_local_id(0);
307  Index pLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId % PannelParameters::LocalThreadSizeP
308  : linearLocalThreadId / PannelParameters::LocalThreadSizeR;
309  Index rLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId / PannelParameters::LocalThreadSizeP
310  : linearLocalThreadId % PannelParameters::LocalThreadSizeR;
311  const Index pGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) % preserve_elements_num_groups
312  : itemID.get_group(0) / reduce_elements_num_groups;
313  const Index rGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) / preserve_elements_num_groups
314  : itemID.get_group(0) % reduce_elements_num_groups;
315 
316  Index globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
317  const Index globalRId = rGroupId * PannelParameters::LocalThreadSizeR + rLocalThreadId;
318  CoeffReturnType *scratchPtr = scratch.get_pointer();
319  auto outPtr = output_accessor + (reduce_elements_num_groups > 1 ? rGroupId * num_coeffs_to_preserve : 0);
320  CoeffReturnType accumulator = op.initialize();
321 
322  element_wise_reduce(globalRId, globalPId, accumulator);
323 
324  accumulator = OpDef::finalise_op(op.finalize(accumulator), num_coeffs_to_reduce);
325  scratchPtr[pLocalThreadId + rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)] =
326  accumulator;
327  if (rt == reduction_dim::inner_most) {
328  pLocalThreadId = linearLocalThreadId % PannelParameters::LocalThreadSizeP;
329  rLocalThreadId = linearLocalThreadId / PannelParameters::LocalThreadSizeP;
330  globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
331  }
332 
333  /* Apply the reduction operation between the current local
334  * id and the one on the other half of the vector. */
335  auto out_scratch_ptr =
336  scratchPtr + (pLocalThreadId + (rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)));
337  itemID.barrier(cl::sycl::access::fence_space::local_space);
338  if (rt == reduction_dim::inner_most) {
339  accumulator = *out_scratch_ptr;
340  }
341  // The Local LocalThreadSizeR is always power of 2
343  for (Index offset = PannelParameters::LocalThreadSizeR >> 1; offset > 0; offset >>= 1) {
344  if (rLocalThreadId < offset) {
345  op.reduce(out_scratch_ptr[(PannelParameters::LocalThreadSizeP + PannelParameters::BC) * offset], &accumulator);
346  // The result has already been divided for mean reducer in the
347  // previous reduction so no need to divide furthermore
348  *out_scratch_ptr = op.finalize(accumulator);
349  }
350  /* All threads collectively read from global memory into local.
351  * The barrier ensures all threads' IO is resolved before
352  * execution continues (strictly speaking, all threads within
353  * a single work-group - there is no co-ordination between
354  * work-groups, only work-items). */
355  itemID.barrier(cl::sycl::access::fence_space::local_space);
356  }
357 
358  if (rLocalThreadId == 0 && (globalPId < num_coeffs_to_preserve)) {
359  outPtr[globalPId] = op.finalize(accumulator);
360  }
361  }
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator, const Index &)
Definition: TensorReductionSycl.h:43
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce(Index globalRId, Index globalPId, CoeffReturnType &accumulator) const
Definition: TensorReductionSycl.h:287
Evaluator::CoeffReturnType CoeffReturnType
Definition: TensorReductionSycl.h:259

References EIGEN_UNROLL_LOOP, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::element_wise_reduce(), Eigen::TensorSycl::internal::OpDefiner< Op, CoeffReturnType, Index, Vectorizable >::finalise_op(), Eigen::TensorSycl::internal::inner_most, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::num_coeffs_to_preserve, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::num_coeffs_to_reduce, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::op, Eigen::TensorSycl::internal::outer_most, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::output_accessor, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::preserve_elements_num_groups, Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::reduce_elements_num_groups, and Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::scratch.

Member Data Documentation

◆ evaluator

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
Evaluator Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::evaluator

◆ num_coeffs_to_preserve

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
const Index Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::num_coeffs_to_preserve

◆ num_coeffs_to_reduce

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
const Index Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::num_coeffs_to_reduce

◆ op

◆ output_accessor

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
EvaluatorPointerType Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::output_accessor

◆ preserve_elements_num_groups

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
const Index Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::preserve_elements_num_groups

◆ reduce_elements_num_groups

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
const Index Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::reduce_elements_num_groups

◆ scratch

template<typename Evaluator , typename OpType , typename PannelParameters , reduction_dim rt>
ScratchAcc Eigen::TensorSycl::internal::PartialReductionKernel< Evaluator, OpType, PannelParameters, rt >::scratch

The documentation for this struct was generated from the following file: