Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range > Struct Template Reference

#include <TensorReductionSycl.h>

Public Types

typedef cl::sycl::accessor< CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > LocalAccessor
 
typedef OpDefiner< OpType, CoeffReturnType, Index, true > OpDef
 
typedef OpDef::type Op
 

Public Member Functions

 SecondStepFullReducer (LocalAccessor scratch_, InputAccessor aI_, OutputAccessor outAcc_, OpType op_)
 
void operator() (cl::sycl::nd_item< 1 > itemID) const
 

Public Attributes

LocalAccessor scratch
 
InputAccessor aI
 
OutputAccessor outAcc
 
Op op
 

Member Typedef Documentation

◆ LocalAccessor

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::LocalAccessor

◆ Op

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
typedef OpDef::type Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::Op

◆ OpDef

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
typedef OpDefiner<OpType, CoeffReturnType, Index, true> Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::OpDef

Constructor & Destructor Documentation

◆ SecondStepFullReducer()

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::SecondStepFullReducer ( LocalAccessor  scratch_,
InputAccessor  aI_,
OutputAccessor  outAcc_,
OpType  op_ 
)
inline
89  : scratch(scratch_), aI(aI_), outAcc(outAcc_), op(OpDef::get_op(op_)) {}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op)
Definition: TensorReductionSycl.h:41
Op op
Definition: TensorReductionSycl.h:87
OutputAccessor outAcc
Definition: TensorReductionSycl.h:86
LocalAccessor scratch
Definition: TensorReductionSycl.h:84
InputAccessor aI
Definition: TensorReductionSycl.h:85

Member Function Documentation

◆ operator()()

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
void Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::operator() ( cl::sycl::nd_item< 1 >  itemID) const
inline
91  {
92  // Our empirical research shows that the best performance will be achieved
93  // when there is only one element per thread to reduce in the second step.
94  // in this step the second step reduction time is almost negligible.
95  // Hence, in the second step of reduction the input size is fixed to the
96  // local size, thus, there is only one element read per thread. The
97  // algorithm must be changed if the number of reduce per thread in the
98  // second step is greater than 1. Otherwise, the result will be wrong.
99  const Index localid = itemID.get_local_id(0);
100  auto aInPtr = aI + localid;
101  auto aOutPtr = outAcc;
102  CoeffReturnType *scratchptr = scratch.get_pointer();
103  CoeffReturnType accumulator = *aInPtr;
104 
105  scratchptr[localid] = op.finalize(accumulator);
106  for (Index offset = itemID.get_local_range(0) / 2; offset > 0; offset /= 2) {
107  itemID.barrier(cl::sycl::access::fence_space::local_space);
108  if (localid < offset) {
109  op.reduce(scratchptr[localid + offset], &accumulator);
110  scratchptr[localid] = op.finalize(accumulator);
111  }
112  }
113  if (localid == 0) *aOutPtr = op.finalize(accumulator);
114  }
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

References Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::aI, Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::op, Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::outAcc, and Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::scratch.

Member Data Documentation

◆ aI

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
InputAccessor Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::aI

◆ op

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
Op Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::op

◆ outAcc

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
OutputAccessor Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::outAcc

◆ scratch

template<typename CoeffReturnType , typename OpType , typename InputAccessor , typename OutputAccessor , typename Index , Index local_range>
LocalAccessor Eigen::TensorSycl::internal::SecondStepFullReducer< CoeffReturnType, OpType, InputAccessor, OutputAccessor, Index, local_range >::scratch

The documentation for this struct was generated from the following file: