Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range > Class Template Reference

#include <TensorReductionSycl.h>

Public Types

typedef Evaluator::CoeffReturnType CoeffReturnType
 
typedef Evaluator::Index Index
 
typedef OpDefiner< OpType, typename Evaluator::CoeffReturnType, Index,(Evaluator::ReducerTraits::PacketAccess &Evaluator::InputPacketAccess)> OpDef
 
typedef OpDef::type Op
 
typedef Evaluator::EvaluatorPointerType EvaluatorPointerType
 
typedef Evaluator::PacketReturnType PacketReturnType
 
typedef std::conditional_t<(Evaluator::ReducerTraits::PacketAccess &Evaluator::InputPacketAccess), PacketReturnType, CoeffReturnTypeOutType
 
typedef cl::sycl::accessor< OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > LocalAccessor
 

Public Member Functions

 FullReductionKernelFunctor (LocalAccessor scratch_, Evaluator evaluator_, EvaluatorPointerType final_output_, Index rng_, OpType op_)
 
void operator() (cl::sycl::nd_item< 1 > itemID) const
 
template<bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< Vect > compute_reduction (const cl::sycl::nd_item< 1 > &itemID) const
 
template<bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!Vect > compute_reduction (const cl::sycl::nd_item< 1 > &itemID) const
 

Public Attributes

LocalAccessor scratch
 
Evaluator evaluator
 
EvaluatorPointerType final_output
 
Index rng
 
Op op
 

Member Typedef Documentation

◆ CoeffReturnType

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef Evaluator::CoeffReturnType Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::CoeffReturnType

◆ EvaluatorPointerType

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef Evaluator::EvaluatorPointerType Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::EvaluatorPointerType

◆ Index

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef Evaluator::Index Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::Index

◆ LocalAccessor

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::LocalAccessor

◆ Op

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef OpDef::type Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::Op

◆ OpDef

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef OpDefiner<OpType, typename Evaluator::CoeffReturnType, Index, (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)> Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::OpDef

◆ OutType

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef std::conditional_t<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess), PacketReturnType, CoeffReturnType> Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::OutType

◆ PacketReturnType

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
typedef Evaluator::PacketReturnType Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::PacketReturnType

Constructor & Destructor Documentation

◆ FullReductionKernelFunctor()

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::FullReductionKernelFunctor ( LocalAccessor  scratch_,
Evaluator  evaluator_,
EvaluatorPointerType  final_output_,
Index  rng_,
OpType  op_ 
)
inline
144  : scratch(scratch_), evaluator(evaluator_), final_output(final_output_), rng(rng_), op(OpDef::get_op(op_)) {}
EvaluatorPointerType final_output
Definition: TensorReductionSycl.h:138
Evaluator evaluator
Definition: TensorReductionSycl.h:137
Index rng
Definition: TensorReductionSycl.h:139
LocalAccessor scratch
Definition: TensorReductionSycl.h:136
Op op
Definition: TensorReductionSycl.h:140
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op)
Definition: TensorReductionSycl.h:41

Member Function Documentation

◆ compute_reduction() [1/2]

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
template<bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<Vect> Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::compute_reduction ( const cl::sycl::nd_item< 1 > &  itemID) const
inline
150  {
151  auto output_ptr = final_output;
152  Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
153  Index globalid = itemID.get_global_id(0);
154  Index localid = itemID.get_local_id(0);
155  Index step = Evaluator::PacketSize * itemID.get_global_range(0);
156  Index start = Evaluator::PacketSize * globalid;
157  // vectorizable parts
158  PacketReturnType packetAccumulator = op.template initializePacket<PacketReturnType>();
159  for (Index i = start; i < VectorizedRange; i += step) {
160  op.template reducePacket<PacketReturnType>(evaluator.impl().template packet<Unaligned>(i), &packetAccumulator);
161  }
162  globalid += VectorizedRange;
163  // non vectorizable parts
164  for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
165  op.template reducePacket<PacketReturnType>(
167  evaluator.impl().coeff(i), op.initialize()),
168  &packetAccumulator);
169  }
170  scratch[localid] = packetAccumulator =
171  OpDef::finalise_op(op.template finalizePacket<PacketReturnType>(packetAccumulator), rng);
172  // reduction parts // Local size is always power of 2
174  for (Index offset = local_range / 2; offset > 0; offset /= 2) {
175  itemID.barrier(cl::sycl::access::fence_space::local_space);
176  if (localid < offset) {
177  op.template reducePacket<PacketReturnType>(scratch[localid + offset], &packetAccumulator);
178  scratch[localid] = op.template finalizePacket<PacketReturnType>(packetAccumulator);
179  }
180  }
181  if (localid == 0) {
182  output_ptr[itemID.get_group(0)] =
183  op.finalizeBoth(op.initialize(), op.template finalizePacket<PacketReturnType>(packetAccumulator));
184  }
185  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
Evaluator::PacketReturnType PacketReturnType
Definition: TensorReductionSycl.h:130
Evaluator::Index Index
Definition: TensorReductionSycl.h:123
void start(const unsigned &i)
(Re-)start i-th timer
Definition: oomph_utilities.cc:243
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator, const Index &)
Definition: TensorReductionSycl.h:43
Definition: InteropHeaders.h:135

References EIGEN_UNROLL_LOOP, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::evaluator, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::final_output, Eigen::TensorSycl::internal::OpDefiner< Op, CoeffReturnType, Index, Vectorizable >::finalise_op(), i, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::op, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::rng, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::scratch, and oomph::CumulativeTimings::start().

Referenced by Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::operator()().

◆ compute_reduction() [2/2]

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
template<bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!Vect> Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::compute_reduction ( const cl::sycl::nd_item< 1 > &  itemID) const
inline
189  {
190  auto output_ptr = final_output;
191  Index globalid = itemID.get_global_id(0);
192  Index localid = itemID.get_local_id(0);
193  // vectorizable parts
194  CoeffReturnType accumulator = op.initialize();
195  // non vectorizable parts
196  for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
197  op.reduce(evaluator.impl().coeff(i), &accumulator);
198  }
199  scratch[localid] = accumulator = OpDef::finalise_op(op.finalize(accumulator), rng);
200 
201  // reduction parts. the local size is always power of 2
203  for (Index offset = local_range / 2; offset > 0; offset /= 2) {
204  itemID.barrier(cl::sycl::access::fence_space::local_space);
205  if (localid < offset) {
206  op.reduce(scratch[localid + offset], &accumulator);
207  scratch[localid] = op.finalize(accumulator);
208  }
209  }
210  if (localid == 0) {
211  output_ptr[itemID.get_group(0)] = op.finalize(accumulator);
212  }
213  }
Evaluator::CoeffReturnType CoeffReturnType
Definition: TensorReductionSycl.h:122

References EIGEN_UNROLL_LOOP, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::evaluator, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::final_output, Eigen::TensorSycl::internal::OpDefiner< Op, CoeffReturnType, Index, Vectorizable >::finalise_op(), i, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::op, Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::rng, and Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::scratch.

◆ operator()()

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
void Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::operator() ( cl::sycl::nd_item< 1 >  itemID) const
inline
146 { compute_reduction(itemID); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t< Vect > compute_reduction(const cl::sycl::nd_item< 1 > &itemID) const
Definition: TensorReductionSycl.h:149

References Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::compute_reduction().

Member Data Documentation

◆ evaluator

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
Evaluator Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::evaluator

◆ final_output

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
EvaluatorPointerType Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::final_output

◆ op

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
Op Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::op

◆ rng

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
Index Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::rng

◆ scratch

template<typename Evaluator , typename OpType , typename Evaluator::Index local_range>
LocalAccessor Eigen::TensorSycl::internal::FullReductionKernelFunctor< Evaluator, OpType, local_range >::scratch

The documentation for this class was generated from the following file: