Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index > Struct Template Reference

#include <TensorScanSycl.h>

Public Types

typedef cl::sycl::accessor< CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > LocalAccessor
 

Public Member Functions

EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScanAdjustmentKernelFunctor (LocalAccessor, InAccessor in_accessor_, OutAccessor out_accessor_, const ScanParameters< Index > scanParameters_, Op accumulator_)
 
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator() (cl::sycl::nd_item< 1 > itemID) const
 

Public Attributes

InAccessor in_ptr
 
OutAccessor out_ptr
 
const ScanParameters< IndexscanParameters
 
Op accumulator
 

Static Public Attributes

static EIGEN_CONSTEXPR int PacketSize = ScanParameters<Index>::ScanPerThread / 2
 

Member Typedef Documentation

◆ LocalAccessor

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::LocalAccessor

Constructor & Destructor Documentation

◆ ScanAdjustmentKernelFunctor()

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::ScanAdjustmentKernelFunctor ( LocalAccessor  ,
InAccessor  in_accessor_,
OutAccessor  out_accessor_,
const ScanParameters< Index scanParameters_,
Op  accumulator_ 
)
inline
300  : in_ptr(in_accessor_), out_ptr(out_accessor_), scanParameters(scanParameters_), accumulator(accumulator_) {}
const ScanParameters< Index > scanParameters
Definition: TensorScanSycl.h:294
OutAccessor out_ptr
Definition: TensorScanSycl.h:293
InAccessor in_ptr
Definition: TensorScanSycl.h:292
Op accumulator
Definition: TensorScanSycl.h:295

Member Function Documentation

◆ operator()()

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::operator() ( cl::sycl::nd_item< 1 >  itemID) const
inline
302  {
303  for (Index loop_offset = 0; loop_offset < scanParameters.loop_range; loop_offset++) {
304  Index data_offset = (itemID.get_global_id(0) + (itemID.get_global_range(0) * loop_offset));
305  Index tmp = data_offset % scanParameters.panel_threads;
306  const Index panel_id = data_offset / scanParameters.panel_threads;
307  const Index group_id = tmp / scanParameters.group_threads;
309  const Index block_id = tmp / scanParameters.block_threads;
310  const Index local_id = tmp % scanParameters.block_threads;
311 
312  // the actual panel size is scan_size * non_scan_size.
313  // elements_per_panel is roundup to power of 2 for binary tree
314  const Index panel_offset = panel_id * scanParameters.scan_size * scanParameters.non_scan_size;
315  const Index group_offset = group_id * scanParameters.non_scan_stride;
316  // This will be effective when the size is bigger than elements_per_block
317  const Index block_offset = block_id * scanParameters.elements_per_block * scanParameters.scan_stride;
318  const Index thread_offset = ScanParameters<Index>::ScanPerThread * local_id * scanParameters.scan_stride;
319 
320  const Index global_offset = panel_offset + group_offset + block_offset + thread_offset;
322  const Index in_id = (panel_id * block_size * scanParameters.non_scan_size) + (group_id * block_size) + block_id;
323  CoeffReturnType adjust_val = in_ptr[in_id];
324 
325  Index next_elements = 0;
327  for (Index i = 0; i < ScanParameters<Index>::ScanPerThread; i++) {
328  Index global_id = global_offset + next_elements;
329  if ((((block_id * scanParameters.elements_per_block) + (ScanParameters<Index>::ScanPerThread * local_id) + i) <
331  (global_id < scanParameters.total_size)) {
332  CoeffReturnType accum = adjust_val;
333  accumulator.reduce(out_ptr[global_id], &accum);
334  out_ptr[global_id] = accumulator.finalize(accum);
335  }
336  next_elements += scanParameters.scan_stride;
337  }
338  }
339  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
const index_t non_scan_stride
Definition: TensorScanSycl.h:58
const index_t scan_stride
Definition: TensorScanSycl.h:59
const index_t total_size
Definition: TensorScanSycl.h:55
const index_t non_scan_size
Definition: TensorScanSycl.h:56
const index_t block_threads
Definition: TensorScanSycl.h:62
static EIGEN_CONSTEXPR Index ScanPerThread
Definition: TensorScanSycl.h:54
const index_t group_threads
Definition: TensorScanSycl.h:61
const index_t panel_threads
Definition: TensorScanSycl.h:60
const index_t elements_per_group
Definition: TensorScanSycl.h:63
const index_t elements_per_block
Definition: TensorScanSycl.h:64
const index_t scan_size
Definition: TensorScanSycl.h:57
const index_t loop_range
Definition: TensorScanSycl.h:65

References Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::accumulator, Eigen::TensorSycl::internal::ScanParameters< index_t >::block_threads, EIGEN_UNROLL_LOOP, Eigen::TensorSycl::internal::ScanParameters< index_t >::elements_per_block, Eigen::TensorSycl::internal::ScanParameters< index_t >::elements_per_group, Eigen::TensorSycl::internal::ScanParameters< index_t >::group_threads, i, Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::in_ptr, Eigen::TensorSycl::internal::ScanParameters< index_t >::loop_range, Eigen::TensorSycl::internal::ScanParameters< index_t >::non_scan_size, Eigen::TensorSycl::internal::ScanParameters< index_t >::non_scan_stride, Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::out_ptr, Eigen::TensorSycl::internal::ScanParameters< index_t >::panel_threads, Eigen::TensorSycl::internal::ScanParameters< index_t >::scan_size, Eigen::TensorSycl::internal::ScanParameters< index_t >::scan_stride, Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::scanParameters, tmp, and Eigen::TensorSycl::internal::ScanParameters< index_t >::total_size.

Member Data Documentation

◆ accumulator

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
Op Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::accumulator

◆ in_ptr

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
InAccessor Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::in_ptr

◆ out_ptr

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
OutAccessor Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::out_ptr

◆ PacketSize

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
EIGEN_CONSTEXPR int Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::PacketSize = ScanParameters<Index>::ScanPerThread / 2
static

◆ scanParameters

template<typename CoeffReturnType , typename InAccessor , typename OutAccessor , typename Op , typename Index >
const ScanParameters<Index> Eigen::TensorSycl::internal::ScanAdjustmentKernelFunctor< CoeffReturnType, InAccessor, OutAccessor, Op, Index >::scanParameters

The documentation for this struct was generated from the following file: