Eigen::TensorSycl::internal::ScanLauncher_impl< CoeffReturnType, stp > Struct Template Reference

#include <TensorScanSycl.h>

Static Public Member Functions

template<typename Input , typename EvaluatorPointerType , typename Reducer , typename Index >
static EIGEN_STRONG_INLINE void scan_block (Input in_ptr, EvaluatorPointerType out_ptr, Reducer &accumulator, const Index total_size, const Index scan_size, const Index panel_size, const Index non_scan_size, const Index scan_stride, const Index non_scan_stride, const bool inclusive, const Eigen::SyclDevice &dev)
 

Member Function Documentation

◆ scan_block()

template<typename CoeffReturnType , scan_step stp>
template<typename Input , typename EvaluatorPointerType , typename Reducer , typename Index >
static EIGEN_STRONG_INLINE void Eigen::TensorSycl::internal::ScanLauncher_impl< CoeffReturnType, stp >::scan_block ( Input  in_ptr,
EvaluatorPointerType  out_ptr,
Reducer &  accumulator,
const Index  total_size,
const Index  scan_size,
const Index  panel_size,
const Index  non_scan_size,
const Index  scan_stride,
const Index  non_scan_stride,
const bool  inclusive,
const Eigen::SyclDevice &  dev 
)
inlinestatic
430  {
431  auto scan_info =
432  ScanInfo<Index>(total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride, dev);
433  const Index temp_pointer_size = scan_info.block_size * non_scan_size * panel_size;
434  const Index scratch_size = scan_info.max_elements_per_block / (ScanParameters<Index>::ScanPerThread / 2);
435  CoeffReturnType *temp_pointer =
436  static_cast<CoeffReturnType *>(dev.allocate_temp(temp_pointer_size * sizeof(CoeffReturnType)));
437  EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer);
438 
439  typedef ScanKernelFunctor<Input, CoeffReturnType, EvaluatorPointerType, Reducer, Index, stp> ScanFunctor;
440  dev.template binary_kernel_launcher<CoeffReturnType, ScanFunctor>(
441  in_ptr, out_ptr, tmp_global_accessor, scan_info.get_thread_range(), scratch_size,
442  scan_info.get_scan_parameter(), accumulator, inclusive)
443  .wait();
444 
445  if (scan_info.block_size > 1) {
447  tmp_global_accessor, tmp_global_accessor, accumulator, temp_pointer_size, scan_info.block_size, panel_size,
448  non_scan_size, Index(1), scan_info.block_size, false, dev);
449 
451  tmp_global_accessor, out_ptr, accumulator, total_size, scan_size, panel_size, non_scan_size, scan_stride,
452  non_scan_stride, dev);
453  }
454  dev.deallocate_temp(temp_pointer);
455  }
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
static EIGEN_STRONG_INLINE void adjust_scan_block_offset(EvaluatorPointerType in_ptr, EvaluatorPointerType out_ptr, Reducer &accumulator, const Index total_size, const Index scan_size, const Index panel_size, const Index non_scan_size, const Index scan_stride, const Index non_scan_stride, const Eigen::SyclDevice &dev)
Definition: TensorScanSycl.h:406
static EIGEN_STRONG_INLINE void scan_block(Input in_ptr, EvaluatorPointerType out_ptr, Reducer &accumulator, const Index total_size, const Index scan_size, const Index panel_size, const Index non_scan_size, const Index scan_stride, const Index non_scan_stride, const bool inclusive, const Eigen::SyclDevice &dev)
Definition: TensorScanSycl.h:426
static EIGEN_CONSTEXPR Index ScanPerThread
Definition: TensorScanSycl.h:54

References Eigen::TensorSycl::internal::SYCLAdjustBlockOffset< EvaluatorPointerType, CoeffReturnType, Reducer, Index >::adjust_scan_block_offset().

Referenced by Eigen::internal::ScanLauncher< Self, Reducer, Eigen::SyclDevice, vectorize >::operator()().


The documentation for this struct was generated from the following file: