Eigen::TensorSycl::internal::ScanInfo< Index > Struct Template Reference

#include <TensorScanSycl.h>

Public Member Functions

EIGEN_STRONG_INLINE ScanInfo (const Index &total_size_, const Index &scan_size_, const Index &panel_size_, const Index &non_scan_size_, const Index &scan_stride_, const Index &non_scan_stride_, const Eigen::SyclDevice &dev_)
 
ScanParameters< Indexget_scan_parameter ()
 
cl::sycl::nd_range< 1 > get_thread_range ()
 

Public Attributes

const Indextotal_size
 
const Indexscan_size
 
const Indexpanel_size
 
const Indexnon_scan_size
 
const Indexscan_stride
 
const Indexnon_scan_stride
 
Index max_elements_per_block
 
Index block_size
 
Index panel_threads
 
Index group_threads
 
Index block_threads
 
Index elements_per_group
 
Index elements_per_block
 
Index loop_range
 
Index global_range
 
Index local_range
 
const Eigen::SyclDevice & dev
 

Constructor & Destructor Documentation

◆ ScanInfo()

template<typename Index >
EIGEN_STRONG_INLINE Eigen::TensorSycl::internal::ScanInfo< Index >::ScanInfo ( const Index total_size_,
const Index scan_size_,
const Index panel_size_,
const Index non_scan_size_,
const Index scan_stride_,
const Index non_scan_stride_,
const Eigen::SyclDevice &  dev_ 
)
inline
365  : total_size(total_size_),
366  scan_size(scan_size_),
367  panel_size(panel_size_),
368  non_scan_size(non_scan_size_),
369  scan_stride(scan_stride_),
370  non_scan_stride(non_scan_stride_),
371  dev(dev_) {
372  // must be power of 2
373  local_range = std::min(Index(dev.getNearestPowerOfTwoWorkGroupSize()),
374  Index(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1));
375 
377 
379  dev.getPowerOfTwo(Index(roundUp(Index(scan_size), ScanParameters<Index>::ScanPerThread)), true);
380  const Index elements_per_panel = elements_per_group * non_scan_size;
386 #ifdef EIGEN_SYCL_MAX_GLOBAL_RANGE
388 #else
389  const Index max_threads = panel_threads * panel_size;
390 #endif
391  global_range = roundUp(max_threads, local_range);
392  loop_range = Index(
393  std::ceil(double(elements_per_panel * panel_size) / (global_range * ScanParameters<Index>::ScanPerThread)));
394  }
#define EIGEN_SYCL_MAX_GLOBAL_RANGE
Definition: TensorScanSycl.h:48
#define min(a, b)
Definition: datatypes.h:22
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16 &a)
Definition: BFloat16.h:644
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Index max_elements_per_block
Definition: TensorScanSycl.h:351
const Index & total_size
Definition: TensorScanSycl.h:344
const Index & non_scan_stride
Definition: TensorScanSycl.h:349
Index group_threads
Definition: TensorScanSycl.h:354
const Index & non_scan_size
Definition: TensorScanSycl.h:347
const Index & scan_stride
Definition: TensorScanSycl.h:348
const Eigen::SyclDevice & dev
Definition: TensorScanSycl.h:361
Index local_range
Definition: TensorScanSycl.h:360
Index elements_per_block
Definition: TensorScanSycl.h:357
Index block_threads
Definition: TensorScanSycl.h:355
Index block_size
Definition: TensorScanSycl.h:352
Index panel_threads
Definition: TensorScanSycl.h:353
Index elements_per_group
Definition: TensorScanSycl.h:356
Index loop_range
Definition: TensorScanSycl.h:358
const Index & panel_size
Definition: TensorScanSycl.h:346
const Index & scan_size
Definition: TensorScanSycl.h:345
Index global_range
Definition: TensorScanSycl.h:359
static EIGEN_CONSTEXPR Index ScanPerThread
Definition: TensorScanSycl.h:54

References Eigen::TensorSycl::internal::ScanInfo< Index >::block_size, Eigen::TensorSycl::internal::ScanInfo< Index >::block_threads, Eigen::bfloat16_impl::ceil(), Eigen::TensorSycl::internal::ScanInfo< Index >::dev, EIGEN_SYCL_MAX_GLOBAL_RANGE, Eigen::TensorSycl::internal::ScanInfo< Index >::elements_per_block, Eigen::TensorSycl::internal::ScanInfo< Index >::elements_per_group, Eigen::TensorSycl::internal::ScanInfo< Index >::global_range, Eigen::TensorSycl::internal::ScanInfo< Index >::group_threads, Eigen::TensorSycl::internal::ScanInfo< Index >::local_range, Eigen::TensorSycl::internal::ScanInfo< Index >::loop_range, Eigen::TensorSycl::internal::ScanInfo< Index >::max_elements_per_block, min, Eigen::TensorSycl::internal::ScanInfo< Index >::non_scan_size, Eigen::TensorSycl::internal::ScanInfo< Index >::panel_size, Eigen::TensorSycl::internal::ScanInfo< Index >::panel_threads, and Eigen::TensorSycl::internal::ScanInfo< Index >::scan_size.

Member Function Documentation

◆ get_scan_parameter()

◆ get_thread_range()

template<typename Index >
cl::sycl::nd_range<1> Eigen::TensorSycl::internal::ScanInfo< Index >::get_thread_range ( )
inline
399  {
400  return cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range));
401  }

References Eigen::TensorSycl::internal::ScanInfo< Index >::global_range, and Eigen::TensorSycl::internal::ScanInfo< Index >::local_range.

Member Data Documentation

◆ block_size

◆ block_threads

◆ dev

template<typename Index >
const Eigen::SyclDevice& Eigen::TensorSycl::internal::ScanInfo< Index >::dev

◆ elements_per_block

◆ elements_per_group

◆ global_range

◆ group_threads

◆ local_range

◆ loop_range

◆ max_elements_per_block

template<typename Index >
Index Eigen::TensorSycl::internal::ScanInfo< Index >::max_elements_per_block

◆ non_scan_size

◆ non_scan_stride

◆ panel_size

◆ panel_threads

◆ scan_size

◆ scan_stride

◆ total_size


The documentation for this struct was generated from the following file: