Eigen::CoreThreadPoolDevice Struct Reference

#include <CoreThreadPoolDevice.h>

Public Types

using Task = std::function< void()>
 

Public Member Functions

EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoreThreadPoolDevice (ThreadPool &pool, float threadCostThreshold=3e-5f)
 
template<int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int calculateLevels (Index size, float cost) const
 
template<typename UnaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void parallelForImpl (Index begin, Index end, UnaryFunctor &f, Barrier &barrier, int level)
 
template<typename BinaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void parallelForImpl (Index outerBegin, Index outerEnd, Index innerBegin, Index innerEnd, BinaryFunctor &f, Barrier &barrier, int level)
 
template<typename UnaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void parallelFor (Index begin, Index end, UnaryFunctor &f, float cost)
 
template<typename BinaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void parallelFor (Index outerBegin, Index outerEnd, Index innerBegin, Index innerEnd, BinaryFunctor &f, float cost)
 

Public Attributes

ThreadPoolm_pool
 
float m_costFactor
 

Member Typedef Documentation

◆ Task

using Eigen::CoreThreadPoolDevice::Task = std::function<void()>

Constructor & Destructor Documentation

◆ CoreThreadPoolDevice()

EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::CoreThreadPoolDevice::CoreThreadPoolDevice ( ThreadPool pool,
float  threadCostThreshold = 3e-5f 
)
inline
45  : m_pool(pool) {
46  eigen_assert(threadCostThreshold >= 0.0f && "threadCostThreshold must be non-negative");
47  m_costFactor = threadCostThreshold;
48  }
#define eigen_assert(x)
Definition: Macros.h:910
ThreadPool & m_pool
Definition: CoreThreadPoolDevice.h:143
float m_costFactor
Definition: CoreThreadPoolDevice.h:146

References eigen_assert, and m_costFactor.

Member Function Documentation

◆ calculateLevels()

template<int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int Eigen::CoreThreadPoolDevice::calculateLevels ( Index  size,
float  cost 
) const
inline
51  {
52  eigen_assert(cost >= 0.0f && "cost must be non-negative");
53  Index numOps = size / PacketSize;
54  int actualThreads = numOps < m_pool.NumThreads() ? static_cast<int>(numOps) : m_pool.NumThreads();
55  float totalCost = static_cast<float>(numOps) * cost;
56  float idealThreads = totalCost * m_costFactor;
57  if (idealThreads < static_cast<float>(actualThreads)) {
58  idealThreads = numext::maxi(idealThreads, 1.0f);
59  actualThreads = numext::mini(actualThreads, static_cast<int>(idealThreads));
60  }
61  int maxLevel = internal::log2_ceil(actualThreads);
62  return maxLevel;
63  }
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
int NumThreads() const EIGEN_FINAL
Definition: NonBlockingThreadPool.h:205
int log2_ceil(const BitsType &x)
Definition: MathFunctions.h:758
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition: MathFunctions.h:926
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: MathFunctions.h:920
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

References eigen_assert, Eigen::internal::log2_ceil(), m_costFactor, m_pool, Eigen::numext::maxi(), Eigen::numext::mini(), Eigen::ThreadPoolTempl< Environment >::NumThreads(), and size.

◆ parallelFor() [1/2]

template<typename UnaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::CoreThreadPoolDevice::parallelFor ( Index  begin,
Index  end,
UnaryFunctor &  f,
float  cost 
)
inline
123  {
124  Index size = end - begin;
125  int maxLevel = calculateLevels<PacketSize>(size, cost);
126  Barrier barrier(1 << maxLevel);
127  parallelForImpl<UnaryFunctor, PacketSize>(begin, end, f, barrier, maxLevel);
128  barrier.Wait();
129  }
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
static constexpr lastp1_t end
Definition: IndexedViewHelper.h:79

References Eigen::placeholders::end, f(), size, and Eigen::Barrier::Wait().

◆ parallelFor() [2/2]

template<typename BinaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::CoreThreadPoolDevice::parallelFor ( Index  outerBegin,
Index  outerEnd,
Index  innerBegin,
Index  innerEnd,
BinaryFunctor &  f,
float  cost 
)
inline
133  {
134  Index outerSize = outerEnd - outerBegin;
135  Index innerSize = innerEnd - innerBegin;
136  Index size = outerSize * innerSize;
137  int maxLevel = calculateLevels<PacketSize>(size, cost);
138  Barrier barrier(1 << maxLevel);
139  parallelForImpl<BinaryFunctor, PacketSize>(outerBegin, outerEnd, innerBegin, innerEnd, f, barrier, maxLevel);
140  barrier.Wait();
141  }

References f(), size, and Eigen::Barrier::Wait().

◆ parallelForImpl() [1/2]

template<typename UnaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void Eigen::CoreThreadPoolDevice::parallelForImpl ( Index  begin,
Index  end,
UnaryFunctor &  f,
Barrier barrier,
int  level 
)
inline
74  {
75  while (level > 0) {
76  level--;
77  Index size = end - begin;
78  eigen_assert(size % PacketSize == 0 && "this function assumes size is a multiple of PacketSize");
79  Index mid = begin + numext::round_down(size >> 1, PacketSize);
80  Task right = [this, mid, end, &f, &barrier, level]() {
81  parallelForImpl<UnaryFunctor, PacketSize>(mid, end, f, barrier, level);
82  };
83  m_pool.Schedule(std::move(right));
84  end = mid;
85  }
86  for (Index i = begin; i < end; i += PacketSize) f(i);
87  barrier.Notify();
88  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
void Schedule(std::function< void()> fn) EIGEN_OVERRIDE
Definition: NonBlockingThreadPool.h:120
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T round_down(T a, U b)
Definition: MathFunctions.h:1266
std::function< void()> Task
Definition: CoreThreadPoolDevice.h:43

References eigen_assert, Eigen::placeholders::end, f(), i, m_pool, Eigen::Barrier::Notify(), Eigen::numext::round_down(), Eigen::ThreadPoolTempl< Environment >::Schedule(), and size.

◆ parallelForImpl() [2/2]

template<typename BinaryFunctor , int PacketSize>
EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void Eigen::CoreThreadPoolDevice::parallelForImpl ( Index  outerBegin,
Index  outerEnd,
Index  innerBegin,
Index  innerEnd,
BinaryFunctor &  f,
Barrier barrier,
int  level 
)
inline
93  {
94  while (level > 0) {
95  level--;
96  Index outerSize = outerEnd - outerBegin;
97  if (outerSize > 1) {
98  Index outerMid = outerBegin + (outerSize >> 1);
99  Task right = [this, &f, &barrier, outerMid, outerEnd, innerBegin, innerEnd, level]() {
100  parallelForImpl<BinaryFunctor, PacketSize>(outerMid, outerEnd, innerBegin, innerEnd, f, barrier, level);
101  };
102  m_pool.Schedule(std::move(right));
103  outerEnd = outerMid;
104  } else {
105  Index innerSize = innerEnd - innerBegin;
106  eigen_assert(innerSize % PacketSize == 0 && "this function assumes innerSize is a multiple of PacketSize");
107  Index innerMid = innerBegin + numext::round_down(innerSize >> 1, PacketSize);
108  Task right = [this, &f, &barrier, outerBegin, outerEnd, innerMid, innerEnd, level]() {
109  parallelForImpl<BinaryFunctor, PacketSize>(outerBegin, outerEnd, innerMid, innerEnd, f, barrier, level);
110  };
111  m_pool.Schedule(std::move(right));
112  innerEnd = innerMid;
113  }
114  }
115  for (Index outer = outerBegin; outer < outerEnd; outer++)
116  for (Index inner = innerBegin; inner < innerEnd; inner += PacketSize) f(outer, inner);
117  barrier.Notify();
118  }

References eigen_assert, f(), m_pool, Eigen::Barrier::Notify(), Eigen::numext::round_down(), and Eigen::ThreadPoolTempl< Environment >::Schedule().

Member Data Documentation

◆ m_costFactor

float Eigen::CoreThreadPoolDevice::m_costFactor

◆ m_pool

ThreadPool& Eigen::CoreThreadPoolDevice::m_pool

Referenced by calculateLevels(), and parallelForImpl().


The documentation for this struct was generated from the following file: