Eigen::ForkJoinScheduler Class Reference

#include <ForkJoin.h>

Static Public Member Functions

template<typename DoFnType >
static void ParallelForAsync (int start, int end, int granularity, DoFnType do_func, std::function< void()> done, Eigen::ThreadPool *thread_pool)
 
template<typename DoFnType >
static void ParallelFor (int start, int end, int granularity, DoFnType do_func, Eigen::ThreadPool *thread_pool)
 

Static Private Member Functions

template<typename LeftType , typename RightType >
static void ForkJoin (LeftType &&left_thunk, RightType &&right_thunk, Eigen::ThreadPool *thread_pool)
 
static void RunParallelForAsync (int start, int end, int granularity, std::function< void(int)> &do_func, std::function< void()> &done, Eigen::ThreadPool *thread_pool)
 
static void RunParallelForAsync (int start, int end, int granularity, std::function< void(int, int)> &do_func, std::function< void()> &done, Eigen::ThreadPool *thread_pool)
 

Member Function Documentation

◆ ForkJoin()

template<typename LeftType , typename RightType >
static void Eigen::ForkJoinScheduler::ForkJoin ( LeftType &&  left_thunk,
RightType &&  right_thunk,
Eigen::ThreadPool thread_pool 
)
inlinestaticprivate
92  {
93  std::atomic<bool> right_done(false);
94  auto execute_right = [&right_thunk, &right_done]() {
95  std::forward<RightType>(right_thunk)();
96  right_done.store(true, std::memory_order_release);
97  };
98  thread_pool->Schedule(execute_right);
99  std::forward<LeftType>(left_thunk)();
101  while (!right_done.load(std::memory_order_acquire)) {
102  thread_pool->MaybeGetTask(&task);
103  if (task.f) task.f();
104  }
105  }
Environment::Task Task
Definition: NonBlockingThreadPool.h:22
void Schedule(std::function< void()> fn) EIGEN_OVERRIDE
Definition: NonBlockingThreadPool.h:120
void MaybeGetTask(Task *t)
Definition: NonBlockingThreadPool.h:157

References Eigen::ThreadPoolTempl< Environment >::MaybeGetTask(), and Eigen::ThreadPoolTempl< Environment >::Schedule().

Referenced by RunParallelForAsync().

◆ ParallelFor()

template<typename DoFnType >
static void Eigen::ForkJoinScheduler::ParallelFor ( int  start,
int  end,
int  granularity,
DoFnType  do_func,
Eigen::ThreadPool thread_pool 
)
inlinestatic
78  {
79  if (start >= end) return;
80  auto dummy_done = []() {};
81  Barrier barrier(1);
82  thread_pool->Schedule([start, end, granularity, thread_pool, &do_func, &dummy_done, &barrier]() {
83  ForkJoinScheduler::ParallelForAsync(start, end, granularity, do_func, dummy_done, thread_pool);
84  barrier.Notify();
85  });
86  barrier.Wait();
87  }
static void ParallelForAsync(int start, int end, int granularity, DoFnType do_func, std::function< void()> done, Eigen::ThreadPool *thread_pool)
Definition: ForkJoin.h:67
static constexpr lastp1_t end
Definition: IndexedViewHelper.h:79
void start(const unsigned &i)
(Re-)start i-th timer
Definition: oomph_utilities.cc:243

References Eigen::placeholders::end, Eigen::Barrier::Notify(), ParallelForAsync(), Eigen::ThreadPoolTempl< Environment >::Schedule(), oomph::CumulativeTimings::start(), and Eigen::Barrier::Wait().

◆ ParallelForAsync()

template<typename DoFnType >
static void Eigen::ForkJoinScheduler::ParallelForAsync ( int  start,
int  end,
int  granularity,
DoFnType  do_func,
std::function< void()>  done,
Eigen::ThreadPool thread_pool 
)
inlinestatic
68  {
69  if (start >= end) {
70  done();
71  return;
72  }
73  ForkJoinScheduler::RunParallelForAsync(start, end, granularity, do_func, done, thread_pool);
74  }
static void RunParallelForAsync(int start, int end, int granularity, std::function< void(int)> &do_func, std::function< void()> &done, Eigen::ThreadPool *thread_pool)
Definition: ForkJoin.h:109

References Eigen::placeholders::end, RunParallelForAsync(), and oomph::CumulativeTimings::start().

Referenced by ParallelFor().

◆ RunParallelForAsync() [1/2]

static void Eigen::ForkJoinScheduler::RunParallelForAsync ( int  start,
int  end,
int  granularity,
std::function< void(int)> &  do_func,
std::function< void()> &  done,
Eigen::ThreadPool thread_pool 
)
inlinestaticprivate
110  {
111  std::function<void(int, int)> wrapped_do_func = [&do_func](int start, int end) {
112  for (int i = start; i < end; ++i) do_func(i);
113  };
114  ForkJoinScheduler::RunParallelForAsync(start, end, granularity, wrapped_do_func, done, thread_pool);
115  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9

References Eigen::placeholders::end, i, and oomph::CumulativeTimings::start().

Referenced by ParallelForAsync(), and RunParallelForAsync().

◆ RunParallelForAsync() [2/2]

static void Eigen::ForkJoinScheduler::RunParallelForAsync ( int  start,
int  end,
int  granularity,
std::function< void(int, int)> &  do_func,
std::function< void()> &  done,
Eigen::ThreadPool thread_pool 
)
inlinestaticprivate
120  {
121  if ((end - start) <= granularity) {
122  do_func(start, end);
123  for (int j = 0; j < end - start; ++j) done();
124  } else {
125  // Typical workloads choose initial values of `{start, end, granularity}` such that `start - end` and
126  // `granularity` are powers of two. Since modern processors usually implement (2^x)-way
127  // set-associative caches, we minimize the number of cache misses by choosing midpoints that are not
128  // powers of two (to avoid having two addresses in the main memory pointing to the same point in the
129  // cache). More specifically, we restrict the set of candidate midpoints to:
130  //
131  // P := {start, start + granularity, start + 2*granularity, ..., end},
132  //
133  // and choose the entry in `P` at (roughly) the 9/16 mark.
134  const int size = end - start;
135  const int mid = start + Eigen::numext::div_ceil(9 * (size + 1) / 16, granularity) * granularity;
137  [start, mid, granularity, &do_func, &done, thread_pool]() {
138  RunParallelForAsync(start, mid, granularity, do_func, done, thread_pool);
139  },
140  [mid, end, granularity, &do_func, &done, thread_pool]() {
141  RunParallelForAsync(mid, end, granularity, do_func, done, thread_pool);
142  },
143  thread_pool);
144  }
145  }
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
static void ForkJoin(LeftType &&left_thunk, RightType &&right_thunk, Eigen::ThreadPool *thread_pool)
Definition: ForkJoin.h:92
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T div_ceil(T a, T b)
Definition: MathFunctions.h:1251
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References Eigen::numext::div_ceil(), Eigen::placeholders::end, ForkJoin(), j, RunParallelForAsync(), size, and oomph::CumulativeTimings::start().


The documentation for this class was generated from the following file: