10 #ifndef EIGEN_PARALLELIZER_H
11 #define EIGEN_PARALLELIZER_H
14 #include "../InternalHeaderCheck.h"
37 #if defined(EIGEN_HAS_OPENMP) && defined(EIGEN_GEMM_THREADPOOL)
38 #error "EIGEN_HAS_OPENMP and EIGEN_GEMM_THREADPOOL may not both be defined."
64 #ifdef EIGEN_GEMM_THREADPOOL
75 if (new_pool !=
nullptr) {
86 inline ThreadPool* getGemmThreadPool() {
return setGemmThreadPool(
nullptr); }
93 #if defined(EIGEN_USE_BLAS) || (!defined(EIGEN_HAS_OPENMP) && !defined(EIGEN_GEMM_THREADPOOL))
105 template <
typename Index>
107 template <
bool Condition,
typename Functor,
typename Index>
115 template <
typename Index>
116 struct GemmParallelTaskInfo {
117 GemmParallelTaskInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
118 std::atomic<Index> sync;
119 std::atomic<int> users;
124 template <
typename Index>
125 struct GemmParallelInfo {
126 const int logical_thread_id;
127 const int num_threads;
128 GemmParallelTaskInfo<Index>* task_info;
130 GemmParallelInfo(
int logical_thread_id_,
int num_threads_, GemmParallelTaskInfo<Index>* task_info_)
131 : logical_thread_id(logical_thread_id_), num_threads(num_threads_), task_info(task_info_) {}
135 static int m_maxThreads = -1;
138 #if defined(EIGEN_HAS_OPENMP)
143 int omp_threads = omp_get_max_threads();
144 m_maxThreads = (*
v == 0 ? omp_threads :
std::min(*
v, omp_threads));
145 #elif defined(EIGEN_GEMM_THREADPOOL)
151 int pool_threads = pool !=
nullptr ? pool->NumThreads() : 1;
152 m_maxThreads = (*
v == 0 ? pool_threads :
numext::mini(pool_threads, *
v));
156 #if defined(EIGEN_HAS_OPENMP)
157 if (m_maxThreads > 0)
160 *
v = omp_get_max_threads();
169 template <
bool Condition,
typename Functor,
typename Index>
181 Index pb_max_threads = std::max<Index>(1,
size / Functor::Traits::nr);
184 double work =
static_cast<double>(
rows) *
static_cast<double>(
cols) *
static_cast<double>(depth);
185 double kMinTaskSize = 50000;
186 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads,
static_cast<Index>(work / kMinTaskSize)));
189 int threads = std::min<int>(
nbThreads(),
static_cast<int>(pb_max_threads));
193 bool dont_parallelize = (!Condition) || (threads <= 1);
194 #if defined(EIGEN_HAS_OPENMP)
196 dont_parallelize |= omp_get_num_threads() > 1;
197 #elif defined(EIGEN_GEMM_THREADPOOL)
203 dont_parallelize |= (pool ==
nullptr || pool->CurrentThreadId() != -1);
207 func.initParallelSession(threads);
213 #if defined(EIGEN_HAS_OPENMP)
214 #pragma omp parallel num_threads(threads)
216 Index i = omp_get_thread_num();
219 Index actual_threads = omp_get_num_threads();
220 GemmParallelInfo<Index>
info(
i,
static_cast<int>(actual_threads), task_info);
223 Index blockRows = (
rows / actual_threads);
224 blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
227 Index actualBlockRows = (
i + 1 == actual_threads) ?
rows - r0 : blockRows;
230 Index actualBlockCols = (
i + 1 == actual_threads) ?
cols - c0 : blockCols;
232 info.task_info[
i].lhs_start = r0;
233 info.task_info[
i].lhs_length = actualBlockRows;
241 #elif defined(EIGEN_GEMM_THREADPOOL)
242 Barrier barrier(threads);
243 auto task = [=, &
func, &barrier, &task_info](
int i) {
244 Index actual_threads = threads;
245 GemmParallelInfo<Index>
info(
i,
static_cast<int>(actual_threads), task_info);
247 Index blockRows = (
rows / actual_threads);
248 blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
251 Index actualBlockRows = (
i + 1 == actual_threads) ?
rows - r0 : blockRows;
254 Index actualBlockCols = (
i + 1 == actual_threads) ?
cols - c0 : blockCols;
256 info.task_info[
i].lhs_start = r0;
257 info.task_info[
i].lhs_length = actualBlockRows;
269 for (
int i = 0;
i < threads - 1; ++
i) {
270 pool->Schedule([=, task = std::move(task)] { task(
i); });
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_DEPRECATED
Definition: Macros.h:931
#define eigen_internal_assert(x)
Definition: Macros.h:916
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:806
int rows
Definition: Tutorial_commainit_02.cpp:1
int cols
Definition: Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
#define min(a, b)
Definition: datatypes.h:22
Action
Definition: Constants.h:516
@ GetAction
Definition: Constants.h:516
@ SetAction
Definition: Constants.h:516
Eigen::DenseIndex ret
Definition: level1_cplx_impl.h:43
EIGEN_BLAS_FUNC() swap(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
Definition: level1_impl.h:117
int info
Definition: level2_cplx_impl.h:39
func(actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha)
EIGEN_STRONG_INLINE void parallelize_gemm(const Functor &func, Index rows, Index cols, Index, bool)
Definition: Parallelizer.h:108
void manage_multi_threading(Action action, int *v)
Definition: Parallelizer.h:95
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: MathFunctions.h:920
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
int nbThreads()
Definition: Parallelizer.h:54
EIGEN_DEPRECATED void initParallel()
Definition: Parallelizer.h:50
ThreadPoolTempl< StlThreadEnvironment > ThreadPool
Definition: NonBlockingThreadPool.h:580
void setNbThreads(int v)
Definition: Parallelizer.h:62
void transpose()
Definition: skew_symmetric_matrix3.cpp:135
action
Definition: calibrate.py:47
Definition: Eigen_Colamd.h:49
Definition: Parallelizer.h:106
Definition: NonLinearOptimization.cpp:97
Definition: benchGeometry.cpp:21