10#ifndef EIGEN_PARALLELIZER_H
11#define EIGEN_PARALLELIZER_H
18inline void manage_multi_threading(Action action,
int* v)
20 static int m_maxThreads = -1;
21 EIGEN_UNUSED_VARIABLE(m_maxThreads);
25 eigen_internal_assert(v!=0);
28 else if(action==GetAction)
30 eigen_internal_assert(v!=0);
31 #ifdef EIGEN_HAS_OPENMP
35 *v = omp_get_max_threads();
42 eigen_internal_assert(
false);
52 internal::manage_multi_threading(GetAction, &nbt);
53 std::ptrdiff_t l1, l2, l3;
54 internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
62 internal::manage_multi_threading(GetAction, &ret);
70 internal::manage_multi_threading(SetAction, &v);
75template<
typename Index>
struct GemmParallelInfo
77 GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
86template<
bool Condition,
typename Functor,
typename Index>
87void parallelize_gemm(
const Functor& func,
Index rows,
Index cols,
Index depth,
bool transpose)
91#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
96 EIGEN_UNUSED_VARIABLE(depth);
97 EIGEN_UNUSED_VARIABLE(transpose);
109 Index size = transpose ? rows : cols;
110 Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
113 double work =
static_cast<double>(rows) *
static_cast<double>(cols) *
114 static_cast<double>(depth);
115 double kMinTaskSize = 50000;
116 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
124 if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
125 return func(0,rows, 0,cols);
128 func.initParallelSession(threads);
131 std::swap(rows,cols);
133 ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
135 #pragma omp parallel num_threads(threads)
137 Index i = omp_get_thread_num();
139 Index actual_threads = omp_get_num_threads();
141 Index blockCols = (cols / actual_threads) & ~
Index(0x3);
142 Index blockRows = (rows / actual_threads);
143 blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
145 Index r0 = i*blockRows;
146 Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
148 Index c0 = i*blockCols;
149 Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
151 info[i].lhs_start = r0;
152 info[i].lhs_length = actualBlockRows;
155 func(c0, actualBlockCols, 0, rows, info);
157 func(0, rows, c0, actualBlockCols, info);
Namespace containing all symbols from the Eigen library.
Definition A05_PortingFrom2To3.dox:1
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:65
void initParallel()
Definition Parallelizer.h:49
int nbThreads()
Definition Parallelizer.h:59
void setNbThreads(int v)
Definition Parallelizer.h:68