1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_PARALLELIZER_H
11 #define EIGEN_PARALLELIZER_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 /** \internal */
manage_multi_threading(Action action,int * v)18 inline void manage_multi_threading(Action action, int* v)
19 {
20   static int m_maxThreads = -1;
21   EIGEN_UNUSED_VARIABLE(m_maxThreads);
22 
23   if(action==SetAction)
24   {
25     eigen_internal_assert(v!=0);
26     m_maxThreads = *v;
27   }
28   else if(action==GetAction)
29   {
30     eigen_internal_assert(v!=0);
31     #ifdef EIGEN_HAS_OPENMP
32     if(m_maxThreads>0)
33       *v = m_maxThreads;
34     else
35       *v = omp_get_max_threads();
36     #else
37     *v = 1;
38     #endif
39   }
40   else
41   {
42     eigen_internal_assert(false);
43   }
44 }
45 
46 }
47 
48 /** Must be call first when calling Eigen from multiple threads */
initParallel()49 inline void initParallel()
50 {
51   int nbt;
52   internal::manage_multi_threading(GetAction, &nbt);
53   std::ptrdiff_t l1, l2, l3;
54   internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
55 }
56 
57 /** \returns the max number of threads reserved for Eigen
58   * \sa setNbThreads */
nbThreads()59 inline int nbThreads()
60 {
61   int ret;
62   internal::manage_multi_threading(GetAction, &ret);
63   return ret;
64 }
65 
66 /** Sets the max number of threads reserved for Eigen
67   * \sa nbThreads */
setNbThreads(int v)68 inline void setNbThreads(int v)
69 {
70   internal::manage_multi_threading(SetAction, &v);
71 }
72 
73 namespace internal {
74 
75 template<typename Index> struct GemmParallelInfo
76 {
GemmParallelInfoGemmParallelInfo77   GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
78 
79   Index volatile sync;
80   int volatile users;
81 
82   Index lhs_start;
83   Index lhs_length;
84 };
85 
86 template<bool Condition, typename Functor, typename Index>
parallelize_gemm(const Functor & func,Index rows,Index cols,Index depth,bool transpose)87 void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
88 {
89   // TODO when EIGEN_USE_BLAS is defined,
90   // we should still enable OMP for other scalar types
91 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
92   // FIXME the transpose variable is only needed to properly split
93   // the matrix product when multithreading is enabled. This is a temporary
94   // fix to support row-major destination matrices. This whole
95   // parallelizer mechanism has to be redisigned anyway.
96   EIGEN_UNUSED_VARIABLE(depth);
97   EIGEN_UNUSED_VARIABLE(transpose);
98   func(0,rows, 0,cols);
99 #else
100 
101   // Dynamically check whether we should enable or disable OpenMP.
102   // The conditions are:
103   // - the max number of threads we can create is greater than 1
104   // - we are not already in a parallel code
105   // - the sizes are large enough
106 
107   // compute the maximal number of threads from the size of the product:
108   // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
109   Index size = transpose ? rows : cols;
110   Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
111 
112   // compute the maximal number of threads from the total amount of work:
113   double work = static_cast<double>(rows) * static_cast<double>(cols) *
114       static_cast<double>(depth);
115   double kMinTaskSize = 50000;  // FIXME improve this heuristic.
116   pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
117 
118   // compute the number of threads we are going to use
119   Index threads = std::min<Index>(nbThreads(), pb_max_threads);
120 
121   // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
122   // then abort multi-threading
123   // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
124   if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
125     return func(0,rows, 0,cols);
126 
127   Eigen::initParallel();
128   func.initParallelSession(threads);
129 
130   if(transpose)
131     std::swap(rows,cols);
132 
133   ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
134 
135   #pragma omp parallel num_threads(threads)
136   {
137     Index i = omp_get_thread_num();
138     // Note that the actual number of threads might be lower than the number of request ones.
139     Index actual_threads = omp_get_num_threads();
140 
141     Index blockCols = (cols / actual_threads) & ~Index(0x3);
142     Index blockRows = (rows / actual_threads);
143     blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
144 
145     Index r0 = i*blockRows;
146     Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
147 
148     Index c0 = i*blockCols;
149     Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
150 
151     info[i].lhs_start = r0;
152     info[i].lhs_length = actualBlockRows;
153 
154     if(transpose)
155       func(c0, actualBlockCols, 0, rows, info);
156     else
157       func(0, rows, c0, actualBlockCols, info);
158   }
159 #endif
160 }
161 
162 } // end namespace internal
163 
164 } // end namespace Eigen
165 
166 #endif // EIGEN_PARALLELIZER_H
167