1 
2 #ifndef NTL_BasicThreadPool__H
3 #define NTL_BasicThreadPool__H
4 
5 #include <NTL/tools.h>
6 #include <NTL/vector.h>
7 #include <NTL/SmartPtr.h>
8 #include <NTL/thread.h>
9 
10 
11 NTL_OPEN_NNS
12 
13 
14 inline long AvailableThreads();
15 
16 struct PartitionInfo {
17    long nintervals;  // number of intervals
18    long intervalsz;  // interval size
19    long nsintervals; // number of small intervals
20 
21    explicit
22    PartitionInfo(long sz, long nt = AvailableThreads())
23    // partitions [0..sz) into nintervals intervals,
24    // so that there are nsintervals of size intervalsz-1
25    // and nintervals-nsintervals of size intervalsz
26    {
27       if (sz <= 0) {
28          nintervals = intervalsz = nsintervals = 0;
29          return;
30       }
31 
32       if (nt <= 0) LogicError("PartitionInfo: bad args");
33 
34       // NOTE: this overflow check probably unnecessary
35       if (NTL_OVERFLOW(sz, 1, 0) || NTL_OVERFLOW(nt, 1, 0))
36          ResourceError("PartitionInfo: arg too big");
37 
38       if (sz < nt) {
39          nintervals = sz;
40          intervalsz = 1;
41          nsintervals = 0;
42          return;
43       }
44 
45       nintervals = nt;
46 
47       long q, r;
48       q = sz/nt;
49       r = sz - nt*q;
50 
51       if (r == 0) {
52          intervalsz = q;
53          nsintervals = 0;
54       }
55       else {
56          intervalsz = q+1;
57          nsintervals = nt - r;
58       }
59    }
60 
NumIntervalsPartitionInfo61    long NumIntervals() const { return nintervals; }
62 
intervalPartitionInfo63    void interval(long& first, long& last, long i) const
64    // [first..last) is the ith interval -- no range checking is done
65    {
66 
67 #if 0
68       // this is the logic, naturally expressed
69       if (i < nsintervals) {
70          first = i*(intervalsz-1);
71          last = first + (intervalsz-1);
72       }
73       else {
74          first = nsintervals*(intervalsz-1) + (i-nsintervals)*intervalsz;
75          last = first + intervalsz;
76       }
77 #else
78       // this is the same logic, but branch-free (and portable)
79       // ...probably unnecessary optimization
80 
81       long mask = -long(cast_unsigned(i-nsintervals) >> (NTL_BITS_PER_LONG-1));
82       // mask == -1 if i < nsintervals, 0 o/w
83 
84       long lfirst = i*(intervalsz-1);
85       lfirst += long((~cast_unsigned(mask)) & cast_unsigned(i-nsintervals));
86       // lfirst += max(0, i-nsintervals)
87 
88       long llast = lfirst + intervalsz + mask;
89 
90       first = lfirst;
91       last = llast;
92 #endif
93    }
94 
95 };
96 
97 
98 
99 NTL_CLOSE_NNS
100 
101 
102 
103 #ifdef NTL_THREADS
104 
105 
106 #include <thread>
107 #include <condition_variable>
108 #include <exception>
109 
110 
111 NTL_OPEN_NNS
112 
113 /*************************************************************
114 
115 Some simple thread pooling.
116 
117 You create a thread pool by constructing a BasicThreadPool object.
118 For example:
119 
120    long nthreads = 4;
121    BasicThreadPool pool(nthreads);
122 
123 creates a thread pool of 4 threads.  These threads will exist
124 until the destructor for pool is called.
125 
126 The simplest way to use a thread pools is as follows.
127 Suppose you have a task that consists of N subtasks,
128 indexed 0..N-1.  Then you can write:
129 
130 
131    pool.exec_range(N,
132       [&](long first, long last) {
133          for (long i = first; i < last; i++) {
134             ... code to process subtask i ...
135          }
136       }
137    );
138 
139 The second argument to exec1 is a C++11 "lambda".
140 The "[&]" indicates that all local variables in the calling
141 context are captured by reference, so the lambda body can
142 reference all visible local variables directly.
143 
144 A lower-level interface is also provided.
145 One can write:
146 
147    pool.exec_index(n,
148       [&](long index) {
149          ... code to process index i ...
150       }
151    );
152 
153 This will activate n threads with indices 0..n-1, and execute
154 the given code on each index.  The parameter n must be
155 in the range 1..nthreads, otherwise an error is raised.
156 
157 This lower-level interface is useful in some cases,
158 especially when memory is managed in some special way.
159 For convenience, a method is provided to break
160 subtasks up into smaller, almost-equal-sized groups
161 of subtasks:
162 
163    Vec<long> pvec;
164    long n = pool.SplitProblems(N, pvec);
165 
166 can be used for this.  N is the number of subtasks, indexed 0..N-1.
167 This method will compute n as needed by exec, and
168 the range of subtasks to be processed by a given index in the range
169 0..n-1 is pvec[index]..pvec[index+1]-1
170 Thus, the logic of the above exec1 example can be written
171 using the lower-level exec interface as follows:
172 
173 
174    Vec<long> pvec;
175    long n = pool.SplitProblems(N, pvec);
176    pool.exec_index(n,
177       [&](long index) {
178          long first = pvec[index];
179          long last = pvec[index+1];
180          for (long i = first; i < last; i++) {
181             ... code to process subtask i ...
182          }
183       }
184    );
185 
186 However, with this approach, memory or other resources can be
187 assigned to each index = 0..n-1, and managed externally.
188 
189 
190 
191 
192 *************************************************************/
193 
194 
195 class BasicThreadPool {
196 friend struct RecursiveThreadPool;
197 
198 private:
199 
200 // lots of nested stuff
201 
202    template<class T>
203    class SimpleSignal {
204    private:
205      T val;
206      std::mutex m;
207      std::condition_variable cv;
208 
209      SimpleSignal(const SimpleSignal&); // disabled
210      void operator=(const SimpleSignal&); // disabled
211 
212    public:
SimpleSignal()213      SimpleSignal() : val(0) { }
214 
wait()215      T wait()
216      {
217        std::unique_lock<std::mutex> lock(m);
218        cv.wait(lock, [&]() { return val; } );
219        T old_val = val;
220        val = 0;
221        return old_val;
222      }
223 
send(T new_val)224      void send(T new_val)
225      {
226        std::lock_guard<std::mutex> lock(m);
227        val = new_val;
228        cv.notify_one();
229      }
230    };
231 
232 
233    template<class T, class T1>
234    class CompositeSignal {
235    private:
236      T val;
237      T1 val1;
238      std::mutex m;
239      std::condition_variable cv;
240 
241      CompositeSignal(const CompositeSignal&); // disabled
242      void operator=(const CompositeSignal&); // disabled
243 
244    public:
CompositeSignal()245      CompositeSignal() : val(0) { }
246 
wait(T1 & _val1)247      T wait(T1& _val1)
248      {
249        std::unique_lock<std::mutex> lock(m);
250        cv.wait(lock, [&]() { return val; } );
251        T _val = val;
252        _val1 = val1;
253        val = 0;
254        return _val;
255      }
256 
send(T _val,T1 _val1)257      void send(T _val, T1 _val1)
258      {
259        std::lock_guard<std::mutex> lock(m);
260        val = _val;
261        val1 = _val1;
262        cv.notify_one();
263      }
264    };
265 
266 
267 
268    class ConcurrentTask {
269      BasicThreadPool *pool;
270    public:
ConcurrentTask(BasicThreadPool * _pool)271      ConcurrentTask(BasicThreadPool *_pool) : pool(_pool) { }
getBasicThreadPool()272      BasicThreadPool *getBasicThreadPool() const { return pool; }
273 
274      virtual void run(long index) = 0;
275    };
276 
277 
278 
279    // dummy class, used for signalling termination
280    class ConcurrentTaskTerminate : public ConcurrentTask {
281    public:
ConcurrentTaskTerminate()282      ConcurrentTaskTerminate() : ConcurrentTask(0) { }
run(long index)283      void run(long index) { }
284    };
285 
286 
287 
288    template<class Fct>
289    class ConcurrentTaskFct : public ConcurrentTask {
290    public:
291      const Fct& fct;
292 
ConcurrentTaskFct(BasicThreadPool * _pool,const Fct & _fct)293      ConcurrentTaskFct(BasicThreadPool *_pool, const Fct& _fct) :
294        ConcurrentTask(_pool), fct(_fct) { }
295 
run(long index)296      void run(long index) { fct(index); }
297    };
298 
299    template<class Fct>
300    class ConcurrentTaskFct1 : public ConcurrentTask {
301    public:
302       const Fct& fct;
303       const PartitionInfo& pinfo;
304 
ConcurrentTaskFct1(BasicThreadPool * _pool,const Fct & _fct,const PartitionInfo & _pinfo)305       ConcurrentTaskFct1(BasicThreadPool *_pool, const Fct& _fct,
306          const PartitionInfo& _pinfo) :
307          ConcurrentTask(_pool), fct(_fct), pinfo(_pinfo)  { }
308 
run(long index)309       void run(long index)
310       {
311          long first, last;
312          pinfo.interval(first, last, index);
313          fct(first, last);
314       }
315    };
316 
317 
318 
319    struct AutomaticThread {
320       CompositeSignal< ConcurrentTask *, long > localSignal;
321       ConcurrentTaskTerminate term;
322       std::thread t;
323 
324 
AutomaticThreadAutomaticThread325       AutomaticThread() : t(worker, &localSignal)
326       {
327          // cerr << "starting thread " << t.get_id() << "\n";
328       }
329 
~AutomaticThreadAutomaticThread330       ~AutomaticThread()
331       {
332         // cerr << "stopping thread " << t.get_id() << "...";
333         localSignal.send(&term, -1);
334         t.join();
335         // cerr << "\n";
336       }
337    };
338 
339 
340 
341 // BasicThreadPool data members
342 
343   long nthreads;
344 
345   bool active_flag;
346 
347   std::atomic<long> counter;
348   SimpleSignal<bool> globalSignal;
349 
350   Vec< UniquePtr<AutomaticThread> > threadVec;
351 
352   std::exception_ptr eptr;
353   std::mutex eptr_guard;
354 
355 // BasicThreadPool private member functions
356 
357   BasicThreadPool(const BasicThreadPool&); // disabled
358   void operator=(const BasicThreadPool&); // disabled
359 
launch(ConcurrentTask * task,long index)360   void launch(ConcurrentTask *task, long index)
361   {
362     threadVec[index-1]->localSignal.send(task, index);
363     // we use threadVec[index-1] to allow for the fact
364     // that we want the current thread to have index 0
365   }
366 
begin(long cnt)367   void begin(long cnt)
368   {
369 
370     active_flag = true;
371     counter = cnt;
372   }
373 
end()374   void end()
375   {
376     globalSignal.wait();
377 
378     active_flag = false;
379 
380     if (eptr) {
381       std::exception_ptr eptr1 = eptr;
382       eptr = nullptr;
383       std::rethrow_exception(eptr1);
384     }
385   }
386 
runOneTask(ConcurrentTask * task,long index)387   static void runOneTask(ConcurrentTask *task, long index)
388   {
389     BasicThreadPool *pool = task->getBasicThreadPool();
390 
391     try {
392        task->run(index);
393     }
394     catch (...) {
395        std::lock_guard<std::mutex> lock(pool->eptr_guard);
396        if (!pool->eptr) pool->eptr = std::current_exception();
397     }
398 
399     if (--(pool->counter) == 0) pool->globalSignal.send(true);
400   }
401 
worker(CompositeSignal<ConcurrentTask *,long> * localSignal)402    static void worker(CompositeSignal< ConcurrentTask *, long > *localSignal)
403    {
404      for (;;) {
405        long index = -1;
406        ConcurrentTask *task = localSignal->wait(index);
407        if (index == -1) return;
408 
409        runOneTask(task, index);
410      }
411    }
412 
413 
414 public:
415 
NumThreads()416   long NumThreads() const { return nthreads; }
active()417   bool active() const { return active_flag; }
418 
419   explicit
BasicThreadPool(long _nthreads)420   BasicThreadPool(long _nthreads) :
421     nthreads(_nthreads), active_flag(false), counter(0)
422   {
423     if (nthreads <= 0) LogicError("BasicThreadPool::BasicThreadPool: bad args");
424     if (nthreads == 1) return;
425 
426     if (NTL_OVERFLOW(nthreads, 1, 0))
427       ResourceError("BasicThreadPool::BasicThreadPool: arg too big");
428 
429 
430     threadVec.SetLength(nthreads-1);
431 
432     for (long i = 0; i < nthreads-1; i++) {
433       threadVec[i].make();
434     }
435   }
436 
~BasicThreadPool()437   ~BasicThreadPool()
438   {
439     if (active()) TerminalError("BasicThreadPool: destructor called while active");
440   }
441 
442 
443   // adding, deleting, moving threads
444 
445   void add(long n = 1)
446   {
447     if (active()) LogicError("BasicThreadPool: illegal operation while active");
448     if (n <= 0) LogicError("BasicThreadPool::add: bad args");
449     if (NTL_OVERFLOW(n, 1, 0))
450       ResourceError("BasicThreadPool::add: arg too big");
451 
452     Vec< UniquePtr<AutomaticThread> > newThreads;
453 
454     newThreads.SetLength(n);
455     for (long i = 0; i < n; i++)
456       newThreads[i].make();
457 
458     threadVec.SetLength(n + nthreads - 1);
459     for (long i = 0; i < n; i++)
460       threadVec[nthreads-1+i].move(newThreads[i]);
461 
462     nthreads += n;
463   }
464 
465 
466   void remove(long n = 1)
467   {
468     if (active()) LogicError("BasicThreadPool: illegal operation while active");
469     if (n <= 0 || n >= nthreads) LogicError("BasicThreadPool::remove: bad args");
470 
471     for (long i = nthreads-1-n; i < nthreads-1; i++)
472       threadVec[i] = 0;
473 
474     threadVec.SetLength(nthreads-1-n);
475     nthreads -= n;
476   }
477 
478 
479   void move(BasicThreadPool& other, long n = 1)
480   {
481     if (active() || other.active())
482       LogicError("BasicThreadPool: illegal operation while active");
483     if (n <= 0 || n >= other.nthreads) LogicError("BasicThreadPool::move: bad args");
484 
485     if (this == &other) return;
486 
487     threadVec.SetLength(n + nthreads - 1);
488     for (long i = 0; i < n; i++)
489        threadVec[nthreads-1+i].move(other.threadVec[other.nthreads-1-n+i]);
490 
491     other.threadVec.SetLength(other.nthreads-1-n);
492     other.nthreads -= n;
493 
494     nthreads += n;
495   }
496 
497 
498 
499   // High level interfaces, intended to be used with lambdas
500 
501   // In this version, fct takes one argument, which is
502   // an index in [0..cnt)
503 
504   template<class Fct>
exec_index(long cnt,const Fct & fct)505   void exec_index(long cnt, const Fct& fct)
506   {
507     if (active()) LogicError("BasicThreadPool: illegal operation while active");
508     if (cnt <= 0) return;
509     if (cnt > nthreads) LogicError("BasicThreadPool::exec_index: bad args");
510 
511     ConcurrentTaskFct<Fct> task(this, fct);
512 
513     begin(cnt);
514     for (long t = 1; t < cnt; t++) launch(&task, t);
515     runOneTask(&task, 0);
516     end();
517   }
518 
519   template<class Fct>
relaxed_exec_index(BasicThreadPool * pool,long cnt,const Fct & fct)520   static void relaxed_exec_index(BasicThreadPool *pool, long cnt, const Fct& fct)
521   {
522     if (cnt > 0) {
523       if (cnt == 1) {
524 	fct(0);
525       }
526       else if (pool && !pool->active()) {
527 	pool->exec_index(cnt, fct);
528       }
529       else {
530 	LogicError("relaxed_exec_index: not enough threads");
531       }
532     }
533   }
534 
535   // even higher level version: sz is the number of subproblems,
536   // and fct takes two args, first and last, so that subproblems
537   // [first..last) are processed.
538 
539   template<class Fct>
exec_range(long sz,const Fct & fct)540   void exec_range(long sz, const Fct& fct)
541   {
542     if (active()) LogicError("BasicThreadPool: illegal operation while active");
543     if (sz <= 0) return;
544 
545     PartitionInfo pinfo(sz, nthreads);
546 
547     long cnt = pinfo.NumIntervals();
548     ConcurrentTaskFct1<Fct> task(this, fct, pinfo);
549 
550     begin(cnt);
551     for (long t = 1; t < cnt; t++) launch(&task, t);
552     runOneTask(&task, 0);
553     end();
554   }
555 
556   template<class Fct>
relaxed_exec_range(BasicThreadPool * pool,long sz,const Fct & fct)557   static void relaxed_exec_range(BasicThreadPool *pool, long sz, const Fct& fct)
558   {
559     if (sz <= 0) return;
560     if (!pool || pool->active() || sz == 1) {
561       fct(0, sz);
562     }
563     else {
564       pool->exec_range(sz, fct);
565     }
566   }
567 
568 };
569 
570 
571 // NOTE: BasicThreadPool's are non-relocatable
572 
573 struct RecursiveThreadPool : BasicThreadPool {
574    BasicThreadPool *base_pool;
575    long lo, hi; // range of indices is [lo..hi)
576 
RecursiveThreadPoolRecursiveThreadPool577    RecursiveThreadPool(BasicThreadPool* _base_pool, long _lo, long _hi) :
578       BasicThreadPool(1), base_pool(_base_pool), lo(_lo), hi(_hi)
579    {
580       if (lo == 0 && hi == base_pool->nthreads)
581          base_pool->active_flag = true;
582    }
583 
~RecursiveThreadPoolRecursiveThreadPool584    ~RecursiveThreadPool()
585    {
586       if (lo == 0 && hi == base_pool->nthreads)
587          base_pool->active_flag = false;
588    }
589 
590 
591    template<class Fct0, class Fct1>
exec_pairRecursiveThreadPool592    void exec_pair(long mid, const Fct0& fct0, const Fct1& fct1)
593    {
594      ConcurrentTaskFct<Fct0> task0(this, fct0);
595      ConcurrentTaskFct<Fct1> task1(this, fct1);
596 
597      begin(2);
598      base_pool->launch(&task1, mid);
599      runOneTask(&task0, lo);
600      end();
601    }
602 };
603 
604 // NOTE: RecursiveThreadPool's are non-relocatable
605 
606 inline
StartRecursion(BasicThreadPool * base_pool)607 SmartPtr<RecursiveThreadPool> StartRecursion(BasicThreadPool *base_pool)
608 {
609    if (!base_pool || base_pool->active()) return 0;
610    long nthreads = base_pool->NumThreads();
611    if (nthreads <= 1) return 0;
612    return MakeSmart<RecursiveThreadPool>(base_pool, 0, nthreads);
613 }
614 
615 // NOTE: returning some kind of smart pointer ensures that
616 // the object itself will stay alive until the end of the
617 // largest enclosing expression, and then be destroyed.
618 // I could have also used a UniquePtr, and relied on the move
619 // constructor to be called.  However, NTL still has a DISABLE_MOVE
620 // option that would break that.  I could also have used
621 // std::unique_ptr; however, I'm generally avoiding those parts
622 // of the standard library. A SmartPtr has some additional
623 // overhead, but this will only be called once at the outermost
624 // recursion, so it should be OK.
625 
626 
627 
628 
629 struct RecursiveThreadPoolHelper {
630    UniquePtr<RecursiveThreadPool> subpool_stg[2];
631    RecursiveThreadPool *subpool_ptr[2];
632    long mid;
633 
concurrentRecursiveThreadPoolHelper634    bool concurrent() { return mid != 0; }
subpoolRecursiveThreadPoolHelper635    RecursiveThreadPool* subpool(long i) { return subpool_ptr[i]; }
636 
RecursiveThreadPoolHelperRecursiveThreadPoolHelper637    RecursiveThreadPoolHelper(RecursiveThreadPool *pool, bool seq, double load0)
638    {
639       mid = 0;
640       subpool_ptr[0] = subpool_ptr[1] = 0;
641 
642       if (seq || !pool) return;
643       long n = pool->hi - pool->lo;
644       if (n <= 1) return;
645 
646       long n0 = long(load0*n + 0.5);
647       if (n0 < 0 || n0 > n) LogicError("RecursiveThreadPoolHelper: bad load0");
648 
649       if (n0 == 0) {
650          subpool_ptr[1] = pool;
651          return;
652       }
653 
654       if (n0 == n) {
655          subpool_ptr[0] = pool;
656          return;
657       }
658 
659       mid = pool->lo + n0;
660 
661       long n1 = n-n0;
662       if (n0 > 1) subpool_stg[0].make(pool->base_pool, pool->lo, mid);
663       if (n1 > 1) subpool_stg[1].make(pool->base_pool, mid, pool->hi);
664 
665       subpool_ptr[0] = subpool_stg[0].get();
666       subpool_ptr[1] = subpool_stg[1].get();
667    }
668 };
669 
670 
671 
672 NTL_CLOSE_NNS
673 
674 
675 #endif
676 
677 
678 
679 #ifdef NTL_THREAD_BOOST
680 
681 #ifndef NTL_THREADS
682 #error "NTL_THREAD_BOOST requires NTL_THREADS"
683 #endif
684 
685 NTL_OPEN_NNS
686 
687 extern
688 NTL_CHEAP_THREAD_LOCAL BasicThreadPool *NTLThreadPool_ptr;
689 
690 inline
GetThreadPool()691 BasicThreadPool *GetThreadPool()
692 {
693    return NTLThreadPool_ptr;
694 }
695 
696 void ResetThreadPool(BasicThreadPool *pool = 0);
697 BasicThreadPool *ReleaseThreadPool();
698 
SetNumThreads(long n)699 inline void SetNumThreads(long n)
700 {
701    BasicThreadPool *p = (n == 1 ? 0 : MakeRaw<BasicThreadPool>(n));
702    ResetThreadPool(p);
703 }
704 
AvailableThreads()705 inline long AvailableThreads()
706 {
707    BasicThreadPool *pool = GetThreadPool();
708    if (!pool || pool->active())
709       return 1;
710    else
711       return pool->NumThreads();
712 }
713 
714 
715 NTL_CLOSE_NNS
716 
717 
718 #define NTL_EXEC_RANGE(n, first, last)  \
719 {  \
720    NTL_NNS BasicThreadPool::relaxed_exec_range(NTL_NNS GetThreadPool(), (n), \
721      [&](long first, long last) {  \
722 
723 
724 #define NTL_EXEC_RANGE_END  \
725    } ); \
726 }  \
727 
728 
729 #define NTL_GEXEC_RANGE(seq, n, first, last)  \
730 {  \
731    NTL_NNS BasicThreadPool::relaxed_exec_range((seq) ? 0 : NTL_NNS GetThreadPool(), (n), \
732      [&](long first, long last) {  \
733 
734 
735 #define NTL_GEXEC_RANGE_END  \
736    } ); \
737 }  \
738 
739 
740 #define NTL_EXEC_INDEX(n, index)  \
741 {  \
742    NTL_NNS BasicThreadPool::relaxed_exec_index(NTL_NNS GetThreadPool(), (n), \
743      [&](long index) {  \
744 
745 
746 #define NTL_EXEC_INDEX_END  \
747    } ); \
748 }  \
749 
750 
751 
752 // NOTE: at least with gcc >= 4.9.2, the GEXEC versions will evaluate seq, and
753 // if it is true, jump directly (more or less) to the body
754 
755 
756 #define NTL_TBDECL(x) static void basic_ ## x
757 #define NTL_TBDECL_static(x) static void basic_ ## x
758 
759 #define NTL_IMPORT(x) auto _ntl_hidden_variable_IMPORT__ ## x = x; auto x = _ntl_hidden_variable_IMPORT__ ##x;
760 
761 
762 #define NTL_INIT_DIVIDE StartRecursion(GetThreadPool()).get()
763 
764 #define NTL_EXEC_DIVIDE(seq, pool, helper, load0, F0, F1) \
765 { \
766   NTL::RecursiveThreadPoolHelper helper(pool, seq, load0); \
767   if (!helper.mid) { \
768     { F0; } \
769     { F1; } \
770   } \
771   else { \
772     pool->exec_pair(helper.mid,  \
773       [&](long){ F0; }, \
774       [&](long){ F1; } ); \
775   } \
776 }
777 
778 
779 
780 
781 
782 #else
783 
784 NTL_OPEN_NNS
785 
786 
787 inline void SetNumThreads(long n) { }
788 
789 inline long AvailableThreads() { return 1; }
790 
791 struct RecursiveThreadPool;
792 
793 struct RecursiveThreadPoolDummyHelper {
794    bool concurrent() { return false; }
795    RecursiveThreadPool* subpool(long i) { return 0; }
796 };
797 
798 
799 NTL_CLOSE_NNS
800 
801 #define NTL_EXEC_RANGE(n, first, last)  \
802 {  \
803    long _ntl_par_exec_n = (n);  \
804    if (_ntl_par_exec_n > 0) {  \
805       long first = 0;  \
806       long last = _ntl_par_exec_n;  \
807       {  \
808 
809 
810 #define NTL_EXEC_RANGE_END  }}}
811 
812 #define NTL_GEXEC_RANGE(seq, n, first, last)  \
813 {  \
814    long _ntl_par_exec_n = (n);  \
815    if (_ntl_par_exec_n > 0) {  \
816       long first = 0;  \
817       long last = _ntl_par_exec_n;  \
818       {  \
819 
820 
821 #define NTL_GEXEC_RANGE_END  }}}
822 
823 
824 
825 
826 #define NTL_EXEC_INDEX(n, index)  \
827 {  \
828    long _ntl_par_exec_n = (n);  \
829    if (_ntl_par_exec_n > 0) {  \
830       if (_ntl_par_exec_n > 1) NTL_NNS LogicError("NTL_EXEC_INDEX: not enough threads"); \
831       long index = 0;  \
832       {  \
833 
834 
835 #define NTL_EXEC_INDEX_END  }}}
836 
837 
838 
839 #define NTL_TBDECL(x) void x
840 #define NTL_TBDECL_static(x) static void x
841 
842 #define NTL_IMPORT(x)
843 
844 #define NTL_INIT_DIVIDE ((RecursiveThreadPool*) 0)
845 
846 #define NTL_EXEC_DIVIDE(seq, pool, helper, load0, F0, F1) \
847 { \
848   NTL::RecursiveThreadPoolDummyHelper helper; \
849   { F0; } \
850   { F1; } \
851 }
852 
853 #endif
854 
855 
856 
857 
858 
859 #endif
860 
861