1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version 2
5 * of the License, or (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software Foundation,
14 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 *
16 * The Original Code is Copyright (C) 2006 Blender Foundation
17 * All rights reserved.
18 */
19
20 /** \file
21 * \ingroup bli
22 */
23
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "MEM_guardedalloc.h"
29
30 #include "BLI_gsqueue.h"
31 #include "BLI_listbase.h"
32 #include "BLI_system.h"
33 #include "BLI_task.h"
34 #include "BLI_threads.h"
35
36 #include "PIL_time.h"
37
38 /* for checking system threads - BLI_system_thread_count */
39 #ifdef WIN32
40 # include <sys/timeb.h>
41 # include <windows.h>
42 #elif defined(__APPLE__)
43 # include <sys/sysctl.h>
44 # include <sys/types.h>
45 #else
46 # include <sys/time.h>
47 # include <unistd.h>
48 #endif
49
50 #ifdef WITH_TBB
51 # include <tbb/spin_mutex.h>
52 #endif
53
54 #include "atomic_ops.h"
55 #include "numaapi.h"
56
57 #if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && \
58 !defined(__clang__)
59 # define USE_APPLE_OMP_FIX
60 #endif
61
62 #ifdef USE_APPLE_OMP_FIX
63 /* ************** libgomp (Apple gcc 4.2.1) TLS bug workaround *************** */
64 extern pthread_key_t gomp_tls_key;
65 static void *thread_tls_data;
66 #endif
67
68 /**
69 * Basic Thread Control API
70 * ========================
71 *
72 * Many thread cases have an X amount of jobs, and only an Y amount of
73 * threads are useful (typically amount of CPU's)
74 *
75 * This code can be used to start a maximum amount of 'thread slots', which
76 * then can be filled in a loop with an idle timer.
77 *
78 * A sample loop can look like this (pseudo c);
79 *
80 * \code{.c}
81 *
82 * ListBase lb;
83 * int max_threads = 2;
84 * int cont = 1;
85 *
86 * BLI_threadpool_init(&lb, do_something_func, max_threads);
87 *
88 * while (cont) {
89 * if (BLI_available_threads(&lb) && !(escape loop event)) {
90 * // get new job (data pointer)
91 * // tag job 'processed
92 * BLI_threadpool_insert(&lb, job);
93 * }
94 * else PIL_sleep_ms(50);
95 *
96 * // Find if a job is ready, this the do_something_func() should write in job somewhere.
97 * cont = 0;
98 * for (go over all jobs)
99 * if (job is ready) {
100 * if (job was not removed) {
101 * BLI_threadpool_remove(&lb, job);
102 * }
103 * }
104 * else cont = 1;
105 * }
106 * // Conditions to exit loop.
107 * if (if escape loop event) {
108 * if (BLI_available_threadslots(&lb) == max_threads) {
109 * break;
110 * }
111 * }
112 * }
113 *
114 * BLI_threadpool_end(&lb);
115 *
116 * \endcode
117 */
118 static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER;
119 static pthread_mutex_t _image_draw_lock = PTHREAD_MUTEX_INITIALIZER;
120 static pthread_mutex_t _viewer_lock = PTHREAD_MUTEX_INITIALIZER;
121 static pthread_mutex_t _custom1_lock = PTHREAD_MUTEX_INITIALIZER;
122 static pthread_mutex_t _nodes_lock = PTHREAD_MUTEX_INITIALIZER;
123 static pthread_mutex_t _movieclip_lock = PTHREAD_MUTEX_INITIALIZER;
124 static pthread_mutex_t _colormanage_lock = PTHREAD_MUTEX_INITIALIZER;
125 static pthread_mutex_t _fftw_lock = PTHREAD_MUTEX_INITIALIZER;
126 static pthread_mutex_t _view3d_lock = PTHREAD_MUTEX_INITIALIZER;
127 static pthread_t mainid;
128 static bool is_numa_available = false;
129 static unsigned int thread_levels = 0; /* threads can be invoked inside threads */
130 static int num_threads_override = 0;
131
132 /* just a max for security reasons */
133 #define RE_MAX_THREAD BLENDER_MAX_THREADS
134
135 typedef struct ThreadSlot {
136 struct ThreadSlot *next, *prev;
137 void *(*do_thread)(void *);
138 void *callerdata;
139 pthread_t pthread;
140 int avail;
141 } ThreadSlot;
142
BLI_threadapi_init(void)143 void BLI_threadapi_init(void)
144 {
145 mainid = pthread_self();
146 if (numaAPI_Initialize() == NUMAAPI_SUCCESS) {
147 is_numa_available = true;
148 }
149 }
150
BLI_threadapi_exit(void)151 void BLI_threadapi_exit(void)
152 {
153 }
154
155 /* tot = 0 only initializes malloc mutex in a safe way (see sequence.c)
156 * problem otherwise: scene render will kill of the mutex!
157 */
158
BLI_threadpool_init(ListBase * threadbase,void * (* do_thread)(void *),int tot)159 void BLI_threadpool_init(ListBase *threadbase, void *(*do_thread)(void *), int tot)
160 {
161 int a;
162
163 if (threadbase != nullptr && tot > 0) {
164 BLI_listbase_clear(threadbase);
165
166 if (tot > RE_MAX_THREAD) {
167 tot = RE_MAX_THREAD;
168 }
169 else if (tot < 1) {
170 tot = 1;
171 }
172
173 for (a = 0; a < tot; a++) {
174 ThreadSlot *tslot = static_cast<ThreadSlot *>(MEM_callocN(sizeof(ThreadSlot), "threadslot"));
175 BLI_addtail(threadbase, tslot);
176 tslot->do_thread = do_thread;
177 tslot->avail = 1;
178 }
179 }
180
181 unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1);
182 if (level == 0) {
183 #ifdef USE_APPLE_OMP_FIX
184 /* Workaround for Apple gcc 4.2.1 OMP vs background thread bug,
185 * we copy GOMP thread local storage pointer to setting it again
186 * inside the thread that we start. */
187 thread_tls_data = pthread_getspecific(gomp_tls_key);
188 #endif
189 }
190 }
191
192 /* amount of available threads */
BLI_available_threads(ListBase * threadbase)193 int BLI_available_threads(ListBase *threadbase)
194 {
195 int counter = 0;
196
197 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
198 if (tslot->avail) {
199 counter++;
200 }
201 }
202
203 return counter;
204 }
205
206 /* returns thread number, for sample patterns or threadsafe tables */
BLI_threadpool_available_thread_index(ListBase * threadbase)207 int BLI_threadpool_available_thread_index(ListBase *threadbase)
208 {
209 int counter = 0;
210
211 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
212 if (tslot->avail) {
213 return counter;
214 }
215 ++counter;
216 }
217
218 return 0;
219 }
220
tslot_thread_start(void * tslot_p)221 static void *tslot_thread_start(void *tslot_p)
222 {
223 ThreadSlot *tslot = (ThreadSlot *)tslot_p;
224
225 #ifdef USE_APPLE_OMP_FIX
226 /* Workaround for Apple gcc 4.2.1 OMP vs background thread bug,
227 * set GOMP thread local storage pointer which was copied beforehand */
228 pthread_setspecific(gomp_tls_key, thread_tls_data);
229 #endif
230
231 return tslot->do_thread(tslot->callerdata);
232 }
233
BLI_thread_is_main(void)234 int BLI_thread_is_main(void)
235 {
236 return pthread_equal(pthread_self(), mainid);
237 }
238
BLI_threadpool_insert(ListBase * threadbase,void * callerdata)239 void BLI_threadpool_insert(ListBase *threadbase, void *callerdata)
240 {
241 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
242 if (tslot->avail) {
243 tslot->avail = 0;
244 tslot->callerdata = callerdata;
245 pthread_create(&tslot->pthread, nullptr, tslot_thread_start, tslot);
246 return;
247 }
248 }
249 printf("ERROR: could not insert thread slot\n");
250 }
251
BLI_threadpool_remove(ListBase * threadbase,void * callerdata)252 void BLI_threadpool_remove(ListBase *threadbase, void *callerdata)
253 {
254 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
255 if (tslot->callerdata == callerdata) {
256 pthread_join(tslot->pthread, nullptr);
257 tslot->callerdata = nullptr;
258 tslot->avail = 1;
259 }
260 }
261 }
262
BLI_threadpool_remove_index(ListBase * threadbase,int index)263 void BLI_threadpool_remove_index(ListBase *threadbase, int index)
264 {
265 int counter = 0;
266
267 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
268 if (counter == index && tslot->avail == 0) {
269 pthread_join(tslot->pthread, nullptr);
270 tslot->callerdata = nullptr;
271 tslot->avail = 1;
272 break;
273 }
274 ++counter;
275 }
276 }
277
BLI_threadpool_clear(ListBase * threadbase)278 void BLI_threadpool_clear(ListBase *threadbase)
279 {
280 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
281 if (tslot->avail == 0) {
282 pthread_join(tslot->pthread, nullptr);
283 tslot->callerdata = nullptr;
284 tslot->avail = 1;
285 }
286 }
287 }
288
BLI_threadpool_end(ListBase * threadbase)289 void BLI_threadpool_end(ListBase *threadbase)
290 {
291
292 /* only needed if there's actually some stuff to end
293 * this way we don't end up decrementing thread_levels on an empty threadbase
294 * */
295 if (threadbase == nullptr || BLI_listbase_is_empty(threadbase)) {
296 return;
297 }
298
299 LISTBASE_FOREACH (ThreadSlot *, tslot, threadbase) {
300 if (tslot->avail == 0) {
301 pthread_join(tslot->pthread, nullptr);
302 }
303 }
304 BLI_freelistN(threadbase);
305 }
306
307 /* System Information */
308
309 /* how many threads are native on this system? */
BLI_system_thread_count(void)310 int BLI_system_thread_count(void)
311 {
312 static int t = -1;
313
314 if (num_threads_override != 0) {
315 return num_threads_override;
316 }
317 if (LIKELY(t != -1)) {
318 return t;
319 }
320
321 {
322 #ifdef WIN32
323 SYSTEM_INFO info;
324 GetSystemInfo(&info);
325 t = (int)info.dwNumberOfProcessors;
326 #else
327 # ifdef __APPLE__
328 int mib[2];
329 size_t len;
330
331 mib[0] = CTL_HW;
332 mib[1] = HW_NCPU;
333 len = sizeof(t);
334 sysctl(mib, 2, &t, &len, nullptr, 0);
335 # else
336 t = (int)sysconf(_SC_NPROCESSORS_ONLN);
337 # endif
338 #endif
339 }
340
341 CLAMP(t, 1, RE_MAX_THREAD);
342
343 return t;
344 }
345
BLI_system_num_threads_override_set(int num)346 void BLI_system_num_threads_override_set(int num)
347 {
348 num_threads_override = num;
349 }
350
BLI_system_num_threads_override_get(void)351 int BLI_system_num_threads_override_get(void)
352 {
353 return num_threads_override;
354 }
355
356 /* Global Mutex Locks */
357
global_mutex_from_type(const int type)358 static ThreadMutex *global_mutex_from_type(const int type)
359 {
360 switch (type) {
361 case LOCK_IMAGE:
362 return &_image_lock;
363 case LOCK_DRAW_IMAGE:
364 return &_image_draw_lock;
365 case LOCK_VIEWER:
366 return &_viewer_lock;
367 case LOCK_CUSTOM1:
368 return &_custom1_lock;
369 case LOCK_NODES:
370 return &_nodes_lock;
371 case LOCK_MOVIECLIP:
372 return &_movieclip_lock;
373 case LOCK_COLORMANAGE:
374 return &_colormanage_lock;
375 case LOCK_FFTW:
376 return &_fftw_lock;
377 case LOCK_VIEW3D:
378 return &_view3d_lock;
379 default:
380 BLI_assert(0);
381 return nullptr;
382 }
383 }
384
BLI_thread_lock(int type)385 void BLI_thread_lock(int type)
386 {
387 pthread_mutex_lock(global_mutex_from_type(type));
388 }
389
BLI_thread_unlock(int type)390 void BLI_thread_unlock(int type)
391 {
392 pthread_mutex_unlock(global_mutex_from_type(type));
393 }
394
395 /* Mutex Locks */
396
BLI_mutex_init(ThreadMutex * mutex)397 void BLI_mutex_init(ThreadMutex *mutex)
398 {
399 pthread_mutex_init(mutex, nullptr);
400 }
401
BLI_mutex_lock(ThreadMutex * mutex)402 void BLI_mutex_lock(ThreadMutex *mutex)
403 {
404 pthread_mutex_lock(mutex);
405 }
406
BLI_mutex_unlock(ThreadMutex * mutex)407 void BLI_mutex_unlock(ThreadMutex *mutex)
408 {
409 pthread_mutex_unlock(mutex);
410 }
411
BLI_mutex_trylock(ThreadMutex * mutex)412 bool BLI_mutex_trylock(ThreadMutex *mutex)
413 {
414 return (pthread_mutex_trylock(mutex) == 0);
415 }
416
BLI_mutex_end(ThreadMutex * mutex)417 void BLI_mutex_end(ThreadMutex *mutex)
418 {
419 pthread_mutex_destroy(mutex);
420 }
421
BLI_mutex_alloc(void)422 ThreadMutex *BLI_mutex_alloc(void)
423 {
424 ThreadMutex *mutex = static_cast<ThreadMutex *>(MEM_callocN(sizeof(ThreadMutex), "ThreadMutex"));
425 BLI_mutex_init(mutex);
426 return mutex;
427 }
428
BLI_mutex_free(ThreadMutex * mutex)429 void BLI_mutex_free(ThreadMutex *mutex)
430 {
431 BLI_mutex_end(mutex);
432 MEM_freeN(mutex);
433 }
434
435 /* Spin Locks */
436
437 #ifdef WITH_TBB
tbb_spin_mutex_cast(SpinLock * spin)438 static tbb::spin_mutex *tbb_spin_mutex_cast(SpinLock *spin)
439 {
440 static_assert(sizeof(SpinLock) >= sizeof(tbb::spin_mutex),
441 "SpinLock must match tbb::spin_mutex");
442 static_assert(alignof(SpinLock) % alignof(tbb::spin_mutex) == 0,
443 "SpinLock must be aligned same as tbb::spin_mutex");
444 return reinterpret_cast<tbb::spin_mutex *>(spin);
445 }
446 #endif
447
BLI_spin_init(SpinLock * spin)448 void BLI_spin_init(SpinLock *spin)
449 {
450 #ifdef WITH_TBB
451 tbb::spin_mutex *spin_mutex = tbb_spin_mutex_cast(spin);
452 new (spin_mutex) tbb::spin_mutex();
453 #elif defined(__APPLE__)
454 BLI_mutex_init(spin);
455 #elif defined(_MSC_VER)
456 *spin = 0;
457 #else
458 pthread_spin_init(spin, 0);
459 #endif
460 }
461
BLI_spin_lock(SpinLock * spin)462 void BLI_spin_lock(SpinLock *spin)
463 {
464 #ifdef WITH_TBB
465 tbb::spin_mutex *spin_mutex = tbb_spin_mutex_cast(spin);
466 spin_mutex->lock();
467 #elif defined(__APPLE__)
468 BLI_mutex_lock(spin);
469 #elif defined(_MSC_VER)
470 while (InterlockedExchangeAcquire(spin, 1)) {
471 while (*spin) {
472 /* Spin-lock hint for processors with hyperthreading. */
473 YieldProcessor();
474 }
475 }
476 #else
477 pthread_spin_lock(spin);
478 #endif
479 }
480
BLI_spin_unlock(SpinLock * spin)481 void BLI_spin_unlock(SpinLock *spin)
482 {
483 #ifdef WITH_TBB
484 tbb::spin_mutex *spin_mutex = tbb_spin_mutex_cast(spin);
485 spin_mutex->unlock();
486 #elif defined(__APPLE__)
487 BLI_mutex_unlock(spin);
488 #elif defined(_MSC_VER)
489 _ReadWriteBarrier();
490 *spin = 0;
491 #else
492 pthread_spin_unlock(spin);
493 #endif
494 }
495
BLI_spin_end(SpinLock * spin)496 void BLI_spin_end(SpinLock *spin)
497 {
498 #ifdef WITH_TBB
499 tbb::spin_mutex *spin_mutex = tbb_spin_mutex_cast(spin);
500 spin_mutex->~spin_mutex();
501 #elif defined(__APPLE__)
502 BLI_mutex_end(spin);
503 #elif defined(_MSC_VER)
504 /* Nothing to do, spin is a simple integer type. */
505 #else
506 pthread_spin_destroy(spin);
507 #endif
508 }
509
510 /* Read/Write Mutex Lock */
511
BLI_rw_mutex_init(ThreadRWMutex * mutex)512 void BLI_rw_mutex_init(ThreadRWMutex *mutex)
513 {
514 pthread_rwlock_init(mutex, nullptr);
515 }
516
BLI_rw_mutex_lock(ThreadRWMutex * mutex,int mode)517 void BLI_rw_mutex_lock(ThreadRWMutex *mutex, int mode)
518 {
519 if (mode == THREAD_LOCK_READ) {
520 pthread_rwlock_rdlock(mutex);
521 }
522 else {
523 pthread_rwlock_wrlock(mutex);
524 }
525 }
526
BLI_rw_mutex_unlock(ThreadRWMutex * mutex)527 void BLI_rw_mutex_unlock(ThreadRWMutex *mutex)
528 {
529 pthread_rwlock_unlock(mutex);
530 }
531
BLI_rw_mutex_end(ThreadRWMutex * mutex)532 void BLI_rw_mutex_end(ThreadRWMutex *mutex)
533 {
534 pthread_rwlock_destroy(mutex);
535 }
536
BLI_rw_mutex_alloc(void)537 ThreadRWMutex *BLI_rw_mutex_alloc(void)
538 {
539 ThreadRWMutex *mutex = static_cast<ThreadRWMutex *>(
540 MEM_callocN(sizeof(ThreadRWMutex), "ThreadRWMutex"));
541 BLI_rw_mutex_init(mutex);
542 return mutex;
543 }
544
BLI_rw_mutex_free(ThreadRWMutex * mutex)545 void BLI_rw_mutex_free(ThreadRWMutex *mutex)
546 {
547 BLI_rw_mutex_end(mutex);
548 MEM_freeN(mutex);
549 }
550
551 /* Ticket Mutex Lock */
552
553 struct TicketMutex {
554 pthread_cond_t cond;
555 pthread_mutex_t mutex;
556 unsigned int queue_head, queue_tail;
557 };
558
BLI_ticket_mutex_alloc(void)559 TicketMutex *BLI_ticket_mutex_alloc(void)
560 {
561 TicketMutex *ticket = static_cast<TicketMutex *>(
562 MEM_callocN(sizeof(TicketMutex), "TicketMutex"));
563
564 pthread_cond_init(&ticket->cond, nullptr);
565 pthread_mutex_init(&ticket->mutex, nullptr);
566
567 return ticket;
568 }
569
BLI_ticket_mutex_free(TicketMutex * ticket)570 void BLI_ticket_mutex_free(TicketMutex *ticket)
571 {
572 pthread_mutex_destroy(&ticket->mutex);
573 pthread_cond_destroy(&ticket->cond);
574 MEM_freeN(ticket);
575 }
576
BLI_ticket_mutex_lock(TicketMutex * ticket)577 void BLI_ticket_mutex_lock(TicketMutex *ticket)
578 {
579 unsigned int queue_me;
580
581 pthread_mutex_lock(&ticket->mutex);
582 queue_me = ticket->queue_tail++;
583
584 while (queue_me != ticket->queue_head) {
585 pthread_cond_wait(&ticket->cond, &ticket->mutex);
586 }
587
588 pthread_mutex_unlock(&ticket->mutex);
589 }
590
BLI_ticket_mutex_unlock(TicketMutex * ticket)591 void BLI_ticket_mutex_unlock(TicketMutex *ticket)
592 {
593 pthread_mutex_lock(&ticket->mutex);
594 ticket->queue_head++;
595 pthread_cond_broadcast(&ticket->cond);
596 pthread_mutex_unlock(&ticket->mutex);
597 }
598
599 /* ************************************************ */
600
601 /* Condition */
602
BLI_condition_init(ThreadCondition * cond)603 void BLI_condition_init(ThreadCondition *cond)
604 {
605 pthread_cond_init(cond, nullptr);
606 }
607
BLI_condition_wait(ThreadCondition * cond,ThreadMutex * mutex)608 void BLI_condition_wait(ThreadCondition *cond, ThreadMutex *mutex)
609 {
610 pthread_cond_wait(cond, mutex);
611 }
612
BLI_condition_wait_global_mutex(ThreadCondition * cond,const int type)613 void BLI_condition_wait_global_mutex(ThreadCondition *cond, const int type)
614 {
615 pthread_cond_wait(cond, global_mutex_from_type(type));
616 }
617
BLI_condition_notify_one(ThreadCondition * cond)618 void BLI_condition_notify_one(ThreadCondition *cond)
619 {
620 pthread_cond_signal(cond);
621 }
622
BLI_condition_notify_all(ThreadCondition * cond)623 void BLI_condition_notify_all(ThreadCondition *cond)
624 {
625 pthread_cond_broadcast(cond);
626 }
627
BLI_condition_end(ThreadCondition * cond)628 void BLI_condition_end(ThreadCondition *cond)
629 {
630 pthread_cond_destroy(cond);
631 }
632
633 /* ************************************************ */
634
635 struct ThreadQueue {
636 GSQueue *queue;
637 pthread_mutex_t mutex;
638 pthread_cond_t push_cond;
639 pthread_cond_t finish_cond;
640 volatile int nowait;
641 volatile int canceled;
642 };
643
BLI_thread_queue_init(void)644 ThreadQueue *BLI_thread_queue_init(void)
645 {
646 ThreadQueue *queue;
647
648 queue = static_cast<ThreadQueue *>(MEM_callocN(sizeof(ThreadQueue), "ThreadQueue"));
649 queue->queue = BLI_gsqueue_new(sizeof(void *));
650
651 pthread_mutex_init(&queue->mutex, nullptr);
652 pthread_cond_init(&queue->push_cond, nullptr);
653 pthread_cond_init(&queue->finish_cond, nullptr);
654
655 return queue;
656 }
657
BLI_thread_queue_free(ThreadQueue * queue)658 void BLI_thread_queue_free(ThreadQueue *queue)
659 {
660 /* destroy everything, assumes no one is using queue anymore */
661 pthread_cond_destroy(&queue->finish_cond);
662 pthread_cond_destroy(&queue->push_cond);
663 pthread_mutex_destroy(&queue->mutex);
664
665 BLI_gsqueue_free(queue->queue);
666
667 MEM_freeN(queue);
668 }
669
BLI_thread_queue_push(ThreadQueue * queue,void * work)670 void BLI_thread_queue_push(ThreadQueue *queue, void *work)
671 {
672 pthread_mutex_lock(&queue->mutex);
673
674 BLI_gsqueue_push(queue->queue, &work);
675
676 /* signal threads waiting to pop */
677 pthread_cond_signal(&queue->push_cond);
678 pthread_mutex_unlock(&queue->mutex);
679 }
680
BLI_thread_queue_pop(ThreadQueue * queue)681 void *BLI_thread_queue_pop(ThreadQueue *queue)
682 {
683 void *work = nullptr;
684
685 /* wait until there is work */
686 pthread_mutex_lock(&queue->mutex);
687 while (BLI_gsqueue_is_empty(queue->queue) && !queue->nowait) {
688 pthread_cond_wait(&queue->push_cond, &queue->mutex);
689 }
690
691 /* if we have something, pop it */
692 if (!BLI_gsqueue_is_empty(queue->queue)) {
693 BLI_gsqueue_pop(queue->queue, &work);
694
695 if (BLI_gsqueue_is_empty(queue->queue)) {
696 pthread_cond_broadcast(&queue->finish_cond);
697 }
698 }
699
700 pthread_mutex_unlock(&queue->mutex);
701
702 return work;
703 }
704
wait_timeout(struct timespec * timeout,int ms)705 static void wait_timeout(struct timespec *timeout, int ms)
706 {
707 ldiv_t div_result;
708 long sec, usec, x;
709
710 #ifdef WIN32
711 {
712 struct _timeb now;
713 _ftime(&now);
714 sec = now.time;
715 usec = now.millitm * 1000; /* microsecond precision would be better */
716 }
717 #else
718 {
719 struct timeval now;
720 gettimeofday(&now, nullptr);
721 sec = now.tv_sec;
722 usec = now.tv_usec;
723 }
724 #endif
725
726 /* add current time + millisecond offset */
727 div_result = ldiv(ms, 1000);
728 timeout->tv_sec = sec + div_result.quot;
729
730 x = usec + (div_result.rem * 1000);
731
732 if (x >= 1000000) {
733 timeout->tv_sec++;
734 x -= 1000000;
735 }
736
737 timeout->tv_nsec = x * 1000;
738 }
739
BLI_thread_queue_pop_timeout(ThreadQueue * queue,int ms)740 void *BLI_thread_queue_pop_timeout(ThreadQueue *queue, int ms)
741 {
742 double t;
743 void *work = nullptr;
744 struct timespec timeout;
745
746 t = PIL_check_seconds_timer();
747 wait_timeout(&timeout, ms);
748
749 /* wait until there is work */
750 pthread_mutex_lock(&queue->mutex);
751 while (BLI_gsqueue_is_empty(queue->queue) && !queue->nowait) {
752 if (pthread_cond_timedwait(&queue->push_cond, &queue->mutex, &timeout) == ETIMEDOUT) {
753 break;
754 }
755 if (PIL_check_seconds_timer() - t >= ms * 0.001) {
756 break;
757 }
758 }
759
760 /* if we have something, pop it */
761 if (!BLI_gsqueue_is_empty(queue->queue)) {
762 BLI_gsqueue_pop(queue->queue, &work);
763
764 if (BLI_gsqueue_is_empty(queue->queue)) {
765 pthread_cond_broadcast(&queue->finish_cond);
766 }
767 }
768
769 pthread_mutex_unlock(&queue->mutex);
770
771 return work;
772 }
773
BLI_thread_queue_len(ThreadQueue * queue)774 int BLI_thread_queue_len(ThreadQueue *queue)
775 {
776 int size;
777
778 pthread_mutex_lock(&queue->mutex);
779 size = BLI_gsqueue_len(queue->queue);
780 pthread_mutex_unlock(&queue->mutex);
781
782 return size;
783 }
784
BLI_thread_queue_is_empty(ThreadQueue * queue)785 bool BLI_thread_queue_is_empty(ThreadQueue *queue)
786 {
787 bool is_empty;
788
789 pthread_mutex_lock(&queue->mutex);
790 is_empty = BLI_gsqueue_is_empty(queue->queue);
791 pthread_mutex_unlock(&queue->mutex);
792
793 return is_empty;
794 }
795
BLI_thread_queue_nowait(ThreadQueue * queue)796 void BLI_thread_queue_nowait(ThreadQueue *queue)
797 {
798 pthread_mutex_lock(&queue->mutex);
799
800 queue->nowait = 1;
801
802 /* signal threads waiting to pop */
803 pthread_cond_broadcast(&queue->push_cond);
804 pthread_mutex_unlock(&queue->mutex);
805 }
806
BLI_thread_queue_wait_finish(ThreadQueue * queue)807 void BLI_thread_queue_wait_finish(ThreadQueue *queue)
808 {
809 /* wait for finish condition */
810 pthread_mutex_lock(&queue->mutex);
811
812 while (!BLI_gsqueue_is_empty(queue->queue)) {
813 pthread_cond_wait(&queue->finish_cond, &queue->mutex);
814 }
815
816 pthread_mutex_unlock(&queue->mutex);
817 }
818
819 /* **** Special functions to help performance on crazy NUMA setups. **** */
820
821 #if 0 /* UNUSED */
822 static bool check_is_threadripper2_alike_topology(void)
823 {
824 /* NOTE: We hope operating system does not support CPU hot-swap to
825 * a different brand. And that SMP of different types is also not
826 * encouraged by the system. */
827 static bool is_initialized = false;
828 static bool is_threadripper2 = false;
829 if (is_initialized) {
830 return is_threadripper2;
831 }
832 is_initialized = true;
833 char *cpu_brand = BLI_cpu_brand_string();
834 if (cpu_brand == nullptr) {
835 return false;
836 }
837 if (strstr(cpu_brand, "Threadripper")) {
838 /* NOTE: We consider all Thread-rippers having similar topology to
839 * the second one. This is because we are trying to utilize NUMA node
840 * 0 as much as possible. This node does exist on earlier versions of
841 * thread-ripper and setting affinity to it should not have negative
842 * effect.
843 * This allows us to avoid per-model check, making the code more
844 * reliable for the CPUs which are not yet released.
845 */
846 if (strstr(cpu_brand, "2990WX") || strstr(cpu_brand, "2950X")) {
847 is_threadripper2 = true;
848 }
849 }
850 /* NOTE: While all dies of EPYC has memory controller, only two f them
851 * has access to a lower-indexed DDR slots. Those dies are same as on
852 * Threadripper2 with the memory controller.
853 * Now, it is rather likely that reasonable amount of users don't max
854 * up their DR slots, making it only two dies connected to a DDR slot
855 * with actual memory in it. */
856 if (strstr(cpu_brand, "EPYC")) {
857 /* NOTE: Similarly to Thread-ripper we do not do model check. */
858 is_threadripper2 = true;
859 }
860 MEM_freeN(cpu_brand);
861 return is_threadripper2;
862 }
863
864 static void threadripper_put_process_on_fast_node(void)
865 {
866 if (!is_numa_available) {
867 return;
868 }
869 /* NOTE: Technically, we can use NUMA nodes 0 and 2 and using both of
870 * them in the affinity mask will allow OS to schedule threads more
871 * flexible,possibly increasing overall performance when multiple apps
872 * are crunching numbers.
873 *
874 * However, if scene fits into memory adjacent to a single die we don't
875 * want OS to re-schedule the process to another die since that will make
876 * it further away from memory allocated for .blend file. */
877 /* NOTE: Even if NUMA is available in the API but is disabled in BIOS on
878 * this workstation we still process here. If NUMA is disabled it will be a
879 * single node, so our action is no-visible-changes, but allows to keep
880 * things simple and unified. */
881 numaAPI_RunProcessOnNode(0);
882 }
883
884 static void threadripper_put_thread_on_fast_node(void)
885 {
886 if (!is_numa_available) {
887 return;
888 }
889 /* NOTE: This is where things becomes more interesting. On the one hand
890 * we can use nodes 0 and 2 and allow operating system to do balancing
891 * of processes/threads for the maximum performance when multiple apps
892 * are running.
893 * On another hand, however, we probably want to use same node as the
894 * main thread since that's where the memory of .blend file is likely
895 * to be allocated.
896 * Since the main thread is currently on node 0, we also put thread on
897 * same node. */
898 /* See additional note about NUMA disabled in BIOS above. */
899 numaAPI_RunThreadOnNode(0);
900 }
901 #endif /* UNUSED */
902
BLI_thread_put_process_on_fast_node(void)903 void BLI_thread_put_process_on_fast_node(void)
904 {
905 /* Disabled for now since this causes only 16 threads to be used on a
906 * thread-ripper for computations like sculpting and fluid sim. The problem
907 * is that all threads created as children from this thread will inherit
908 * the NUMA node and so will end up on the same node. This can be fixed
909 * case-by-case by assigning the NUMA node for every child thread, however
910 * this is difficult for external libraries and OpenMP, and out of our
911 * control for plugins like external renderers. */
912 #if 0
913 if (check_is_threadripper2_alike_topology()) {
914 threadripper_put_process_on_fast_node();
915 }
916 #endif
917 }
918
BLI_thread_put_thread_on_fast_node(void)919 void BLI_thread_put_thread_on_fast_node(void)
920 {
921 /* Disabled for now, see comment above. */
922 #if 0
923 if (check_is_threadripper2_alike_topology()) {
924 threadripper_put_thread_on_fast_node();
925 }
926 #endif
927 }
928