1 #include "HalideRuntime.h"
2 #include "printer.h"
3 #include "scoped_spin_lock.h"
4
5 /* This provides an implementation of pthreads-like mutex and
6 * condition variables with fast default case performance. The code
7 * is based on the "parking lot" design and specifically Amanieu
8 * d'Antras' Rust implementation:
9 * https://github.com/Amanieu/parking_lot
10 * and the one in WTF:
11 * https://webkit.org/blog/6161/locking-in-webkit/
12 *
13 * Neither of the above implementations were used directly largely for
14 * dependency management. This implementation lacks a few features
15 * relative to those two. Specifically timeouts are not
16 * supported. Nor is optional fairness or deadlock detection.
17 * This implementation should provide a faily standalone "one file"
18 * fast synchronization layer on top of readily available system primitives.
19 *
20 * TODO: Implement pthread_once equivalent.
21 * TODO: Add read/write lock and move SharedExclusiveSpinLock from tracing.cpp
22 * to this mechanism.
23 * TODO: Add timeouts and optional fairness if needed.
24 * TODO: Relying on condition variables has issues for old versions of Windows
25 * and likely has portability issues to some very bare bones embedded OSes.
26 * Doing an implementation using only semaphores or event counters should
27 * be doable.
28 */
29
30 // Copied from tsan_interface.h
31 #ifndef TSAN_ANNOTATIONS
32 #define TSAN_ANNOTATIONS 0
33 #endif
34
35 #if TSAN_ANNOTATIONS
36 extern "C" {
37 const unsigned __tsan_mutex_linker_init = 1 << 0;
38 void __tsan_mutex_pre_lock(void *addr, unsigned flags);
39 void __tsan_mutex_post_lock(void *addr, unsigned flags, int recursion);
40 int __tsan_mutex_pre_unlock(void *addr, unsigned flags);
41 void __tsan_mutex_post_unlock(void *addr, unsigned flags);
42 void __tsan_mutex_pre_signal(void *addr, unsigned flags);
43 void __tsan_mutex_post_signal(void *addr, unsigned flags);
44 }
45 #endif
46
47 namespace Halide {
48 namespace Runtime {
49 namespace Internal {
50
51 namespace Synchronization {
52
53 namespace {
54
55 #if TSAN_ANNOTATIONS
if_tsan_pre_lock(void * mutex)56 ALWAYS_INLINE void if_tsan_pre_lock(void *mutex) {
57 __tsan_mutex_pre_lock(mutex, __tsan_mutex_linker_init);
58 };
59 // TODO(zalman|dvyukov): Is 1 the right value for a non-recursive lock? pretty sure value is ignored.
if_tsan_post_lock(void * mutex)60 ALWAYS_INLINE void if_tsan_post_lock(void *mutex) {
61 __tsan_mutex_post_lock(mutex, __tsan_mutex_linker_init, 1);
62 }
63 // TODO(zalman|dvyukov): Is it safe to ignore return value here if locks are not recursive?
if_tsan_pre_unlock(void * mutex)64 ALWAYS_INLINE void if_tsan_pre_unlock(void *mutex) {
65 (void)__tsan_mutex_pre_unlock(mutex, __tsan_mutex_linker_init);
66 }
if_tsan_post_unlock(void * mutex)67 ALWAYS_INLINE void if_tsan_post_unlock(void *mutex) {
68 __tsan_mutex_post_unlock(mutex, __tsan_mutex_linker_init);
69 }
if_tsan_pre_signal(void * cond)70 ALWAYS_INLINE void if_tsan_pre_signal(void *cond) {
71 __tsan_mutex_pre_signal(cond, 0);
72 }
if_tsan_post_signal(void * cond)73 ALWAYS_INLINE void if_tsan_post_signal(void *cond) {
74 __tsan_mutex_post_signal(cond, 0);
75 }
76 #else
77 ALWAYS_INLINE void if_tsan_pre_lock(void *) {
78 }
79 ALWAYS_INLINE void if_tsan_post_lock(void *) {
80 }
81 ALWAYS_INLINE void if_tsan_pre_unlock(void *) {
82 }
83 ALWAYS_INLINE void if_tsan_post_unlock(void *) {
84 }
85 ALWAYS_INLINE void if_tsan_pre_signal(void *) {
86 }
87 ALWAYS_INLINE void if_tsan_post_signal(void *) {
88 }
89 #endif
90
91 #ifdef BITS_32
atomic_and_fetch_release(uintptr_t * addr,uintptr_t val)92 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
93 return __sync_and_and_fetch(addr, val);
94 }
95
96 template<typename T>
atomic_fetch_add_acquire_release(T * addr,T val)97 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
98 return __sync_fetch_and_add(addr, val);
99 }
100
101 template<typename T>
cas_strong_sequentially_consistent_helper(T * addr,T * expected,T * desired)102 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, T *expected, T *desired) {
103 T oldval = *expected;
104 T gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
105 *expected = gotval;
106 return oldval == gotval;
107 }
108
atomic_cas_strong_release_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)109 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
110 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
111 }
112
atomic_cas_weak_release_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)113 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
114 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
115 }
116
117 template<typename T>
atomic_cas_weak_relacq_relaxed(T * addr,T * expected,T * desired)118 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
119 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
120 }
121
atomic_cas_weak_relaxed_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)122 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
123 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
124 }
125
atomic_cas_weak_acquire_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)126 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
127 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
128 }
129
atomic_fetch_and_release(uintptr_t * addr,uintptr_t val)130 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
131 return __sync_fetch_and_and(addr, val);
132 }
133
134 template<typename T>
atomic_load_relaxed(T * addr,T * val)135 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
136 *val = *addr;
137 }
138
139 template<typename T>
atomic_load_acquire(T * addr,T * val)140 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
141 __sync_synchronize();
142 *val = *addr;
143 }
144
atomic_or_fetch_relaxed(uintptr_t * addr,uintptr_t val)145 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
146 return __sync_or_and_fetch(addr, val);
147 }
148
atomic_store_relaxed(uintptr_t * addr,uintptr_t * val)149 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
150 *addr = *val;
151 }
152
153 template<typename T>
atomic_store_release(T * addr,T * val)154 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
155 *addr = *val;
156 __sync_synchronize();
157 }
158
atomic_thread_fence_acquire()159 ALWAYS_INLINE void atomic_thread_fence_acquire() {
160 __sync_synchronize();
161 }
162
163 #else
164
atomic_and_fetch_release(uintptr_t * addr,uintptr_t val)165 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
166 return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
167 }
168
169 template<typename T>
atomic_fetch_add_acquire_release(T * addr,T val)170 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
171 return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
172 }
173
atomic_cas_strong_release_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)174 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
175 return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
176 }
177
178 template<typename T>
atomic_cas_weak_relacq_relaxed(T * addr,T * expected,T * desired)179 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
180 return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
181 }
182
atomic_cas_weak_release_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)183 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
184 return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
185 }
186
atomic_cas_weak_relaxed_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)187 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
188 return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
189 }
190
atomic_cas_weak_acquire_relaxed(uintptr_t * addr,uintptr_t * expected,uintptr_t * desired)191 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
192 return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
193 }
194
atomic_fetch_and_release(uintptr_t * addr,uintptr_t val)195 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
196 return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
197 }
198
199 template<typename T>
atomic_load_relaxed(T * addr,T * val)200 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
201 __atomic_load(addr, val, __ATOMIC_RELAXED);
202 }
203
204 template<typename T>
atomic_load_acquire(T * addr,T * val)205 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
206 __atomic_load(addr, val, __ATOMIC_ACQUIRE);
207 }
208
atomic_or_fetch_relaxed(uintptr_t * addr,uintptr_t val)209 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
210 return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
211 }
212
atomic_store_relaxed(uintptr_t * addr,uintptr_t * val)213 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
214 __atomic_store(addr, val, __ATOMIC_RELAXED);
215 }
216
217 template<typename T>
atomic_store_release(T * addr,T * val)218 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
219 __atomic_store(addr, val, __ATOMIC_RELEASE);
220 }
221
atomic_thread_fence_acquire()222 ALWAYS_INLINE void atomic_thread_fence_acquire() {
223 __atomic_thread_fence(__ATOMIC_ACQUIRE);
224 }
225
226 #endif
227
228 } // namespace
229
230 class spin_control {
231 int spin_count;
232
233 public:
234 // Everyone says this should be 40. Have not measured it.
spin_control()235 ALWAYS_INLINE spin_control()
236 : spin_count(40) {
237 }
238
should_spin()239 ALWAYS_INLINE bool should_spin() {
240 if (spin_count > 0) {
241 spin_count--;
242 }
243 return spin_count > 0;
244 }
245
reset()246 ALWAYS_INLINE void reset() {
247 spin_count = 40;
248 }
249 };
250
251 #if __cplusplus >= 201103L
252 // Low order two bits are used for locking state,
253 static constexpr uint8_t lock_bit = 0x01;
254 static constexpr uint8_t queue_lock_bit = 0x02;
255 static constexpr uint8_t parked_bit = 0x02;
256 #else
257 #define lock_bit 0x01
258 #define queue_lock_bit 0x02
259 #define parked_bit 0x02
260 #endif
261
262 struct word_lock_queue_data {
263 thread_parker parker; // TODO: member or pointer?
264
265 // This design is from the Rust parking lot implementation by Amanieu d'Antras.
266 // Comment from original:
267 //
268 // Linked list of threads in the queue. The queue is split into two parts:
269 // the processed part and the unprocessed part. When new nodes are added to
270 // the list, they only have the next pointer set, and queue_tail is null.
271 //
272 // Nodes are processed with the queue lock held, which consists of setting
273 // the prev pointer for each node and setting the queue_tail pointer on the
274 // first processed node of the list.
275 //
276 // This setup allows nodes to be added to the queue without a lock, while
277 // still allowing O(1) removal of nodes from the processed part of the list.
278 // The only cost is the O(n) processing, but this only needs to be done
279 // once for each node, and therefore isn't too expensive.
280
281 word_lock_queue_data *next;
282 word_lock_queue_data *prev;
283
284 word_lock_queue_data *tail;
285
word_lock_queue_dataword_lock_queue_data286 ALWAYS_INLINE word_lock_queue_data()
287 : next(NULL), prev(NULL), tail(NULL) {
288 }
289
290 // Inlined, empty dtor needed to avoid confusing MachO builds
~word_lock_queue_dataword_lock_queue_data291 ALWAYS_INLINE ~word_lock_queue_data() {
292 }
293 };
294
295 class word_lock {
296 uintptr_t state;
297
298 void lock_full();
299 void unlock_full();
300
301 public:
word_lock()302 ALWAYS_INLINE word_lock()
303 : state(0) {
304 }
lock()305 ALWAYS_INLINE void lock() {
306 if_tsan_pre_lock(this);
307
308 uintptr_t expected = 0;
309 uintptr_t desired = lock_bit;
310 // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
311 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
312 lock_full();
313 }
314
315 if_tsan_post_lock(this);
316 }
317
unlock()318 ALWAYS_INLINE void unlock() {
319 if_tsan_pre_unlock(this);
320
321 uintptr_t val = atomic_fetch_and_release(&state, ~(uintptr_t)lock_bit);
322 // If another thread is currently queueing, that thread will ensure
323 // it acquires the lock or wakes a waiting thread.
324 bool no_thread_queuing = (val & queue_lock_bit) == 0;
325 // Only need to do a wakeup if there are threads waiting.
326 bool some_queued = (val & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0;
327 if (no_thread_queuing && some_queued) {
328 unlock_full();
329 }
330
331 if_tsan_post_unlock(this);
332 }
333 };
334
lock_full()335 WEAK void word_lock::lock_full() {
336 spin_control spinner;
337 uintptr_t expected;
338 atomic_load_relaxed(&state, &expected);
339
340 while (true) {
341 if (!(expected & lock_bit)) {
342 uintptr_t desired = expected | lock_bit;
343
344 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
345 return;
346 }
347 continue;
348 }
349
350 if (((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0) && spinner.should_spin()) {
351 halide_thread_yield();
352 atomic_load_relaxed(&state, &expected);
353 continue;
354 }
355
356 word_lock_queue_data node;
357
358 node.parker.prepare_park();
359 // TODO set up prelinkage parking state
360
361 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
362 if (head == NULL) {
363 node.tail = &node;
364 // constructor set node.prev = NULL;
365 } else {
366 // Mark the tail as NULL. The unlock routine will walk the list and wakeup
367 // the thread at the end.
368 // constructor set node.tail = NULL;
369 // constructor set node.prev = NULL;
370 node.next = head;
371 }
372
373 uintptr_t desired = ((uintptr_t)&node) | (expected & (queue_lock_bit | lock_bit));
374 if (atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
375 node.parker.park();
376 spinner.reset();
377 atomic_load_relaxed(&state, &expected);
378 }
379 }
380 }
381
unlock_full()382 WEAK void word_lock::unlock_full() {
383 uintptr_t expected;
384 atomic_load_relaxed(&state, &expected);
385
386 while (true) {
387 // If another thread is currently queueing, that thread will ensure
388 // it acquires the lock or wakes a waiting thread.
389 bool thread_queuing = (expected & queue_lock_bit);
390 // Only need to do a wakeup if there are threads waiting.
391 bool none_queued = (expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0;
392 if (thread_queuing || none_queued) {
393 return;
394 }
395
396 uintptr_t desired = expected | queue_lock_bit;
397 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
398 break;
399 }
400 }
401
402 while (true) {
403 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
404 word_lock_queue_data *current = head;
405 word_lock_queue_data *tail = current->tail;
406 int times_through = 0;
407 while (tail == NULL) {
408 word_lock_queue_data *next = current->next;
409 halide_assert(NULL, next != NULL);
410 next->prev = current;
411 current = next;
412 tail = current->tail;
413 times_through++;
414 }
415 head->tail = tail;
416
417 // If the lock is now locked, unlock the queue and have the thread
418 // that currently holds the lock do the wakeup
419 if (expected & lock_bit) {
420 uintptr_t desired = expected & ~(uintptr_t)queue_lock_bit;
421 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
422 return;
423 }
424 atomic_thread_fence_acquire();
425 continue;
426 }
427
428 word_lock_queue_data *new_tail = tail->prev;
429 if (new_tail == NULL) {
430 bool continue_outer = false;
431 while (!continue_outer) {
432 uintptr_t desired = expected & lock_bit;
433 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
434 break;
435 }
436 if ((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0) {
437 continue;
438 } else {
439 atomic_thread_fence_acquire();
440 continue_outer = true;
441 }
442 }
443
444 if (continue_outer) {
445 continue;
446 }
447 } else {
448 head->tail = new_tail;
449 atomic_and_fetch_release(&state, ~(uintptr_t)queue_lock_bit);
450 }
451
452 // TODO: The reason there are three calls here is other things can happen between them.
453 // Also it is not clear if unpark_start has to return the mutex/flag used by unpark
454 // and unpark_finish due to memory lifetime reasons.
455 tail->parker.unpark_start();
456 tail->parker.unpark();
457 tail->parker.unpark_finish();
458 break;
459 }
460 }
461
462 struct queue_data {
463 thread_parker parker; // TODO: member or pointer?
464
465 uintptr_t sleep_address;
466
467 queue_data *next;
468
469 uintptr_t unpark_info;
470
queue_dataqueue_data471 ALWAYS_INLINE queue_data()
472 : sleep_address(0), next(NULL), unpark_info(0) {
473 }
474 // Inlined, empty dtor needed to avoid confusing MachO builds
~queue_dataqueue_data475 ALWAYS_INLINE ~queue_data() {
476 }
477 };
478
479 // Must be a power of two.
480 #define LOAD_FACTOR 4
481
482 struct hash_bucket {
483 word_lock mutex;
484
485 queue_data *head; // Is this queue_data or thread_data?
486 queue_data *tail; // Is this queue_data or thread_data?
487 };
488
489 // The use of a #define here and table_storage is because if
490 // a class with a constructor is used, clang generates a COMDAT
491 // which cannot be lowered for Mac OS due to MachO. A better
492 // solution is desired of course.
493 #define HASH_TABLE_BITS 10
494 struct hash_table {
495 hash_bucket buckets[MAX_THREADS * LOAD_FACTOR];
496 };
497 WEAK char table_storage[sizeof(hash_table)];
498 #define table (*(hash_table *)table_storage)
499
check_hash(uintptr_t hash)500 ALWAYS_INLINE void check_hash(uintptr_t hash) {
501 halide_assert(NULL, hash < sizeof(table.buckets) / sizeof(table.buckets[0]));
502 }
503
504 #if 0
505 WEAK void dump_hash() {
506 int i = 0;
507 for (auto &bucket : table.buckets) {
508 queue_data *head = bucket.head;
509 while (head != NULL) {
510 print(NULL) << "Bucket index " << i << " addr " << (void *)head->sleep_address << "\n";
511 head = head->next;
512 }
513 i++;
514 }
515 }
516 #endif
517
addr_hash(uintptr_t addr,uint32_t bits)518 static ALWAYS_INLINE uintptr_t addr_hash(uintptr_t addr, uint32_t bits) {
519 // Fibonacci hashing. The golden ratio is 1.9E3779B97F4A7C15F39...
520 // in hexadecimal.
521 if (sizeof(uintptr_t) >= 8) {
522 return (addr * (uintptr_t)0x9E3779B97F4A7C15) >> (64 - bits);
523 } else {
524 return (addr * (uintptr_t)0x9E3779B9) >> (32 - bits);
525 }
526 }
527
lock_bucket(uintptr_t addr)528 WEAK hash_bucket &lock_bucket(uintptr_t addr) {
529 uintptr_t hash = addr_hash(addr, HASH_TABLE_BITS);
530
531 check_hash(hash);
532
533 // TODO: if resizing is implemented, loop, etc.
534 hash_bucket &bucket = table.buckets[hash];
535
536 bucket.mutex.lock();
537
538 return bucket;
539 }
540
541 struct bucket_pair {
542 hash_bucket &from;
543 hash_bucket &to;
544
bucket_pairbucket_pair545 ALWAYS_INLINE bucket_pair(hash_bucket &from, hash_bucket &to)
546 : from(from), to(to) {
547 }
548 };
549
lock_bucket_pair(uintptr_t addr_from,uintptr_t addr_to)550 WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to) {
551 // TODO: if resizing is implemented, loop, etc.
552 uintptr_t hash_from = addr_hash(addr_from, HASH_TABLE_BITS);
553 uintptr_t hash_to = addr_hash(addr_to, HASH_TABLE_BITS);
554
555 check_hash(hash_from);
556 check_hash(hash_to);
557
558 // Lock the bucket with the smaller hash first in order
559 // to prevent deadlock.
560 if (hash_from == hash_to) {
561 hash_bucket &first = table.buckets[hash_from];
562 first.mutex.lock();
563 return bucket_pair(first, first);
564 } else if (hash_from < hash_to) {
565 hash_bucket &first = table.buckets[hash_from];
566 hash_bucket &second = table.buckets[hash_to];
567 first.mutex.lock();
568 second.mutex.lock();
569 return bucket_pair(first, second);
570 } else {
571 hash_bucket &first = table.buckets[hash_to];
572 hash_bucket &second = table.buckets[hash_from];
573 first.mutex.lock();
574 second.mutex.lock();
575 return bucket_pair(second, first);
576 }
577 }
578
unlock_bucket_pair(bucket_pair & buckets)579 WEAK void unlock_bucket_pair(bucket_pair &buckets) {
580 // In the lock routine, the buckets are locked smaller hash index first.
581 // Here we reverse this ordering by comparing the pointers. This works
582 // since the pointers are obtained by indexing an array with the hash
583 // values.
584 if (&buckets.from == &buckets.to) {
585 buckets.from.mutex.unlock();
586 } else if (&buckets.from > &buckets.to) {
587 buckets.from.mutex.unlock();
588 buckets.to.mutex.unlock();
589 } else {
590 buckets.to.mutex.unlock();
591 buckets.from.mutex.unlock();
592 }
593 }
594
595 struct validate_action {
596 bool unpark_one;
597 uintptr_t invalid_unpark_info;
598
validate_actionvalidate_action599 ALWAYS_INLINE validate_action()
600 : unpark_one(false), invalid_unpark_info(0) {
601 }
602 };
603
parking_control_validate(void * control,validate_action & action)604 WEAK bool parking_control_validate(void *control, validate_action &action) {
605 return true;
606 };
parking_control_before_sleep(void * control)607 WEAK void parking_control_before_sleep(void *control){};
parking_control_unpark(void * control,int unparked,bool more_waiters)608 WEAK uintptr_t parking_control_unpark(void *control, int unparked, bool more_waiters) {
609 return 0;
610 };
parking_control_requeue_callback(void * control,const validate_action & action,bool one_to_wake,bool some_requeued)611 WEAK void parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued){};
612
613 struct parking_control {
614 bool (*validate)(void *control, validate_action &action);
615 void (*before_sleep)(void *control);
616 uintptr_t (*unpark)(void *control, int unparked, bool more_waiters);
617 void (*requeue_callback)(void *control, const validate_action &action, bool one_to_wake, bool some_requeued);
618
parking_controlparking_control619 ALWAYS_INLINE parking_control()
620 : validate(parking_control_validate), before_sleep(parking_control_before_sleep),
621 unpark(parking_control_unpark), requeue_callback(parking_control_requeue_callback) {
622 }
623 };
624
625 // TODO: Do we need a park_result thing here?
park(uintptr_t addr,parking_control & control)626 WEAK uintptr_t park(uintptr_t addr, parking_control &control) {
627 queue_data queue_data;
628
629 hash_bucket &bucket = lock_bucket(addr);
630
631 validate_action action;
632 if (!control.validate(&control, action)) {
633 bucket.mutex.unlock();
634 return action.invalid_unpark_info;
635 }
636
637 queue_data.next = NULL;
638 queue_data.sleep_address = addr;
639 queue_data.parker.prepare_park();
640 if (bucket.head != NULL) {
641 bucket.tail->next = &queue_data;
642 } else {
643 bucket.head = &queue_data;
644 }
645 bucket.tail = &queue_data;
646 bucket.mutex.unlock();
647
648 control.before_sleep(&control);
649
650 queue_data.parker.park();
651
652 return queue_data.unpark_info;
653
654 // TODO: handling timeout.
655 }
656
unpark_one(uintptr_t addr,parking_control & control)657 WEAK uintptr_t unpark_one(uintptr_t addr, parking_control &control) {
658 hash_bucket &bucket = lock_bucket(addr);
659
660 queue_data **data_location = &bucket.head;
661 queue_data *prev = NULL;
662 queue_data *data = *data_location;
663 while (data != NULL) {
664 uintptr_t cur_addr;
665 atomic_load_relaxed(&data->sleep_address, &cur_addr);
666 if (cur_addr == addr) {
667 queue_data *next = data->next;
668 *data_location = next;
669
670 bool more_waiters = false;
671
672 if (bucket.tail == data) {
673 bucket.tail = prev;
674 } else {
675 queue_data *data2 = next;
676 while (data2 != NULL && !more_waiters) {
677 uintptr_t cur_addr2;
678 atomic_load_relaxed(&data2->sleep_address, &cur_addr2);
679 more_waiters = (cur_addr2 == addr);
680 data2 = data2->next;
681 }
682 }
683
684 data->unpark_info = control.unpark(&control, 1, more_waiters);
685
686 data->parker.unpark_start();
687 bucket.mutex.unlock();
688 data->parker.unpark();
689 data->parker.unpark_finish();
690
691 // TODO: Figure out ret type.
692 return more_waiters ? 1 : 0;
693 } else {
694 data_location = &data->next;
695 prev = data;
696 data = data->next;
697 }
698 }
699
700 control.unpark(&control, 0, false);
701
702 bucket.mutex.unlock();
703
704 // TODO: decide if this is the right return value.
705 return 0;
706 }
707
unpark_all(uintptr_t addr,uintptr_t unpark_info)708 WEAK uintptr_t unpark_all(uintptr_t addr, uintptr_t unpark_info) {
709 hash_bucket &bucket = lock_bucket(addr);
710
711 queue_data **data_location = &bucket.head;
712 queue_data *prev = NULL;
713 queue_data *data = *data_location;
714 size_t waiters = 0;
715 queue_data *temp_list_storage[16];
716 queue_data **temp_list = &temp_list_storage[0];
717 size_t max_waiters = sizeof(temp_list_storage) / sizeof(temp_list_storage[0]);
718
719 while (data != NULL) {
720 uintptr_t cur_addr;
721 atomic_load_relaxed(&data->sleep_address, &cur_addr);
722
723 queue_data *next = data->next;
724 if (cur_addr == addr) {
725 *data_location = next;
726
727 if (bucket.tail == data) {
728 bucket.tail = prev;
729 }
730
731 if (waiters == max_waiters) {
732 queue_data **temp = temp_list;
733 temp_list = (queue_data **)malloc(sizeof(queue_data *) * max_waiters * 2);
734 for (size_t i = 0; i < max_waiters; i++) {
735 temp_list[i] = temp[i];
736 }
737 max_waiters *= 2;
738 if (temp != &temp_list_storage[0]) {
739 free(temp);
740 }
741 }
742
743 data->unpark_info = unpark_info;
744
745 temp_list[waiters++] = data;
746 data->parker.unpark_start();
747
748 data = next;
749 } else {
750 *data_location = data->next;
751 prev = data;
752 data = next;
753 }
754 }
755
756 bucket.mutex.unlock();
757
758 for (size_t i = 0; i < waiters; i++) {
759 temp_list[i]->parker.unpark();
760 }
761
762 // TODO: decide if this really needs to be two loops.
763 for (size_t i = 0; i < waiters; i++) {
764 temp_list[i]->parker.unpark_finish();
765 }
766
767 if (temp_list != &temp_list_storage[0]) {
768 free(temp_list);
769 }
770
771 return waiters;
772 }
773
unpark_requeue(uintptr_t addr_from,uintptr_t addr_to,parking_control & control,uintptr_t unpark_info)774 WEAK int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, parking_control &control, uintptr_t unpark_info) {
775 bucket_pair buckets = lock_bucket_pair(addr_from, addr_to);
776
777 validate_action action;
778 if (!control.validate(&control, action)) {
779 unlock_bucket_pair(buckets);
780 return 0;
781 }
782
783 queue_data **data_location = &buckets.from.head;
784 queue_data *prev = NULL;
785 queue_data *data = *data_location;
786 queue_data *requeue = NULL;
787 queue_data *requeue_tail = NULL;
788 queue_data *wakeup = NULL;
789
790 while (data != NULL) {
791 uintptr_t cur_addr;
792 atomic_load_relaxed(&data->sleep_address, &cur_addr);
793
794 queue_data *next = data->next;
795 if (cur_addr == addr_from) {
796 *data_location = next;
797
798 if (buckets.from.tail == data) {
799 buckets.from.tail = prev;
800 }
801
802 if (action.unpark_one && wakeup == NULL) {
803 wakeup = data;
804 } else {
805 if (requeue == NULL) {
806 requeue = data;
807 } else {
808 requeue_tail->next = data;
809 }
810
811 requeue_tail = data;
812 atomic_store_relaxed(&data->sleep_address, &addr_to);
813 }
814 data = next;
815 // TODO: prev ptr?
816 } else {
817 data_location = &data->next;
818 prev = data;
819 data = next;
820 }
821 }
822
823 if (requeue != NULL) {
824 requeue_tail->next = NULL;
825 if (buckets.to.head == NULL) {
826 buckets.to.head = requeue;
827 } else {
828 buckets.to.tail->next = requeue;
829 }
830 buckets.to.tail = requeue_tail;
831 }
832
833 control.requeue_callback(&control, action, wakeup != NULL, requeue != NULL);
834
835 if (wakeup != NULL) {
836 wakeup->unpark_info = unpark_info;
837 wakeup->parker.unpark_start();
838 unlock_bucket_pair(buckets);
839 wakeup->parker.unpark();
840 wakeup->parker.unpark_finish();
841 } else {
842 unlock_bucket_pair(buckets);
843 }
844
845 return wakeup != NULL && action.unpark_one;
846 }
847
848 WEAK bool mutex_parking_control_validate(void *control, validate_action &action);
849 WEAK uintptr_t mutex_parking_control_unpark(void *control, int unparked, bool more_waiters);
850 struct mutex_parking_control : parking_control {
851 uintptr_t *lock_state;
852
mutex_parking_controlmutex_parking_control853 ALWAYS_INLINE mutex_parking_control(uintptr_t *lock_state)
854 : lock_state(lock_state) {
855 validate = mutex_parking_control_validate;
856 unpark = mutex_parking_control_unpark;
857 }
858 };
859
860 // Only used in parking -- lock_full.
mutex_parking_control_validate(void * control,validate_action & action)861 WEAK bool mutex_parking_control_validate(void *control, validate_action &action) {
862 mutex_parking_control *mutex_control = (mutex_parking_control *)control;
863
864 uintptr_t result;
865 atomic_load_relaxed(mutex_control->lock_state, &result);
866 return result == (lock_bit | parked_bit);
867 }
868
869 // Only used in unparking -- unlock_full.
mutex_parking_control_unpark(void * control,int unparked,bool more_waiters)870 WEAK uintptr_t mutex_parking_control_unpark(void *control, int unparked, bool more_waiters) {
871 mutex_parking_control *mutex_control = (mutex_parking_control *)control;
872
873 // TODO: consider handling fairness.
874 uintptr_t return_state = more_waiters ? parked_bit : 0;
875 atomic_store_release(mutex_control->lock_state, &return_state);
876
877 return 0;
878 }
879
880 class fast_mutex {
881 uintptr_t state;
882
lock_full()883 ALWAYS_INLINE void lock_full() {
884 // Everyone says this should be 40. Have not measured it.
885 spin_control spinner;
886 uintptr_t expected;
887 atomic_load_relaxed(&state, &expected);
888
889 while (true) {
890 if (!(expected & lock_bit)) {
891 uintptr_t desired = expected | lock_bit;
892 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
893 return;
894 }
895 continue;
896 }
897
898 // If no one is parked, spin with spin count.
899 if ((expected & parked_bit) == 0 && spinner.should_spin()) {
900 halide_thread_yield();
901 atomic_load_relaxed(&state, &expected);
902 continue;
903 }
904
905 // Mark mutex as having parked threads if not already done.
906 if ((expected & parked_bit) == 0) {
907 uintptr_t desired = expected | parked_bit;
908 if (!atomic_cas_weak_relaxed_relaxed(&state, &expected, &desired)) {
909 continue;
910 }
911 }
912
913 // TODO: consider handling fairness, timeout
914 mutex_parking_control control(&state);
915 uintptr_t result = park((uintptr_t)this, control);
916 if (result == (uintptr_t)this) {
917 return;
918 }
919
920 spinner.reset();
921 atomic_load_relaxed(&state, &expected);
922 }
923 }
924
unlock_full()925 ALWAYS_INLINE void unlock_full() {
926 uintptr_t expected = lock_bit;
927 uintptr_t desired = 0;
928 // Try for a fast release of the lock. Redundant with code in lock, but done
929 // to make unlock_full a standalone unlock that can be called directly.
930 if (atomic_cas_strong_release_relaxed(&state, &expected, &desired)) {
931 return;
932 }
933
934 mutex_parking_control control(&state);
935 unpark_one((uintptr_t)this, control);
936 }
937
938 public:
lock()939 ALWAYS_INLINE void lock() {
940 uintptr_t expected = 0;
941 uintptr_t desired = lock_bit;
942 // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
943 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
944 lock_full();
945 }
946 }
947
unlock()948 ALWAYS_INLINE void unlock() {
949 uintptr_t expected = lock_bit;
950 uintptr_t desired = 0;
951 // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
952 if (!atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
953 unlock_full();
954 }
955 }
956
make_parked_if_locked()957 ALWAYS_INLINE bool make_parked_if_locked() {
958 uintptr_t val;
959 atomic_load_relaxed(&state, &val);
960 while (true) {
961 if (!(val & lock_bit)) {
962 return false;
963 }
964
965 uintptr_t desired = val | parked_bit;
966 if (atomic_cas_weak_relaxed_relaxed(&state, &val, &desired)) {
967 return true;
968 }
969 }
970 }
971
make_parked()972 ALWAYS_INLINE void make_parked() {
973 atomic_or_fetch_relaxed(&state, parked_bit);
974 }
975 };
976 WEAK uintptr_t signal_parking_control_unpark(void *control, int unparked, bool more_waiters);
977 struct signal_parking_control : parking_control {
978 uintptr_t *cond_state;
979 fast_mutex *mutex;
980
signal_parking_controlsignal_parking_control981 ALWAYS_INLINE signal_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
982 : cond_state(cond_state), mutex(mutex) {
983 unpark = signal_parking_control_unpark;
984 }
985 };
986
signal_parking_control_unpark(void * control,int unparked,bool more_waiters)987 WEAK uintptr_t signal_parking_control_unpark(void *control, int unparked, bool more_waiters) {
988 signal_parking_control *signal_control = (signal_parking_control *)control;
989
990 if (!more_waiters) {
991 uintptr_t val = 0;
992 atomic_store_relaxed(signal_control->cond_state, &val);
993 }
994
995 #if 0 // TODO: figure out why this was here.
996 return (uintptr_t)signal_control->mutex;
997 #else
998 return 0;
999 #endif
1000 }
1001 WEAK bool broadcast_parking_control_validate(void *control, validate_action &action);
1002 WEAK void broadcast_parking_control_requeue_callback(void *control, const validate_action &action,
1003 bool one_to_wake, bool some_requeued);
1004 struct broadcast_parking_control : parking_control {
1005 uintptr_t *cond_state;
1006 fast_mutex *mutex;
1007
broadcast_parking_controlbroadcast_parking_control1008 ALWAYS_INLINE broadcast_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
1009 : cond_state(cond_state), mutex(mutex) {
1010 validate = broadcast_parking_control_validate;
1011 requeue_callback = broadcast_parking_control_requeue_callback;
1012 }
1013 };
1014
broadcast_parking_control_validate(void * control,validate_action & action)1015 WEAK bool broadcast_parking_control_validate(void *control, validate_action &action) {
1016 broadcast_parking_control *broadcast_control = (broadcast_parking_control *)control;
1017
1018 uintptr_t val;
1019 atomic_load_relaxed(broadcast_control->cond_state, &val);
1020 // By the time this broadcast locked everything and was processed, the cond
1021 // has progressed to a new mutex, do nothing since any waiting threads have
1022 // to be waiting on what is effectively a different condition.
1023 if (val != (uintptr_t)broadcast_control->mutex) {
1024 return false;
1025 }
1026 // Clear the cond's connection to the mutex as all waiting threads are going to reque onto the mutex.
1027 val = 0;
1028 atomic_store_relaxed(broadcast_control->cond_state, &val);
1029
1030 action.unpark_one = !broadcast_control->mutex->make_parked_if_locked();
1031
1032 return true;
1033 }
1034
broadcast_parking_control_requeue_callback(void * control,const validate_action & action,bool one_to_wake,bool some_requeued)1035 WEAK void broadcast_parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued) {
1036 broadcast_parking_control *broadcast_control = (broadcast_parking_control *)control;
1037
1038 if (action.unpark_one && some_requeued) {
1039 broadcast_control->mutex->make_parked();
1040 }
1041 }
1042 WEAK bool wait_parking_control_validate(void *control, validate_action &action);
1043 WEAK void wait_parking_control_before_sleep(void *control);
1044 WEAK uintptr_t wait_parking_control_unpark(void *control, int unparked, bool more_waiters);
1045 struct wait_parking_control : parking_control {
1046 uintptr_t *cond_state;
1047 fast_mutex *mutex;
1048
wait_parking_controlwait_parking_control1049 ALWAYS_INLINE wait_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
1050 : cond_state(cond_state), mutex(mutex) {
1051 validate = wait_parking_control_validate;
1052 before_sleep = wait_parking_control_before_sleep;
1053 unpark = wait_parking_control_unpark;
1054 }
1055 };
1056
wait_parking_control_validate(void * control,validate_action & action)1057 WEAK bool wait_parking_control_validate(void *control, validate_action &action) {
1058 wait_parking_control *wait_control = (wait_parking_control *)control;
1059
1060 uintptr_t val;
1061 atomic_load_relaxed(wait_control->cond_state, &val);
1062
1063 if (val == 0) {
1064 val = (uintptr_t)wait_control->mutex;
1065 atomic_store_relaxed(wait_control->cond_state, &val);
1066 } else if (val != (uintptr_t)wait_control->mutex) {
1067 // TODO: signal error.
1068 action.invalid_unpark_info = (uintptr_t)wait_control->mutex;
1069 return false;
1070 }
1071
1072 return true;
1073 }
1074
wait_parking_control_before_sleep(void * control)1075 WEAK void wait_parking_control_before_sleep(void *control) {
1076 wait_parking_control *wait_control = (wait_parking_control *)control;
1077
1078 wait_control->mutex->unlock();
1079 }
1080
wait_parking_control_unpark(void * control,int unparked,bool more_waiters)1081 WEAK uintptr_t wait_parking_control_unpark(void *control, int unparked, bool more_waiters) {
1082 wait_parking_control *wait_control = (wait_parking_control *)control;
1083
1084 if (!more_waiters) {
1085 uintptr_t val = 0;
1086 atomic_store_relaxed(wait_control->cond_state, &val);
1087 }
1088 return 0;
1089 }
1090
1091 class fast_cond {
1092 uintptr_t state;
1093
1094 public:
fast_cond()1095 ALWAYS_INLINE fast_cond()
1096 : state(0) {
1097 }
1098
signal()1099 ALWAYS_INLINE void signal() {
1100 if_tsan_pre_signal(this);
1101
1102 uintptr_t val;
1103 atomic_load_relaxed(&state, &val);
1104 if (val == 0) {
1105 if_tsan_post_signal(this);
1106 return;
1107 }
1108 signal_parking_control control(&state, (fast_mutex *)val);
1109 unpark_one((uintptr_t)this, control);
1110 if_tsan_post_signal(this);
1111 }
1112
broadcast()1113 ALWAYS_INLINE void broadcast() {
1114 if_tsan_pre_signal(this);
1115 uintptr_t val;
1116 atomic_load_relaxed(&state, &val);
1117 if (val == 0) {
1118 if_tsan_post_signal(this);
1119 return;
1120 }
1121 broadcast_parking_control control(&state, (fast_mutex *)val);
1122 unpark_requeue((uintptr_t)this, val, control, 0);
1123 if_tsan_post_signal(this);
1124 }
1125
wait(fast_mutex * mutex)1126 ALWAYS_INLINE void wait(fast_mutex *mutex) {
1127 wait_parking_control control(&state, mutex);
1128 uintptr_t result = park((uintptr_t)this, control);
1129 if (result != (uintptr_t)mutex) {
1130 mutex->lock();
1131 } else {
1132 if_tsan_pre_lock(mutex);
1133
1134 // TODO: this is debug only.
1135 uintptr_t val;
1136 atomic_load_relaxed((uintptr_t *)mutex, &val);
1137 halide_assert(NULL, val & 0x1);
1138
1139 if_tsan_post_lock(mutex);
1140 }
1141 }
1142 };
1143
1144 } // namespace Synchronization
1145
1146 } // namespace Internal
1147 } // namespace Runtime
1148 } // namespace Halide
1149
1150 extern "C" {
1151
halide_mutex_lock(halide_mutex * mutex)1152 WEAK void halide_mutex_lock(halide_mutex *mutex) {
1153 Halide::Runtime::Internal::Synchronization::fast_mutex *fast_mutex =
1154 (Halide::Runtime::Internal::Synchronization::fast_mutex *)mutex;
1155 fast_mutex->lock();
1156 }
1157
halide_mutex_unlock(halide_mutex * mutex)1158 WEAK void halide_mutex_unlock(halide_mutex *mutex) {
1159 Halide::Runtime::Internal::Synchronization::fast_mutex *fast_mutex =
1160 (Halide::Runtime::Internal::Synchronization::fast_mutex *)mutex;
1161 fast_mutex->unlock();
1162 }
1163
halide_cond_broadcast(struct halide_cond * cond)1164 WEAK void halide_cond_broadcast(struct halide_cond *cond) {
1165 Halide::Runtime::Internal::Synchronization::fast_cond *fast_cond =
1166 (Halide::Runtime::Internal::Synchronization::fast_cond *)cond;
1167 fast_cond->broadcast();
1168 }
1169
halide_cond_signal(struct halide_cond * cond)1170 WEAK void halide_cond_signal(struct halide_cond *cond) {
1171 Halide::Runtime::Internal::Synchronization::fast_cond *fast_cond =
1172 (Halide::Runtime::Internal::Synchronization::fast_cond *)cond;
1173 fast_cond->signal();
1174 }
1175
halide_cond_wait(struct halide_cond * cond,struct halide_mutex * mutex)1176 WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex) {
1177 Halide::Runtime::Internal::Synchronization::fast_cond *fast_cond =
1178 (Halide::Runtime::Internal::Synchronization::fast_cond *)cond;
1179 Halide::Runtime::Internal::Synchronization::fast_mutex *fast_mutex =
1180 (Halide::Runtime::Internal::Synchronization::fast_mutex *)mutex;
1181 fast_cond->wait(fast_mutex);
1182 }
1183
1184 // Actual definition of the mutex array.
1185 struct halide_mutex_array {
1186 struct halide_mutex *array;
1187 };
1188
halide_mutex_array_create(int sz)1189 WEAK halide_mutex_array *halide_mutex_array_create(int sz) {
1190 // TODO: If sz is huge, we should probably hash it down to something smaller
1191 // in the accessors below. Check for deadlocks before doing so.
1192 halide_mutex_array *array = (halide_mutex_array *)halide_malloc(
1193 NULL, sizeof(halide_mutex_array));
1194 if (array == NULL) {
1195 // Will result in a failed assertion and a call to halide_error.
1196 return NULL;
1197 }
1198 array->array = (halide_mutex *)halide_malloc(
1199 NULL, sz * sizeof(halide_mutex));
1200 if (array->array == NULL) {
1201 halide_free(NULL, array);
1202 // Will result in a failed assertion and a call to halide_error.
1203 return NULL;
1204 }
1205 memset(array->array, 0, sz * sizeof(halide_mutex));
1206 return array;
1207 }
1208
halide_mutex_array_destroy(void * user_context,void * array)1209 WEAK void halide_mutex_array_destroy(void *user_context, void *array) {
1210 struct halide_mutex_array *arr_ptr = (struct halide_mutex_array *)array;
1211 halide_free(user_context, arr_ptr->array);
1212 halide_free(user_context, arr_ptr);
1213 }
1214
halide_mutex_array_lock(struct halide_mutex_array * array,int entry)1215 WEAK int halide_mutex_array_lock(struct halide_mutex_array *array, int entry) {
1216 halide_mutex_lock(&array->array[entry]);
1217 return 0;
1218 }
1219
halide_mutex_array_unlock(struct halide_mutex_array * array,int entry)1220 WEAK int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry) {
1221 halide_mutex_unlock(&array->array[entry]);
1222 return 0;
1223 }
1224 }
1225