1 /*
2 * Copyright 2013-2016 Formal Methods and Tools, University of Twente
3 * Copyright 2016-2017 Tom van Dijk, Johannes Kepler University Linz
4 * Copyright 2019-2021 Tom van Dijk, Formal Methods and Tools, University of Twente
5 *
6 * Licensed under the Apache License, Version 2.0 (the License);
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include <unistd.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <pthread.h> /* for pthread_t */
23
24 #ifndef __LACE_H__
25 #define __LACE_H__
26
27 #define LACE_PIE_TIMES 0
28 #define LACE_COUNT_TASKS 0
29 #define LACE_COUNT_STEALS 0
30 #define LACE_COUNT_SPLITS 0
31 #define LACE_USE_HWLOC 0
32 #define LACE_USE_MMAP 0
33
34 #ifdef __cplusplus
35 extern "C" {
36 #endif /* __cplusplus */
37
38 /**
39 * Using Lace.
40 *
41 * Optionally set the verbosity level with lace_set_verbosity.
42 * Optionally set the default program stack size of each worker thread with lace_set_stacksize.
43 *
44 * Then call lace_start to start Lace workers.
45 * - lace_start(n_workers, deque_size);
46 * set both parameters to 0 for reasonable defaults, using all available cores.
47 *
48 * After this, you can run tasks using the RUN(...)
49 *
50 * Use lace_suspend and lace_resume to temporarily stop running, or lace_stop to completely stop Lace.
51 */
52
53 /**
54 * Type definitions used in the functions below.
55 * - WorkerP contains the (private) Worker data
56 * - Task contains a single Task
57 */
58 typedef struct _WorkerP WorkerP;
59 typedef struct _Task Task;
60
61 /**
62 * The macro LACE_TYPEDEF_CB(typedefname, taskname, parametertypes) defines
63 * a Task for use as a callback function.
64 */
65 #define LACE_TYPEDEF_CB(t, f, ...) typedef t (*f)(WorkerP *, Task *, ##__VA_ARGS__);
66
67 /**
68 * Set verbosity level (0 = no startup messages, 1 = startup messages)
69 * Default level: 0
70 */
71 void lace_set_verbosity(int level);
72
73 /**
74 * Set the program stack size of Lace worker threads. (Not really needed, default is OK.)
75 */
76 void lace_set_stacksize(size_t stacksize);
77
78 /**
79 * Get the program stack size of Lace worker threads.
80 * If this returns 0, it uses the default.
81 */
82 size_t lace_get_stacksize(void);
83
84 /**
85 * Get the number of available PUs (hardware threads)
86 */
87 unsigned int lace_get_pu_count(void);
88
89 /**
90 * Start Lace with <n_workers> workers and a a task deque size of <dqsize> per worker.
91 * If <n_workers> is set to 0, automatically detects available cores.
92 * If <dqsize> is est to 0, uses a reasonable default value.
93 */
94 void lace_start(unsigned int n_workers, size_t dqsize);
95
96 /**
97 * Suspend all workers.
98 * Call this method from outside Lace threads.
99 */
100 void lace_suspend(void);
101
102 /**
103 * Resume all workers.
104 * Call this method from outside Lace threads.
105 */
106 void lace_resume(void);
107
108 /**
109 * Stop Lace.
110 * Call this method from outside Lace threads.
111 */
112 void lace_stop(void);
113
114 /**
115 * Steal a random task.
116 * Only use this from inside a Lace task.
117 */
118 #define lace_steal_random() CALL(lace_steal_random)
119 void lace_steal_random_CALL(WorkerP*, Task*);
120
121 /**
122 * Enter the Lace barrier. (all active workers must enter it before we can continue)
123 * Only run this from inside a Lace task.
124 */
125 void lace_barrier(void);
126
127 /**
128 * Retrieve the number of Lace workers
129 */
130 unsigned int lace_workers(void);
131
132 /**
133 * Retrieve the current worker data.
134 * Only run this from inside a Lace task.
135 * (Used by LACE_VARS)
136 */
137 WorkerP *lace_get_worker(void);
138
139 /**
140 * Retrieve the current head of the deque of the worker.
141 * (Used by LACE_VARS)
142 */
143 Task *lace_get_head(WorkerP *);
144
145 /**
146 * Helper function to call from outside Lace threads.
147 * This helper function is used by the _RUN methods for the RUN() macro.
148 */
149 void lace_run_task(Task *task);
150
151 /**
152 * Helper function to start a new task execution (task frame) on a given task.
153 * This helper function is used by the _NEWFRAME methods for the NEWFRAME() macro
154 * Only when the task is done, do workers continue with the previous task frame.
155 */
156 void lace_run_newframe(Task *task);
157
158 /**
159 * Helper function to make all run a given task together.
160 * This helper function is used by the _TOGETHER methods for the TOGETHER() macro
161 * They all start the task in a lace_barrier and complete it with a lace barrier.
162 * Meaning they all start together, and all end together.
163 */
164 void lace_run_together(Task *task);
165
166 /**
167 * Create a pointer to a Tasks main function.
168 */
169 #define TASK(f) ( f##_CALL )
170
171 /**
172 * Call a Tasks implementation (adds Lace variables to call)
173 */
174 #define WRAP(f, ...) ( f((WorkerP *)__lace_worker, (Task *)__lace_dq_head, ##__VA_ARGS__) )
175
176 /**
177 * Sync a task.
178 */
179 #define SYNC(f) ( __lace_dq_head--, WRAP(f##_SYNC) )
180
181 /**
182 * Sync a task, but if the task is not stolen, then do not execute it.
183 */
184 #define DROP() ( __lace_dq_head--, WRAP(lace_drop) )
185
186 /**
187 * Spawn a task.
188 */
189 #define SPAWN(f, ...) ( WRAP(f##_SPAWN, ##__VA_ARGS__), __lace_dq_head++ )
190
191 /**
192 * Directly execute a task from inside a Lace thread.
193 */
194 #define CALL(f, ...) ( WRAP(f##_CALL, ##__VA_ARGS__) )
195
196 /**
197 * Directly execute a task from outside Lace threads.
198 */
199 #define RUN(f, ...) ( f##_RUN ( __VA_ARGS__ ) )
200
201 /**
202 * Signal all workers to interrupt their current tasks and instead perform (a personal copy of) the given task.
203 */
204 #define TOGETHER(f, ...) ( f##_TOGETHER ( __VA_ARGS__) )
205
206 /**
207 * Signal all workers to interrupt their current tasks and help the current thread with the given task.
208 */
209 #define NEWFRAME(f, ...) ( f##_NEWFRAME ( __VA_ARGS__) )
210
211 /**
212 * (Try to) steal a task from a random worker.
213 */
214 #define STEAL_RANDOM() ( CALL(lace_steal_random) )
215
216 /**
217 * Get the current worker id.
218 */
219 #define LACE_WORKER_ID ( __lace_worker->worker )
220
221 /**
222 * Get the core where the current worker is pinned.
223 */
224 #define LACE_WORKER_PU ( __lace_worker->pu )
225
226 /**
227 * Initialize local variables __lace_worker and __lace_dq_head which are required for most Lace functionality.
228 * This only works inside a Lace thread.
229 */
230 #define LACE_VARS WorkerP * __attribute__((unused)) __lace_worker = lace_get_worker(); Task * __attribute__((unused)) __lace_dq_head = lace_get_head(__lace_worker);
231
232 /**
233 * Check if current tasks must be interrupted, and if so, interrupt.
234 */
235 void lace_yield(WorkerP *__lace_worker, Task *__lace_dq_head);
236 #define YIELD_NEWFRAME() { if (unlikely((*(Task* volatile *)&lace_newframe.t) != NULL)) lace_yield(__lace_worker, __lace_dq_head); }
237
238 /**
239 * True if the given task is stolen, False otherwise.
240 */
241 #define TASK_IS_STOLEN(t) ((size_t)t->thief > 1)
242
243 /**
244 * True if the given task is completed, False otherwise.
245 */
246 #define TASK_IS_COMPLETED(t) ((size_t)t->thief == 2)
247
248 /**
249 * Retrieves a pointer to the result of the given task.
250 */
251 #define TASK_RESULT(t) (&t->d[0])
252
253 /**
254 * Compute a random number, thread-local (so scalable)
255 */
256 #define LACE_TRNG (__lace_worker->rng = 2862933555777941757ULL * __lace_worker->rng + 3037000493ULL)
257
258 /* Some flags that influence Lace behavior */
259
260 #ifndef LACE_LEAP_RANDOM /* Use random leaping when leapfrogging fails */
261 #define LACE_LEAP_RANDOM 1
262 #endif
263
264 #ifndef LACE_COUNT_EVENTS
265 #define LACE_COUNT_EVENTS (LACE_PIE_TIMES || LACE_COUNT_TASKS || LACE_COUNT_STEALS || LACE_COUNT_SPLITS)
266 #endif
267
268 /**
269 * Now follows the implementation of Lace
270 */
271
272 /* Typical cacheline size of system architectures */
273 #ifndef LINE_SIZE
274 #define LINE_SIZE 64
275 #endif
276
277 /* The size of a pointer, 8 bytes on a 64-bit architecture */
278 #define P_SZ (sizeof(void *))
279
280 #define PAD(x,b) ( ( (b) - ((x)%(b)) ) & ((b)-1) ) /* b must be power of 2 */
281 #define ROUND(x,b) ( (x) + PAD( (x), (b) ) )
282
283 /* The size is in bytes. Note that this is without the extra overhead from Lace.
284 The value must be greater than or equal to the maximum size of your tasks.
285 The task size is the maximum of the size of the result or of the sum of the parameter sizes. */
286 #ifndef LACE_TASKSIZE
287 #define LACE_TASKSIZE (6)*P_SZ
288 #endif
289
290 /* Some fences */
291 #ifndef compiler_barrier
292 #define compiler_barrier() { asm volatile("" ::: "memory"); }
293 #endif
294
295 #ifndef mfence
296 #if defined(__amd64__) || defined(__i386__)
297 #define mfence() { asm volatile("mfence" ::: "memory"); }
298 #elif defined(__powerpc__)
299 #define mfence() { asm volatile("sync" ::: "memory"); }
300 #endif
301 #endif
302
303 /* Compiler specific branch prediction optimization */
304 #ifndef likely
305 #define likely(x) __builtin_expect((x),1)
306 #endif
307
308 #ifndef unlikely
309 #define unlikely(x) __builtin_expect((x),0)
310 #endif
311
312 #if LACE_PIE_TIMES
313 /* High resolution timer */
gethrtime()314 static inline uint64_t gethrtime()
315 {
316 uint32_t hi, lo;
317 asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) :: "memory");
318 return (uint64_t)hi<<32 | lo;
319 }
320 #endif
321
322 #if LACE_COUNT_EVENTS
323 void lace_count_reset();
324 void lace_count_report_file(FILE *file);
325 #endif
326
327 #if LACE_COUNT_TASKS
328 #define PR_COUNTTASK(s) PR_INC(s,CTR_tasks)
329 #else
330 #define PR_COUNTTASK(s) /* Empty */
331 #endif
332
333 #if LACE_COUNT_STEALS
334 #define PR_COUNTSTEALS(s,i) PR_INC(s,i)
335 #else
336 #define PR_COUNTSTEALS(s,i) /* Empty */
337 #endif
338
339 #if LACE_COUNT_SPLITS
340 #define PR_COUNTSPLITS(s,i) PR_INC(s,i)
341 #else
342 #define PR_COUNTSPLITS(s,i) /* Empty */
343 #endif
344
345 #if LACE_COUNT_EVENTS
346 #define PR_ADD(s,i,k) ( ((s)->ctr[i])+=k )
347 #else
348 #define PR_ADD(s,i,k) /* Empty */
349 #endif
350 #define PR_INC(s,i) PR_ADD(s,i,1)
351
352 typedef enum {
353 #ifdef LACE_COUNT_TASKS
354 CTR_tasks, /* Number of tasks spawned */
355 #endif
356 #ifdef LACE_COUNT_STEALS
357 CTR_steal_tries, /* Number of steal attempts */
358 CTR_leap_tries, /* Number of leap attempts */
359 CTR_steals, /* Number of succesful steals */
360 CTR_leaps, /* Number of succesful leaps */
361 CTR_steal_busy, /* Number of steal busies */
362 CTR_leap_busy, /* Number of leap busies */
363 #endif
364 #ifdef LACE_COUNT_SPLITS
365 CTR_split_grow, /* Number of split right */
366 CTR_split_shrink,/* Number of split left */
367 CTR_split_req, /* Number of split requests */
368 #endif
369 CTR_fast_sync, /* Number of fast syncs */
370 CTR_slow_sync, /* Number of slow syncs */
371 #ifdef LACE_PIE_TIMES
372 CTR_init, /* Timer for initialization */
373 CTR_close, /* Timer for shutdown */
374 CTR_wapp, /* Timer for application code (steal) */
375 CTR_lapp, /* Timer for application code (leap) */
376 CTR_wsteal, /* Timer for steal code (steal) */
377 CTR_lsteal, /* Timer for steal code (leap) */
378 CTR_wstealsucc, /* Timer for succesful steal code (steal) */
379 CTR_lstealsucc, /* Timer for succesful steal code (leap) */
380 CTR_wsignal, /* Timer for signal after work (steal) */
381 CTR_lsignal, /* Timer for signal after work (leap) */
382 #endif
383 CTR_MAX
384 } CTR_index;
385
386 #define THIEF_EMPTY ((struct _Worker*)0x0)
387 #define THIEF_TASK ((struct _Worker*)0x1)
388 #define THIEF_COMPLETED ((struct _Worker*)0x2)
389
390 #define TASK_COMMON_FIELDS(type) \
391 void (*f)(struct _WorkerP *, struct _Task *, struct type *); \
392 struct _Worker * volatile thief;
393
394 struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) };
395 #define LACE_COMMON_FIELD_SIZE sizeof(struct __lace_common_fields_only)
396
397 typedef struct _Task {
398 TASK_COMMON_FIELDS(_Task);
399 char p1[PAD(LACE_COMMON_FIELD_SIZE, P_SZ)];
400 char d[LACE_TASKSIZE];
401 char p2[PAD(ROUND(LACE_COMMON_FIELD_SIZE, P_SZ) + LACE_TASKSIZE, LINE_SIZE)];
402 } Task;
403
404 typedef union __attribute__((packed)) {
405 struct {
406 uint32_t tail;
407 uint32_t split;
408 } ts;
409 uint64_t v;
410 } TailSplit;
411
412 typedef struct _Worker {
413 Task *dq;
414 TailSplit ts;
415 uint8_t allstolen;
416
417 char pad1[PAD(P_SZ+sizeof(TailSplit)+1, LINE_SIZE)];
418
419 uint8_t movesplit;
420 } Worker;
421
422 typedef struct _WorkerP {
423 Task *dq; // same as dq
424 Task *split; // same as dq+ts.ts.split
425 Task *end; // dq+dq_size
426 Worker *_public; // pointer to public Worker struct
427 uint64_t rng; // my random seed (for lace_trng)
428 uint32_t seed; // my random seed (for lace_steal_random)
429 uint16_t worker; // what is my worker id?
430 uint8_t allstolen; // my allstolen
431 volatile int8_t enabled; // if this worker is enabled
432
433 #if LACE_COUNT_EVENTS
434 uint64_t ctr[CTR_MAX]; // counters
435 volatile uint64_t time;
436 volatile int level;
437 #endif
438
439 int16_t pu; // my pu (for HWLOC)
440 } WorkerP;
441
442 #define LACE_STOLEN ((Worker*)0)
443 #define LACE_BUSY ((Worker*)1)
444 #define LACE_NOWORK ((Worker*)2)
445
446 void lace_abort_stack_overflow(void) __attribute__((noreturn));
447
448 typedef struct
449 {
450 Task *t;
451 char pad[LINE_SIZE-sizeof(Task *)];
452 } lace_newframe_t;
453
454 extern lace_newframe_t lace_newframe;
455
456 /**
457 * Make all tasks of the current worker shared.
458 */
459 #define LACE_MAKE_ALL_SHARED() lace_make_all_shared(__lace_worker, __lace_dq_head)
460 static inline void __attribute__((unused))
lace_make_all_shared(WorkerP * w,Task * __lace_dq_head)461 lace_make_all_shared( WorkerP *w, Task *__lace_dq_head)
462 {
463 if (w->split != __lace_dq_head) {
464 w->split = __lace_dq_head;
465 w->_public->ts.ts.split = __lace_dq_head - w->dq;
466 }
467 }
468
469 #if LACE_PIE_TIMES
lace_time_event(WorkerP * w,int event)470 static void lace_time_event( WorkerP *w, int event )
471 {
472 uint64_t now = gethrtime(),
473 prev = w->time;
474
475 switch( event ) {
476
477 // Enter application code
478 case 1 :
479 if( w->level /* level */ == 0 ) {
480 PR_ADD( w, CTR_init, now - prev );
481 w->level = 1;
482 } else if( w->level /* level */ == 1 ) {
483 PR_ADD( w, CTR_wsteal, now - prev );
484 PR_ADD( w, CTR_wstealsucc, now - prev );
485 } else {
486 PR_ADD( w, CTR_lsteal, now - prev );
487 PR_ADD( w, CTR_lstealsucc, now - prev );
488 }
489 break;
490
491 // Exit application code
492 case 2 :
493 if( w->level /* level */ == 1 ) {
494 PR_ADD( w, CTR_wapp, now - prev );
495 } else {
496 PR_ADD( w, CTR_lapp, now - prev );
497 }
498 break;
499
500 // Enter sync on stolen
501 case 3 :
502 if( w->level /* level */ == 1 ) {
503 PR_ADD( w, CTR_wapp, now - prev );
504 } else {
505 PR_ADD( w, CTR_lapp, now - prev );
506 }
507 w->level++;
508 break;
509
510 // Exit sync on stolen
511 case 4 :
512 if( w->level /* level */ == 1 ) {
513 fprintf( stderr, "This should not happen, level = %d\n", w->level );
514 } else {
515 PR_ADD( w, CTR_lsteal, now - prev );
516 }
517 w->level--;
518 break;
519
520 // Return from failed steal
521 case 7 :
522 if( w->level /* level */ == 0 ) {
523 PR_ADD( w, CTR_init, now - prev );
524 } else if( w->level /* level */ == 1 ) {
525 PR_ADD( w, CTR_wsteal, now - prev );
526 } else {
527 PR_ADD( w, CTR_lsteal, now - prev );
528 }
529 break;
530
531 // Signalling time
532 case 8 :
533 if( w->level /* level */ == 1 ) {
534 PR_ADD( w, CTR_wsignal, now - prev );
535 PR_ADD( w, CTR_wsteal, now - prev );
536 } else {
537 PR_ADD( w, CTR_lsignal, now - prev );
538 PR_ADD( w, CTR_lsteal, now - prev );
539 }
540 break;
541
542 // Done
543 case 9 :
544 if( w->level /* level */ == 0 ) {
545 PR_ADD( w, CTR_init, now - prev );
546 } else {
547 PR_ADD( w, CTR_close, now - prev );
548 }
549 break;
550
551 default: return;
552 }
553
554 w->time = now;
555 }
556 #else
557 #define lace_time_event( w, e ) /* Empty */
558 #endif
559
560 static Worker* __attribute__((noinline))
lace_steal(WorkerP * self,Task * __dq_head,Worker * victim)561 lace_steal(WorkerP *self, Task *__dq_head, Worker *victim)
562 {
563 if (victim != NULL && !victim->allstolen) {
564 /* Must be a volatile. In GCC 4.8, if it is not declared volatile, the
565 compiler will optimize extra memory accesses to victim->ts instead
566 of comparing the local values ts.ts.tail and ts.ts.split, causing
567 thieves to steal non existent tasks! */
568 TailSplit ts;
569 ts.v = *(volatile uint64_t *)&victim->ts.v;
570 if (ts.ts.tail < ts.ts.split) {
571 TailSplit ts_new;
572 ts_new.v = ts.v;
573 ts_new.ts.tail++;
574 if (__sync_bool_compare_and_swap(&victim->ts.v, ts.v, ts_new.v)) {
575 // Stolen
576 Task *t = &victim->dq[ts.ts.tail];
577 t->thief = self->_public;
578 lace_time_event(self, 1);
579 t->f(self, __dq_head, t);
580 lace_time_event(self, 2);
581 t->thief = THIEF_COMPLETED;
582 lace_time_event(self, 8);
583 return LACE_STOLEN;
584 }
585
586 lace_time_event(self, 7);
587 return LACE_BUSY;
588 }
589
590 if (victim->movesplit == 0) {
591 victim->movesplit = 1;
592 PR_COUNTSPLITS(self, CTR_split_req);
593 }
594 }
595
596 lace_time_event(self, 7);
597 return LACE_NOWORK;
598 }
599
600 static int
lace_shrink_shared(WorkerP * w)601 lace_shrink_shared(WorkerP *w)
602 {
603 Worker *wt = w->_public;
604 TailSplit ts;
605 ts.v = wt->ts.v; /* Force in 1 memory read */
606 uint32_t tail = ts.ts.tail;
607 uint32_t split = ts.ts.split;
608
609 if (tail != split) {
610 uint32_t newsplit = (tail + split)/2;
611 wt->ts.ts.split = newsplit;
612 mfence();
613 tail = *(volatile uint32_t *)&(wt->ts.ts.tail);
614 if (tail != split) {
615 if (unlikely(tail > newsplit)) {
616 newsplit = (tail + split) / 2;
617 wt->ts.ts.split = newsplit;
618 }
619 w->split = w->dq + newsplit;
620 PR_COUNTSPLITS(w, CTR_split_shrink);
621 return 0;
622 }
623 }
624
625 wt->allstolen = 1;
626 w->allstolen = 1;
627 return 1;
628 }
629
630 static inline void
lace_leapfrog(WorkerP * __lace_worker,Task * __lace_dq_head)631 lace_leapfrog(WorkerP *__lace_worker, Task *__lace_dq_head)
632 {
633 lace_time_event(__lace_worker, 3);
634 Task *t = __lace_dq_head;
635 Worker *thief = t->thief;
636 if (thief != THIEF_COMPLETED) {
637 while ((size_t)thief <= 1) thief = t->thief;
638
639 /* PRE-LEAP: increase head again */
640 __lace_dq_head += 1;
641
642 /* Now leapfrog */
643 int attempts = 32;
644 while (thief != THIEF_COMPLETED) {
645 PR_COUNTSTEALS(__lace_worker, CTR_leap_tries);
646 Worker *res = lace_steal(__lace_worker, __lace_dq_head, thief);
647 if (res == LACE_NOWORK) {
648 YIELD_NEWFRAME();
649 if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { lace_steal_random(); attempts = 32; }
650 } else if (res == LACE_STOLEN) {
651 PR_COUNTSTEALS(__lace_worker, CTR_leaps);
652 } else if (res == LACE_BUSY) {
653 PR_COUNTSTEALS(__lace_worker, CTR_leap_busy);
654 }
655 compiler_barrier();
656 thief = t->thief;
657 }
658
659 /* POST-LEAP: really pop the finished task */
660 /* no need to decrease __lace_dq_head, since it is a local variable */
661 compiler_barrier();
662 if (__lace_worker->allstolen == 0) {
663 /* Assume: tail = split = head (pre-pop) */
664 /* Now we do a real pop ergo either decrease tail,split,head or declare allstolen */
665 Worker *wt = __lace_worker->_public;
666 wt->allstolen = 1;
667 __lace_worker->allstolen = 1;
668 }
669 }
670
671 compiler_barrier();
672 t->thief = THIEF_EMPTY;
673 lace_time_event(__lace_worker, 4);
674 }
675
676 static __attribute__((noinline))
lace_drop_slow(WorkerP * w,Task * __dq_head)677 void lace_drop_slow(WorkerP *w, Task *__dq_head)
678 {
679 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) lace_leapfrog(w, __dq_head);
680 }
681
682 static inline __attribute__((unused))
lace_drop(WorkerP * w,Task * __dq_head)683 void lace_drop(WorkerP *w, Task *__dq_head)
684 {
685 if (likely(0 == w->_public->movesplit)) {
686 if (likely(w->split <= __dq_head)) {
687 return;
688 }
689 }
690 lace_drop_slow(w, __dq_head);
691 }
692
693
694
695 // Task macros for tasks of arity 0
696
697 #define TASK_DECL_0(RTYPE, NAME) \
698 \
699 typedef struct _TD_##NAME { \
700 TASK_COMMON_FIELDS(_TD_##NAME) \
701 union { RTYPE res; } d; \
702 } TD_##NAME; \
703 \
704 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
705 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
706 \
707 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
708 RTYPE NAME##_CALL(WorkerP *, Task * ); \
709 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
710 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
711 \
712 static inline __attribute__((unused)) \
713 void NAME##_SPAWN(WorkerP *w, Task *__dq_head ) \
714 { \
715 PR_COUNTTASK(w); \
716 \
717 TD_##NAME *t; \
718 TailSplit ts; \
719 uint32_t head, split, newsplit; \
720 \
721 if (__dq_head == w->end) lace_abort_stack_overflow(); \
722 \
723 t = (TD_##NAME *)__dq_head; \
724 t->f = &NAME##_WRAP; \
725 t->thief = THIEF_TASK; \
726 \
727 compiler_barrier(); \
728 \
729 Worker *wt = w->_public; \
730 if (unlikely(w->allstolen)) { \
731 if (wt->movesplit) wt->movesplit = 0; \
732 head = __dq_head - w->dq; \
733 ts = (TailSplit){{head,head+1}}; \
734 wt->ts.v = ts.v; \
735 compiler_barrier(); \
736 wt->allstolen = 0; \
737 w->split = __dq_head+1; \
738 w->allstolen = 0; \
739 } else if (unlikely(wt->movesplit)) { \
740 head = __dq_head - w->dq; \
741 split = w->split - w->dq; \
742 newsplit = (split + head + 2)/2; \
743 wt->ts.ts.split = newsplit; \
744 w->split = w->dq + newsplit; \
745 compiler_barrier(); \
746 wt->movesplit = 0; \
747 PR_COUNTSPLITS(w, CTR_split_grow); \
748 } \
749 } \
750 \
751 static inline __attribute__((unused)) \
752 RTYPE NAME##_NEWFRAME() \
753 { \
754 Task _t; \
755 TD_##NAME *t = (TD_##NAME *)&_t; \
756 t->f = &NAME##_WRAP; \
757 t->thief = THIEF_TASK; \
758 \
759 lace_run_newframe(&_t); \
760 return ((TD_##NAME *)t)->d.res; \
761 } \
762 \
763 static inline __attribute__((unused)) \
764 void NAME##_TOGETHER() \
765 { \
766 Task _t; \
767 TD_##NAME *t = (TD_##NAME *)&_t; \
768 t->f = &NAME##_WRAP; \
769 t->thief = THIEF_TASK; \
770 \
771 lace_run_together(&_t); \
772 } \
773 \
774 static inline __attribute__((unused)) \
775 RTYPE NAME##_RUN() \
776 { \
777 Task _t; \
778 TD_##NAME *t = (TD_##NAME *)&_t; \
779 t->f = &NAME##_WRAP; \
780 t->thief = THIEF_TASK; \
781 \
782 lace_run_task(&_t); \
783 return ((TD_##NAME *)t)->d.res; \
784 } \
785 \
786 static __attribute__((noinline)) \
787 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
788 { \
789 TD_##NAME *t; \
790 \
791 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
792 lace_leapfrog(w, __dq_head); \
793 t = (TD_##NAME *)__dq_head; \
794 return ((TD_##NAME *)t)->d.res; \
795 } \
796 \
797 compiler_barrier(); \
798 \
799 Worker *wt = w->_public; \
800 if (wt->movesplit) { \
801 Task *t = w->split; \
802 size_t diff = __dq_head - t; \
803 diff = (diff + 1) / 2; \
804 w->split = t + diff; \
805 wt->ts.ts.split += diff; \
806 compiler_barrier(); \
807 wt->movesplit = 0; \
808 PR_COUNTSPLITS(w, CTR_split_grow); \
809 } \
810 \
811 compiler_barrier(); \
812 \
813 t = (TD_##NAME *)__dq_head; \
814 t->thief = THIEF_EMPTY; \
815 return NAME##_CALL(w, __dq_head ); \
816 } \
817 \
818 static inline __attribute__((unused)) \
819 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
820 { \
821 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
822 \
823 if (likely(0 == w->_public->movesplit)) { \
824 if (likely(w->split <= __dq_head)) { \
825 TD_##NAME *t = (TD_##NAME *)__dq_head; \
826 t->thief = THIEF_EMPTY; \
827 return NAME##_CALL(w, __dq_head ); \
828 } \
829 } \
830 \
831 return NAME##_SYNC_SLOW(w, __dq_head); \
832 } \
833 \
834 \
835
836 #define TASK_IMPL_0(RTYPE, NAME) \
837 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
838 { \
839 t->d.res = NAME##_CALL(w, __dq_head ); \
840 } \
841 \
842 static inline __attribute__((always_inline)) \
843 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head ); \
844 \
845 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
846 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head ) \
847 { \
848 return NAME##_WORK(w, __dq_head ); \
849 } \
850 \
851 static inline __attribute__((always_inline)) \
852 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
853
854 #define TASK_0(RTYPE, NAME) TASK_DECL_0(RTYPE, NAME) TASK_IMPL_0(RTYPE, NAME)
855
856 #define VOID_TASK_DECL_0(NAME) \
857 \
858 typedef struct _TD_##NAME { \
859 TASK_COMMON_FIELDS(_TD_##NAME) \
860 \
861 } TD_##NAME; \
862 \
863 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
864 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
865 \
866 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
867 void NAME##_CALL(WorkerP *, Task * ); \
868 static inline void NAME##_SYNC(WorkerP *, Task *); \
869 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
870 \
871 static inline __attribute__((unused)) \
872 void NAME##_SPAWN(WorkerP *w, Task *__dq_head ) \
873 { \
874 PR_COUNTTASK(w); \
875 \
876 TD_##NAME *t; \
877 TailSplit ts; \
878 uint32_t head, split, newsplit; \
879 \
880 if (__dq_head == w->end) lace_abort_stack_overflow(); \
881 \
882 t = (TD_##NAME *)__dq_head; \
883 t->f = &NAME##_WRAP; \
884 t->thief = THIEF_TASK; \
885 \
886 compiler_barrier(); \
887 \
888 Worker *wt = w->_public; \
889 if (unlikely(w->allstolen)) { \
890 if (wt->movesplit) wt->movesplit = 0; \
891 head = __dq_head - w->dq; \
892 ts = (TailSplit){{head,head+1}}; \
893 wt->ts.v = ts.v; \
894 compiler_barrier(); \
895 wt->allstolen = 0; \
896 w->split = __dq_head+1; \
897 w->allstolen = 0; \
898 } else if (unlikely(wt->movesplit)) { \
899 head = __dq_head - w->dq; \
900 split = w->split - w->dq; \
901 newsplit = (split + head + 2)/2; \
902 wt->ts.ts.split = newsplit; \
903 w->split = w->dq + newsplit; \
904 compiler_barrier(); \
905 wt->movesplit = 0; \
906 PR_COUNTSPLITS(w, CTR_split_grow); \
907 } \
908 } \
909 \
910 static inline __attribute__((unused)) \
911 void NAME##_NEWFRAME() \
912 { \
913 Task _t; \
914 TD_##NAME *t = (TD_##NAME *)&_t; \
915 t->f = &NAME##_WRAP; \
916 t->thief = THIEF_TASK; \
917 \
918 lace_run_newframe(&_t); \
919 return ; \
920 } \
921 \
922 static inline __attribute__((unused)) \
923 void NAME##_TOGETHER() \
924 { \
925 Task _t; \
926 TD_##NAME *t = (TD_##NAME *)&_t; \
927 t->f = &NAME##_WRAP; \
928 t->thief = THIEF_TASK; \
929 \
930 lace_run_together(&_t); \
931 } \
932 \
933 static inline __attribute__((unused)) \
934 void NAME##_RUN() \
935 { \
936 Task _t; \
937 TD_##NAME *t = (TD_##NAME *)&_t; \
938 t->f = &NAME##_WRAP; \
939 t->thief = THIEF_TASK; \
940 \
941 lace_run_task(&_t); \
942 return ; \
943 } \
944 \
945 static __attribute__((noinline)) \
946 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
947 { \
948 TD_##NAME *t; \
949 \
950 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
951 lace_leapfrog(w, __dq_head); \
952 t = (TD_##NAME *)__dq_head; \
953 return ; \
954 } \
955 \
956 compiler_barrier(); \
957 \
958 Worker *wt = w->_public; \
959 if (wt->movesplit) { \
960 Task *t = w->split; \
961 size_t diff = __dq_head - t; \
962 diff = (diff + 1) / 2; \
963 w->split = t + diff; \
964 wt->ts.ts.split += diff; \
965 compiler_barrier(); \
966 wt->movesplit = 0; \
967 PR_COUNTSPLITS(w, CTR_split_grow); \
968 } \
969 \
970 compiler_barrier(); \
971 \
972 t = (TD_##NAME *)__dq_head; \
973 t->thief = THIEF_EMPTY; \
974 return NAME##_CALL(w, __dq_head ); \
975 } \
976 \
977 static inline __attribute__((unused)) \
978 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
979 { \
980 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
981 \
982 if (likely(0 == w->_public->movesplit)) { \
983 if (likely(w->split <= __dq_head)) { \
984 TD_##NAME *t = (TD_##NAME *)__dq_head; \
985 t->thief = THIEF_EMPTY; \
986 return NAME##_CALL(w, __dq_head ); \
987 } \
988 } \
989 \
990 return NAME##_SYNC_SLOW(w, __dq_head); \
991 } \
992 \
993 \
994
995 #define VOID_TASK_IMPL_0(NAME) \
996 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
997 { \
998 NAME##_CALL(w, __dq_head ); \
999 } \
1000 \
1001 static inline __attribute__((always_inline)) \
1002 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head ); \
1003 \
1004 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1005 void NAME##_CALL(WorkerP *w, Task *__dq_head ) \
1006 { \
1007 return NAME##_WORK(w, __dq_head ); \
1008 } \
1009 \
1010 static inline __attribute__((always_inline)) \
1011 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
1012
1013 #define VOID_TASK_0(NAME) VOID_TASK_DECL_0(NAME) VOID_TASK_IMPL_0(NAME)
1014
1015
1016 // Task macros for tasks of arity 1
1017
1018 #define TASK_DECL_1(RTYPE, NAME, ATYPE_1) \
1019 \
1020 typedef struct _TD_##NAME { \
1021 TASK_COMMON_FIELDS(_TD_##NAME) \
1022 union { struct { ATYPE_1 arg_1; } args; RTYPE res; } d; \
1023 } TD_##NAME; \
1024 \
1025 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1026 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1027 \
1028 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1029 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1); \
1030 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
1031 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
1032 \
1033 static inline __attribute__((unused)) \
1034 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
1035 { \
1036 PR_COUNTTASK(w); \
1037 \
1038 TD_##NAME *t; \
1039 TailSplit ts; \
1040 uint32_t head, split, newsplit; \
1041 \
1042 if (__dq_head == w->end) lace_abort_stack_overflow(); \
1043 \
1044 t = (TD_##NAME *)__dq_head; \
1045 t->f = &NAME##_WRAP; \
1046 t->thief = THIEF_TASK; \
1047 t->d.args.arg_1 = arg_1; \
1048 compiler_barrier(); \
1049 \
1050 Worker *wt = w->_public; \
1051 if (unlikely(w->allstolen)) { \
1052 if (wt->movesplit) wt->movesplit = 0; \
1053 head = __dq_head - w->dq; \
1054 ts = (TailSplit){{head,head+1}}; \
1055 wt->ts.v = ts.v; \
1056 compiler_barrier(); \
1057 wt->allstolen = 0; \
1058 w->split = __dq_head+1; \
1059 w->allstolen = 0; \
1060 } else if (unlikely(wt->movesplit)) { \
1061 head = __dq_head - w->dq; \
1062 split = w->split - w->dq; \
1063 newsplit = (split + head + 2)/2; \
1064 wt->ts.ts.split = newsplit; \
1065 w->split = w->dq + newsplit; \
1066 compiler_barrier(); \
1067 wt->movesplit = 0; \
1068 PR_COUNTSPLITS(w, CTR_split_grow); \
1069 } \
1070 } \
1071 \
1072 static inline __attribute__((unused)) \
1073 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1) \
1074 { \
1075 Task _t; \
1076 TD_##NAME *t = (TD_##NAME *)&_t; \
1077 t->f = &NAME##_WRAP; \
1078 t->thief = THIEF_TASK; \
1079 t->d.args.arg_1 = arg_1; \
1080 lace_run_newframe(&_t); \
1081 return ((TD_##NAME *)t)->d.res; \
1082 } \
1083 \
1084 static inline __attribute__((unused)) \
1085 void NAME##_TOGETHER(ATYPE_1 arg_1) \
1086 { \
1087 Task _t; \
1088 TD_##NAME *t = (TD_##NAME *)&_t; \
1089 t->f = &NAME##_WRAP; \
1090 t->thief = THIEF_TASK; \
1091 t->d.args.arg_1 = arg_1; \
1092 lace_run_together(&_t); \
1093 } \
1094 \
1095 static inline __attribute__((unused)) \
1096 RTYPE NAME##_RUN(ATYPE_1 arg_1) \
1097 { \
1098 Task _t; \
1099 TD_##NAME *t = (TD_##NAME *)&_t; \
1100 t->f = &NAME##_WRAP; \
1101 t->thief = THIEF_TASK; \
1102 t->d.args.arg_1 = arg_1; \
1103 lace_run_task(&_t); \
1104 return ((TD_##NAME *)t)->d.res; \
1105 } \
1106 \
1107 static __attribute__((noinline)) \
1108 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
1109 { \
1110 TD_##NAME *t; \
1111 \
1112 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
1113 lace_leapfrog(w, __dq_head); \
1114 t = (TD_##NAME *)__dq_head; \
1115 return ((TD_##NAME *)t)->d.res; \
1116 } \
1117 \
1118 compiler_barrier(); \
1119 \
1120 Worker *wt = w->_public; \
1121 if (wt->movesplit) { \
1122 Task *t = w->split; \
1123 size_t diff = __dq_head - t; \
1124 diff = (diff + 1) / 2; \
1125 w->split = t + diff; \
1126 wt->ts.ts.split += diff; \
1127 compiler_barrier(); \
1128 wt->movesplit = 0; \
1129 PR_COUNTSPLITS(w, CTR_split_grow); \
1130 } \
1131 \
1132 compiler_barrier(); \
1133 \
1134 t = (TD_##NAME *)__dq_head; \
1135 t->thief = THIEF_EMPTY; \
1136 return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
1137 } \
1138 \
1139 static inline __attribute__((unused)) \
1140 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
1141 { \
1142 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
1143 \
1144 if (likely(0 == w->_public->movesplit)) { \
1145 if (likely(w->split <= __dq_head)) { \
1146 TD_##NAME *t = (TD_##NAME *)__dq_head; \
1147 t->thief = THIEF_EMPTY; \
1148 return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
1149 } \
1150 } \
1151 \
1152 return NAME##_SYNC_SLOW(w, __dq_head); \
1153 } \
1154 \
1155 \
1156
1157 #define TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1) \
1158 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
1159 { \
1160 t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
1161 } \
1162 \
1163 static inline __attribute__((always_inline)) \
1164 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1); \
1165 \
1166 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1167 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
1168 { \
1169 return NAME##_WORK(w, __dq_head , arg_1); \
1170 } \
1171 \
1172 static inline __attribute__((always_inline)) \
1173 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
1174
1175 #define TASK_1(RTYPE, NAME, ATYPE_1, ARG_1) TASK_DECL_1(RTYPE, NAME, ATYPE_1) TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1)
1176
1177 #define VOID_TASK_DECL_1(NAME, ATYPE_1) \
1178 \
1179 typedef struct _TD_##NAME { \
1180 TASK_COMMON_FIELDS(_TD_##NAME) \
1181 union { struct { ATYPE_1 arg_1; } args; } d; \
1182 } TD_##NAME; \
1183 \
1184 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1185 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1186 \
1187 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1188 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1); \
1189 static inline void NAME##_SYNC(WorkerP *, Task *); \
1190 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
1191 \
1192 static inline __attribute__((unused)) \
1193 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
1194 { \
1195 PR_COUNTTASK(w); \
1196 \
1197 TD_##NAME *t; \
1198 TailSplit ts; \
1199 uint32_t head, split, newsplit; \
1200 \
1201 if (__dq_head == w->end) lace_abort_stack_overflow(); \
1202 \
1203 t = (TD_##NAME *)__dq_head; \
1204 t->f = &NAME##_WRAP; \
1205 t->thief = THIEF_TASK; \
1206 t->d.args.arg_1 = arg_1; \
1207 compiler_barrier(); \
1208 \
1209 Worker *wt = w->_public; \
1210 if (unlikely(w->allstolen)) { \
1211 if (wt->movesplit) wt->movesplit = 0; \
1212 head = __dq_head - w->dq; \
1213 ts = (TailSplit){{head,head+1}}; \
1214 wt->ts.v = ts.v; \
1215 compiler_barrier(); \
1216 wt->allstolen = 0; \
1217 w->split = __dq_head+1; \
1218 w->allstolen = 0; \
1219 } else if (unlikely(wt->movesplit)) { \
1220 head = __dq_head - w->dq; \
1221 split = w->split - w->dq; \
1222 newsplit = (split + head + 2)/2; \
1223 wt->ts.ts.split = newsplit; \
1224 w->split = w->dq + newsplit; \
1225 compiler_barrier(); \
1226 wt->movesplit = 0; \
1227 PR_COUNTSPLITS(w, CTR_split_grow); \
1228 } \
1229 } \
1230 \
1231 static inline __attribute__((unused)) \
1232 void NAME##_NEWFRAME(ATYPE_1 arg_1) \
1233 { \
1234 Task _t; \
1235 TD_##NAME *t = (TD_##NAME *)&_t; \
1236 t->f = &NAME##_WRAP; \
1237 t->thief = THIEF_TASK; \
1238 t->d.args.arg_1 = arg_1; \
1239 lace_run_newframe(&_t); \
1240 return ; \
1241 } \
1242 \
1243 static inline __attribute__((unused)) \
1244 void NAME##_TOGETHER(ATYPE_1 arg_1) \
1245 { \
1246 Task _t; \
1247 TD_##NAME *t = (TD_##NAME *)&_t; \
1248 t->f = &NAME##_WRAP; \
1249 t->thief = THIEF_TASK; \
1250 t->d.args.arg_1 = arg_1; \
1251 lace_run_together(&_t); \
1252 } \
1253 \
1254 static inline __attribute__((unused)) \
1255 void NAME##_RUN(ATYPE_1 arg_1) \
1256 { \
1257 Task _t; \
1258 TD_##NAME *t = (TD_##NAME *)&_t; \
1259 t->f = &NAME##_WRAP; \
1260 t->thief = THIEF_TASK; \
1261 t->d.args.arg_1 = arg_1; \
1262 lace_run_task(&_t); \
1263 return ; \
1264 } \
1265 \
1266 static __attribute__((noinline)) \
1267 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
1268 { \
1269 TD_##NAME *t; \
1270 \
1271 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
1272 lace_leapfrog(w, __dq_head); \
1273 t = (TD_##NAME *)__dq_head; \
1274 return ; \
1275 } \
1276 \
1277 compiler_barrier(); \
1278 \
1279 Worker *wt = w->_public; \
1280 if (wt->movesplit) { \
1281 Task *t = w->split; \
1282 size_t diff = __dq_head - t; \
1283 diff = (diff + 1) / 2; \
1284 w->split = t + diff; \
1285 wt->ts.ts.split += diff; \
1286 compiler_barrier(); \
1287 wt->movesplit = 0; \
1288 PR_COUNTSPLITS(w, CTR_split_grow); \
1289 } \
1290 \
1291 compiler_barrier(); \
1292 \
1293 t = (TD_##NAME *)__dq_head; \
1294 t->thief = THIEF_EMPTY; \
1295 return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
1296 } \
1297 \
1298 static inline __attribute__((unused)) \
1299 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
1300 { \
1301 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
1302 \
1303 if (likely(0 == w->_public->movesplit)) { \
1304 if (likely(w->split <= __dq_head)) { \
1305 TD_##NAME *t = (TD_##NAME *)__dq_head; \
1306 t->thief = THIEF_EMPTY; \
1307 return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
1308 } \
1309 } \
1310 \
1311 return NAME##_SYNC_SLOW(w, __dq_head); \
1312 } \
1313 \
1314 \
1315
1316 #define VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1) \
1317 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
1318 { \
1319 NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
1320 } \
1321 \
1322 static inline __attribute__((always_inline)) \
1323 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1); \
1324 \
1325 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1326 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
1327 { \
1328 return NAME##_WORK(w, __dq_head , arg_1); \
1329 } \
1330 \
1331 static inline __attribute__((always_inline)) \
1332 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
1333
1334 #define VOID_TASK_1(NAME, ATYPE_1, ARG_1) VOID_TASK_DECL_1(NAME, ATYPE_1) VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1)
1335
1336
1337 // Task macros for tasks of arity 2
1338
1339 #define TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2) \
1340 \
1341 typedef struct _TD_##NAME { \
1342 TASK_COMMON_FIELDS(_TD_##NAME) \
1343 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; } args; RTYPE res; } d; \
1344 } TD_##NAME; \
1345 \
1346 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1347 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1348 \
1349 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1350 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2); \
1351 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
1352 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
1353 \
1354 static inline __attribute__((unused)) \
1355 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
1356 { \
1357 PR_COUNTTASK(w); \
1358 \
1359 TD_##NAME *t; \
1360 TailSplit ts; \
1361 uint32_t head, split, newsplit; \
1362 \
1363 if (__dq_head == w->end) lace_abort_stack_overflow(); \
1364 \
1365 t = (TD_##NAME *)__dq_head; \
1366 t->f = &NAME##_WRAP; \
1367 t->thief = THIEF_TASK; \
1368 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1369 compiler_barrier(); \
1370 \
1371 Worker *wt = w->_public; \
1372 if (unlikely(w->allstolen)) { \
1373 if (wt->movesplit) wt->movesplit = 0; \
1374 head = __dq_head - w->dq; \
1375 ts = (TailSplit){{head,head+1}}; \
1376 wt->ts.v = ts.v; \
1377 compiler_barrier(); \
1378 wt->allstolen = 0; \
1379 w->split = __dq_head+1; \
1380 w->allstolen = 0; \
1381 } else if (unlikely(wt->movesplit)) { \
1382 head = __dq_head - w->dq; \
1383 split = w->split - w->dq; \
1384 newsplit = (split + head + 2)/2; \
1385 wt->ts.ts.split = newsplit; \
1386 w->split = w->dq + newsplit; \
1387 compiler_barrier(); \
1388 wt->movesplit = 0; \
1389 PR_COUNTSPLITS(w, CTR_split_grow); \
1390 } \
1391 } \
1392 \
1393 static inline __attribute__((unused)) \
1394 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2) \
1395 { \
1396 Task _t; \
1397 TD_##NAME *t = (TD_##NAME *)&_t; \
1398 t->f = &NAME##_WRAP; \
1399 t->thief = THIEF_TASK; \
1400 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1401 lace_run_newframe(&_t); \
1402 return ((TD_##NAME *)t)->d.res; \
1403 } \
1404 \
1405 static inline __attribute__((unused)) \
1406 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2) \
1407 { \
1408 Task _t; \
1409 TD_##NAME *t = (TD_##NAME *)&_t; \
1410 t->f = &NAME##_WRAP; \
1411 t->thief = THIEF_TASK; \
1412 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1413 lace_run_together(&_t); \
1414 } \
1415 \
1416 static inline __attribute__((unused)) \
1417 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) \
1418 { \
1419 Task _t; \
1420 TD_##NAME *t = (TD_##NAME *)&_t; \
1421 t->f = &NAME##_WRAP; \
1422 t->thief = THIEF_TASK; \
1423 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1424 lace_run_task(&_t); \
1425 return ((TD_##NAME *)t)->d.res; \
1426 } \
1427 \
1428 static __attribute__((noinline)) \
1429 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
1430 { \
1431 TD_##NAME *t; \
1432 \
1433 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
1434 lace_leapfrog(w, __dq_head); \
1435 t = (TD_##NAME *)__dq_head; \
1436 return ((TD_##NAME *)t)->d.res; \
1437 } \
1438 \
1439 compiler_barrier(); \
1440 \
1441 Worker *wt = w->_public; \
1442 if (wt->movesplit) { \
1443 Task *t = w->split; \
1444 size_t diff = __dq_head - t; \
1445 diff = (diff + 1) / 2; \
1446 w->split = t + diff; \
1447 wt->ts.ts.split += diff; \
1448 compiler_barrier(); \
1449 wt->movesplit = 0; \
1450 PR_COUNTSPLITS(w, CTR_split_grow); \
1451 } \
1452 \
1453 compiler_barrier(); \
1454 \
1455 t = (TD_##NAME *)__dq_head; \
1456 t->thief = THIEF_EMPTY; \
1457 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
1458 } \
1459 \
1460 static inline __attribute__((unused)) \
1461 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
1462 { \
1463 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
1464 \
1465 if (likely(0 == w->_public->movesplit)) { \
1466 if (likely(w->split <= __dq_head)) { \
1467 TD_##NAME *t = (TD_##NAME *)__dq_head; \
1468 t->thief = THIEF_EMPTY; \
1469 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
1470 } \
1471 } \
1472 \
1473 return NAME##_SYNC_SLOW(w, __dq_head); \
1474 } \
1475 \
1476 \
1477
1478 #define TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) \
1479 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
1480 { \
1481 t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
1482 } \
1483 \
1484 static inline __attribute__((always_inline)) \
1485 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2); \
1486 \
1487 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1488 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
1489 { \
1490 return NAME##_WORK(w, __dq_head , arg_1, arg_2); \
1491 } \
1492 \
1493 static inline __attribute__((always_inline)) \
1494 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
1495
1496 #define TASK_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2) TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
1497
1498 #define VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2) \
1499 \
1500 typedef struct _TD_##NAME { \
1501 TASK_COMMON_FIELDS(_TD_##NAME) \
1502 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; } args; } d; \
1503 } TD_##NAME; \
1504 \
1505 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1506 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1507 \
1508 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1509 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2); \
1510 static inline void NAME##_SYNC(WorkerP *, Task *); \
1511 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
1512 \
1513 static inline __attribute__((unused)) \
1514 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
1515 { \
1516 PR_COUNTTASK(w); \
1517 \
1518 TD_##NAME *t; \
1519 TailSplit ts; \
1520 uint32_t head, split, newsplit; \
1521 \
1522 if (__dq_head == w->end) lace_abort_stack_overflow(); \
1523 \
1524 t = (TD_##NAME *)__dq_head; \
1525 t->f = &NAME##_WRAP; \
1526 t->thief = THIEF_TASK; \
1527 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1528 compiler_barrier(); \
1529 \
1530 Worker *wt = w->_public; \
1531 if (unlikely(w->allstolen)) { \
1532 if (wt->movesplit) wt->movesplit = 0; \
1533 head = __dq_head - w->dq; \
1534 ts = (TailSplit){{head,head+1}}; \
1535 wt->ts.v = ts.v; \
1536 compiler_barrier(); \
1537 wt->allstolen = 0; \
1538 w->split = __dq_head+1; \
1539 w->allstolen = 0; \
1540 } else if (unlikely(wt->movesplit)) { \
1541 head = __dq_head - w->dq; \
1542 split = w->split - w->dq; \
1543 newsplit = (split + head + 2)/2; \
1544 wt->ts.ts.split = newsplit; \
1545 w->split = w->dq + newsplit; \
1546 compiler_barrier(); \
1547 wt->movesplit = 0; \
1548 PR_COUNTSPLITS(w, CTR_split_grow); \
1549 } \
1550 } \
1551 \
1552 static inline __attribute__((unused)) \
1553 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2) \
1554 { \
1555 Task _t; \
1556 TD_##NAME *t = (TD_##NAME *)&_t; \
1557 t->f = &NAME##_WRAP; \
1558 t->thief = THIEF_TASK; \
1559 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1560 lace_run_newframe(&_t); \
1561 return ; \
1562 } \
1563 \
1564 static inline __attribute__((unused)) \
1565 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2) \
1566 { \
1567 Task _t; \
1568 TD_##NAME *t = (TD_##NAME *)&_t; \
1569 t->f = &NAME##_WRAP; \
1570 t->thief = THIEF_TASK; \
1571 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1572 lace_run_together(&_t); \
1573 } \
1574 \
1575 static inline __attribute__((unused)) \
1576 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2) \
1577 { \
1578 Task _t; \
1579 TD_##NAME *t = (TD_##NAME *)&_t; \
1580 t->f = &NAME##_WRAP; \
1581 t->thief = THIEF_TASK; \
1582 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
1583 lace_run_task(&_t); \
1584 return ; \
1585 } \
1586 \
1587 static __attribute__((noinline)) \
1588 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
1589 { \
1590 TD_##NAME *t; \
1591 \
1592 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
1593 lace_leapfrog(w, __dq_head); \
1594 t = (TD_##NAME *)__dq_head; \
1595 return ; \
1596 } \
1597 \
1598 compiler_barrier(); \
1599 \
1600 Worker *wt = w->_public; \
1601 if (wt->movesplit) { \
1602 Task *t = w->split; \
1603 size_t diff = __dq_head - t; \
1604 diff = (diff + 1) / 2; \
1605 w->split = t + diff; \
1606 wt->ts.ts.split += diff; \
1607 compiler_barrier(); \
1608 wt->movesplit = 0; \
1609 PR_COUNTSPLITS(w, CTR_split_grow); \
1610 } \
1611 \
1612 compiler_barrier(); \
1613 \
1614 t = (TD_##NAME *)__dq_head; \
1615 t->thief = THIEF_EMPTY; \
1616 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
1617 } \
1618 \
1619 static inline __attribute__((unused)) \
1620 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
1621 { \
1622 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
1623 \
1624 if (likely(0 == w->_public->movesplit)) { \
1625 if (likely(w->split <= __dq_head)) { \
1626 TD_##NAME *t = (TD_##NAME *)__dq_head; \
1627 t->thief = THIEF_EMPTY; \
1628 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
1629 } \
1630 } \
1631 \
1632 return NAME##_SYNC_SLOW(w, __dq_head); \
1633 } \
1634 \
1635 \
1636
1637 #define VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) \
1638 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
1639 { \
1640 NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
1641 } \
1642 \
1643 static inline __attribute__((always_inline)) \
1644 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2); \
1645 \
1646 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1647 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
1648 { \
1649 return NAME##_WORK(w, __dq_head , arg_1, arg_2); \
1650 } \
1651 \
1652 static inline __attribute__((always_inline)) \
1653 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
1654
1655 #define VOID_TASK_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2) VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
1656
1657
1658 // Task macros for tasks of arity 3
1659
1660 #define TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3) \
1661 \
1662 typedef struct _TD_##NAME { \
1663 TASK_COMMON_FIELDS(_TD_##NAME) \
1664 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; RTYPE res; } d;\
1665 } TD_##NAME; \
1666 \
1667 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1668 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1669 \
1670 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1671 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3); \
1672 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
1673 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
1674 \
1675 static inline __attribute__((unused)) \
1676 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1677 { \
1678 PR_COUNTTASK(w); \
1679 \
1680 TD_##NAME *t; \
1681 TailSplit ts; \
1682 uint32_t head, split, newsplit; \
1683 \
1684 if (__dq_head == w->end) lace_abort_stack_overflow(); \
1685 \
1686 t = (TD_##NAME *)__dq_head; \
1687 t->f = &NAME##_WRAP; \
1688 t->thief = THIEF_TASK; \
1689 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1690 compiler_barrier(); \
1691 \
1692 Worker *wt = w->_public; \
1693 if (unlikely(w->allstolen)) { \
1694 if (wt->movesplit) wt->movesplit = 0; \
1695 head = __dq_head - w->dq; \
1696 ts = (TailSplit){{head,head+1}}; \
1697 wt->ts.v = ts.v; \
1698 compiler_barrier(); \
1699 wt->allstolen = 0; \
1700 w->split = __dq_head+1; \
1701 w->allstolen = 0; \
1702 } else if (unlikely(wt->movesplit)) { \
1703 head = __dq_head - w->dq; \
1704 split = w->split - w->dq; \
1705 newsplit = (split + head + 2)/2; \
1706 wt->ts.ts.split = newsplit; \
1707 w->split = w->dq + newsplit; \
1708 compiler_barrier(); \
1709 wt->movesplit = 0; \
1710 PR_COUNTSPLITS(w, CTR_split_grow); \
1711 } \
1712 } \
1713 \
1714 static inline __attribute__((unused)) \
1715 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \
1716 { \
1717 Task _t; \
1718 TD_##NAME *t = (TD_##NAME *)&_t; \
1719 t->f = &NAME##_WRAP; \
1720 t->thief = THIEF_TASK; \
1721 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1722 lace_run_newframe(&_t); \
1723 return ((TD_##NAME *)t)->d.res; \
1724 } \
1725 \
1726 static inline __attribute__((unused)) \
1727 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \
1728 { \
1729 Task _t; \
1730 TD_##NAME *t = (TD_##NAME *)&_t; \
1731 t->f = &NAME##_WRAP; \
1732 t->thief = THIEF_TASK; \
1733 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1734 lace_run_together(&_t); \
1735 } \
1736 \
1737 static inline __attribute__((unused)) \
1738 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \
1739 { \
1740 Task _t; \
1741 TD_##NAME *t = (TD_##NAME *)&_t; \
1742 t->f = &NAME##_WRAP; \
1743 t->thief = THIEF_TASK; \
1744 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1745 lace_run_task(&_t); \
1746 return ((TD_##NAME *)t)->d.res; \
1747 } \
1748 \
1749 static __attribute__((noinline)) \
1750 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
1751 { \
1752 TD_##NAME *t; \
1753 \
1754 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
1755 lace_leapfrog(w, __dq_head); \
1756 t = (TD_##NAME *)__dq_head; \
1757 return ((TD_##NAME *)t)->d.res; \
1758 } \
1759 \
1760 compiler_barrier(); \
1761 \
1762 Worker *wt = w->_public; \
1763 if (wt->movesplit) { \
1764 Task *t = w->split; \
1765 size_t diff = __dq_head - t; \
1766 diff = (diff + 1) / 2; \
1767 w->split = t + diff; \
1768 wt->ts.ts.split += diff; \
1769 compiler_barrier(); \
1770 wt->movesplit = 0; \
1771 PR_COUNTSPLITS(w, CTR_split_grow); \
1772 } \
1773 \
1774 compiler_barrier(); \
1775 \
1776 t = (TD_##NAME *)__dq_head; \
1777 t->thief = THIEF_EMPTY; \
1778 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1779 } \
1780 \
1781 static inline __attribute__((unused)) \
1782 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
1783 { \
1784 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
1785 \
1786 if (likely(0 == w->_public->movesplit)) { \
1787 if (likely(w->split <= __dq_head)) { \
1788 TD_##NAME *t = (TD_##NAME *)__dq_head; \
1789 t->thief = THIEF_EMPTY; \
1790 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1791 } \
1792 } \
1793 \
1794 return NAME##_SYNC_SLOW(w, __dq_head); \
1795 } \
1796 \
1797 \
1798
1799 #define TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) \
1800 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
1801 { \
1802 t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1803 } \
1804 \
1805 static inline __attribute__((always_inline)) \
1806 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
1807 \
1808 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1809 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1810 { \
1811 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3); \
1812 } \
1813 \
1814 static inline __attribute__((always_inline)) \
1815 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
1816
1817 #define TASK_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3) TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
1818
1819 #define VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3) \
1820 \
1821 typedef struct _TD_##NAME { \
1822 TASK_COMMON_FIELDS(_TD_##NAME) \
1823 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; } d; \
1824 } TD_##NAME; \
1825 \
1826 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1827 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1828 \
1829 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1830 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3); \
1831 static inline void NAME##_SYNC(WorkerP *, Task *); \
1832 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
1833 \
1834 static inline __attribute__((unused)) \
1835 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1836 { \
1837 PR_COUNTTASK(w); \
1838 \
1839 TD_##NAME *t; \
1840 TailSplit ts; \
1841 uint32_t head, split, newsplit; \
1842 \
1843 if (__dq_head == w->end) lace_abort_stack_overflow(); \
1844 \
1845 t = (TD_##NAME *)__dq_head; \
1846 t->f = &NAME##_WRAP; \
1847 t->thief = THIEF_TASK; \
1848 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1849 compiler_barrier(); \
1850 \
1851 Worker *wt = w->_public; \
1852 if (unlikely(w->allstolen)) { \
1853 if (wt->movesplit) wt->movesplit = 0; \
1854 head = __dq_head - w->dq; \
1855 ts = (TailSplit){{head,head+1}}; \
1856 wt->ts.v = ts.v; \
1857 compiler_barrier(); \
1858 wt->allstolen = 0; \
1859 w->split = __dq_head+1; \
1860 w->allstolen = 0; \
1861 } else if (unlikely(wt->movesplit)) { \
1862 head = __dq_head - w->dq; \
1863 split = w->split - w->dq; \
1864 newsplit = (split + head + 2)/2; \
1865 wt->ts.ts.split = newsplit; \
1866 w->split = w->dq + newsplit; \
1867 compiler_barrier(); \
1868 wt->movesplit = 0; \
1869 PR_COUNTSPLITS(w, CTR_split_grow); \
1870 } \
1871 } \
1872 \
1873 static inline __attribute__((unused)) \
1874 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \
1875 { \
1876 Task _t; \
1877 TD_##NAME *t = (TD_##NAME *)&_t; \
1878 t->f = &NAME##_WRAP; \
1879 t->thief = THIEF_TASK; \
1880 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1881 lace_run_newframe(&_t); \
1882 return ; \
1883 } \
1884 \
1885 static inline __attribute__((unused)) \
1886 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \
1887 { \
1888 Task _t; \
1889 TD_##NAME *t = (TD_##NAME *)&_t; \
1890 t->f = &NAME##_WRAP; \
1891 t->thief = THIEF_TASK; \
1892 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1893 lace_run_together(&_t); \
1894 } \
1895 \
1896 static inline __attribute__((unused)) \
1897 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3) \
1898 { \
1899 Task _t; \
1900 TD_##NAME *t = (TD_##NAME *)&_t; \
1901 t->f = &NAME##_WRAP; \
1902 t->thief = THIEF_TASK; \
1903 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
1904 lace_run_task(&_t); \
1905 return ; \
1906 } \
1907 \
1908 static __attribute__((noinline)) \
1909 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
1910 { \
1911 TD_##NAME *t; \
1912 \
1913 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
1914 lace_leapfrog(w, __dq_head); \
1915 t = (TD_##NAME *)__dq_head; \
1916 return ; \
1917 } \
1918 \
1919 compiler_barrier(); \
1920 \
1921 Worker *wt = w->_public; \
1922 if (wt->movesplit) { \
1923 Task *t = w->split; \
1924 size_t diff = __dq_head - t; \
1925 diff = (diff + 1) / 2; \
1926 w->split = t + diff; \
1927 wt->ts.ts.split += diff; \
1928 compiler_barrier(); \
1929 wt->movesplit = 0; \
1930 PR_COUNTSPLITS(w, CTR_split_grow); \
1931 } \
1932 \
1933 compiler_barrier(); \
1934 \
1935 t = (TD_##NAME *)__dq_head; \
1936 t->thief = THIEF_EMPTY; \
1937 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1938 } \
1939 \
1940 static inline __attribute__((unused)) \
1941 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
1942 { \
1943 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
1944 \
1945 if (likely(0 == w->_public->movesplit)) { \
1946 if (likely(w->split <= __dq_head)) { \
1947 TD_##NAME *t = (TD_##NAME *)__dq_head; \
1948 t->thief = THIEF_EMPTY; \
1949 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1950 } \
1951 } \
1952 \
1953 return NAME##_SYNC_SLOW(w, __dq_head); \
1954 } \
1955 \
1956 \
1957
1958 #define VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) \
1959 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
1960 { \
1961 NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \
1962 } \
1963 \
1964 static inline __attribute__((always_inline)) \
1965 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
1966 \
1967 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1968 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1969 { \
1970 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3); \
1971 } \
1972 \
1973 static inline __attribute__((always_inline)) \
1974 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
1975
1976 #define VOID_TASK_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3) VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
1977
1978
1979 // Task macros for tasks of arity 4
1980
1981 #define TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) \
1982 \
1983 typedef struct _TD_##NAME { \
1984 TASK_COMMON_FIELDS(_TD_##NAME) \
1985 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; RTYPE res; } d;\
1986 } TD_##NAME; \
1987 \
1988 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1989 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1990 \
1991 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
1992 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
1993 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
1994 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
1995 \
1996 static inline __attribute__((unused)) \
1997 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
1998 { \
1999 PR_COUNTTASK(w); \
2000 \
2001 TD_##NAME *t; \
2002 TailSplit ts; \
2003 uint32_t head, split, newsplit; \
2004 \
2005 if (__dq_head == w->end) lace_abort_stack_overflow(); \
2006 \
2007 t = (TD_##NAME *)__dq_head; \
2008 t->f = &NAME##_WRAP; \
2009 t->thief = THIEF_TASK; \
2010 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2011 compiler_barrier(); \
2012 \
2013 Worker *wt = w->_public; \
2014 if (unlikely(w->allstolen)) { \
2015 if (wt->movesplit) wt->movesplit = 0; \
2016 head = __dq_head - w->dq; \
2017 ts = (TailSplit){{head,head+1}}; \
2018 wt->ts.v = ts.v; \
2019 compiler_barrier(); \
2020 wt->allstolen = 0; \
2021 w->split = __dq_head+1; \
2022 w->allstolen = 0; \
2023 } else if (unlikely(wt->movesplit)) { \
2024 head = __dq_head - w->dq; \
2025 split = w->split - w->dq; \
2026 newsplit = (split + head + 2)/2; \
2027 wt->ts.ts.split = newsplit; \
2028 w->split = w->dq + newsplit; \
2029 compiler_barrier(); \
2030 wt->movesplit = 0; \
2031 PR_COUNTSPLITS(w, CTR_split_grow); \
2032 } \
2033 } \
2034 \
2035 static inline __attribute__((unused)) \
2036 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \
2037 { \
2038 Task _t; \
2039 TD_##NAME *t = (TD_##NAME *)&_t; \
2040 t->f = &NAME##_WRAP; \
2041 t->thief = THIEF_TASK; \
2042 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2043 lace_run_newframe(&_t); \
2044 return ((TD_##NAME *)t)->d.res; \
2045 } \
2046 \
2047 static inline __attribute__((unused)) \
2048 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \
2049 { \
2050 Task _t; \
2051 TD_##NAME *t = (TD_##NAME *)&_t; \
2052 t->f = &NAME##_WRAP; \
2053 t->thief = THIEF_TASK; \
2054 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2055 lace_run_together(&_t); \
2056 } \
2057 \
2058 static inline __attribute__((unused)) \
2059 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \
2060 { \
2061 Task _t; \
2062 TD_##NAME *t = (TD_##NAME *)&_t; \
2063 t->f = &NAME##_WRAP; \
2064 t->thief = THIEF_TASK; \
2065 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2066 lace_run_task(&_t); \
2067 return ((TD_##NAME *)t)->d.res; \
2068 } \
2069 \
2070 static __attribute__((noinline)) \
2071 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
2072 { \
2073 TD_##NAME *t; \
2074 \
2075 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
2076 lace_leapfrog(w, __dq_head); \
2077 t = (TD_##NAME *)__dq_head; \
2078 return ((TD_##NAME *)t)->d.res; \
2079 } \
2080 \
2081 compiler_barrier(); \
2082 \
2083 Worker *wt = w->_public; \
2084 if (wt->movesplit) { \
2085 Task *t = w->split; \
2086 size_t diff = __dq_head - t; \
2087 diff = (diff + 1) / 2; \
2088 w->split = t + diff; \
2089 wt->ts.ts.split += diff; \
2090 compiler_barrier(); \
2091 wt->movesplit = 0; \
2092 PR_COUNTSPLITS(w, CTR_split_grow); \
2093 } \
2094 \
2095 compiler_barrier(); \
2096 \
2097 t = (TD_##NAME *)__dq_head; \
2098 t->thief = THIEF_EMPTY; \
2099 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2100 } \
2101 \
2102 static inline __attribute__((unused)) \
2103 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
2104 { \
2105 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
2106 \
2107 if (likely(0 == w->_public->movesplit)) { \
2108 if (likely(w->split <= __dq_head)) { \
2109 TD_##NAME *t = (TD_##NAME *)__dq_head; \
2110 t->thief = THIEF_EMPTY; \
2111 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2112 } \
2113 } \
2114 \
2115 return NAME##_SYNC_SLOW(w, __dq_head); \
2116 } \
2117 \
2118 \
2119
2120 #define TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
2121 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
2122 { \
2123 t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2124 } \
2125 \
2126 static inline __attribute__((always_inline)) \
2127 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
2128 \
2129 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2130 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
2131 { \
2132 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4); \
2133 } \
2134 \
2135 static inline __attribute__((always_inline)) \
2136 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
2137
2138 #define TASK_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
2139
2140 #define VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) \
2141 \
2142 typedef struct _TD_##NAME { \
2143 TASK_COMMON_FIELDS(_TD_##NAME) \
2144 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; } d;\
2145 } TD_##NAME; \
2146 \
2147 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2148 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2149 \
2150 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
2151 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
2152 static inline void NAME##_SYNC(WorkerP *, Task *); \
2153 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
2154 \
2155 static inline __attribute__((unused)) \
2156 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
2157 { \
2158 PR_COUNTTASK(w); \
2159 \
2160 TD_##NAME *t; \
2161 TailSplit ts; \
2162 uint32_t head, split, newsplit; \
2163 \
2164 if (__dq_head == w->end) lace_abort_stack_overflow(); \
2165 \
2166 t = (TD_##NAME *)__dq_head; \
2167 t->f = &NAME##_WRAP; \
2168 t->thief = THIEF_TASK; \
2169 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2170 compiler_barrier(); \
2171 \
2172 Worker *wt = w->_public; \
2173 if (unlikely(w->allstolen)) { \
2174 if (wt->movesplit) wt->movesplit = 0; \
2175 head = __dq_head - w->dq; \
2176 ts = (TailSplit){{head,head+1}}; \
2177 wt->ts.v = ts.v; \
2178 compiler_barrier(); \
2179 wt->allstolen = 0; \
2180 w->split = __dq_head+1; \
2181 w->allstolen = 0; \
2182 } else if (unlikely(wt->movesplit)) { \
2183 head = __dq_head - w->dq; \
2184 split = w->split - w->dq; \
2185 newsplit = (split + head + 2)/2; \
2186 wt->ts.ts.split = newsplit; \
2187 w->split = w->dq + newsplit; \
2188 compiler_barrier(); \
2189 wt->movesplit = 0; \
2190 PR_COUNTSPLITS(w, CTR_split_grow); \
2191 } \
2192 } \
2193 \
2194 static inline __attribute__((unused)) \
2195 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \
2196 { \
2197 Task _t; \
2198 TD_##NAME *t = (TD_##NAME *)&_t; \
2199 t->f = &NAME##_WRAP; \
2200 t->thief = THIEF_TASK; \
2201 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2202 lace_run_newframe(&_t); \
2203 return ; \
2204 } \
2205 \
2206 static inline __attribute__((unused)) \
2207 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \
2208 { \
2209 Task _t; \
2210 TD_##NAME *t = (TD_##NAME *)&_t; \
2211 t->f = &NAME##_WRAP; \
2212 t->thief = THIEF_TASK; \
2213 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2214 lace_run_together(&_t); \
2215 } \
2216 \
2217 static inline __attribute__((unused)) \
2218 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4) \
2219 { \
2220 Task _t; \
2221 TD_##NAME *t = (TD_##NAME *)&_t; \
2222 t->f = &NAME##_WRAP; \
2223 t->thief = THIEF_TASK; \
2224 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2225 lace_run_task(&_t); \
2226 return ; \
2227 } \
2228 \
2229 static __attribute__((noinline)) \
2230 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
2231 { \
2232 TD_##NAME *t; \
2233 \
2234 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
2235 lace_leapfrog(w, __dq_head); \
2236 t = (TD_##NAME *)__dq_head; \
2237 return ; \
2238 } \
2239 \
2240 compiler_barrier(); \
2241 \
2242 Worker *wt = w->_public; \
2243 if (wt->movesplit) { \
2244 Task *t = w->split; \
2245 size_t diff = __dq_head - t; \
2246 diff = (diff + 1) / 2; \
2247 w->split = t + diff; \
2248 wt->ts.ts.split += diff; \
2249 compiler_barrier(); \
2250 wt->movesplit = 0; \
2251 PR_COUNTSPLITS(w, CTR_split_grow); \
2252 } \
2253 \
2254 compiler_barrier(); \
2255 \
2256 t = (TD_##NAME *)__dq_head; \
2257 t->thief = THIEF_EMPTY; \
2258 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2259 } \
2260 \
2261 static inline __attribute__((unused)) \
2262 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
2263 { \
2264 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
2265 \
2266 if (likely(0 == w->_public->movesplit)) { \
2267 if (likely(w->split <= __dq_head)) { \
2268 TD_##NAME *t = (TD_##NAME *)__dq_head; \
2269 t->thief = THIEF_EMPTY; \
2270 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2271 } \
2272 } \
2273 \
2274 return NAME##_SYNC_SLOW(w, __dq_head); \
2275 } \
2276 \
2277 \
2278
2279 #define VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
2280 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
2281 { \
2282 NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2283 } \
2284 \
2285 static inline __attribute__((always_inline)) \
2286 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
2287 \
2288 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2289 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
2290 { \
2291 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4); \
2292 } \
2293 \
2294 static inline __attribute__((always_inline)) \
2295 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
2296
2297 #define VOID_TASK_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
2298
2299
2300 // Task macros for tasks of arity 5
2301
2302 #define TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) \
2303 \
2304 typedef struct _TD_##NAME { \
2305 TASK_COMMON_FIELDS(_TD_##NAME) \
2306 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; RTYPE res; } d;\
2307 } TD_##NAME; \
2308 \
2309 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2310 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2311 \
2312 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
2313 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
2314 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
2315 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
2316 \
2317 static inline __attribute__((unused)) \
2318 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2319 { \
2320 PR_COUNTTASK(w); \
2321 \
2322 TD_##NAME *t; \
2323 TailSplit ts; \
2324 uint32_t head, split, newsplit; \
2325 \
2326 if (__dq_head == w->end) lace_abort_stack_overflow(); \
2327 \
2328 t = (TD_##NAME *)__dq_head; \
2329 t->f = &NAME##_WRAP; \
2330 t->thief = THIEF_TASK; \
2331 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2332 compiler_barrier(); \
2333 \
2334 Worker *wt = w->_public; \
2335 if (unlikely(w->allstolen)) { \
2336 if (wt->movesplit) wt->movesplit = 0; \
2337 head = __dq_head - w->dq; \
2338 ts = (TailSplit){{head,head+1}}; \
2339 wt->ts.v = ts.v; \
2340 compiler_barrier(); \
2341 wt->allstolen = 0; \
2342 w->split = __dq_head+1; \
2343 w->allstolen = 0; \
2344 } else if (unlikely(wt->movesplit)) { \
2345 head = __dq_head - w->dq; \
2346 split = w->split - w->dq; \
2347 newsplit = (split + head + 2)/2; \
2348 wt->ts.ts.split = newsplit; \
2349 w->split = w->dq + newsplit; \
2350 compiler_barrier(); \
2351 wt->movesplit = 0; \
2352 PR_COUNTSPLITS(w, CTR_split_grow); \
2353 } \
2354 } \
2355 \
2356 static inline __attribute__((unused)) \
2357 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2358 { \
2359 Task _t; \
2360 TD_##NAME *t = (TD_##NAME *)&_t; \
2361 t->f = &NAME##_WRAP; \
2362 t->thief = THIEF_TASK; \
2363 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2364 lace_run_newframe(&_t); \
2365 return ((TD_##NAME *)t)->d.res; \
2366 } \
2367 \
2368 static inline __attribute__((unused)) \
2369 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2370 { \
2371 Task _t; \
2372 TD_##NAME *t = (TD_##NAME *)&_t; \
2373 t->f = &NAME##_WRAP; \
2374 t->thief = THIEF_TASK; \
2375 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2376 lace_run_together(&_t); \
2377 } \
2378 \
2379 static inline __attribute__((unused)) \
2380 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2381 { \
2382 Task _t; \
2383 TD_##NAME *t = (TD_##NAME *)&_t; \
2384 t->f = &NAME##_WRAP; \
2385 t->thief = THIEF_TASK; \
2386 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2387 lace_run_task(&_t); \
2388 return ((TD_##NAME *)t)->d.res; \
2389 } \
2390 \
2391 static __attribute__((noinline)) \
2392 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
2393 { \
2394 TD_##NAME *t; \
2395 \
2396 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
2397 lace_leapfrog(w, __dq_head); \
2398 t = (TD_##NAME *)__dq_head; \
2399 return ((TD_##NAME *)t)->d.res; \
2400 } \
2401 \
2402 compiler_barrier(); \
2403 \
2404 Worker *wt = w->_public; \
2405 if (wt->movesplit) { \
2406 Task *t = w->split; \
2407 size_t diff = __dq_head - t; \
2408 diff = (diff + 1) / 2; \
2409 w->split = t + diff; \
2410 wt->ts.ts.split += diff; \
2411 compiler_barrier(); \
2412 wt->movesplit = 0; \
2413 PR_COUNTSPLITS(w, CTR_split_grow); \
2414 } \
2415 \
2416 compiler_barrier(); \
2417 \
2418 t = (TD_##NAME *)__dq_head; \
2419 t->thief = THIEF_EMPTY; \
2420 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2421 } \
2422 \
2423 static inline __attribute__((unused)) \
2424 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
2425 { \
2426 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
2427 \
2428 if (likely(0 == w->_public->movesplit)) { \
2429 if (likely(w->split <= __dq_head)) { \
2430 TD_##NAME *t = (TD_##NAME *)__dq_head; \
2431 t->thief = THIEF_EMPTY; \
2432 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2433 } \
2434 } \
2435 \
2436 return NAME##_SYNC_SLOW(w, __dq_head); \
2437 } \
2438 \
2439 \
2440
2441 #define TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
2442 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
2443 { \
2444 t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2445 } \
2446 \
2447 static inline __attribute__((always_inline)) \
2448 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
2449 \
2450 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2451 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2452 { \
2453 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5); \
2454 } \
2455 \
2456 static inline __attribute__((always_inline)) \
2457 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
2458
2459 #define TASK_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
2460
2461 #define VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) \
2462 \
2463 typedef struct _TD_##NAME { \
2464 TASK_COMMON_FIELDS(_TD_##NAME) \
2465 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; } d;\
2466 } TD_##NAME; \
2467 \
2468 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2469 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2470 \
2471 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
2472 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
2473 static inline void NAME##_SYNC(WorkerP *, Task *); \
2474 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
2475 \
2476 static inline __attribute__((unused)) \
2477 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2478 { \
2479 PR_COUNTTASK(w); \
2480 \
2481 TD_##NAME *t; \
2482 TailSplit ts; \
2483 uint32_t head, split, newsplit; \
2484 \
2485 if (__dq_head == w->end) lace_abort_stack_overflow(); \
2486 \
2487 t = (TD_##NAME *)__dq_head; \
2488 t->f = &NAME##_WRAP; \
2489 t->thief = THIEF_TASK; \
2490 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2491 compiler_barrier(); \
2492 \
2493 Worker *wt = w->_public; \
2494 if (unlikely(w->allstolen)) { \
2495 if (wt->movesplit) wt->movesplit = 0; \
2496 head = __dq_head - w->dq; \
2497 ts = (TailSplit){{head,head+1}}; \
2498 wt->ts.v = ts.v; \
2499 compiler_barrier(); \
2500 wt->allstolen = 0; \
2501 w->split = __dq_head+1; \
2502 w->allstolen = 0; \
2503 } else if (unlikely(wt->movesplit)) { \
2504 head = __dq_head - w->dq; \
2505 split = w->split - w->dq; \
2506 newsplit = (split + head + 2)/2; \
2507 wt->ts.ts.split = newsplit; \
2508 w->split = w->dq + newsplit; \
2509 compiler_barrier(); \
2510 wt->movesplit = 0; \
2511 PR_COUNTSPLITS(w, CTR_split_grow); \
2512 } \
2513 } \
2514 \
2515 static inline __attribute__((unused)) \
2516 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2517 { \
2518 Task _t; \
2519 TD_##NAME *t = (TD_##NAME *)&_t; \
2520 t->f = &NAME##_WRAP; \
2521 t->thief = THIEF_TASK; \
2522 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2523 lace_run_newframe(&_t); \
2524 return ; \
2525 } \
2526 \
2527 static inline __attribute__((unused)) \
2528 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2529 { \
2530 Task _t; \
2531 TD_##NAME *t = (TD_##NAME *)&_t; \
2532 t->f = &NAME##_WRAP; \
2533 t->thief = THIEF_TASK; \
2534 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2535 lace_run_together(&_t); \
2536 } \
2537 \
2538 static inline __attribute__((unused)) \
2539 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2540 { \
2541 Task _t; \
2542 TD_##NAME *t = (TD_##NAME *)&_t; \
2543 t->f = &NAME##_WRAP; \
2544 t->thief = THIEF_TASK; \
2545 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2546 lace_run_task(&_t); \
2547 return ; \
2548 } \
2549 \
2550 static __attribute__((noinline)) \
2551 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
2552 { \
2553 TD_##NAME *t; \
2554 \
2555 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
2556 lace_leapfrog(w, __dq_head); \
2557 t = (TD_##NAME *)__dq_head; \
2558 return ; \
2559 } \
2560 \
2561 compiler_barrier(); \
2562 \
2563 Worker *wt = w->_public; \
2564 if (wt->movesplit) { \
2565 Task *t = w->split; \
2566 size_t diff = __dq_head - t; \
2567 diff = (diff + 1) / 2; \
2568 w->split = t + diff; \
2569 wt->ts.ts.split += diff; \
2570 compiler_barrier(); \
2571 wt->movesplit = 0; \
2572 PR_COUNTSPLITS(w, CTR_split_grow); \
2573 } \
2574 \
2575 compiler_barrier(); \
2576 \
2577 t = (TD_##NAME *)__dq_head; \
2578 t->thief = THIEF_EMPTY; \
2579 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2580 } \
2581 \
2582 static inline __attribute__((unused)) \
2583 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
2584 { \
2585 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
2586 \
2587 if (likely(0 == w->_public->movesplit)) { \
2588 if (likely(w->split <= __dq_head)) { \
2589 TD_##NAME *t = (TD_##NAME *)__dq_head; \
2590 t->thief = THIEF_EMPTY; \
2591 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2592 } \
2593 } \
2594 \
2595 return NAME##_SYNC_SLOW(w, __dq_head); \
2596 } \
2597 \
2598 \
2599
2600 #define VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
2601 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
2602 { \
2603 NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2604 } \
2605 \
2606 static inline __attribute__((always_inline)) \
2607 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
2608 \
2609 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2610 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2611 { \
2612 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5); \
2613 } \
2614 \
2615 static inline __attribute__((always_inline)) \
2616 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
2617
2618 #define VOID_TASK_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
2619
2620
2621 // Task macros for tasks of arity 6
2622
2623 #define TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6)\
2624 \
2625 typedef struct _TD_##NAME { \
2626 TASK_COMMON_FIELDS(_TD_##NAME) \
2627 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; RTYPE res; } d;\
2628 } TD_##NAME; \
2629 \
2630 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2631 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2632 \
2633 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
2634 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
2635 static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
2636 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
2637 \
2638 static inline __attribute__((unused)) \
2639 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2640 { \
2641 PR_COUNTTASK(w); \
2642 \
2643 TD_##NAME *t; \
2644 TailSplit ts; \
2645 uint32_t head, split, newsplit; \
2646 \
2647 if (__dq_head == w->end) lace_abort_stack_overflow(); \
2648 \
2649 t = (TD_##NAME *)__dq_head; \
2650 t->f = &NAME##_WRAP; \
2651 t->thief = THIEF_TASK; \
2652 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2653 compiler_barrier(); \
2654 \
2655 Worker *wt = w->_public; \
2656 if (unlikely(w->allstolen)) { \
2657 if (wt->movesplit) wt->movesplit = 0; \
2658 head = __dq_head - w->dq; \
2659 ts = (TailSplit){{head,head+1}}; \
2660 wt->ts.v = ts.v; \
2661 compiler_barrier(); \
2662 wt->allstolen = 0; \
2663 w->split = __dq_head+1; \
2664 w->allstolen = 0; \
2665 } else if (unlikely(wt->movesplit)) { \
2666 head = __dq_head - w->dq; \
2667 split = w->split - w->dq; \
2668 newsplit = (split + head + 2)/2; \
2669 wt->ts.ts.split = newsplit; \
2670 w->split = w->dq + newsplit; \
2671 compiler_barrier(); \
2672 wt->movesplit = 0; \
2673 PR_COUNTSPLITS(w, CTR_split_grow); \
2674 } \
2675 } \
2676 \
2677 static inline __attribute__((unused)) \
2678 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2679 { \
2680 Task _t; \
2681 TD_##NAME *t = (TD_##NAME *)&_t; \
2682 t->f = &NAME##_WRAP; \
2683 t->thief = THIEF_TASK; \
2684 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2685 lace_run_newframe(&_t); \
2686 return ((TD_##NAME *)t)->d.res; \
2687 } \
2688 \
2689 static inline __attribute__((unused)) \
2690 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2691 { \
2692 Task _t; \
2693 TD_##NAME *t = (TD_##NAME *)&_t; \
2694 t->f = &NAME##_WRAP; \
2695 t->thief = THIEF_TASK; \
2696 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2697 lace_run_together(&_t); \
2698 } \
2699 \
2700 static inline __attribute__((unused)) \
2701 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2702 { \
2703 Task _t; \
2704 TD_##NAME *t = (TD_##NAME *)&_t; \
2705 t->f = &NAME##_WRAP; \
2706 t->thief = THIEF_TASK; \
2707 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2708 lace_run_task(&_t); \
2709 return ((TD_##NAME *)t)->d.res; \
2710 } \
2711 \
2712 static __attribute__((noinline)) \
2713 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
2714 { \
2715 TD_##NAME *t; \
2716 \
2717 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
2718 lace_leapfrog(w, __dq_head); \
2719 t = (TD_##NAME *)__dq_head; \
2720 return ((TD_##NAME *)t)->d.res; \
2721 } \
2722 \
2723 compiler_barrier(); \
2724 \
2725 Worker *wt = w->_public; \
2726 if (wt->movesplit) { \
2727 Task *t = w->split; \
2728 size_t diff = __dq_head - t; \
2729 diff = (diff + 1) / 2; \
2730 w->split = t + diff; \
2731 wt->ts.ts.split += diff; \
2732 compiler_barrier(); \
2733 wt->movesplit = 0; \
2734 PR_COUNTSPLITS(w, CTR_split_grow); \
2735 } \
2736 \
2737 compiler_barrier(); \
2738 \
2739 t = (TD_##NAME *)__dq_head; \
2740 t->thief = THIEF_EMPTY; \
2741 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2742 } \
2743 \
2744 static inline __attribute__((unused)) \
2745 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
2746 { \
2747 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
2748 \
2749 if (likely(0 == w->_public->movesplit)) { \
2750 if (likely(w->split <= __dq_head)) { \
2751 TD_##NAME *t = (TD_##NAME *)__dq_head; \
2752 t->thief = THIEF_EMPTY; \
2753 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2754 } \
2755 } \
2756 \
2757 return NAME##_SYNC_SLOW(w, __dq_head); \
2758 } \
2759 \
2760 \
2761
2762 #define TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
2763 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
2764 { \
2765 t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2766 } \
2767 \
2768 static inline __attribute__((always_inline)) \
2769 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
2770 \
2771 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2772 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2773 { \
2774 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \
2775 } \
2776 \
2777 static inline __attribute__((always_inline)) \
2778 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
2779
2780 #define TASK_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
2781
2782 #define VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) \
2783 \
2784 typedef struct _TD_##NAME { \
2785 TASK_COMMON_FIELDS(_TD_##NAME) \
2786 union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; } d;\
2787 } TD_##NAME; \
2788 \
2789 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2790 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2791 \
2792 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
2793 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
2794 static inline void NAME##_SYNC(WorkerP *, Task *); \
2795 static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
2796 \
2797 static inline __attribute__((unused)) \
2798 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2799 { \
2800 PR_COUNTTASK(w); \
2801 \
2802 TD_##NAME *t; \
2803 TailSplit ts; \
2804 uint32_t head, split, newsplit; \
2805 \
2806 if (__dq_head == w->end) lace_abort_stack_overflow(); \
2807 \
2808 t = (TD_##NAME *)__dq_head; \
2809 t->f = &NAME##_WRAP; \
2810 t->thief = THIEF_TASK; \
2811 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2812 compiler_barrier(); \
2813 \
2814 Worker *wt = w->_public; \
2815 if (unlikely(w->allstolen)) { \
2816 if (wt->movesplit) wt->movesplit = 0; \
2817 head = __dq_head - w->dq; \
2818 ts = (TailSplit){{head,head+1}}; \
2819 wt->ts.v = ts.v; \
2820 compiler_barrier(); \
2821 wt->allstolen = 0; \
2822 w->split = __dq_head+1; \
2823 w->allstolen = 0; \
2824 } else if (unlikely(wt->movesplit)) { \
2825 head = __dq_head - w->dq; \
2826 split = w->split - w->dq; \
2827 newsplit = (split + head + 2)/2; \
2828 wt->ts.ts.split = newsplit; \
2829 w->split = w->dq + newsplit; \
2830 compiler_barrier(); \
2831 wt->movesplit = 0; \
2832 PR_COUNTSPLITS(w, CTR_split_grow); \
2833 } \
2834 } \
2835 \
2836 static inline __attribute__((unused)) \
2837 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2838 { \
2839 Task _t; \
2840 TD_##NAME *t = (TD_##NAME *)&_t; \
2841 t->f = &NAME##_WRAP; \
2842 t->thief = THIEF_TASK; \
2843 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2844 lace_run_newframe(&_t); \
2845 return ; \
2846 } \
2847 \
2848 static inline __attribute__((unused)) \
2849 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2850 { \
2851 Task _t; \
2852 TD_##NAME *t = (TD_##NAME *)&_t; \
2853 t->f = &NAME##_WRAP; \
2854 t->thief = THIEF_TASK; \
2855 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2856 lace_run_together(&_t); \
2857 } \
2858 \
2859 static inline __attribute__((unused)) \
2860 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2861 { \
2862 Task _t; \
2863 TD_##NAME *t = (TD_##NAME *)&_t; \
2864 t->f = &NAME##_WRAP; \
2865 t->thief = THIEF_TASK; \
2866 t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2867 lace_run_task(&_t); \
2868 return ; \
2869 } \
2870 \
2871 static __attribute__((noinline)) \
2872 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
2873 { \
2874 TD_##NAME *t; \
2875 \
2876 if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
2877 lace_leapfrog(w, __dq_head); \
2878 t = (TD_##NAME *)__dq_head; \
2879 return ; \
2880 } \
2881 \
2882 compiler_barrier(); \
2883 \
2884 Worker *wt = w->_public; \
2885 if (wt->movesplit) { \
2886 Task *t = w->split; \
2887 size_t diff = __dq_head - t; \
2888 diff = (diff + 1) / 2; \
2889 w->split = t + diff; \
2890 wt->ts.ts.split += diff; \
2891 compiler_barrier(); \
2892 wt->movesplit = 0; \
2893 PR_COUNTSPLITS(w, CTR_split_grow); \
2894 } \
2895 \
2896 compiler_barrier(); \
2897 \
2898 t = (TD_##NAME *)__dq_head; \
2899 t->thief = THIEF_EMPTY; \
2900 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2901 } \
2902 \
2903 static inline __attribute__((unused)) \
2904 void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
2905 { \
2906 /* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
2907 \
2908 if (likely(0 == w->_public->movesplit)) { \
2909 if (likely(w->split <= __dq_head)) { \
2910 TD_##NAME *t = (TD_##NAME *)__dq_head; \
2911 t->thief = THIEF_EMPTY; \
2912 return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2913 } \
2914 } \
2915 \
2916 return NAME##_SYNC_SLOW(w, __dq_head); \
2917 } \
2918 \
2919 \
2920
2921 #define VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
2922 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
2923 { \
2924 NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2925 } \
2926 \
2927 static inline __attribute__((always_inline)) \
2928 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
2929 \
2930 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2931 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2932 { \
2933 return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \
2934 } \
2935 \
2936 static inline __attribute__((always_inline)) \
2937 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
2938
2939 #define VOID_TASK_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
2940
2941
2942
2943 #ifdef __cplusplus
2944 }
2945 #endif /* __cplusplus */
2946
2947 #endif
2948