1 /*
2  * Copyright 2013-2016 Formal Methods and Tools, University of Twente
3  * Copyright 2016-2017 Tom van Dijk, Johannes Kepler University Linz
4  * Copyright 2019-2021 Tom van Dijk, Formal Methods and Tools, University of Twente
5  *
6  * Licensed under the Apache License, Version 2.0 (the License);
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #include <unistd.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <pthread.h> /* for pthread_t */
23 
24 #ifndef __LACE_H__
25 #define __LACE_H__
26 
27 #define LACE_PIE_TIMES 0
28 #define LACE_COUNT_TASKS 0
29 #define LACE_COUNT_STEALS 0
30 #define LACE_COUNT_SPLITS 0
31 #define LACE_USE_HWLOC 0
32 #define LACE_USE_MMAP 0
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif /* __cplusplus */
37 
38 /**
39  * Using Lace.
40  *
41  * Optionally set the verbosity level with lace_set_verbosity.
42  * Optionally set the default program stack size of each worker thread with lace_set_stacksize.
43  *
44  * Then call lace_start to start Lace workers.
45  * - lace_start(n_workers, deque_size);
46  *   set both parameters to 0 for reasonable defaults, using all available cores.
47  *
48  * After this, you can run tasks using the RUN(...)
49  *
50  * Use lace_suspend and lace_resume to temporarily stop running, or lace_stop to completely stop Lace.
51  */
52 
53 /**
54  * Type definitions used in the functions below.
55  * - WorkerP contains the (private) Worker data
56  * - Task contains a single Task
57  */
58 typedef struct _WorkerP WorkerP;
59 typedef struct _Task Task;
60 
61 /**
62  * The macro LACE_TYPEDEF_CB(typedefname, taskname, parametertypes) defines
63  * a Task for use as a callback function.
64  */
65 #define LACE_TYPEDEF_CB(t, f, ...) typedef t (*f)(WorkerP *, Task *, ##__VA_ARGS__);
66 
67 /**
68  * Set verbosity level (0 = no startup messages, 1 = startup messages)
69  * Default level: 0
70  */
71 void lace_set_verbosity(int level);
72 
73 /**
74  * Set the program stack size of Lace worker threads. (Not really needed, default is OK.)
75  */
76 void lace_set_stacksize(size_t stacksize);
77 
78 /**
79  * Get the program stack size of Lace worker threads.
80  * If this returns 0, it uses the default.
81  */
82 size_t lace_get_stacksize(void);
83 
84 /**
85  * Get the number of available PUs (hardware threads)
86  */
87 unsigned int lace_get_pu_count(void);
88 
89 /**
90  * Start Lace with <n_workers> workers and a a task deque size of <dqsize> per worker.
91  * If <n_workers> is set to 0, automatically detects available cores.
92  * If <dqsize> is est to 0, uses a reasonable default value.
93  */
94 void lace_start(unsigned int n_workers, size_t dqsize);
95 
96 /**
97  * Suspend all workers.
98  * Call this method from outside Lace threads.
99  */
100 void lace_suspend(void);
101 
102 /**
103  * Resume all workers.
104  * Call this method from outside Lace threads.
105  */
106 void lace_resume(void);
107 
108 /**
109  * Stop Lace.
110  * Call this method from outside Lace threads.
111  */
112 void lace_stop(void);
113 
114 /**
115  * Steal a random task.
116  * Only use this from inside a Lace task.
117  */
118 #define lace_steal_random() CALL(lace_steal_random)
119 void lace_steal_random_CALL(WorkerP*, Task*);
120 
121 /**
122  * Enter the Lace barrier. (all active workers must enter it before we can continue)
123  * Only run this from inside a Lace task.
124  */
125 void lace_barrier(void);
126 
127 /**
128  * Retrieve the number of Lace workers
129  */
130 unsigned int lace_workers(void);
131 
132 /**
133  * Retrieve the current worker data.
134  * Only run this from inside a Lace task.
135  * (Used by LACE_VARS)
136  */
137 WorkerP *lace_get_worker(void);
138 
139 /**
140  * Retrieve the current head of the deque of the worker.
141  * (Used by LACE_VARS)
142  */
143 Task *lace_get_head(WorkerP *);
144 
145 /**
146  * Helper function to call from outside Lace threads.
147  * This helper function is used by the _RUN methods for the RUN() macro.
148  */
149 void lace_run_task(Task *task);
150 
151 /**
152  * Helper function to start a new task execution (task frame) on a given task.
153  * This helper function is used by the _NEWFRAME methods for the NEWFRAME() macro
154  * Only when the task is done, do workers continue with the previous task frame.
155  */
156 void lace_run_newframe(Task *task);
157 
158 /**
159  * Helper function to make all run a given task together.
160  * This helper function is used by the _TOGETHER methods for the TOGETHER() macro
161  * They all start the task in a lace_barrier and complete it with a lace barrier.
162  * Meaning they all start together, and all end together.
163  */
164 void lace_run_together(Task *task);
165 
166 /**
167  * Create a pointer to a Tasks main function.
168  */
169 #define TASK(f)           ( f##_CALL )
170 
171 /**
172  * Call a Tasks implementation (adds Lace variables to call)
173  */
174 #define WRAP(f, ...)      ( f((WorkerP *)__lace_worker, (Task *)__lace_dq_head, ##__VA_ARGS__) )
175 
176 /**
177  * Sync a task.
178  */
179 #define SYNC(f)           ( __lace_dq_head--, WRAP(f##_SYNC) )
180 
181 /**
182  * Sync a task, but if the task is not stolen, then do not execute it.
183  */
184 #define DROP()            ( __lace_dq_head--, WRAP(lace_drop) )
185 
186 /**
187  * Spawn a task.
188  */
189 #define SPAWN(f, ...)     ( WRAP(f##_SPAWN, ##__VA_ARGS__), __lace_dq_head++ )
190 
191 /**
192  * Directly execute a task from inside a Lace thread.
193  */
194 #define CALL(f, ...)      ( WRAP(f##_CALL, ##__VA_ARGS__) )
195 
196 /**
197  * Directly execute a task from outside Lace threads.
198  */
199 #define RUN(f, ...)    ( f##_RUN ( __VA_ARGS__ ) )
200 
201 /**
202  * Signal all workers to interrupt their current tasks and instead perform (a personal copy of) the given task.
203  */
204 #define TOGETHER(f, ...)  ( f##_TOGETHER ( __VA_ARGS__) )
205 
206 /**
207  * Signal all workers to interrupt their current tasks and help the current thread with the given task.
208  */
209 #define NEWFRAME(f, ...)  ( f##_NEWFRAME ( __VA_ARGS__) )
210 
211 /**
212  * (Try to) steal a task from a random worker.
213  */
214 #define STEAL_RANDOM()    ( CALL(lace_steal_random) )
215 
216 /**
217  * Get the current worker id.
218  */
219 #define LACE_WORKER_ID    ( __lace_worker->worker )
220 
221 /**
222  * Get the core where the current worker is pinned.
223  */
224 #define LACE_WORKER_PU    ( __lace_worker->pu )
225 
226 /**
227  * Initialize local variables __lace_worker and __lace_dq_head which are required for most Lace functionality.
228  * This only works inside a Lace thread.
229  */
230 #define LACE_VARS WorkerP * __attribute__((unused)) __lace_worker = lace_get_worker(); Task * __attribute__((unused)) __lace_dq_head = lace_get_head(__lace_worker);
231 
232 /**
233  * Check if current tasks must be interrupted, and if so, interrupt.
234  */
235 void lace_yield(WorkerP *__lace_worker, Task *__lace_dq_head);
236 #define YIELD_NEWFRAME() { if (unlikely((*(Task* volatile *)&lace_newframe.t) != NULL)) lace_yield(__lace_worker, __lace_dq_head); }
237 
238 /**
239  * True if the given task is stolen, False otherwise.
240  */
241 #define TASK_IS_STOLEN(t) ((size_t)t->thief > 1)
242 
243 /**
244  * True if the given task is completed, False otherwise.
245  */
246 #define TASK_IS_COMPLETED(t) ((size_t)t->thief == 2)
247 
248 /**
249  * Retrieves a pointer to the result of the given task.
250  */
251 #define TASK_RESULT(t) (&t->d[0])
252 
253 /**
254  * Compute a random number, thread-local (so scalable)
255  */
256 #define LACE_TRNG (__lace_worker->rng = 2862933555777941757ULL * __lace_worker->rng + 3037000493ULL)
257 
258 /* Some flags that influence Lace behavior */
259 
260 #ifndef LACE_LEAP_RANDOM /* Use random leaping when leapfrogging fails */
261 #define LACE_LEAP_RANDOM 1
262 #endif
263 
264 #ifndef LACE_COUNT_EVENTS
265 #define LACE_COUNT_EVENTS (LACE_PIE_TIMES || LACE_COUNT_TASKS || LACE_COUNT_STEALS || LACE_COUNT_SPLITS)
266 #endif
267 
268 /**
269  * Now follows the implementation of Lace
270  */
271 
272 /* Typical cacheline size of system architectures */
273 #ifndef LINE_SIZE
274 #define LINE_SIZE 64
275 #endif
276 
277 /* The size of a pointer, 8 bytes on a 64-bit architecture */
278 #define P_SZ (sizeof(void *))
279 
280 #define PAD(x,b) ( ( (b) - ((x)%(b)) ) & ((b)-1) ) /* b must be power of 2 */
281 #define ROUND(x,b) ( (x) + PAD( (x), (b) ) )
282 
283 /* The size is in bytes. Note that this is without the extra overhead from Lace.
284    The value must be greater than or equal to the maximum size of your tasks.
285    The task size is the maximum of the size of the result or of the sum of the parameter sizes. */
286 #ifndef LACE_TASKSIZE
287 #define LACE_TASKSIZE (6)*P_SZ
288 #endif
289 
290 /* Some fences */
291 #ifndef compiler_barrier
292 #define compiler_barrier() { asm volatile("" ::: "memory"); }
293 #endif
294 
295 #ifndef mfence
296 #if defined(__amd64__) || defined(__i386__)
297 #define mfence() { asm volatile("mfence" ::: "memory"); }
298 #elif defined(__powerpc__)
299 #define mfence() { asm volatile("sync" ::: "memory"); }
300 #endif
301 #endif
302 
303 /* Compiler specific branch prediction optimization */
304 #ifndef likely
305 #define likely(x)       __builtin_expect((x),1)
306 #endif
307 
308 #ifndef unlikely
309 #define unlikely(x)     __builtin_expect((x),0)
310 #endif
311 
312 #if LACE_PIE_TIMES
313 /* High resolution timer */
gethrtime()314 static inline uint64_t gethrtime()
315 {
316     uint32_t hi, lo;
317     asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) :: "memory");
318     return (uint64_t)hi<<32 | lo;
319 }
320 #endif
321 
322 #if LACE_COUNT_EVENTS
323 void lace_count_reset();
324 void lace_count_report_file(FILE *file);
325 #endif
326 
327 #if LACE_COUNT_TASKS
328 #define PR_COUNTTASK(s) PR_INC(s,CTR_tasks)
329 #else
330 #define PR_COUNTTASK(s) /* Empty */
331 #endif
332 
333 #if LACE_COUNT_STEALS
334 #define PR_COUNTSTEALS(s,i) PR_INC(s,i)
335 #else
336 #define PR_COUNTSTEALS(s,i) /* Empty */
337 #endif
338 
339 #if LACE_COUNT_SPLITS
340 #define PR_COUNTSPLITS(s,i) PR_INC(s,i)
341 #else
342 #define PR_COUNTSPLITS(s,i) /* Empty */
343 #endif
344 
345 #if LACE_COUNT_EVENTS
346 #define PR_ADD(s,i,k) ( ((s)->ctr[i])+=k )
347 #else
348 #define PR_ADD(s,i,k) /* Empty */
349 #endif
350 #define PR_INC(s,i) PR_ADD(s,i,1)
351 
352 typedef enum {
353 #ifdef LACE_COUNT_TASKS
354     CTR_tasks,       /* Number of tasks spawned */
355 #endif
356 #ifdef LACE_COUNT_STEALS
357     CTR_steal_tries, /* Number of steal attempts */
358     CTR_leap_tries,  /* Number of leap attempts */
359     CTR_steals,      /* Number of succesful steals */
360     CTR_leaps,       /* Number of succesful leaps */
361     CTR_steal_busy,  /* Number of steal busies */
362     CTR_leap_busy,   /* Number of leap busies */
363 #endif
364 #ifdef LACE_COUNT_SPLITS
365     CTR_split_grow,  /* Number of split right */
366     CTR_split_shrink,/* Number of split left */
367     CTR_split_req,   /* Number of split requests */
368 #endif
369     CTR_fast_sync,   /* Number of fast syncs */
370     CTR_slow_sync,   /* Number of slow syncs */
371 #ifdef LACE_PIE_TIMES
372     CTR_init,        /* Timer for initialization */
373     CTR_close,       /* Timer for shutdown */
374     CTR_wapp,        /* Timer for application code (steal) */
375     CTR_lapp,        /* Timer for application code (leap) */
376     CTR_wsteal,      /* Timer for steal code (steal) */
377     CTR_lsteal,      /* Timer for steal code (leap) */
378     CTR_wstealsucc,  /* Timer for succesful steal code (steal) */
379     CTR_lstealsucc,  /* Timer for succesful steal code (leap) */
380     CTR_wsignal,     /* Timer for signal after work (steal) */
381     CTR_lsignal,     /* Timer for signal after work (leap) */
382 #endif
383     CTR_MAX
384 } CTR_index;
385 
386 #define THIEF_EMPTY     ((struct _Worker*)0x0)
387 #define THIEF_TASK      ((struct _Worker*)0x1)
388 #define THIEF_COMPLETED ((struct _Worker*)0x2)
389 
390 #define TASK_COMMON_FIELDS(type)                               \
391     void (*f)(struct _WorkerP *, struct _Task *, struct type *);  \
392     struct _Worker * volatile thief;
393 
394 struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) };
395 #define LACE_COMMON_FIELD_SIZE sizeof(struct __lace_common_fields_only)
396 
397 typedef struct _Task {
398     TASK_COMMON_FIELDS(_Task);
399     char p1[PAD(LACE_COMMON_FIELD_SIZE, P_SZ)];
400     char d[LACE_TASKSIZE];
401     char p2[PAD(ROUND(LACE_COMMON_FIELD_SIZE, P_SZ) + LACE_TASKSIZE, LINE_SIZE)];
402 } Task;
403 
404 typedef union __attribute__((packed)) {
405     struct {
406         uint32_t tail;
407         uint32_t split;
408     } ts;
409     uint64_t v;
410 } TailSplit;
411 
412 typedef struct _Worker {
413     Task *dq;
414     TailSplit ts;
415     uint8_t allstolen;
416 
417     char pad1[PAD(P_SZ+sizeof(TailSplit)+1, LINE_SIZE)];
418 
419     uint8_t movesplit;
420 } Worker;
421 
422 typedef struct _WorkerP {
423     Task *dq;                   // same as dq
424     Task *split;                // same as dq+ts.ts.split
425     Task *end;                  // dq+dq_size
426     Worker *_public;            // pointer to public Worker struct
427     uint64_t rng;               // my random seed (for lace_trng)
428     uint32_t seed;              // my random seed (for lace_steal_random)
429     uint16_t worker;            // what is my worker id?
430     uint8_t allstolen;          // my allstolen
431     volatile int8_t enabled;    // if this worker is enabled
432 
433 #if LACE_COUNT_EVENTS
434     uint64_t ctr[CTR_MAX];      // counters
435     volatile uint64_t time;
436     volatile int level;
437 #endif
438 
439     int16_t pu;                 // my pu (for HWLOC)
440 } WorkerP;
441 
442 #define LACE_STOLEN   ((Worker*)0)
443 #define LACE_BUSY     ((Worker*)1)
444 #define LACE_NOWORK   ((Worker*)2)
445 
446 void lace_abort_stack_overflow(void) __attribute__((noreturn));
447 
448 typedef struct
449 {
450     Task *t;
451     char pad[LINE_SIZE-sizeof(Task *)];
452 } lace_newframe_t;
453 
454 extern lace_newframe_t lace_newframe;
455 
456 /**
457  * Make all tasks of the current worker shared.
458  */
459 #define LACE_MAKE_ALL_SHARED() lace_make_all_shared(__lace_worker, __lace_dq_head)
460 static inline void __attribute__((unused))
lace_make_all_shared(WorkerP * w,Task * __lace_dq_head)461 lace_make_all_shared( WorkerP *w, Task *__lace_dq_head)
462 {
463     if (w->split != __lace_dq_head) {
464         w->split = __lace_dq_head;
465         w->_public->ts.ts.split = __lace_dq_head - w->dq;
466     }
467 }
468 
469 #if LACE_PIE_TIMES
lace_time_event(WorkerP * w,int event)470 static void lace_time_event( WorkerP *w, int event )
471 {
472     uint64_t now = gethrtime(),
473              prev = w->time;
474 
475     switch( event ) {
476 
477         // Enter application code
478         case 1 :
479             if(  w->level /* level */ == 0 ) {
480                 PR_ADD( w, CTR_init, now - prev );
481                 w->level = 1;
482             } else if( w->level /* level */ == 1 ) {
483                 PR_ADD( w, CTR_wsteal, now - prev );
484                 PR_ADD( w, CTR_wstealsucc, now - prev );
485             } else {
486                 PR_ADD( w, CTR_lsteal, now - prev );
487                 PR_ADD( w, CTR_lstealsucc, now - prev );
488             }
489             break;
490 
491             // Exit application code
492         case 2 :
493             if( w->level /* level */ == 1 ) {
494                 PR_ADD( w, CTR_wapp, now - prev );
495             } else {
496                 PR_ADD( w, CTR_lapp, now - prev );
497             }
498             break;
499 
500             // Enter sync on stolen
501         case 3 :
502             if( w->level /* level */ == 1 ) {
503                 PR_ADD( w, CTR_wapp, now - prev );
504             } else {
505                 PR_ADD( w, CTR_lapp, now - prev );
506             }
507             w->level++;
508             break;
509 
510             // Exit sync on stolen
511         case 4 :
512             if( w->level /* level */ == 1 ) {
513                 fprintf( stderr, "This should not happen, level = %d\n", w->level );
514             } else {
515                 PR_ADD( w, CTR_lsteal, now - prev );
516             }
517             w->level--;
518             break;
519 
520             // Return from failed steal
521         case 7 :
522             if( w->level /* level */ == 0 ) {
523                 PR_ADD( w, CTR_init, now - prev );
524             } else if( w->level /* level */ == 1 ) {
525                 PR_ADD( w, CTR_wsteal, now - prev );
526             } else {
527                 PR_ADD( w, CTR_lsteal, now - prev );
528             }
529             break;
530 
531             // Signalling time
532         case 8 :
533             if( w->level /* level */ == 1 ) {
534                 PR_ADD( w, CTR_wsignal, now - prev );
535                 PR_ADD( w, CTR_wsteal, now - prev );
536             } else {
537                 PR_ADD( w, CTR_lsignal, now - prev );
538                 PR_ADD( w, CTR_lsteal, now - prev );
539             }
540             break;
541 
542             // Done
543         case 9 :
544             if( w->level /* level */ == 0 ) {
545                 PR_ADD( w, CTR_init, now - prev );
546             } else {
547                 PR_ADD( w, CTR_close, now - prev );
548             }
549             break;
550 
551         default: return;
552     }
553 
554     w->time = now;
555 }
556 #else
557 #define lace_time_event( w, e ) /* Empty */
558 #endif
559 
560 static Worker* __attribute__((noinline))
lace_steal(WorkerP * self,Task * __dq_head,Worker * victim)561 lace_steal(WorkerP *self, Task *__dq_head, Worker *victim)
562 {
563     if (victim != NULL && !victim->allstolen) {
564         /* Must be a volatile. In GCC 4.8, if it is not declared volatile, the
565            compiler will optimize extra memory accesses to victim->ts instead
566            of comparing the local values ts.ts.tail and ts.ts.split, causing
567            thieves to steal non existent tasks! */
568         TailSplit ts;
569         ts.v = *(volatile uint64_t *)&victim->ts.v;
570         if (ts.ts.tail < ts.ts.split) {
571             TailSplit ts_new;
572             ts_new.v = ts.v;
573             ts_new.ts.tail++;
574             if (__sync_bool_compare_and_swap(&victim->ts.v, ts.v, ts_new.v)) {
575                 // Stolen
576                 Task *t = &victim->dq[ts.ts.tail];
577                 t->thief = self->_public;
578                 lace_time_event(self, 1);
579                 t->f(self, __dq_head, t);
580                 lace_time_event(self, 2);
581                 t->thief = THIEF_COMPLETED;
582                 lace_time_event(self, 8);
583                 return LACE_STOLEN;
584             }
585 
586             lace_time_event(self, 7);
587             return LACE_BUSY;
588         }
589 
590         if (victim->movesplit == 0) {
591             victim->movesplit = 1;
592             PR_COUNTSPLITS(self, CTR_split_req);
593         }
594     }
595 
596     lace_time_event(self, 7);
597     return LACE_NOWORK;
598 }
599 
600 static int
lace_shrink_shared(WorkerP * w)601 lace_shrink_shared(WorkerP *w)
602 {
603     Worker *wt = w->_public;
604     TailSplit ts;
605     ts.v = wt->ts.v; /* Force in 1 memory read */
606     uint32_t tail = ts.ts.tail;
607     uint32_t split = ts.ts.split;
608 
609     if (tail != split) {
610         uint32_t newsplit = (tail + split)/2;
611         wt->ts.ts.split = newsplit;
612         mfence();
613         tail = *(volatile uint32_t *)&(wt->ts.ts.tail);
614         if (tail != split) {
615             if (unlikely(tail > newsplit)) {
616                 newsplit = (tail + split) / 2;
617                 wt->ts.ts.split = newsplit;
618             }
619             w->split = w->dq + newsplit;
620             PR_COUNTSPLITS(w, CTR_split_shrink);
621             return 0;
622         }
623     }
624 
625     wt->allstolen = 1;
626     w->allstolen = 1;
627     return 1;
628 }
629 
630 static inline void
lace_leapfrog(WorkerP * __lace_worker,Task * __lace_dq_head)631 lace_leapfrog(WorkerP *__lace_worker, Task *__lace_dq_head)
632 {
633     lace_time_event(__lace_worker, 3);
634     Task *t = __lace_dq_head;
635     Worker *thief = t->thief;
636     if (thief != THIEF_COMPLETED) {
637         while ((size_t)thief <= 1) thief = t->thief;
638 
639         /* PRE-LEAP: increase head again */
640         __lace_dq_head += 1;
641 
642         /* Now leapfrog */
643         int attempts = 32;
644         while (thief != THIEF_COMPLETED) {
645             PR_COUNTSTEALS(__lace_worker, CTR_leap_tries);
646             Worker *res = lace_steal(__lace_worker, __lace_dq_head, thief);
647             if (res == LACE_NOWORK) {
648                 YIELD_NEWFRAME();
649                 if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { lace_steal_random(); attempts = 32; }
650             } else if (res == LACE_STOLEN) {
651                 PR_COUNTSTEALS(__lace_worker, CTR_leaps);
652             } else if (res == LACE_BUSY) {
653                 PR_COUNTSTEALS(__lace_worker, CTR_leap_busy);
654             }
655             compiler_barrier();
656             thief = t->thief;
657         }
658 
659         /* POST-LEAP: really pop the finished task */
660         /*            no need to decrease __lace_dq_head, since it is a local variable */
661         compiler_barrier();
662         if (__lace_worker->allstolen == 0) {
663             /* Assume: tail = split = head (pre-pop) */
664             /* Now we do a real pop ergo either decrease tail,split,head or declare allstolen */
665             Worker *wt = __lace_worker->_public;
666             wt->allstolen = 1;
667             __lace_worker->allstolen = 1;
668         }
669     }
670 
671     compiler_barrier();
672     t->thief = THIEF_EMPTY;
673     lace_time_event(__lace_worker, 4);
674 }
675 
676 static __attribute__((noinline))
lace_drop_slow(WorkerP * w,Task * __dq_head)677 void lace_drop_slow(WorkerP *w, Task *__dq_head)
678 {
679     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) lace_leapfrog(w, __dq_head);
680 }
681 
682 static inline __attribute__((unused))
lace_drop(WorkerP * w,Task * __dq_head)683 void lace_drop(WorkerP *w, Task *__dq_head)
684 {
685     if (likely(0 == w->_public->movesplit)) {
686         if (likely(w->split <= __dq_head)) {
687             return;
688         }
689     }
690     lace_drop_slow(w, __dq_head);
691 }
692 
693 
694 
695 // Task macros for tasks of arity 0
696 
697 #define TASK_DECL_0(RTYPE, NAME)                                                      \
698                                                                                       \
699 typedef struct _TD_##NAME {                                                           \
700   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
701   union {  RTYPE res; } d;                                                            \
702 } TD_##NAME;                                                                          \
703                                                                                       \
704 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
705 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
706                                                                                       \
707 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
708 RTYPE NAME##_CALL(WorkerP *, Task * );                                                \
709 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
710 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
711                                                                                       \
712 static inline __attribute__((unused))                                                 \
713 void NAME##_SPAWN(WorkerP *w, Task *__dq_head )                                       \
714 {                                                                                     \
715     PR_COUNTTASK(w);                                                                  \
716                                                                                       \
717     TD_##NAME *t;                                                                     \
718     TailSplit ts;                                                                     \
719     uint32_t head, split, newsplit;                                                   \
720                                                                                       \
721     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
722                                                                                       \
723     t = (TD_##NAME *)__dq_head;                                                       \
724     t->f = &NAME##_WRAP;                                                              \
725     t->thief = THIEF_TASK;                                                            \
726                                                                                       \
727     compiler_barrier();                                                               \
728                                                                                       \
729     Worker *wt = w->_public;                                                          \
730     if (unlikely(w->allstolen)) {                                                     \
731         if (wt->movesplit) wt->movesplit = 0;                                         \
732         head = __dq_head - w->dq;                                                     \
733         ts = (TailSplit){{head,head+1}};                                              \
734         wt->ts.v = ts.v;                                                              \
735         compiler_barrier();                                                           \
736         wt->allstolen = 0;                                                            \
737         w->split = __dq_head+1;                                                       \
738         w->allstolen = 0;                                                             \
739     } else if (unlikely(wt->movesplit)) {                                             \
740         head = __dq_head - w->dq;                                                     \
741         split = w->split - w->dq;                                                     \
742         newsplit = (split + head + 2)/2;                                              \
743         wt->ts.ts.split = newsplit;                                                   \
744         w->split = w->dq + newsplit;                                                  \
745         compiler_barrier();                                                           \
746         wt->movesplit = 0;                                                            \
747         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
748     }                                                                                 \
749 }                                                                                     \
750                                                                                       \
751 static inline __attribute__((unused))                                                 \
752 RTYPE NAME##_NEWFRAME()                                                               \
753 {                                                                                     \
754     Task _t;                                                                          \
755     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
756     t->f = &NAME##_WRAP;                                                              \
757     t->thief = THIEF_TASK;                                                            \
758                                                                                       \
759     lace_run_newframe(&_t);                                                           \
760     return ((TD_##NAME *)t)->d.res;                                                   \
761 }                                                                                     \
762                                                                                       \
763 static inline __attribute__((unused))                                                 \
764 void NAME##_TOGETHER()                                                                \
765 {                                                                                     \
766     Task _t;                                                                          \
767     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
768     t->f = &NAME##_WRAP;                                                              \
769     t->thief = THIEF_TASK;                                                            \
770                                                                                       \
771     lace_run_together(&_t);                                                           \
772 }                                                                                     \
773                                                                                       \
774 static inline __attribute__((unused))                                                 \
775 RTYPE NAME##_RUN()                                                                    \
776 {                                                                                     \
777     Task _t;                                                                          \
778     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
779     t->f = &NAME##_WRAP;                                                              \
780     t->thief = THIEF_TASK;                                                            \
781                                                                                       \
782     lace_run_task(&_t);                                                               \
783     return ((TD_##NAME *)t)->d.res;                                                   \
784 }                                                                                     \
785                                                                                       \
786 static __attribute__((noinline))                                                      \
787 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
788 {                                                                                     \
789     TD_##NAME *t;                                                                     \
790                                                                                       \
791     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
792         lace_leapfrog(w, __dq_head);                                                  \
793         t = (TD_##NAME *)__dq_head;                                                   \
794         return ((TD_##NAME *)t)->d.res;                                               \
795     }                                                                                 \
796                                                                                       \
797     compiler_barrier();                                                               \
798                                                                                       \
799     Worker *wt = w->_public;                                                          \
800     if (wt->movesplit) {                                                              \
801         Task *t = w->split;                                                           \
802         size_t diff = __dq_head - t;                                                  \
803         diff = (diff + 1) / 2;                                                        \
804         w->split = t + diff;                                                          \
805         wt->ts.ts.split += diff;                                                      \
806         compiler_barrier();                                                           \
807         wt->movesplit = 0;                                                            \
808         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
809     }                                                                                 \
810                                                                                       \
811     compiler_barrier();                                                               \
812                                                                                       \
813     t = (TD_##NAME *)__dq_head;                                                       \
814     t->thief = THIEF_EMPTY;                                                           \
815     return NAME##_CALL(w, __dq_head );                                                \
816 }                                                                                     \
817                                                                                       \
818 static inline __attribute__((unused))                                                 \
819 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
820 {                                                                                     \
821     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
822                                                                                       \
823     if (likely(0 == w->_public->movesplit)) {                                         \
824         if (likely(w->split <= __dq_head)) {                                          \
825             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
826             t->thief = THIEF_EMPTY;                                                   \
827             return NAME##_CALL(w, __dq_head );                                        \
828         }                                                                             \
829     }                                                                                 \
830                                                                                       \
831     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
832 }                                                                                     \
833                                                                                       \
834                                                                                       \
835 
836 #define TASK_IMPL_0(RTYPE, NAME)                                                      \
837 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
838 {                                                                                     \
839     t->d.res = NAME##_CALL(w, __dq_head );                                            \
840 }                                                                                     \
841                                                                                       \
842 static inline __attribute__((always_inline))                                          \
843 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head );                     \
844                                                                                       \
845 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
846 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head )                                       \
847 {                                                                                     \
848     return NAME##_WORK(w, __dq_head );                                                \
849 }                                                                                     \
850                                                                                       \
851 static inline __attribute__((always_inline))                                          \
852 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
853 
854 #define TASK_0(RTYPE, NAME) TASK_DECL_0(RTYPE, NAME) TASK_IMPL_0(RTYPE, NAME)
855 
856 #define VOID_TASK_DECL_0(NAME)                                                        \
857                                                                                       \
858 typedef struct _TD_##NAME {                                                           \
859   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
860                                                                                       \
861 } TD_##NAME;                                                                          \
862                                                                                       \
863 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
864 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
865                                                                                       \
866 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
867 void NAME##_CALL(WorkerP *, Task * );                                                 \
868 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
869 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
870                                                                                       \
871 static inline __attribute__((unused))                                                 \
872 void NAME##_SPAWN(WorkerP *w, Task *__dq_head )                                       \
873 {                                                                                     \
874     PR_COUNTTASK(w);                                                                  \
875                                                                                       \
876     TD_##NAME *t;                                                                     \
877     TailSplit ts;                                                                     \
878     uint32_t head, split, newsplit;                                                   \
879                                                                                       \
880     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
881                                                                                       \
882     t = (TD_##NAME *)__dq_head;                                                       \
883     t->f = &NAME##_WRAP;                                                              \
884     t->thief = THIEF_TASK;                                                            \
885                                                                                       \
886     compiler_barrier();                                                               \
887                                                                                       \
888     Worker *wt = w->_public;                                                          \
889     if (unlikely(w->allstolen)) {                                                     \
890         if (wt->movesplit) wt->movesplit = 0;                                         \
891         head = __dq_head - w->dq;                                                     \
892         ts = (TailSplit){{head,head+1}};                                              \
893         wt->ts.v = ts.v;                                                              \
894         compiler_barrier();                                                           \
895         wt->allstolen = 0;                                                            \
896         w->split = __dq_head+1;                                                       \
897         w->allstolen = 0;                                                             \
898     } else if (unlikely(wt->movesplit)) {                                             \
899         head = __dq_head - w->dq;                                                     \
900         split = w->split - w->dq;                                                     \
901         newsplit = (split + head + 2)/2;                                              \
902         wt->ts.ts.split = newsplit;                                                   \
903         w->split = w->dq + newsplit;                                                  \
904         compiler_barrier();                                                           \
905         wt->movesplit = 0;                                                            \
906         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
907     }                                                                                 \
908 }                                                                                     \
909                                                                                       \
910 static inline __attribute__((unused))                                                 \
911 void NAME##_NEWFRAME()                                                                \
912 {                                                                                     \
913     Task _t;                                                                          \
914     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
915     t->f = &NAME##_WRAP;                                                              \
916     t->thief = THIEF_TASK;                                                            \
917                                                                                       \
918     lace_run_newframe(&_t);                                                           \
919     return ;                                                                          \
920 }                                                                                     \
921                                                                                       \
922 static inline __attribute__((unused))                                                 \
923 void NAME##_TOGETHER()                                                                \
924 {                                                                                     \
925     Task _t;                                                                          \
926     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
927     t->f = &NAME##_WRAP;                                                              \
928     t->thief = THIEF_TASK;                                                            \
929                                                                                       \
930     lace_run_together(&_t);                                                           \
931 }                                                                                     \
932                                                                                       \
933 static inline __attribute__((unused))                                                 \
934 void NAME##_RUN()                                                                     \
935 {                                                                                     \
936     Task _t;                                                                          \
937     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
938     t->f = &NAME##_WRAP;                                                              \
939     t->thief = THIEF_TASK;                                                            \
940                                                                                       \
941     lace_run_task(&_t);                                                               \
942     return ;                                                                          \
943 }                                                                                     \
944                                                                                       \
945 static __attribute__((noinline))                                                      \
946 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
947 {                                                                                     \
948     TD_##NAME *t;                                                                     \
949                                                                                       \
950     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
951         lace_leapfrog(w, __dq_head);                                                  \
952         t = (TD_##NAME *)__dq_head;                                                   \
953         return ;                                                                      \
954     }                                                                                 \
955                                                                                       \
956     compiler_barrier();                                                               \
957                                                                                       \
958     Worker *wt = w->_public;                                                          \
959     if (wt->movesplit) {                                                              \
960         Task *t = w->split;                                                           \
961         size_t diff = __dq_head - t;                                                  \
962         diff = (diff + 1) / 2;                                                        \
963         w->split = t + diff;                                                          \
964         wt->ts.ts.split += diff;                                                      \
965         compiler_barrier();                                                           \
966         wt->movesplit = 0;                                                            \
967         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
968     }                                                                                 \
969                                                                                       \
970     compiler_barrier();                                                               \
971                                                                                       \
972     t = (TD_##NAME *)__dq_head;                                                       \
973     t->thief = THIEF_EMPTY;                                                           \
974     return NAME##_CALL(w, __dq_head );                                                \
975 }                                                                                     \
976                                                                                       \
977 static inline __attribute__((unused))                                                 \
978 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
979 {                                                                                     \
980     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
981                                                                                       \
982     if (likely(0 == w->_public->movesplit)) {                                         \
983         if (likely(w->split <= __dq_head)) {                                          \
984             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
985             t->thief = THIEF_EMPTY;                                                   \
986             return NAME##_CALL(w, __dq_head );                                        \
987         }                                                                             \
988     }                                                                                 \
989                                                                                       \
990     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
991 }                                                                                     \
992                                                                                       \
993                                                                                       \
994 
995 #define VOID_TASK_IMPL_0(NAME)                                                        \
996 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
997 {                                                                                     \
998      NAME##_CALL(w, __dq_head );                                                      \
999 }                                                                                     \
1000                                                                                       \
1001 static inline __attribute__((always_inline))                                          \
1002 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head );                      \
1003                                                                                       \
1004 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1005 void NAME##_CALL(WorkerP *w, Task *__dq_head )                                        \
1006 {                                                                                     \
1007     return NAME##_WORK(w, __dq_head );                                                \
1008 }                                                                                     \
1009                                                                                       \
1010 static inline __attribute__((always_inline))                                          \
1011 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
1012 
1013 #define VOID_TASK_0(NAME) VOID_TASK_DECL_0(NAME) VOID_TASK_IMPL_0(NAME)
1014 
1015 
1016 // Task macros for tasks of arity 1
1017 
1018 #define TASK_DECL_1(RTYPE, NAME, ATYPE_1)                                             \
1019                                                                                       \
1020 typedef struct _TD_##NAME {                                                           \
1021   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1022   union { struct {  ATYPE_1 arg_1; } args; RTYPE res; } d;                            \
1023 } TD_##NAME;                                                                          \
1024                                                                                       \
1025 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1026 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1027                                                                                       \
1028 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1029 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1);                                 \
1030 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
1031 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
1032                                                                                       \
1033 static inline __attribute__((unused))                                                 \
1034 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                        \
1035 {                                                                                     \
1036     PR_COUNTTASK(w);                                                                  \
1037                                                                                       \
1038     TD_##NAME *t;                                                                     \
1039     TailSplit ts;                                                                     \
1040     uint32_t head, split, newsplit;                                                   \
1041                                                                                       \
1042     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
1043                                                                                       \
1044     t = (TD_##NAME *)__dq_head;                                                       \
1045     t->f = &NAME##_WRAP;                                                              \
1046     t->thief = THIEF_TASK;                                                            \
1047      t->d.args.arg_1 = arg_1;                                                         \
1048     compiler_barrier();                                                               \
1049                                                                                       \
1050     Worker *wt = w->_public;                                                          \
1051     if (unlikely(w->allstolen)) {                                                     \
1052         if (wt->movesplit) wt->movesplit = 0;                                         \
1053         head = __dq_head - w->dq;                                                     \
1054         ts = (TailSplit){{head,head+1}};                                              \
1055         wt->ts.v = ts.v;                                                              \
1056         compiler_barrier();                                                           \
1057         wt->allstolen = 0;                                                            \
1058         w->split = __dq_head+1;                                                       \
1059         w->allstolen = 0;                                                             \
1060     } else if (unlikely(wt->movesplit)) {                                             \
1061         head = __dq_head - w->dq;                                                     \
1062         split = w->split - w->dq;                                                     \
1063         newsplit = (split + head + 2)/2;                                              \
1064         wt->ts.ts.split = newsplit;                                                   \
1065         w->split = w->dq + newsplit;                                                  \
1066         compiler_barrier();                                                           \
1067         wt->movesplit = 0;                                                            \
1068         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1069     }                                                                                 \
1070 }                                                                                     \
1071                                                                                       \
1072 static inline __attribute__((unused))                                                 \
1073 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1)                                                  \
1074 {                                                                                     \
1075     Task _t;                                                                          \
1076     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1077     t->f = &NAME##_WRAP;                                                              \
1078     t->thief = THIEF_TASK;                                                            \
1079      t->d.args.arg_1 = arg_1;                                                         \
1080     lace_run_newframe(&_t);                                                           \
1081     return ((TD_##NAME *)t)->d.res;                                                   \
1082 }                                                                                     \
1083                                                                                       \
1084 static inline __attribute__((unused))                                                 \
1085 void NAME##_TOGETHER(ATYPE_1 arg_1)                                                   \
1086 {                                                                                     \
1087     Task _t;                                                                          \
1088     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1089     t->f = &NAME##_WRAP;                                                              \
1090     t->thief = THIEF_TASK;                                                            \
1091      t->d.args.arg_1 = arg_1;                                                         \
1092     lace_run_together(&_t);                                                           \
1093 }                                                                                     \
1094                                                                                       \
1095 static inline __attribute__((unused))                                                 \
1096 RTYPE NAME##_RUN(ATYPE_1 arg_1)                                                       \
1097 {                                                                                     \
1098     Task _t;                                                                          \
1099     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1100     t->f = &NAME##_WRAP;                                                              \
1101     t->thief = THIEF_TASK;                                                            \
1102      t->d.args.arg_1 = arg_1;                                                         \
1103     lace_run_task(&_t);                                                               \
1104     return ((TD_##NAME *)t)->d.res;                                                   \
1105 }                                                                                     \
1106                                                                                       \
1107 static __attribute__((noinline))                                                      \
1108 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
1109 {                                                                                     \
1110     TD_##NAME *t;                                                                     \
1111                                                                                       \
1112     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
1113         lace_leapfrog(w, __dq_head);                                                  \
1114         t = (TD_##NAME *)__dq_head;                                                   \
1115         return ((TD_##NAME *)t)->d.res;                                               \
1116     }                                                                                 \
1117                                                                                       \
1118     compiler_barrier();                                                               \
1119                                                                                       \
1120     Worker *wt = w->_public;                                                          \
1121     if (wt->movesplit) {                                                              \
1122         Task *t = w->split;                                                           \
1123         size_t diff = __dq_head - t;                                                  \
1124         diff = (diff + 1) / 2;                                                        \
1125         w->split = t + diff;                                                          \
1126         wt->ts.ts.split += diff;                                                      \
1127         compiler_barrier();                                                           \
1128         wt->movesplit = 0;                                                            \
1129         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1130     }                                                                                 \
1131                                                                                       \
1132     compiler_barrier();                                                               \
1133                                                                                       \
1134     t = (TD_##NAME *)__dq_head;                                                       \
1135     t->thief = THIEF_EMPTY;                                                           \
1136     return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                               \
1137 }                                                                                     \
1138                                                                                       \
1139 static inline __attribute__((unused))                                                 \
1140 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
1141 {                                                                                     \
1142     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
1143                                                                                       \
1144     if (likely(0 == w->_public->movesplit)) {                                         \
1145         if (likely(w->split <= __dq_head)) {                                          \
1146             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
1147             t->thief = THIEF_EMPTY;                                                   \
1148             return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                       \
1149         }                                                                             \
1150     }                                                                                 \
1151                                                                                       \
1152     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
1153 }                                                                                     \
1154                                                                                       \
1155                                                                                       \
1156 
1157 #define TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1)                                      \
1158 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
1159 {                                                                                     \
1160     t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1);                           \
1161 }                                                                                     \
1162                                                                                       \
1163 static inline __attribute__((always_inline))                                          \
1164 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1);            \
1165                                                                                       \
1166 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1167 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                        \
1168 {                                                                                     \
1169     return NAME##_WORK(w, __dq_head , arg_1);                                         \
1170 }                                                                                     \
1171                                                                                       \
1172 static inline __attribute__((always_inline))                                          \
1173 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
1174 
1175 #define TASK_1(RTYPE, NAME, ATYPE_1, ARG_1) TASK_DECL_1(RTYPE, NAME, ATYPE_1) TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1)
1176 
1177 #define VOID_TASK_DECL_1(NAME, ATYPE_1)                                               \
1178                                                                                       \
1179 typedef struct _TD_##NAME {                                                           \
1180   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1181   union { struct {  ATYPE_1 arg_1; } args; } d;                                       \
1182 } TD_##NAME;                                                                          \
1183                                                                                       \
1184 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1185 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1186                                                                                       \
1187 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1188 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1);                                  \
1189 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
1190 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
1191                                                                                       \
1192 static inline __attribute__((unused))                                                 \
1193 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                        \
1194 {                                                                                     \
1195     PR_COUNTTASK(w);                                                                  \
1196                                                                                       \
1197     TD_##NAME *t;                                                                     \
1198     TailSplit ts;                                                                     \
1199     uint32_t head, split, newsplit;                                                   \
1200                                                                                       \
1201     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
1202                                                                                       \
1203     t = (TD_##NAME *)__dq_head;                                                       \
1204     t->f = &NAME##_WRAP;                                                              \
1205     t->thief = THIEF_TASK;                                                            \
1206      t->d.args.arg_1 = arg_1;                                                         \
1207     compiler_barrier();                                                               \
1208                                                                                       \
1209     Worker *wt = w->_public;                                                          \
1210     if (unlikely(w->allstolen)) {                                                     \
1211         if (wt->movesplit) wt->movesplit = 0;                                         \
1212         head = __dq_head - w->dq;                                                     \
1213         ts = (TailSplit){{head,head+1}};                                              \
1214         wt->ts.v = ts.v;                                                              \
1215         compiler_barrier();                                                           \
1216         wt->allstolen = 0;                                                            \
1217         w->split = __dq_head+1;                                                       \
1218         w->allstolen = 0;                                                             \
1219     } else if (unlikely(wt->movesplit)) {                                             \
1220         head = __dq_head - w->dq;                                                     \
1221         split = w->split - w->dq;                                                     \
1222         newsplit = (split + head + 2)/2;                                              \
1223         wt->ts.ts.split = newsplit;                                                   \
1224         w->split = w->dq + newsplit;                                                  \
1225         compiler_barrier();                                                           \
1226         wt->movesplit = 0;                                                            \
1227         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1228     }                                                                                 \
1229 }                                                                                     \
1230                                                                                       \
1231 static inline __attribute__((unused))                                                 \
1232 void NAME##_NEWFRAME(ATYPE_1 arg_1)                                                   \
1233 {                                                                                     \
1234     Task _t;                                                                          \
1235     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1236     t->f = &NAME##_WRAP;                                                              \
1237     t->thief = THIEF_TASK;                                                            \
1238      t->d.args.arg_1 = arg_1;                                                         \
1239     lace_run_newframe(&_t);                                                           \
1240     return ;                                                                          \
1241 }                                                                                     \
1242                                                                                       \
1243 static inline __attribute__((unused))                                                 \
1244 void NAME##_TOGETHER(ATYPE_1 arg_1)                                                   \
1245 {                                                                                     \
1246     Task _t;                                                                          \
1247     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1248     t->f = &NAME##_WRAP;                                                              \
1249     t->thief = THIEF_TASK;                                                            \
1250      t->d.args.arg_1 = arg_1;                                                         \
1251     lace_run_together(&_t);                                                           \
1252 }                                                                                     \
1253                                                                                       \
1254 static inline __attribute__((unused))                                                 \
1255 void NAME##_RUN(ATYPE_1 arg_1)                                                        \
1256 {                                                                                     \
1257     Task _t;                                                                          \
1258     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1259     t->f = &NAME##_WRAP;                                                              \
1260     t->thief = THIEF_TASK;                                                            \
1261      t->d.args.arg_1 = arg_1;                                                         \
1262     lace_run_task(&_t);                                                               \
1263     return ;                                                                          \
1264 }                                                                                     \
1265                                                                                       \
1266 static __attribute__((noinline))                                                      \
1267 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
1268 {                                                                                     \
1269     TD_##NAME *t;                                                                     \
1270                                                                                       \
1271     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
1272         lace_leapfrog(w, __dq_head);                                                  \
1273         t = (TD_##NAME *)__dq_head;                                                   \
1274         return ;                                                                      \
1275     }                                                                                 \
1276                                                                                       \
1277     compiler_barrier();                                                               \
1278                                                                                       \
1279     Worker *wt = w->_public;                                                          \
1280     if (wt->movesplit) {                                                              \
1281         Task *t = w->split;                                                           \
1282         size_t diff = __dq_head - t;                                                  \
1283         diff = (diff + 1) / 2;                                                        \
1284         w->split = t + diff;                                                          \
1285         wt->ts.ts.split += diff;                                                      \
1286         compiler_barrier();                                                           \
1287         wt->movesplit = 0;                                                            \
1288         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1289     }                                                                                 \
1290                                                                                       \
1291     compiler_barrier();                                                               \
1292                                                                                       \
1293     t = (TD_##NAME *)__dq_head;                                                       \
1294     t->thief = THIEF_EMPTY;                                                           \
1295     return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                               \
1296 }                                                                                     \
1297                                                                                       \
1298 static inline __attribute__((unused))                                                 \
1299 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
1300 {                                                                                     \
1301     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
1302                                                                                       \
1303     if (likely(0 == w->_public->movesplit)) {                                         \
1304         if (likely(w->split <= __dq_head)) {                                          \
1305             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
1306             t->thief = THIEF_EMPTY;                                                   \
1307             return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                       \
1308         }                                                                             \
1309     }                                                                                 \
1310                                                                                       \
1311     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
1312 }                                                                                     \
1313                                                                                       \
1314                                                                                       \
1315 
1316 #define VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1)                                        \
1317 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
1318 {                                                                                     \
1319      NAME##_CALL(w, __dq_head , t->d.args.arg_1);                                     \
1320 }                                                                                     \
1321                                                                                       \
1322 static inline __attribute__((always_inline))                                          \
1323 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1);             \
1324                                                                                       \
1325 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1326 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                         \
1327 {                                                                                     \
1328     return NAME##_WORK(w, __dq_head , arg_1);                                         \
1329 }                                                                                     \
1330                                                                                       \
1331 static inline __attribute__((always_inline))                                          \
1332 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
1333 
1334 #define VOID_TASK_1(NAME, ATYPE_1, ARG_1) VOID_TASK_DECL_1(NAME, ATYPE_1) VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1)
1335 
1336 
1337 // Task macros for tasks of arity 2
1338 
1339 #define TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2)                                    \
1340                                                                                       \
1341 typedef struct _TD_##NAME {                                                           \
1342   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1343   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; } args; RTYPE res; } d;             \
1344 } TD_##NAME;                                                                          \
1345                                                                                       \
1346 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1347 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1348                                                                                       \
1349 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1350 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2);                  \
1351 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
1352 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
1353                                                                                       \
1354 static inline __attribute__((unused))                                                 \
1355 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)         \
1356 {                                                                                     \
1357     PR_COUNTTASK(w);                                                                  \
1358                                                                                       \
1359     TD_##NAME *t;                                                                     \
1360     TailSplit ts;                                                                     \
1361     uint32_t head, split, newsplit;                                                   \
1362                                                                                       \
1363     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
1364                                                                                       \
1365     t = (TD_##NAME *)__dq_head;                                                       \
1366     t->f = &NAME##_WRAP;                                                              \
1367     t->thief = THIEF_TASK;                                                            \
1368      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1369     compiler_barrier();                                                               \
1370                                                                                       \
1371     Worker *wt = w->_public;                                                          \
1372     if (unlikely(w->allstolen)) {                                                     \
1373         if (wt->movesplit) wt->movesplit = 0;                                         \
1374         head = __dq_head - w->dq;                                                     \
1375         ts = (TailSplit){{head,head+1}};                                              \
1376         wt->ts.v = ts.v;                                                              \
1377         compiler_barrier();                                                           \
1378         wt->allstolen = 0;                                                            \
1379         w->split = __dq_head+1;                                                       \
1380         w->allstolen = 0;                                                             \
1381     } else if (unlikely(wt->movesplit)) {                                             \
1382         head = __dq_head - w->dq;                                                     \
1383         split = w->split - w->dq;                                                     \
1384         newsplit = (split + head + 2)/2;                                              \
1385         wt->ts.ts.split = newsplit;                                                   \
1386         w->split = w->dq + newsplit;                                                  \
1387         compiler_barrier();                                                           \
1388         wt->movesplit = 0;                                                            \
1389         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1390     }                                                                                 \
1391 }                                                                                     \
1392                                                                                       \
1393 static inline __attribute__((unused))                                                 \
1394 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2)                                   \
1395 {                                                                                     \
1396     Task _t;                                                                          \
1397     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1398     t->f = &NAME##_WRAP;                                                              \
1399     t->thief = THIEF_TASK;                                                            \
1400      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1401     lace_run_newframe(&_t);                                                           \
1402     return ((TD_##NAME *)t)->d.res;                                                   \
1403 }                                                                                     \
1404                                                                                       \
1405 static inline __attribute__((unused))                                                 \
1406 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2)                                    \
1407 {                                                                                     \
1408     Task _t;                                                                          \
1409     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1410     t->f = &NAME##_WRAP;                                                              \
1411     t->thief = THIEF_TASK;                                                            \
1412      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1413     lace_run_together(&_t);                                                           \
1414 }                                                                                     \
1415                                                                                       \
1416 static inline __attribute__((unused))                                                 \
1417 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2)                                        \
1418 {                                                                                     \
1419     Task _t;                                                                          \
1420     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1421     t->f = &NAME##_WRAP;                                                              \
1422     t->thief = THIEF_TASK;                                                            \
1423      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1424     lace_run_task(&_t);                                                               \
1425     return ((TD_##NAME *)t)->d.res;                                                   \
1426 }                                                                                     \
1427                                                                                       \
1428 static __attribute__((noinline))                                                      \
1429 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
1430 {                                                                                     \
1431     TD_##NAME *t;                                                                     \
1432                                                                                       \
1433     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
1434         lace_leapfrog(w, __dq_head);                                                  \
1435         t = (TD_##NAME *)__dq_head;                                                   \
1436         return ((TD_##NAME *)t)->d.res;                                               \
1437     }                                                                                 \
1438                                                                                       \
1439     compiler_barrier();                                                               \
1440                                                                                       \
1441     Worker *wt = w->_public;                                                          \
1442     if (wt->movesplit) {                                                              \
1443         Task *t = w->split;                                                           \
1444         size_t diff = __dq_head - t;                                                  \
1445         diff = (diff + 1) / 2;                                                        \
1446         w->split = t + diff;                                                          \
1447         wt->ts.ts.split += diff;                                                      \
1448         compiler_barrier();                                                           \
1449         wt->movesplit = 0;                                                            \
1450         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1451     }                                                                                 \
1452                                                                                       \
1453     compiler_barrier();                                                               \
1454                                                                                       \
1455     t = (TD_##NAME *)__dq_head;                                                       \
1456     t->thief = THIEF_EMPTY;                                                           \
1457     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);              \
1458 }                                                                                     \
1459                                                                                       \
1460 static inline __attribute__((unused))                                                 \
1461 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
1462 {                                                                                     \
1463     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
1464                                                                                       \
1465     if (likely(0 == w->_public->movesplit)) {                                         \
1466         if (likely(w->split <= __dq_head)) {                                          \
1467             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
1468             t->thief = THIEF_EMPTY;                                                   \
1469             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);      \
1470         }                                                                             \
1471     }                                                                                 \
1472                                                                                       \
1473     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
1474 }                                                                                     \
1475                                                                                       \
1476                                                                                       \
1477 
1478 #define TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)                      \
1479 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
1480 {                                                                                     \
1481     t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);          \
1482 }                                                                                     \
1483                                                                                       \
1484 static inline __attribute__((always_inline))                                          \
1485 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2);   \
1486                                                                                       \
1487 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1488 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)         \
1489 {                                                                                     \
1490     return NAME##_WORK(w, __dq_head , arg_1, arg_2);                                  \
1491 }                                                                                     \
1492                                                                                       \
1493 static inline __attribute__((always_inline))                                          \
1494 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
1495 
1496 #define TASK_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2) TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
1497 
1498 #define VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2)                                      \
1499                                                                                       \
1500 typedef struct _TD_##NAME {                                                           \
1501   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1502   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; } args; } d;                        \
1503 } TD_##NAME;                                                                          \
1504                                                                                       \
1505 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1506 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1507                                                                                       \
1508 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1509 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2);                   \
1510 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
1511 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
1512                                                                                       \
1513 static inline __attribute__((unused))                                                 \
1514 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)         \
1515 {                                                                                     \
1516     PR_COUNTTASK(w);                                                                  \
1517                                                                                       \
1518     TD_##NAME *t;                                                                     \
1519     TailSplit ts;                                                                     \
1520     uint32_t head, split, newsplit;                                                   \
1521                                                                                       \
1522     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
1523                                                                                       \
1524     t = (TD_##NAME *)__dq_head;                                                       \
1525     t->f = &NAME##_WRAP;                                                              \
1526     t->thief = THIEF_TASK;                                                            \
1527      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1528     compiler_barrier();                                                               \
1529                                                                                       \
1530     Worker *wt = w->_public;                                                          \
1531     if (unlikely(w->allstolen)) {                                                     \
1532         if (wt->movesplit) wt->movesplit = 0;                                         \
1533         head = __dq_head - w->dq;                                                     \
1534         ts = (TailSplit){{head,head+1}};                                              \
1535         wt->ts.v = ts.v;                                                              \
1536         compiler_barrier();                                                           \
1537         wt->allstolen = 0;                                                            \
1538         w->split = __dq_head+1;                                                       \
1539         w->allstolen = 0;                                                             \
1540     } else if (unlikely(wt->movesplit)) {                                             \
1541         head = __dq_head - w->dq;                                                     \
1542         split = w->split - w->dq;                                                     \
1543         newsplit = (split + head + 2)/2;                                              \
1544         wt->ts.ts.split = newsplit;                                                   \
1545         w->split = w->dq + newsplit;                                                  \
1546         compiler_barrier();                                                           \
1547         wt->movesplit = 0;                                                            \
1548         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1549     }                                                                                 \
1550 }                                                                                     \
1551                                                                                       \
1552 static inline __attribute__((unused))                                                 \
1553 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2)                                    \
1554 {                                                                                     \
1555     Task _t;                                                                          \
1556     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1557     t->f = &NAME##_WRAP;                                                              \
1558     t->thief = THIEF_TASK;                                                            \
1559      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1560     lace_run_newframe(&_t);                                                           \
1561     return ;                                                                          \
1562 }                                                                                     \
1563                                                                                       \
1564 static inline __attribute__((unused))                                                 \
1565 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2)                                    \
1566 {                                                                                     \
1567     Task _t;                                                                          \
1568     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1569     t->f = &NAME##_WRAP;                                                              \
1570     t->thief = THIEF_TASK;                                                            \
1571      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1572     lace_run_together(&_t);                                                           \
1573 }                                                                                     \
1574                                                                                       \
1575 static inline __attribute__((unused))                                                 \
1576 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2)                                         \
1577 {                                                                                     \
1578     Task _t;                                                                          \
1579     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1580     t->f = &NAME##_WRAP;                                                              \
1581     t->thief = THIEF_TASK;                                                            \
1582      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
1583     lace_run_task(&_t);                                                               \
1584     return ;                                                                          \
1585 }                                                                                     \
1586                                                                                       \
1587 static __attribute__((noinline))                                                      \
1588 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
1589 {                                                                                     \
1590     TD_##NAME *t;                                                                     \
1591                                                                                       \
1592     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
1593         lace_leapfrog(w, __dq_head);                                                  \
1594         t = (TD_##NAME *)__dq_head;                                                   \
1595         return ;                                                                      \
1596     }                                                                                 \
1597                                                                                       \
1598     compiler_barrier();                                                               \
1599                                                                                       \
1600     Worker *wt = w->_public;                                                          \
1601     if (wt->movesplit) {                                                              \
1602         Task *t = w->split;                                                           \
1603         size_t diff = __dq_head - t;                                                  \
1604         diff = (diff + 1) / 2;                                                        \
1605         w->split = t + diff;                                                          \
1606         wt->ts.ts.split += diff;                                                      \
1607         compiler_barrier();                                                           \
1608         wt->movesplit = 0;                                                            \
1609         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1610     }                                                                                 \
1611                                                                                       \
1612     compiler_barrier();                                                               \
1613                                                                                       \
1614     t = (TD_##NAME *)__dq_head;                                                       \
1615     t->thief = THIEF_EMPTY;                                                           \
1616     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);              \
1617 }                                                                                     \
1618                                                                                       \
1619 static inline __attribute__((unused))                                                 \
1620 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
1621 {                                                                                     \
1622     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
1623                                                                                       \
1624     if (likely(0 == w->_public->movesplit)) {                                         \
1625         if (likely(w->split <= __dq_head)) {                                          \
1626             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
1627             t->thief = THIEF_EMPTY;                                                   \
1628             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);      \
1629         }                                                                             \
1630     }                                                                                 \
1631                                                                                       \
1632     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
1633 }                                                                                     \
1634                                                                                       \
1635                                                                                       \
1636 
1637 #define VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)                        \
1638 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
1639 {                                                                                     \
1640      NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);                    \
1641 }                                                                                     \
1642                                                                                       \
1643 static inline __attribute__((always_inline))                                          \
1644 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2);    \
1645                                                                                       \
1646 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1647 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)          \
1648 {                                                                                     \
1649     return NAME##_WORK(w, __dq_head , arg_1, arg_2);                                  \
1650 }                                                                                     \
1651                                                                                       \
1652 static inline __attribute__((always_inline))                                          \
1653 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
1654 
1655 #define VOID_TASK_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2) VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
1656 
1657 
1658 // Task macros for tasks of arity 3
1659 
1660 #define TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3)                           \
1661                                                                                       \
1662 typedef struct _TD_##NAME {                                                           \
1663   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1664   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; RTYPE res; } d;\
1665 } TD_##NAME;                                                                          \
1666                                                                                       \
1667 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1668 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1669                                                                                       \
1670 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1671 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3);   \
1672 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
1673 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
1674                                                                                       \
1675 static inline __attribute__((unused))                                                 \
1676 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1677 {                                                                                     \
1678     PR_COUNTTASK(w);                                                                  \
1679                                                                                       \
1680     TD_##NAME *t;                                                                     \
1681     TailSplit ts;                                                                     \
1682     uint32_t head, split, newsplit;                                                   \
1683                                                                                       \
1684     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
1685                                                                                       \
1686     t = (TD_##NAME *)__dq_head;                                                       \
1687     t->f = &NAME##_WRAP;                                                              \
1688     t->thief = THIEF_TASK;                                                            \
1689      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1690     compiler_barrier();                                                               \
1691                                                                                       \
1692     Worker *wt = w->_public;                                                          \
1693     if (unlikely(w->allstolen)) {                                                     \
1694         if (wt->movesplit) wt->movesplit = 0;                                         \
1695         head = __dq_head - w->dq;                                                     \
1696         ts = (TailSplit){{head,head+1}};                                              \
1697         wt->ts.v = ts.v;                                                              \
1698         compiler_barrier();                                                           \
1699         wt->allstolen = 0;                                                            \
1700         w->split = __dq_head+1;                                                       \
1701         w->allstolen = 0;                                                             \
1702     } else if (unlikely(wt->movesplit)) {                                             \
1703         head = __dq_head - w->dq;                                                     \
1704         split = w->split - w->dq;                                                     \
1705         newsplit = (split + head + 2)/2;                                              \
1706         wt->ts.ts.split = newsplit;                                                   \
1707         w->split = w->dq + newsplit;                                                  \
1708         compiler_barrier();                                                           \
1709         wt->movesplit = 0;                                                            \
1710         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1711     }                                                                                 \
1712 }                                                                                     \
1713                                                                                       \
1714 static inline __attribute__((unused))                                                 \
1715 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)                    \
1716 {                                                                                     \
1717     Task _t;                                                                          \
1718     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1719     t->f = &NAME##_WRAP;                                                              \
1720     t->thief = THIEF_TASK;                                                            \
1721      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1722     lace_run_newframe(&_t);                                                           \
1723     return ((TD_##NAME *)t)->d.res;                                                   \
1724 }                                                                                     \
1725                                                                                       \
1726 static inline __attribute__((unused))                                                 \
1727 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)                     \
1728 {                                                                                     \
1729     Task _t;                                                                          \
1730     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1731     t->f = &NAME##_WRAP;                                                              \
1732     t->thief = THIEF_TASK;                                                            \
1733      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1734     lace_run_together(&_t);                                                           \
1735 }                                                                                     \
1736                                                                                       \
1737 static inline __attribute__((unused))                                                 \
1738 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)                         \
1739 {                                                                                     \
1740     Task _t;                                                                          \
1741     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1742     t->f = &NAME##_WRAP;                                                              \
1743     t->thief = THIEF_TASK;                                                            \
1744      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1745     lace_run_task(&_t);                                                               \
1746     return ((TD_##NAME *)t)->d.res;                                                   \
1747 }                                                                                     \
1748                                                                                       \
1749 static __attribute__((noinline))                                                      \
1750 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
1751 {                                                                                     \
1752     TD_##NAME *t;                                                                     \
1753                                                                                       \
1754     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
1755         lace_leapfrog(w, __dq_head);                                                  \
1756         t = (TD_##NAME *)__dq_head;                                                   \
1757         return ((TD_##NAME *)t)->d.res;                                               \
1758     }                                                                                 \
1759                                                                                       \
1760     compiler_barrier();                                                               \
1761                                                                                       \
1762     Worker *wt = w->_public;                                                          \
1763     if (wt->movesplit) {                                                              \
1764         Task *t = w->split;                                                           \
1765         size_t diff = __dq_head - t;                                                  \
1766         diff = (diff + 1) / 2;                                                        \
1767         w->split = t + diff;                                                          \
1768         wt->ts.ts.split += diff;                                                      \
1769         compiler_barrier();                                                           \
1770         wt->movesplit = 0;                                                            \
1771         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1772     }                                                                                 \
1773                                                                                       \
1774     compiler_barrier();                                                               \
1775                                                                                       \
1776     t = (TD_##NAME *)__dq_head;                                                       \
1777     t->thief = THIEF_EMPTY;                                                           \
1778     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1779 }                                                                                     \
1780                                                                                       \
1781 static inline __attribute__((unused))                                                 \
1782 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
1783 {                                                                                     \
1784     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
1785                                                                                       \
1786     if (likely(0 == w->_public->movesplit)) {                                         \
1787         if (likely(w->split <= __dq_head)) {                                          \
1788             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
1789             t->thief = THIEF_EMPTY;                                                   \
1790             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1791         }                                                                             \
1792     }                                                                                 \
1793                                                                                       \
1794     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
1795 }                                                                                     \
1796                                                                                       \
1797                                                                                       \
1798 
1799 #define TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)      \
1800 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
1801 {                                                                                     \
1802     t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1803 }                                                                                     \
1804                                                                                       \
1805 static inline __attribute__((always_inline))                                          \
1806 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
1807                                                                                       \
1808 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1809 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1810 {                                                                                     \
1811     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3);                           \
1812 }                                                                                     \
1813                                                                                       \
1814 static inline __attribute__((always_inline))                                          \
1815 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
1816 
1817 #define TASK_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3) TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
1818 
1819 #define VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3)                             \
1820                                                                                       \
1821 typedef struct _TD_##NAME {                                                           \
1822   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1823   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; } d;         \
1824 } TD_##NAME;                                                                          \
1825                                                                                       \
1826 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1827 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1828                                                                                       \
1829 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1830 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3);    \
1831 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
1832 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
1833                                                                                       \
1834 static inline __attribute__((unused))                                                 \
1835 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1836 {                                                                                     \
1837     PR_COUNTTASK(w);                                                                  \
1838                                                                                       \
1839     TD_##NAME *t;                                                                     \
1840     TailSplit ts;                                                                     \
1841     uint32_t head, split, newsplit;                                                   \
1842                                                                                       \
1843     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
1844                                                                                       \
1845     t = (TD_##NAME *)__dq_head;                                                       \
1846     t->f = &NAME##_WRAP;                                                              \
1847     t->thief = THIEF_TASK;                                                            \
1848      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1849     compiler_barrier();                                                               \
1850                                                                                       \
1851     Worker *wt = w->_public;                                                          \
1852     if (unlikely(w->allstolen)) {                                                     \
1853         if (wt->movesplit) wt->movesplit = 0;                                         \
1854         head = __dq_head - w->dq;                                                     \
1855         ts = (TailSplit){{head,head+1}};                                              \
1856         wt->ts.v = ts.v;                                                              \
1857         compiler_barrier();                                                           \
1858         wt->allstolen = 0;                                                            \
1859         w->split = __dq_head+1;                                                       \
1860         w->allstolen = 0;                                                             \
1861     } else if (unlikely(wt->movesplit)) {                                             \
1862         head = __dq_head - w->dq;                                                     \
1863         split = w->split - w->dq;                                                     \
1864         newsplit = (split + head + 2)/2;                                              \
1865         wt->ts.ts.split = newsplit;                                                   \
1866         w->split = w->dq + newsplit;                                                  \
1867         compiler_barrier();                                                           \
1868         wt->movesplit = 0;                                                            \
1869         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1870     }                                                                                 \
1871 }                                                                                     \
1872                                                                                       \
1873 static inline __attribute__((unused))                                                 \
1874 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)                     \
1875 {                                                                                     \
1876     Task _t;                                                                          \
1877     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1878     t->f = &NAME##_WRAP;                                                              \
1879     t->thief = THIEF_TASK;                                                            \
1880      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1881     lace_run_newframe(&_t);                                                           \
1882     return ;                                                                          \
1883 }                                                                                     \
1884                                                                                       \
1885 static inline __attribute__((unused))                                                 \
1886 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)                     \
1887 {                                                                                     \
1888     Task _t;                                                                          \
1889     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1890     t->f = &NAME##_WRAP;                                                              \
1891     t->thief = THIEF_TASK;                                                            \
1892      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1893     lace_run_together(&_t);                                                           \
1894 }                                                                                     \
1895                                                                                       \
1896 static inline __attribute__((unused))                                                 \
1897 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)                          \
1898 {                                                                                     \
1899     Task _t;                                                                          \
1900     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
1901     t->f = &NAME##_WRAP;                                                              \
1902     t->thief = THIEF_TASK;                                                            \
1903      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
1904     lace_run_task(&_t);                                                               \
1905     return ;                                                                          \
1906 }                                                                                     \
1907                                                                                       \
1908 static __attribute__((noinline))                                                      \
1909 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
1910 {                                                                                     \
1911     TD_##NAME *t;                                                                     \
1912                                                                                       \
1913     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
1914         lace_leapfrog(w, __dq_head);                                                  \
1915         t = (TD_##NAME *)__dq_head;                                                   \
1916         return ;                                                                      \
1917     }                                                                                 \
1918                                                                                       \
1919     compiler_barrier();                                                               \
1920                                                                                       \
1921     Worker *wt = w->_public;                                                          \
1922     if (wt->movesplit) {                                                              \
1923         Task *t = w->split;                                                           \
1924         size_t diff = __dq_head - t;                                                  \
1925         diff = (diff + 1) / 2;                                                        \
1926         w->split = t + diff;                                                          \
1927         wt->ts.ts.split += diff;                                                      \
1928         compiler_barrier();                                                           \
1929         wt->movesplit = 0;                                                            \
1930         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
1931     }                                                                                 \
1932                                                                                       \
1933     compiler_barrier();                                                               \
1934                                                                                       \
1935     t = (TD_##NAME *)__dq_head;                                                       \
1936     t->thief = THIEF_EMPTY;                                                           \
1937     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1938 }                                                                                     \
1939                                                                                       \
1940 static inline __attribute__((unused))                                                 \
1941 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
1942 {                                                                                     \
1943     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
1944                                                                                       \
1945     if (likely(0 == w->_public->movesplit)) {                                         \
1946         if (likely(w->split <= __dq_head)) {                                          \
1947             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
1948             t->thief = THIEF_EMPTY;                                                   \
1949             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
1950         }                                                                             \
1951     }                                                                                 \
1952                                                                                       \
1953     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
1954 }                                                                                     \
1955                                                                                       \
1956                                                                                       \
1957 
1958 #define VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)        \
1959 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
1960 {                                                                                     \
1961      NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);   \
1962 }                                                                                     \
1963                                                                                       \
1964 static inline __attribute__((always_inline))                                          \
1965 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
1966                                                                                       \
1967 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
1968 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
1969 {                                                                                     \
1970     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3);                           \
1971 }                                                                                     \
1972                                                                                       \
1973 static inline __attribute__((always_inline))                                          \
1974 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
1975 
1976 #define VOID_TASK_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3) VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
1977 
1978 
1979 // Task macros for tasks of arity 4
1980 
1981 #define TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4)                  \
1982                                                                                       \
1983 typedef struct _TD_##NAME {                                                           \
1984   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
1985   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; RTYPE res; } d;\
1986 } TD_##NAME;                                                                          \
1987                                                                                       \
1988 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
1989 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
1990                                                                                       \
1991 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
1992 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
1993 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
1994 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
1995                                                                                       \
1996 static inline __attribute__((unused))                                                 \
1997 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
1998 {                                                                                     \
1999     PR_COUNTTASK(w);                                                                  \
2000                                                                                       \
2001     TD_##NAME *t;                                                                     \
2002     TailSplit ts;                                                                     \
2003     uint32_t head, split, newsplit;                                                   \
2004                                                                                       \
2005     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
2006                                                                                       \
2007     t = (TD_##NAME *)__dq_head;                                                       \
2008     t->f = &NAME##_WRAP;                                                              \
2009     t->thief = THIEF_TASK;                                                            \
2010      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2011     compiler_barrier();                                                               \
2012                                                                                       \
2013     Worker *wt = w->_public;                                                          \
2014     if (unlikely(w->allstolen)) {                                                     \
2015         if (wt->movesplit) wt->movesplit = 0;                                         \
2016         head = __dq_head - w->dq;                                                     \
2017         ts = (TailSplit){{head,head+1}};                                              \
2018         wt->ts.v = ts.v;                                                              \
2019         compiler_barrier();                                                           \
2020         wt->allstolen = 0;                                                            \
2021         w->split = __dq_head+1;                                                       \
2022         w->allstolen = 0;                                                             \
2023     } else if (unlikely(wt->movesplit)) {                                             \
2024         head = __dq_head - w->dq;                                                     \
2025         split = w->split - w->dq;                                                     \
2026         newsplit = (split + head + 2)/2;                                              \
2027         wt->ts.ts.split = newsplit;                                                   \
2028         w->split = w->dq + newsplit;                                                  \
2029         compiler_barrier();                                                           \
2030         wt->movesplit = 0;                                                            \
2031         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2032     }                                                                                 \
2033 }                                                                                     \
2034                                                                                       \
2035 static inline __attribute__((unused))                                                 \
2036 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)     \
2037 {                                                                                     \
2038     Task _t;                                                                          \
2039     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2040     t->f = &NAME##_WRAP;                                                              \
2041     t->thief = THIEF_TASK;                                                            \
2042      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2043     lace_run_newframe(&_t);                                                           \
2044     return ((TD_##NAME *)t)->d.res;                                                   \
2045 }                                                                                     \
2046                                                                                       \
2047 static inline __attribute__((unused))                                                 \
2048 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)      \
2049 {                                                                                     \
2050     Task _t;                                                                          \
2051     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2052     t->f = &NAME##_WRAP;                                                              \
2053     t->thief = THIEF_TASK;                                                            \
2054      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2055     lace_run_together(&_t);                                                           \
2056 }                                                                                     \
2057                                                                                       \
2058 static inline __attribute__((unused))                                                 \
2059 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)          \
2060 {                                                                                     \
2061     Task _t;                                                                          \
2062     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2063     t->f = &NAME##_WRAP;                                                              \
2064     t->thief = THIEF_TASK;                                                            \
2065      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2066     lace_run_task(&_t);                                                               \
2067     return ((TD_##NAME *)t)->d.res;                                                   \
2068 }                                                                                     \
2069                                                                                       \
2070 static __attribute__((noinline))                                                      \
2071 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
2072 {                                                                                     \
2073     TD_##NAME *t;                                                                     \
2074                                                                                       \
2075     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
2076         lace_leapfrog(w, __dq_head);                                                  \
2077         t = (TD_##NAME *)__dq_head;                                                   \
2078         return ((TD_##NAME *)t)->d.res;                                               \
2079     }                                                                                 \
2080                                                                                       \
2081     compiler_barrier();                                                               \
2082                                                                                       \
2083     Worker *wt = w->_public;                                                          \
2084     if (wt->movesplit) {                                                              \
2085         Task *t = w->split;                                                           \
2086         size_t diff = __dq_head - t;                                                  \
2087         diff = (diff + 1) / 2;                                                        \
2088         w->split = t + diff;                                                          \
2089         wt->ts.ts.split += diff;                                                      \
2090         compiler_barrier();                                                           \
2091         wt->movesplit = 0;                                                            \
2092         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2093     }                                                                                 \
2094                                                                                       \
2095     compiler_barrier();                                                               \
2096                                                                                       \
2097     t = (TD_##NAME *)__dq_head;                                                       \
2098     t->thief = THIEF_EMPTY;                                                           \
2099     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2100 }                                                                                     \
2101                                                                                       \
2102 static inline __attribute__((unused))                                                 \
2103 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
2104 {                                                                                     \
2105     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
2106                                                                                       \
2107     if (likely(0 == w->_public->movesplit)) {                                         \
2108         if (likely(w->split <= __dq_head)) {                                          \
2109             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
2110             t->thief = THIEF_EMPTY;                                                   \
2111             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2112         }                                                                             \
2113     }                                                                                 \
2114                                                                                       \
2115     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
2116 }                                                                                     \
2117                                                                                       \
2118                                                                                       \
2119 
2120 #define TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
2121 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
2122 {                                                                                     \
2123     t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2124 }                                                                                     \
2125                                                                                       \
2126 static inline __attribute__((always_inline))                                          \
2127 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
2128                                                                                       \
2129 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2130 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
2131 {                                                                                     \
2132     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4);                    \
2133 }                                                                                     \
2134                                                                                       \
2135 static inline __attribute__((always_inline))                                          \
2136 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
2137 
2138 #define TASK_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
2139 
2140 #define VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4)                    \
2141                                                                                       \
2142 typedef struct _TD_##NAME {                                                           \
2143   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
2144   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; } d;\
2145 } TD_##NAME;                                                                          \
2146                                                                                       \
2147 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2148 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2149                                                                                       \
2150 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
2151 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
2152 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
2153 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
2154                                                                                       \
2155 static inline __attribute__((unused))                                                 \
2156 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
2157 {                                                                                     \
2158     PR_COUNTTASK(w);                                                                  \
2159                                                                                       \
2160     TD_##NAME *t;                                                                     \
2161     TailSplit ts;                                                                     \
2162     uint32_t head, split, newsplit;                                                   \
2163                                                                                       \
2164     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
2165                                                                                       \
2166     t = (TD_##NAME *)__dq_head;                                                       \
2167     t->f = &NAME##_WRAP;                                                              \
2168     t->thief = THIEF_TASK;                                                            \
2169      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2170     compiler_barrier();                                                               \
2171                                                                                       \
2172     Worker *wt = w->_public;                                                          \
2173     if (unlikely(w->allstolen)) {                                                     \
2174         if (wt->movesplit) wt->movesplit = 0;                                         \
2175         head = __dq_head - w->dq;                                                     \
2176         ts = (TailSplit){{head,head+1}};                                              \
2177         wt->ts.v = ts.v;                                                              \
2178         compiler_barrier();                                                           \
2179         wt->allstolen = 0;                                                            \
2180         w->split = __dq_head+1;                                                       \
2181         w->allstolen = 0;                                                             \
2182     } else if (unlikely(wt->movesplit)) {                                             \
2183         head = __dq_head - w->dq;                                                     \
2184         split = w->split - w->dq;                                                     \
2185         newsplit = (split + head + 2)/2;                                              \
2186         wt->ts.ts.split = newsplit;                                                   \
2187         w->split = w->dq + newsplit;                                                  \
2188         compiler_barrier();                                                           \
2189         wt->movesplit = 0;                                                            \
2190         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2191     }                                                                                 \
2192 }                                                                                     \
2193                                                                                       \
2194 static inline __attribute__((unused))                                                 \
2195 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)      \
2196 {                                                                                     \
2197     Task _t;                                                                          \
2198     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2199     t->f = &NAME##_WRAP;                                                              \
2200     t->thief = THIEF_TASK;                                                            \
2201      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2202     lace_run_newframe(&_t);                                                           \
2203     return ;                                                                          \
2204 }                                                                                     \
2205                                                                                       \
2206 static inline __attribute__((unused))                                                 \
2207 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)      \
2208 {                                                                                     \
2209     Task _t;                                                                          \
2210     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2211     t->f = &NAME##_WRAP;                                                              \
2212     t->thief = THIEF_TASK;                                                            \
2213      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2214     lace_run_together(&_t);                                                           \
2215 }                                                                                     \
2216                                                                                       \
2217 static inline __attribute__((unused))                                                 \
2218 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)           \
2219 {                                                                                     \
2220     Task _t;                                                                          \
2221     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2222     t->f = &NAME##_WRAP;                                                              \
2223     t->thief = THIEF_TASK;                                                            \
2224      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
2225     lace_run_task(&_t);                                                               \
2226     return ;                                                                          \
2227 }                                                                                     \
2228                                                                                       \
2229 static __attribute__((noinline))                                                      \
2230 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
2231 {                                                                                     \
2232     TD_##NAME *t;                                                                     \
2233                                                                                       \
2234     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
2235         lace_leapfrog(w, __dq_head);                                                  \
2236         t = (TD_##NAME *)__dq_head;                                                   \
2237         return ;                                                                      \
2238     }                                                                                 \
2239                                                                                       \
2240     compiler_barrier();                                                               \
2241                                                                                       \
2242     Worker *wt = w->_public;                                                          \
2243     if (wt->movesplit) {                                                              \
2244         Task *t = w->split;                                                           \
2245         size_t diff = __dq_head - t;                                                  \
2246         diff = (diff + 1) / 2;                                                        \
2247         w->split = t + diff;                                                          \
2248         wt->ts.ts.split += diff;                                                      \
2249         compiler_barrier();                                                           \
2250         wt->movesplit = 0;                                                            \
2251         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2252     }                                                                                 \
2253                                                                                       \
2254     compiler_barrier();                                                               \
2255                                                                                       \
2256     t = (TD_##NAME *)__dq_head;                                                       \
2257     t->thief = THIEF_EMPTY;                                                           \
2258     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2259 }                                                                                     \
2260                                                                                       \
2261 static inline __attribute__((unused))                                                 \
2262 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
2263 {                                                                                     \
2264     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
2265                                                                                       \
2266     if (likely(0 == w->_public->movesplit)) {                                         \
2267         if (likely(w->split <= __dq_head)) {                                          \
2268             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
2269             t->thief = THIEF_EMPTY;                                                   \
2270             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2271         }                                                                             \
2272     }                                                                                 \
2273                                                                                       \
2274     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
2275 }                                                                                     \
2276                                                                                       \
2277                                                                                       \
2278 
2279 #define VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
2280 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
2281 {                                                                                     \
2282      NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
2283 }                                                                                     \
2284                                                                                       \
2285 static inline __attribute__((always_inline))                                          \
2286 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
2287                                                                                       \
2288 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2289 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
2290 {                                                                                     \
2291     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4);                    \
2292 }                                                                                     \
2293                                                                                       \
2294 static inline __attribute__((always_inline))                                          \
2295 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
2296 
2297 #define VOID_TASK_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
2298 
2299 
2300 // Task macros for tasks of arity 5
2301 
2302 #define TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5)         \
2303                                                                                       \
2304 typedef struct _TD_##NAME {                                                           \
2305   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
2306   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; RTYPE res; } d;\
2307 } TD_##NAME;                                                                          \
2308                                                                                       \
2309 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2310 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2311                                                                                       \
2312 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
2313 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
2314 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
2315 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
2316                                                                                       \
2317 static inline __attribute__((unused))                                                 \
2318 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2319 {                                                                                     \
2320     PR_COUNTTASK(w);                                                                  \
2321                                                                                       \
2322     TD_##NAME *t;                                                                     \
2323     TailSplit ts;                                                                     \
2324     uint32_t head, split, newsplit;                                                   \
2325                                                                                       \
2326     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
2327                                                                                       \
2328     t = (TD_##NAME *)__dq_head;                                                       \
2329     t->f = &NAME##_WRAP;                                                              \
2330     t->thief = THIEF_TASK;                                                            \
2331      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2332     compiler_barrier();                                                               \
2333                                                                                       \
2334     Worker *wt = w->_public;                                                          \
2335     if (unlikely(w->allstolen)) {                                                     \
2336         if (wt->movesplit) wt->movesplit = 0;                                         \
2337         head = __dq_head - w->dq;                                                     \
2338         ts = (TailSplit){{head,head+1}};                                              \
2339         wt->ts.v = ts.v;                                                              \
2340         compiler_barrier();                                                           \
2341         wt->allstolen = 0;                                                            \
2342         w->split = __dq_head+1;                                                       \
2343         w->allstolen = 0;                                                             \
2344     } else if (unlikely(wt->movesplit)) {                                             \
2345         head = __dq_head - w->dq;                                                     \
2346         split = w->split - w->dq;                                                     \
2347         newsplit = (split + head + 2)/2;                                              \
2348         wt->ts.ts.split = newsplit;                                                   \
2349         w->split = w->dq + newsplit;                                                  \
2350         compiler_barrier();                                                           \
2351         wt->movesplit = 0;                                                            \
2352         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2353     }                                                                                 \
2354 }                                                                                     \
2355                                                                                       \
2356 static inline __attribute__((unused))                                                 \
2357 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2358 {                                                                                     \
2359     Task _t;                                                                          \
2360     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2361     t->f = &NAME##_WRAP;                                                              \
2362     t->thief = THIEF_TASK;                                                            \
2363      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2364     lace_run_newframe(&_t);                                                           \
2365     return ((TD_##NAME *)t)->d.res;                                                   \
2366 }                                                                                     \
2367                                                                                       \
2368 static inline __attribute__((unused))                                                 \
2369 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2370 {                                                                                     \
2371     Task _t;                                                                          \
2372     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2373     t->f = &NAME##_WRAP;                                                              \
2374     t->thief = THIEF_TASK;                                                            \
2375      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2376     lace_run_together(&_t);                                                           \
2377 }                                                                                     \
2378                                                                                       \
2379 static inline __attribute__((unused))                                                 \
2380 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2381 {                                                                                     \
2382     Task _t;                                                                          \
2383     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2384     t->f = &NAME##_WRAP;                                                              \
2385     t->thief = THIEF_TASK;                                                            \
2386      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2387     lace_run_task(&_t);                                                               \
2388     return ((TD_##NAME *)t)->d.res;                                                   \
2389 }                                                                                     \
2390                                                                                       \
2391 static __attribute__((noinline))                                                      \
2392 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
2393 {                                                                                     \
2394     TD_##NAME *t;                                                                     \
2395                                                                                       \
2396     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
2397         lace_leapfrog(w, __dq_head);                                                  \
2398         t = (TD_##NAME *)__dq_head;                                                   \
2399         return ((TD_##NAME *)t)->d.res;                                               \
2400     }                                                                                 \
2401                                                                                       \
2402     compiler_barrier();                                                               \
2403                                                                                       \
2404     Worker *wt = w->_public;                                                          \
2405     if (wt->movesplit) {                                                              \
2406         Task *t = w->split;                                                           \
2407         size_t diff = __dq_head - t;                                                  \
2408         diff = (diff + 1) / 2;                                                        \
2409         w->split = t + diff;                                                          \
2410         wt->ts.ts.split += diff;                                                      \
2411         compiler_barrier();                                                           \
2412         wt->movesplit = 0;                                                            \
2413         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2414     }                                                                                 \
2415                                                                                       \
2416     compiler_barrier();                                                               \
2417                                                                                       \
2418     t = (TD_##NAME *)__dq_head;                                                       \
2419     t->thief = THIEF_EMPTY;                                                           \
2420     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2421 }                                                                                     \
2422                                                                                       \
2423 static inline __attribute__((unused))                                                 \
2424 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
2425 {                                                                                     \
2426     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
2427                                                                                       \
2428     if (likely(0 == w->_public->movesplit)) {                                         \
2429         if (likely(w->split <= __dq_head)) {                                          \
2430             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
2431             t->thief = THIEF_EMPTY;                                                   \
2432             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2433         }                                                                             \
2434     }                                                                                 \
2435                                                                                       \
2436     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
2437 }                                                                                     \
2438                                                                                       \
2439                                                                                       \
2440 
2441 #define TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
2442 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
2443 {                                                                                     \
2444     t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2445 }                                                                                     \
2446                                                                                       \
2447 static inline __attribute__((always_inline))                                          \
2448 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
2449                                                                                       \
2450 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2451 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2452 {                                                                                     \
2453     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5);             \
2454 }                                                                                     \
2455                                                                                       \
2456 static inline __attribute__((always_inline))                                          \
2457 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
2458 
2459 #define TASK_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
2460 
2461 #define VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5)           \
2462                                                                                       \
2463 typedef struct _TD_##NAME {                                                           \
2464   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
2465   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; } d;\
2466 } TD_##NAME;                                                                          \
2467                                                                                       \
2468 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2469 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2470                                                                                       \
2471 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
2472 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
2473 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
2474 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
2475                                                                                       \
2476 static inline __attribute__((unused))                                                 \
2477 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2478 {                                                                                     \
2479     PR_COUNTTASK(w);                                                                  \
2480                                                                                       \
2481     TD_##NAME *t;                                                                     \
2482     TailSplit ts;                                                                     \
2483     uint32_t head, split, newsplit;                                                   \
2484                                                                                       \
2485     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
2486                                                                                       \
2487     t = (TD_##NAME *)__dq_head;                                                       \
2488     t->f = &NAME##_WRAP;                                                              \
2489     t->thief = THIEF_TASK;                                                            \
2490      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2491     compiler_barrier();                                                               \
2492                                                                                       \
2493     Worker *wt = w->_public;                                                          \
2494     if (unlikely(w->allstolen)) {                                                     \
2495         if (wt->movesplit) wt->movesplit = 0;                                         \
2496         head = __dq_head - w->dq;                                                     \
2497         ts = (TailSplit){{head,head+1}};                                              \
2498         wt->ts.v = ts.v;                                                              \
2499         compiler_barrier();                                                           \
2500         wt->allstolen = 0;                                                            \
2501         w->split = __dq_head+1;                                                       \
2502         w->allstolen = 0;                                                             \
2503     } else if (unlikely(wt->movesplit)) {                                             \
2504         head = __dq_head - w->dq;                                                     \
2505         split = w->split - w->dq;                                                     \
2506         newsplit = (split + head + 2)/2;                                              \
2507         wt->ts.ts.split = newsplit;                                                   \
2508         w->split = w->dq + newsplit;                                                  \
2509         compiler_barrier();                                                           \
2510         wt->movesplit = 0;                                                            \
2511         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2512     }                                                                                 \
2513 }                                                                                     \
2514                                                                                       \
2515 static inline __attribute__((unused))                                                 \
2516 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2517 {                                                                                     \
2518     Task _t;                                                                          \
2519     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2520     t->f = &NAME##_WRAP;                                                              \
2521     t->thief = THIEF_TASK;                                                            \
2522      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2523     lace_run_newframe(&_t);                                                           \
2524     return ;                                                                          \
2525 }                                                                                     \
2526                                                                                       \
2527 static inline __attribute__((unused))                                                 \
2528 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2529 {                                                                                     \
2530     Task _t;                                                                          \
2531     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2532     t->f = &NAME##_WRAP;                                                              \
2533     t->thief = THIEF_TASK;                                                            \
2534      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2535     lace_run_together(&_t);                                                           \
2536 }                                                                                     \
2537                                                                                       \
2538 static inline __attribute__((unused))                                                 \
2539 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2540 {                                                                                     \
2541     Task _t;                                                                          \
2542     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2543     t->f = &NAME##_WRAP;                                                              \
2544     t->thief = THIEF_TASK;                                                            \
2545      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
2546     lace_run_task(&_t);                                                               \
2547     return ;                                                                          \
2548 }                                                                                     \
2549                                                                                       \
2550 static __attribute__((noinline))                                                      \
2551 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
2552 {                                                                                     \
2553     TD_##NAME *t;                                                                     \
2554                                                                                       \
2555     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
2556         lace_leapfrog(w, __dq_head);                                                  \
2557         t = (TD_##NAME *)__dq_head;                                                   \
2558         return ;                                                                      \
2559     }                                                                                 \
2560                                                                                       \
2561     compiler_barrier();                                                               \
2562                                                                                       \
2563     Worker *wt = w->_public;                                                          \
2564     if (wt->movesplit) {                                                              \
2565         Task *t = w->split;                                                           \
2566         size_t diff = __dq_head - t;                                                  \
2567         diff = (diff + 1) / 2;                                                        \
2568         w->split = t + diff;                                                          \
2569         wt->ts.ts.split += diff;                                                      \
2570         compiler_barrier();                                                           \
2571         wt->movesplit = 0;                                                            \
2572         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2573     }                                                                                 \
2574                                                                                       \
2575     compiler_barrier();                                                               \
2576                                                                                       \
2577     t = (TD_##NAME *)__dq_head;                                                       \
2578     t->thief = THIEF_EMPTY;                                                           \
2579     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2580 }                                                                                     \
2581                                                                                       \
2582 static inline __attribute__((unused))                                                 \
2583 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
2584 {                                                                                     \
2585     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
2586                                                                                       \
2587     if (likely(0 == w->_public->movesplit)) {                                         \
2588         if (likely(w->split <= __dq_head)) {                                          \
2589             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
2590             t->thief = THIEF_EMPTY;                                                   \
2591             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2592         }                                                                             \
2593     }                                                                                 \
2594                                                                                       \
2595     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
2596 }                                                                                     \
2597                                                                                       \
2598                                                                                       \
2599 
2600 #define VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
2601 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
2602 {                                                                                     \
2603      NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
2604 }                                                                                     \
2605                                                                                       \
2606 static inline __attribute__((always_inline))                                          \
2607 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
2608                                                                                       \
2609 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2610 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
2611 {                                                                                     \
2612     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5);             \
2613 }                                                                                     \
2614                                                                                       \
2615 static inline __attribute__((always_inline))                                          \
2616 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
2617 
2618 #define VOID_TASK_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
2619 
2620 
2621 // Task macros for tasks of arity 6
2622 
2623 #define TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6)\
2624                                                                                       \
2625 typedef struct _TD_##NAME {                                                           \
2626   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
2627   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; RTYPE res; } d;\
2628 } TD_##NAME;                                                                          \
2629                                                                                       \
2630 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2631 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2632                                                                                       \
2633 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
2634 RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
2635 static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
2636 static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
2637                                                                                       \
2638 static inline __attribute__((unused))                                                 \
2639 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2640 {                                                                                     \
2641     PR_COUNTTASK(w);                                                                  \
2642                                                                                       \
2643     TD_##NAME *t;                                                                     \
2644     TailSplit ts;                                                                     \
2645     uint32_t head, split, newsplit;                                                   \
2646                                                                                       \
2647     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
2648                                                                                       \
2649     t = (TD_##NAME *)__dq_head;                                                       \
2650     t->f = &NAME##_WRAP;                                                              \
2651     t->thief = THIEF_TASK;                                                            \
2652      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2653     compiler_barrier();                                                               \
2654                                                                                       \
2655     Worker *wt = w->_public;                                                          \
2656     if (unlikely(w->allstolen)) {                                                     \
2657         if (wt->movesplit) wt->movesplit = 0;                                         \
2658         head = __dq_head - w->dq;                                                     \
2659         ts = (TailSplit){{head,head+1}};                                              \
2660         wt->ts.v = ts.v;                                                              \
2661         compiler_barrier();                                                           \
2662         wt->allstolen = 0;                                                            \
2663         w->split = __dq_head+1;                                                       \
2664         w->allstolen = 0;                                                             \
2665     } else if (unlikely(wt->movesplit)) {                                             \
2666         head = __dq_head - w->dq;                                                     \
2667         split = w->split - w->dq;                                                     \
2668         newsplit = (split + head + 2)/2;                                              \
2669         wt->ts.ts.split = newsplit;                                                   \
2670         w->split = w->dq + newsplit;                                                  \
2671         compiler_barrier();                                                           \
2672         wt->movesplit = 0;                                                            \
2673         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2674     }                                                                                 \
2675 }                                                                                     \
2676                                                                                       \
2677 static inline __attribute__((unused))                                                 \
2678 RTYPE NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2679 {                                                                                     \
2680     Task _t;                                                                          \
2681     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2682     t->f = &NAME##_WRAP;                                                              \
2683     t->thief = THIEF_TASK;                                                            \
2684      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2685     lace_run_newframe(&_t);                                                           \
2686     return ((TD_##NAME *)t)->d.res;                                                   \
2687 }                                                                                     \
2688                                                                                       \
2689 static inline __attribute__((unused))                                                 \
2690 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2691 {                                                                                     \
2692     Task _t;                                                                          \
2693     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2694     t->f = &NAME##_WRAP;                                                              \
2695     t->thief = THIEF_TASK;                                                            \
2696      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2697     lace_run_together(&_t);                                                           \
2698 }                                                                                     \
2699                                                                                       \
2700 static inline __attribute__((unused))                                                 \
2701 RTYPE NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2702 {                                                                                     \
2703     Task _t;                                                                          \
2704     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2705     t->f = &NAME##_WRAP;                                                              \
2706     t->thief = THIEF_TASK;                                                            \
2707      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2708     lace_run_task(&_t);                                                               \
2709     return ((TD_##NAME *)t)->d.res;                                                   \
2710 }                                                                                     \
2711                                                                                       \
2712 static __attribute__((noinline))                                                      \
2713 RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
2714 {                                                                                     \
2715     TD_##NAME *t;                                                                     \
2716                                                                                       \
2717     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
2718         lace_leapfrog(w, __dq_head);                                                  \
2719         t = (TD_##NAME *)__dq_head;                                                   \
2720         return ((TD_##NAME *)t)->d.res;                                               \
2721     }                                                                                 \
2722                                                                                       \
2723     compiler_barrier();                                                               \
2724                                                                                       \
2725     Worker *wt = w->_public;                                                          \
2726     if (wt->movesplit) {                                                              \
2727         Task *t = w->split;                                                           \
2728         size_t diff = __dq_head - t;                                                  \
2729         diff = (diff + 1) / 2;                                                        \
2730         w->split = t + diff;                                                          \
2731         wt->ts.ts.split += diff;                                                      \
2732         compiler_barrier();                                                           \
2733         wt->movesplit = 0;                                                            \
2734         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2735     }                                                                                 \
2736                                                                                       \
2737     compiler_barrier();                                                               \
2738                                                                                       \
2739     t = (TD_##NAME *)__dq_head;                                                       \
2740     t->thief = THIEF_EMPTY;                                                           \
2741     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2742 }                                                                                     \
2743                                                                                       \
2744 static inline __attribute__((unused))                                                 \
2745 RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
2746 {                                                                                     \
2747     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
2748                                                                                       \
2749     if (likely(0 == w->_public->movesplit)) {                                         \
2750         if (likely(w->split <= __dq_head)) {                                          \
2751             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
2752             t->thief = THIEF_EMPTY;                                                   \
2753             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2754         }                                                                             \
2755     }                                                                                 \
2756                                                                                       \
2757     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
2758 }                                                                                     \
2759                                                                                       \
2760                                                                                       \
2761 
2762 #define TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
2763 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
2764 {                                                                                     \
2765     t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2766 }                                                                                     \
2767                                                                                       \
2768 static inline __attribute__((always_inline))                                          \
2769 RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
2770                                                                                       \
2771 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2772 RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2773 {                                                                                     \
2774     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6);      \
2775 }                                                                                     \
2776                                                                                       \
2777 static inline __attribute__((always_inline))                                          \
2778 RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
2779 
2780 #define TASK_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
2781 
2782 #define VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6)  \
2783                                                                                       \
2784 typedef struct _TD_##NAME {                                                           \
2785   TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
2786   union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; } d;\
2787 } TD_##NAME;                                                                          \
2788                                                                                       \
2789 /* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
2790 typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
2791                                                                                       \
2792 void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
2793 void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
2794 static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
2795 static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
2796                                                                                       \
2797 static inline __attribute__((unused))                                                 \
2798 void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2799 {                                                                                     \
2800     PR_COUNTTASK(w);                                                                  \
2801                                                                                       \
2802     TD_##NAME *t;                                                                     \
2803     TailSplit ts;                                                                     \
2804     uint32_t head, split, newsplit;                                                   \
2805                                                                                       \
2806     if (__dq_head == w->end) lace_abort_stack_overflow();                             \
2807                                                                                       \
2808     t = (TD_##NAME *)__dq_head;                                                       \
2809     t->f = &NAME##_WRAP;                                                              \
2810     t->thief = THIEF_TASK;                                                            \
2811      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2812     compiler_barrier();                                                               \
2813                                                                                       \
2814     Worker *wt = w->_public;                                                          \
2815     if (unlikely(w->allstolen)) {                                                     \
2816         if (wt->movesplit) wt->movesplit = 0;                                         \
2817         head = __dq_head - w->dq;                                                     \
2818         ts = (TailSplit){{head,head+1}};                                              \
2819         wt->ts.v = ts.v;                                                              \
2820         compiler_barrier();                                                           \
2821         wt->allstolen = 0;                                                            \
2822         w->split = __dq_head+1;                                                       \
2823         w->allstolen = 0;                                                             \
2824     } else if (unlikely(wt->movesplit)) {                                             \
2825         head = __dq_head - w->dq;                                                     \
2826         split = w->split - w->dq;                                                     \
2827         newsplit = (split + head + 2)/2;                                              \
2828         wt->ts.ts.split = newsplit;                                                   \
2829         w->split = w->dq + newsplit;                                                  \
2830         compiler_barrier();                                                           \
2831         wt->movesplit = 0;                                                            \
2832         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2833     }                                                                                 \
2834 }                                                                                     \
2835                                                                                       \
2836 static inline __attribute__((unused))                                                 \
2837 void NAME##_NEWFRAME(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2838 {                                                                                     \
2839     Task _t;                                                                          \
2840     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2841     t->f = &NAME##_WRAP;                                                              \
2842     t->thief = THIEF_TASK;                                                            \
2843      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2844     lace_run_newframe(&_t);                                                           \
2845     return ;                                                                          \
2846 }                                                                                     \
2847                                                                                       \
2848 static inline __attribute__((unused))                                                 \
2849 void NAME##_TOGETHER(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2850 {                                                                                     \
2851     Task _t;                                                                          \
2852     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2853     t->f = &NAME##_WRAP;                                                              \
2854     t->thief = THIEF_TASK;                                                            \
2855      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2856     lace_run_together(&_t);                                                           \
2857 }                                                                                     \
2858                                                                                       \
2859 static inline __attribute__((unused))                                                 \
2860 void NAME##_RUN(ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2861 {                                                                                     \
2862     Task _t;                                                                          \
2863     TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
2864     t->f = &NAME##_WRAP;                                                              \
2865     t->thief = THIEF_TASK;                                                            \
2866      t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
2867     lace_run_task(&_t);                                                               \
2868     return ;                                                                          \
2869 }                                                                                     \
2870                                                                                       \
2871 static __attribute__((noinline))                                                      \
2872 void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
2873 {                                                                                     \
2874     TD_##NAME *t;                                                                     \
2875                                                                                       \
2876     if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
2877         lace_leapfrog(w, __dq_head);                                                  \
2878         t = (TD_##NAME *)__dq_head;                                                   \
2879         return ;                                                                      \
2880     }                                                                                 \
2881                                                                                       \
2882     compiler_barrier();                                                               \
2883                                                                                       \
2884     Worker *wt = w->_public;                                                          \
2885     if (wt->movesplit) {                                                              \
2886         Task *t = w->split;                                                           \
2887         size_t diff = __dq_head - t;                                                  \
2888         diff = (diff + 1) / 2;                                                        \
2889         w->split = t + diff;                                                          \
2890         wt->ts.ts.split += diff;                                                      \
2891         compiler_barrier();                                                           \
2892         wt->movesplit = 0;                                                            \
2893         PR_COUNTSPLITS(w, CTR_split_grow);                                            \
2894     }                                                                                 \
2895                                                                                       \
2896     compiler_barrier();                                                               \
2897                                                                                       \
2898     t = (TD_##NAME *)__dq_head;                                                       \
2899     t->thief = THIEF_EMPTY;                                                           \
2900     return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2901 }                                                                                     \
2902                                                                                       \
2903 static inline __attribute__((unused))                                                 \
2904 void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
2905 {                                                                                     \
2906     /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
2907                                                                                       \
2908     if (likely(0 == w->_public->movesplit)) {                                         \
2909         if (likely(w->split <= __dq_head)) {                                          \
2910             TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
2911             t->thief = THIEF_EMPTY;                                                   \
2912             return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2913         }                                                                             \
2914     }                                                                                 \
2915                                                                                       \
2916     return NAME##_SYNC_SLOW(w, __dq_head);                                            \
2917 }                                                                                     \
2918                                                                                       \
2919                                                                                       \
2920 
2921 #define VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
2922 void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
2923 {                                                                                     \
2924      NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
2925 }                                                                                     \
2926                                                                                       \
2927 static inline __attribute__((always_inline))                                          \
2928 void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
2929                                                                                       \
2930 /* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
2931 void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
2932 {                                                                                     \
2933     return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6);      \
2934 }                                                                                     \
2935                                                                                       \
2936 static inline __attribute__((always_inline))                                          \
2937 void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
2938 
2939 #define VOID_TASK_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
2940 
2941 
2942 
2943 #ifdef __cplusplus
2944 }
2945 #endif /* __cplusplus */
2946 
2947 #endif
2948