1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  */
16 
17 #ifndef __BLI_TASK_H__
18 #define __BLI_TASK_H__
19 
20 #include <string.h> /* for memset() */
21 
22 struct ListBase;
23 
24 /** \file
25  * \ingroup bli
26  */
27 
28 #include "BLI_threads.h"
29 #include "BLI_utildefines.h"
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 struct BLI_mempool;
36 
37 /* Task Scheduler
38  *
39  * Central scheduler that holds running threads ready to execute tasks. A single
40  * queue holds the task from all pools.
41  *
42  * Init/exit must be called before/after any task pools are created/freed, and
43  * must be called from the main threads. All other scheduler and pool functions
44  * are thread-safe. */
45 
46 void BLI_task_scheduler_init(void);
47 void BLI_task_scheduler_exit(void);
48 int BLI_task_scheduler_num_threads(void);
49 
50 /* Task Pool
51  *
52  * Pool of tasks that will be executed by the central task scheduler. For each
53  * pool, we can wait for all tasks to be done, or cancel them before they are
54  * done.
55  *
56  * Running tasks may spawn new tasks.
57  *
58  * Pools may be nested, i.e. a thread running a task can create another task
59  * pool with smaller tasks. When other threads are busy they will continue
60  * working on their own tasks, if not they will join in, no new threads will
61  * be launched.
62  */
63 
64 typedef enum TaskPriority {
65   TASK_PRIORITY_LOW,
66   TASK_PRIORITY_HIGH,
67 } TaskPriority;
68 
69 typedef struct TaskPool TaskPool;
70 typedef void (*TaskRunFunction)(TaskPool *__restrict pool, void *taskdata);
71 typedef void (*TaskFreeFunction)(TaskPool *__restrict pool, void *taskdata);
72 
73 /* Regular task pool that immediately starts executing tasks as soon as they
74  * are pushed, either on the current or another thread. */
75 TaskPool *BLI_task_pool_create(void *userdata, TaskPriority priority);
76 
77 /* Background: always run tasks in a background thread, never immediately
78  * execute them. For running background jobs. */
79 TaskPool *BLI_task_pool_create_background(void *userdata, TaskPriority priority);
80 
81 /* Background Serial: run tasks one after the other in the background,
82  * without parallelization between the tasks. */
83 TaskPool *BLI_task_pool_create_background_serial(void *userdata, TaskPriority priority);
84 
85 /* Suspended: don't execute tasks until work_and_wait is called. This is slower
86  * as threads can't immediately start working. But it can be used if the data
87  * structures the threads operate on are not fully initialized until all tasks
88  * are created. */
89 TaskPool *BLI_task_pool_create_suspended(void *userdata, TaskPriority priority);
90 
91 /* No threads: immediately executes tasks on the same thread. For debugging. */
92 TaskPool *BLI_task_pool_create_no_threads(void *userdata);
93 
94 void BLI_task_pool_free(TaskPool *pool);
95 
96 void BLI_task_pool_push(TaskPool *pool,
97                         TaskRunFunction run,
98                         void *taskdata,
99                         bool free_taskdata,
100                         TaskFreeFunction freedata);
101 
102 /* work and wait until all tasks are done */
103 void BLI_task_pool_work_and_wait(TaskPool *pool);
104 /* cancel all tasks, keep worker threads running */
105 void BLI_task_pool_cancel(TaskPool *pool);
106 
107 /* for worker threads, test if current task pool canceled. this function may
108  * only be called from worker threads and pool must be the task pool that the
109  * thread is currently executing a task from. */
110 bool BLI_task_pool_current_canceled(TaskPool *pool);
111 
112 /* optional userdata pointer to pass along to run function */
113 void *BLI_task_pool_user_data(TaskPool *pool);
114 
115 /* optional mutex to use from run function */
116 ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool);
117 
118 /* Parallel for routines */
119 
120 /* Per-thread specific data passed to the callback. */
121 typedef struct TaskParallelTLS {
122   /* Copy of user-specifier chunk, which is copied from original chunk to all
123    * worker threads. This is similar to OpenMP's firstprivate.
124    */
125   void *userdata_chunk;
126 } TaskParallelTLS;
127 
128 typedef void (*TaskParallelRangeFunc)(void *__restrict userdata,
129                                       const int iter,
130                                       const TaskParallelTLS *__restrict tls);
131 typedef void (*TaskParallelReduceFunc)(const void *__restrict userdata,
132                                        void *__restrict chunk_join,
133                                        void *__restrict chunk);
134 
135 typedef void (*TaskParallelFreeFunc)(const void *__restrict userdata, void *__restrict chunk);
136 
137 typedef struct TaskParallelSettings {
138   /* Whether caller allows to do threading of the particular range.
139    * Usually set by some equation, which forces threading off when threading
140    * overhead becomes higher than speed benefit.
141    * BLI_task_parallel_range() by itself will always use threading when range
142    * is higher than a chunk size. As in, threading will always be performed.
143    */
144   bool use_threading;
145   /* Each instance of looping chunks will get a copy of this data
146    * (similar to OpenMP's firstprivate).
147    */
148   void *userdata_chunk;       /* Pointer to actual data. */
149   size_t userdata_chunk_size; /* Size of that data.  */
150   /* Function called from calling thread once whole range have been
151    * processed.
152    */
153   /* Function called to join user data chunk into another, to reduce
154    * the result to the original userdata_chunk memory.
155    * The reduce functions should have no side effects, so that they
156    * can be run on any thread. */
157   TaskParallelReduceFunc func_reduce;
158   /* Function called to free data created by TaskParallelRangeFunc. */
159   TaskParallelFreeFunc func_free;
160   /* Minimum allowed number of range iterators to be handled by a single
161    * thread. This allows to achieve following:
162    * - Reduce amount of threading overhead.
163    * - Partially occupy thread pool with ranges which are computationally
164    *   expensive, but which are smaller than amount of available threads.
165    *   For example, it's possible to multi-thread [0 .. 64] range into 4
166    *   thread which will be doing 16 iterators each.
167    * This is a preferred way to tell scheduler when to start threading than
168    * having a global use_threading switch based on just range size.
169    */
170   int min_iter_per_thread;
171 } TaskParallelSettings;
172 
173 BLI_INLINE void BLI_parallel_range_settings_defaults(TaskParallelSettings *settings);
174 
175 void BLI_task_parallel_range(const int start,
176                              const int stop,
177                              void *userdata,
178                              TaskParallelRangeFunc func,
179                              const TaskParallelSettings *settings);
180 
181 /* This data is shared between all tasks, its access needs thread lock or similar protection.
182  */
183 typedef struct TaskParallelIteratorStateShared {
184   /* Maximum amount of items to acquire at once. */
185   int chunk_size;
186   /* Next item to be acquired. */
187   void *next_item;
188   /* Index of the next item to be acquired. */
189   int next_index;
190   /* Indicates that end of iteration has been reached. */
191   bool is_finished;
192   /* Helper lock to protect access to this data in iterator getter callback,
193    * can be ignored (if the callback implements its own protection system, using atomics e.g.).
194    * Will be NULL when iterator is actually processed in a single thread. */
195   SpinLock *spin_lock;
196 } TaskParallelIteratorStateShared;
197 
198 typedef void (*TaskParallelIteratorIterFunc)(void *__restrict userdata,
199                                              const TaskParallelTLS *__restrict tls,
200                                              void **r_next_item,
201                                              int *r_next_index,
202                                              bool *r_do_abort);
203 
204 typedef void (*TaskParallelIteratorFunc)(void *__restrict userdata,
205                                          void *item,
206                                          int index,
207                                          const TaskParallelTLS *__restrict tls);
208 
209 void BLI_task_parallel_iterator(void *userdata,
210                                 TaskParallelIteratorIterFunc iter_func,
211                                 void *init_item,
212                                 const int init_index,
213                                 const int tot_items,
214                                 TaskParallelIteratorFunc func,
215                                 const TaskParallelSettings *settings);
216 
217 void BLI_task_parallel_listbase(struct ListBase *listbase,
218                                 void *userdata,
219                                 TaskParallelIteratorFunc func,
220                                 const TaskParallelSettings *settings);
221 
222 typedef struct MempoolIterData MempoolIterData;
223 typedef void (*TaskParallelMempoolFunc)(void *userdata, MempoolIterData *iter);
224 void BLI_task_parallel_mempool(struct BLI_mempool *mempool,
225                                void *userdata,
226                                TaskParallelMempoolFunc func,
227                                const bool use_threading);
228 
229 /* TODO(sergey): Think of a better place for this. */
BLI_parallel_range_settings_defaults(TaskParallelSettings * settings)230 BLI_INLINE void BLI_parallel_range_settings_defaults(TaskParallelSettings *settings)
231 {
232   memset(settings, 0, sizeof(*settings));
233   settings->use_threading = true;
234   /* Use default heuristic to define actual chunk size. */
235   settings->min_iter_per_thread = 0;
236 }
237 
238 /* Don't use this, store any thread specific data in tls->userdata_chunk instead.
239  * Only here for code to be removed. */
240 int BLI_task_parallel_thread_id(const TaskParallelTLS *tls);
241 
242 /* Task Graph Scheduling */
243 /* Task Graphs can be used to create a forest of directional trees and schedule work to any tree.
244  * The nodes in the graph can be run in separate threads.
245  *
246  *     +---- [root] ----+
247  *     |                |
248  *     v                v
249  * [node_1]    +---- [node_2] ----+
250  *             |                  |
251  *             v                  v
252  *          [node_3]           [node_4]
253  *
254  *    TaskGraph *task_graph = BLI_task_graph_create();
255  *    TaskNode *root = BLI_task_graph_node_create(task_graph, root_exec, NULL, NULL);
256  *    TaskNode *node_1 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
257  *    TaskNode *node_2 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
258  *    TaskNode *node_3 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
259  *    TaskNode *node_4 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
260  *
261  *    BLI_task_graph_edge_create(root, node_1);
262  *    BLI_task_graph_edge_create(root, node_2);
263  *    BLI_task_graph_edge_create(node_2, node_3);
264  *    BLI_task_graph_edge_create(node_2, node_4);
265  *
266  * Any node can be triggered to start a chain of tasks. Normally you would trigger a root node but
267  * it is supported to start the chain of tasks anywhere in the forest or tree. When a node
268  * completes, the execution flow is forwarded via the created edges.
269  * When a child node has multiple parents the child node will be triggered once for each parent.
270  *
271  *    BLI_task_graph_node_push_work(root);
272  *
273  * In this example After `root` is finished, `node_1` and `node_2` will be started.
274  * Only after `node_2` is finished `node_3` and `node_4` will be started.
275  *
276  * After scheduling work we need to wait until all the tasks have been finished.
277  *
278  *    BLI_task_graph_work_and_wait();
279  *
280  * When finished you can clean up all the resources by freeing the task_graph. Nodes are owned by
281  * the graph and are freed task_data will only be freed if a free_func was given.
282  *
283  *    BLI_task_graph_free(task_graph);
284  *
285  * Work can enter a tree on any node. Normally this would be the root_node.
286  * A `task_graph` can be reused, but the caller needs to make sure the task_data is reset.
287  *
288  * ** Task-Data **
289  *
290  * Typically you want give a task data to work on.
291  * Task data can be shared with other nodes, but be careful not to free the data multiple times.
292  * Task data is freed when calling `BLI_task_graph_free`.
293  *
294  *    MyData *task_data = MEM_callocN(sizeof(MyData), __func__);
295  *    TaskNode *root = BLI_task_graph_node_create(task_graph, root_exec, task_data, MEM_freeN);
296  *    TaskNode *node_1 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
297  *    TaskNode *node_2 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
298  *    TaskNode *node_3 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
299  *    TaskNode *node_4 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
300  *
301  */
302 struct TaskGraph;
303 struct TaskNode;
304 
305 typedef void (*TaskGraphNodeRunFunction)(void *__restrict task_data);
306 typedef void (*TaskGraphNodeFreeFunction)(void *task_data);
307 
308 struct TaskGraph *BLI_task_graph_create(void);
309 void BLI_task_graph_work_and_wait(struct TaskGraph *task_graph);
310 void BLI_task_graph_free(struct TaskGraph *task_graph);
311 struct TaskNode *BLI_task_graph_node_create(struct TaskGraph *task_graph,
312                                             TaskGraphNodeRunFunction run,
313                                             void *user_data,
314                                             TaskGraphNodeFreeFunction free_func);
315 bool BLI_task_graph_node_push_work(struct TaskNode *task_node);
316 void BLI_task_graph_edge_create(struct TaskNode *from_node, struct TaskNode *to_node);
317 
318 #ifdef __cplusplus
319 }
320 #endif
321 
322 #endif
323