1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version 2
5 * of the License, or (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software Foundation,
14 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 */
16
17 #ifndef __BLI_TASK_H__
18 #define __BLI_TASK_H__
19
20 #include <string.h> /* for memset() */
21
22 struct ListBase;
23
24 /** \file
25 * \ingroup bli
26 */
27
28 #include "BLI_threads.h"
29 #include "BLI_utildefines.h"
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 struct BLI_mempool;
36
37 /* Task Scheduler
38 *
39 * Central scheduler that holds running threads ready to execute tasks. A single
40 * queue holds the task from all pools.
41 *
42 * Init/exit must be called before/after any task pools are created/freed, and
43 * must be called from the main threads. All other scheduler and pool functions
44 * are thread-safe. */
45
46 void BLI_task_scheduler_init(void);
47 void BLI_task_scheduler_exit(void);
48 int BLI_task_scheduler_num_threads(void);
49
50 /* Task Pool
51 *
52 * Pool of tasks that will be executed by the central task scheduler. For each
53 * pool, we can wait for all tasks to be done, or cancel them before they are
54 * done.
55 *
56 * Running tasks may spawn new tasks.
57 *
58 * Pools may be nested, i.e. a thread running a task can create another task
59 * pool with smaller tasks. When other threads are busy they will continue
60 * working on their own tasks, if not they will join in, no new threads will
61 * be launched.
62 */
63
64 typedef enum TaskPriority {
65 TASK_PRIORITY_LOW,
66 TASK_PRIORITY_HIGH,
67 } TaskPriority;
68
69 typedef struct TaskPool TaskPool;
70 typedef void (*TaskRunFunction)(TaskPool *__restrict pool, void *taskdata);
71 typedef void (*TaskFreeFunction)(TaskPool *__restrict pool, void *taskdata);
72
73 /* Regular task pool that immediately starts executing tasks as soon as they
74 * are pushed, either on the current or another thread. */
75 TaskPool *BLI_task_pool_create(void *userdata, TaskPriority priority);
76
77 /* Background: always run tasks in a background thread, never immediately
78 * execute them. For running background jobs. */
79 TaskPool *BLI_task_pool_create_background(void *userdata, TaskPriority priority);
80
81 /* Background Serial: run tasks one after the other in the background,
82 * without parallelization between the tasks. */
83 TaskPool *BLI_task_pool_create_background_serial(void *userdata, TaskPriority priority);
84
85 /* Suspended: don't execute tasks until work_and_wait is called. This is slower
86 * as threads can't immediately start working. But it can be used if the data
87 * structures the threads operate on are not fully initialized until all tasks
88 * are created. */
89 TaskPool *BLI_task_pool_create_suspended(void *userdata, TaskPriority priority);
90
91 /* No threads: immediately executes tasks on the same thread. For debugging. */
92 TaskPool *BLI_task_pool_create_no_threads(void *userdata);
93
94 void BLI_task_pool_free(TaskPool *pool);
95
96 void BLI_task_pool_push(TaskPool *pool,
97 TaskRunFunction run,
98 void *taskdata,
99 bool free_taskdata,
100 TaskFreeFunction freedata);
101
102 /* work and wait until all tasks are done */
103 void BLI_task_pool_work_and_wait(TaskPool *pool);
104 /* cancel all tasks, keep worker threads running */
105 void BLI_task_pool_cancel(TaskPool *pool);
106
107 /* for worker threads, test if current task pool canceled. this function may
108 * only be called from worker threads and pool must be the task pool that the
109 * thread is currently executing a task from. */
110 bool BLI_task_pool_current_canceled(TaskPool *pool);
111
112 /* optional userdata pointer to pass along to run function */
113 void *BLI_task_pool_user_data(TaskPool *pool);
114
115 /* optional mutex to use from run function */
116 ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool);
117
118 /* Parallel for routines */
119
120 /* Per-thread specific data passed to the callback. */
121 typedef struct TaskParallelTLS {
122 /* Copy of user-specifier chunk, which is copied from original chunk to all
123 * worker threads. This is similar to OpenMP's firstprivate.
124 */
125 void *userdata_chunk;
126 } TaskParallelTLS;
127
128 typedef void (*TaskParallelRangeFunc)(void *__restrict userdata,
129 const int iter,
130 const TaskParallelTLS *__restrict tls);
131 typedef void (*TaskParallelReduceFunc)(const void *__restrict userdata,
132 void *__restrict chunk_join,
133 void *__restrict chunk);
134
135 typedef void (*TaskParallelFreeFunc)(const void *__restrict userdata, void *__restrict chunk);
136
137 typedef struct TaskParallelSettings {
138 /* Whether caller allows to do threading of the particular range.
139 * Usually set by some equation, which forces threading off when threading
140 * overhead becomes higher than speed benefit.
141 * BLI_task_parallel_range() by itself will always use threading when range
142 * is higher than a chunk size. As in, threading will always be performed.
143 */
144 bool use_threading;
145 /* Each instance of looping chunks will get a copy of this data
146 * (similar to OpenMP's firstprivate).
147 */
148 void *userdata_chunk; /* Pointer to actual data. */
149 size_t userdata_chunk_size; /* Size of that data. */
150 /* Function called from calling thread once whole range have been
151 * processed.
152 */
153 /* Function called to join user data chunk into another, to reduce
154 * the result to the original userdata_chunk memory.
155 * The reduce functions should have no side effects, so that they
156 * can be run on any thread. */
157 TaskParallelReduceFunc func_reduce;
158 /* Function called to free data created by TaskParallelRangeFunc. */
159 TaskParallelFreeFunc func_free;
160 /* Minimum allowed number of range iterators to be handled by a single
161 * thread. This allows to achieve following:
162 * - Reduce amount of threading overhead.
163 * - Partially occupy thread pool with ranges which are computationally
164 * expensive, but which are smaller than amount of available threads.
165 * For example, it's possible to multi-thread [0 .. 64] range into 4
166 * thread which will be doing 16 iterators each.
167 * This is a preferred way to tell scheduler when to start threading than
168 * having a global use_threading switch based on just range size.
169 */
170 int min_iter_per_thread;
171 } TaskParallelSettings;
172
173 BLI_INLINE void BLI_parallel_range_settings_defaults(TaskParallelSettings *settings);
174
175 void BLI_task_parallel_range(const int start,
176 const int stop,
177 void *userdata,
178 TaskParallelRangeFunc func,
179 const TaskParallelSettings *settings);
180
181 /* This data is shared between all tasks, its access needs thread lock or similar protection.
182 */
183 typedef struct TaskParallelIteratorStateShared {
184 /* Maximum amount of items to acquire at once. */
185 int chunk_size;
186 /* Next item to be acquired. */
187 void *next_item;
188 /* Index of the next item to be acquired. */
189 int next_index;
190 /* Indicates that end of iteration has been reached. */
191 bool is_finished;
192 /* Helper lock to protect access to this data in iterator getter callback,
193 * can be ignored (if the callback implements its own protection system, using atomics e.g.).
194 * Will be NULL when iterator is actually processed in a single thread. */
195 SpinLock *spin_lock;
196 } TaskParallelIteratorStateShared;
197
198 typedef void (*TaskParallelIteratorIterFunc)(void *__restrict userdata,
199 const TaskParallelTLS *__restrict tls,
200 void **r_next_item,
201 int *r_next_index,
202 bool *r_do_abort);
203
204 typedef void (*TaskParallelIteratorFunc)(void *__restrict userdata,
205 void *item,
206 int index,
207 const TaskParallelTLS *__restrict tls);
208
209 void BLI_task_parallel_iterator(void *userdata,
210 TaskParallelIteratorIterFunc iter_func,
211 void *init_item,
212 const int init_index,
213 const int tot_items,
214 TaskParallelIteratorFunc func,
215 const TaskParallelSettings *settings);
216
217 void BLI_task_parallel_listbase(struct ListBase *listbase,
218 void *userdata,
219 TaskParallelIteratorFunc func,
220 const TaskParallelSettings *settings);
221
222 typedef struct MempoolIterData MempoolIterData;
223 typedef void (*TaskParallelMempoolFunc)(void *userdata, MempoolIterData *iter);
224 void BLI_task_parallel_mempool(struct BLI_mempool *mempool,
225 void *userdata,
226 TaskParallelMempoolFunc func,
227 const bool use_threading);
228
229 /* TODO(sergey): Think of a better place for this. */
BLI_parallel_range_settings_defaults(TaskParallelSettings * settings)230 BLI_INLINE void BLI_parallel_range_settings_defaults(TaskParallelSettings *settings)
231 {
232 memset(settings, 0, sizeof(*settings));
233 settings->use_threading = true;
234 /* Use default heuristic to define actual chunk size. */
235 settings->min_iter_per_thread = 0;
236 }
237
238 /* Don't use this, store any thread specific data in tls->userdata_chunk instead.
239 * Only here for code to be removed. */
240 int BLI_task_parallel_thread_id(const TaskParallelTLS *tls);
241
242 /* Task Graph Scheduling */
243 /* Task Graphs can be used to create a forest of directional trees and schedule work to any tree.
244 * The nodes in the graph can be run in separate threads.
245 *
246 * +---- [root] ----+
247 * | |
248 * v v
249 * [node_1] +---- [node_2] ----+
250 * | |
251 * v v
252 * [node_3] [node_4]
253 *
254 * TaskGraph *task_graph = BLI_task_graph_create();
255 * TaskNode *root = BLI_task_graph_node_create(task_graph, root_exec, NULL, NULL);
256 * TaskNode *node_1 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
257 * TaskNode *node_2 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
258 * TaskNode *node_3 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
259 * TaskNode *node_4 = BLI_task_graph_node_create(task_graph, node_exec, NULL, NULL);
260 *
261 * BLI_task_graph_edge_create(root, node_1);
262 * BLI_task_graph_edge_create(root, node_2);
263 * BLI_task_graph_edge_create(node_2, node_3);
264 * BLI_task_graph_edge_create(node_2, node_4);
265 *
266 * Any node can be triggered to start a chain of tasks. Normally you would trigger a root node but
267 * it is supported to start the chain of tasks anywhere in the forest or tree. When a node
268 * completes, the execution flow is forwarded via the created edges.
269 * When a child node has multiple parents the child node will be triggered once for each parent.
270 *
271 * BLI_task_graph_node_push_work(root);
272 *
273 * In this example After `root` is finished, `node_1` and `node_2` will be started.
274 * Only after `node_2` is finished `node_3` and `node_4` will be started.
275 *
276 * After scheduling work we need to wait until all the tasks have been finished.
277 *
278 * BLI_task_graph_work_and_wait();
279 *
280 * When finished you can clean up all the resources by freeing the task_graph. Nodes are owned by
281 * the graph and are freed task_data will only be freed if a free_func was given.
282 *
283 * BLI_task_graph_free(task_graph);
284 *
285 * Work can enter a tree on any node. Normally this would be the root_node.
286 * A `task_graph` can be reused, but the caller needs to make sure the task_data is reset.
287 *
288 * ** Task-Data **
289 *
290 * Typically you want give a task data to work on.
291 * Task data can be shared with other nodes, but be careful not to free the data multiple times.
292 * Task data is freed when calling `BLI_task_graph_free`.
293 *
294 * MyData *task_data = MEM_callocN(sizeof(MyData), __func__);
295 * TaskNode *root = BLI_task_graph_node_create(task_graph, root_exec, task_data, MEM_freeN);
296 * TaskNode *node_1 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
297 * TaskNode *node_2 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
298 * TaskNode *node_3 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
299 * TaskNode *node_4 = BLI_task_graph_node_create(task_graph, node_exec, task_data, NULL);
300 *
301 */
302 struct TaskGraph;
303 struct TaskNode;
304
305 typedef void (*TaskGraphNodeRunFunction)(void *__restrict task_data);
306 typedef void (*TaskGraphNodeFreeFunction)(void *task_data);
307
308 struct TaskGraph *BLI_task_graph_create(void);
309 void BLI_task_graph_work_and_wait(struct TaskGraph *task_graph);
310 void BLI_task_graph_free(struct TaskGraph *task_graph);
311 struct TaskNode *BLI_task_graph_node_create(struct TaskGraph *task_graph,
312 TaskGraphNodeRunFunction run,
313 void *user_data,
314 TaskGraphNodeFreeFunction free_func);
315 bool BLI_task_graph_node_push_work(struct TaskNode *task_node);
316 void BLI_task_graph_edge_create(struct TaskNode *from_node, struct TaskNode *to_node);
317
318 #ifdef __cplusplus
319 }
320 #endif
321
322 #endif
323