1 #ifndef PTHREADPOOL_H_
2 #define PTHREADPOOL_H_
3 
4 #include <stddef.h>
5 #include <stdint.h>
6 
7 typedef struct pthreadpool* pthreadpool_t;
8 
9 typedef void (*pthreadpool_task_1d_t)(void*, size_t);
10 typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t);
11 typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t);
12 typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t);
13 typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t);
14 typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t);
15 typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t);
16 typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t);
17 typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t);
18 typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t);
19 typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
20 typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t);
21 typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
22 typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
23 typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
24 typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
25 typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
26 
27 typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t);
28 typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t);
29 typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t);
30 typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t);
31 
32 
33 /**
34  * Disable support for denormalized numbers to the maximum extent possible for
35  * the duration of the computation.
36  *
37  * Handling denormalized floating-point numbers is often implemented in
38  * microcode, and incurs significant performance degradation. This hint
39  * instructs the thread pool to disable support for denormalized numbers before
40  * running the computation by manipulating architecture-specific control
41  * registers, and restore the initial value of control registers after the
42  * computation is complete. The thread pool temporary disables denormalized
43  * numbers on all threads involved in the computation (i.e. the caller threads,
44  * and potentially worker threads).
45  *
46  * Disabling denormalized numbers may have a small negative effect on results'
47  * accuracy. As various architectures differ in capabilities to control
48  * processing of denormalized numbers, using this flag may also hurt results'
49  * reproducibility across different instruction set architectures.
50  */
51 #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
52 
53 /**
54  * Yield worker threads to the system scheduler after the operation is finished.
55  *
56  * Force workers to use kernel wait (instead of active spin-wait by default) for
57  * new commands after this command is processed. This flag affects only the
58  * immediate next operation on this thread pool. To make the thread pool always
59  * use kernel wait, pass this flag to all parallelization functions.
60  */
61 #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
62 
63 #ifdef __cplusplus
64 extern "C" {
65 #endif
66 
67 /**
68  * Create a thread pool with the specified number of threads.
69  *
70  * @param  threads_count  the number of threads in the thread pool.
71  *    A value of 0 has special interpretation: it creates a thread pool with as
72  *    many threads as there are logical processors in the system.
73  *
74  * @returns  A pointer to an opaque thread pool object if the call is
75  *    successful, or NULL pointer if the call failed.
76  */
77 pthreadpool_t pthreadpool_create(size_t threads_count);
78 
79 /**
80  * Query the number of threads in a thread pool.
81  *
82  * @param  threadpool  the thread pool to query.
83  *
84  * @returns  The number of threads in the thread pool.
85  */
86 size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
87 
88 /**
89  * Process items on a 1D grid.
90  *
91  * The function implements a parallel version of the following snippet:
92  *
93  *   for (size_t i = 0; i < range; i++)
94  *     function(context, i);
95  *
96  * When the function returns, all items have been processed and the thread pool
97  * is ready for a new task.
98  *
99  * @note If multiple threads call this function with the same thread pool, the
100  *    calls are serialized.
101  *
102  * @param threadpool  the thread pool to use for parallelisation. If threadpool
103  *    is NULL, all items are processed serially on the calling thread.
104  * @param function    the function to call for each item.
105  * @param context     the first argument passed to the specified function.
106  * @param range       the number of items on the 1D grid to process. The
107  *    specified function will be called once for each item.
108  * @param flags       a bitwise combination of zero or more optional flags
109  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
110  */
111 void pthreadpool_parallelize_1d(
112 	pthreadpool_t threadpool,
113 	pthreadpool_task_1d_t function,
114 	void* context,
115 	size_t range,
116 	uint32_t flags);
117 
118 /**
119  * Process items on a 1D grid using a microarchitecture-aware task function.
120  *
121  * The function implements a parallel version of the following snippet:
122  *
123  *   uint32_t uarch_index = cpuinfo_initialize() ?
124  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
125  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
126  *   for (size_t i = 0; i < range; i++)
127  *     function(context, uarch_index, i);
128  *
129  * When the function returns, all items have been processed and the thread pool
130  * is ready for a new task.
131  *
132  * @note If multiple threads call this function with the same thread pool, the
133  *    calls are serialized.
134  *
135  * @param threadpool           the thread pool to use for parallelisation. If
136  *    threadpool is NULL, all items are processed serially on the calling
137  *    thread.
138  * @param function             the function to call for each item.
139  * @param context              the first argument passed to the specified
140  *    function.
141  * @param default_uarch_index  the microarchitecture index to use when
142  *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
143  *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
144  *    max_uarch_index value.
145  * @param max_uarch_index      the maximum microarchitecture index expected by
146  *    the specified function. If the index returned by
147  *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
148  *    will be used instead. default_uarch_index can exceed max_uarch_index.
149  * @param range                the number of items on the 1D grid to process.
150  *    The specified function will be called once for each item.
151  * @param flags                a bitwise combination of zero or more optional
152  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
153  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
154  */
155 void pthreadpool_parallelize_1d_with_uarch(
156 	pthreadpool_t threadpool,
157 	pthreadpool_task_1d_with_id_t function,
158 	void* context,
159 	uint32_t default_uarch_index,
160 	uint32_t max_uarch_index,
161 	size_t range,
162 	uint32_t flags);
163 
164 /**
165  * Process items on a 1D grid with specified maximum tile size.
166  *
167  * The function implements a parallel version of the following snippet:
168  *
169  *   for (size_t i = 0; i < range; i += tile)
170  *     function(context, i, min(range - i, tile));
171  *
172  * When the call returns, all items have been processed and the thread pool is
173  * ready for a new task.
174  *
175  * @note If multiple threads call this function with the same thread pool,
176  *    the calls are serialized.
177  *
178  * @param threadpool  the thread pool to use for parallelisation. If threadpool
179  *    is NULL, all items are processed serially on the calling thread.
180  * @param function    the function to call for each tile.
181  * @param context     the first argument passed to the specified function.
182  * @param range       the number of items on the 1D grid to process.
183  * @param tile        the maximum number of items on the 1D grid to process in
184  *    one function call.
185  * @param flags       a bitwise combination of zero or more optional flags
186  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
187  */
188 void pthreadpool_parallelize_1d_tile_1d(
189 	pthreadpool_t threadpool,
190 	pthreadpool_task_1d_tile_1d_t function,
191 	void* context,
192 	size_t range,
193 	size_t tile,
194 	uint32_t flags);
195 
196 /**
197  * Process items on a 2D grid.
198  *
199  * The function implements a parallel version of the following snippet:
200  *
201  *   for (size_t i = 0; i < range_i; i++)
202  *     for (size_t j = 0; j < range_j; j++)
203  *       function(context, i, j);
204  *
205  * When the function returns, all items have been processed and the thread pool
206  * is ready for a new task.
207  *
208  * @note If multiple threads call this function with the same thread pool, the
209  *    calls are serialized.
210  *
211  * @param threadpool  the thread pool to use for parallelisation. If threadpool
212  *    is NULL, all items are processed serially on the calling thread.
213  * @param function    the function to call for each item.
214  * @param context     the first argument passed to the specified function.
215  * @param range_i     the number of items to process along the first dimension
216  *    of the 2D grid.
217  * @param range_j     the number of items to process along the second dimension
218  *    of the 2D grid.
219  * @param flags       a bitwise combination of zero or more optional flags
220  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
221  */
222 void pthreadpool_parallelize_2d(
223 	pthreadpool_t threadpool,
224 	pthreadpool_task_2d_t function,
225 	void* context,
226 	size_t range_i,
227 	size_t range_j,
228 	uint32_t flags);
229 
230 /**
231  * Process items on a 2D grid with the specified maximum tile size along the
232  * last grid dimension.
233  *
234  * The function implements a parallel version of the following snippet:
235  *
236  *   for (size_t i = 0; i < range_i; i++)
237  *     for (size_t j = 0; j < range_j; j += tile_j)
238  *       function(context, i, j, min(range_j - j, tile_j));
239  *
240  * When the function returns, all items have been processed and the thread pool
241  * is ready for a new task.
242  *
243  * @note If multiple threads call this function with the same thread pool, the
244  *    calls are serialized.
245  *
246  * @param threadpool  the thread pool to use for parallelisation. If threadpool
247  *    is NULL, all items are processed serially on the calling thread.
248  * @param function    the function to call for each tile.
249  * @param context     the first argument passed to the specified function.
250  * @param range_i     the number of items to process along the first dimension
251  *    of the 2D grid.
252  * @param range_j     the number of items to process along the second dimension
253  *    of the 2D grid.
254  * @param tile_j      the maximum number of items along the second dimension of
255  *    the 2D grid to process in one function call.
256  * @param flags       a bitwise combination of zero or more optional flags
257  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
258  */
259 void pthreadpool_parallelize_2d_tile_1d(
260 	pthreadpool_t threadpool,
261 	pthreadpool_task_2d_tile_1d_t function,
262 	void* context,
263 	size_t range_i,
264 	size_t range_j,
265 	size_t tile_j,
266 	uint32_t flags);
267 
268 /**
269  * Process items on a 2D grid with the specified maximum tile size along each
270  * grid dimension.
271  *
272  * The function implements a parallel version of the following snippet:
273  *
274  *   for (size_t i = 0; i < range_i; i += tile_i)
275  *     for (size_t j = 0; j < range_j; j += tile_j)
276  *       function(context, i, j,
277  *         min(range_i - i, tile_i), min(range_j - j, tile_j));
278  *
279  * When the function returns, all items have been processed and the thread pool
280  * is ready for a new task.
281  *
282  * @note If multiple threads call this function with the same thread pool, the
283  *    calls are serialized.
284  *
285  * @param threadpool  the thread pool to use for parallelisation. If threadpool
286  *    is NULL, all items are processed serially on the calling thread.
287  * @param function    the function to call for each tile.
288  * @param context     the first argument passed to the specified function.
289  * @param range_i     the number of items to process along the first dimension
290  *    of the 2D grid.
291  * @param range_j     the number of items to process along the second dimension
292  *    of the 2D grid.
293  * @param tile_j      the maximum number of items along the first dimension of
294  *    the 2D grid to process in one function call.
295  * @param tile_j      the maximum number of items along the second dimension of
296  *    the 2D grid to process in one function call.
297  * @param flags       a bitwise combination of zero or more optional flags
298  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
299  */
300 void pthreadpool_parallelize_2d_tile_2d(
301 	pthreadpool_t threadpool,
302 	pthreadpool_task_2d_tile_2d_t function,
303 	void* context,
304 	size_t range_i,
305 	size_t range_j,
306 	size_t tile_i,
307 	size_t tile_j,
308 	uint32_t flags);
309 
310 /**
311  * Process items on a 2D grid with the specified maximum tile size along each
312  * grid dimension using a microarchitecture-aware task function.
313  *
314  * The function implements a parallel version of the following snippet:
315  *
316  *   uint32_t uarch_index = cpuinfo_initialize() ?
317  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
318  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
319  *   for (size_t i = 0; i < range_i; i += tile_i)
320  *     for (size_t j = 0; j < range_j; j += tile_j)
321  *       function(context, uarch_index, i, j,
322  *         min(range_i - i, tile_i), min(range_j - j, tile_j));
323  *
324  * When the function returns, all items have been processed and the thread pool
325  * is ready for a new task.
326  *
327  * @note If multiple threads call this function with the same thread pool, the
328  *    calls are serialized.
329  *
330  * @param threadpool           the thread pool to use for parallelisation. If
331  *    threadpool is NULL, all items are processed serially on the calling
332  *    thread.
333  * @param function             the function to call for each tile.
334  * @param context              the first argument passed to the specified
335  *    function.
336  * @param default_uarch_index  the microarchitecture index to use when
337  *                             pthreadpool is configured without cpuinfo,
338  *                             cpuinfo initialization failed, or index returned
339  *                             by cpuinfo_get_current_uarch_index() exceeds
340  *                             the max_uarch_index value.
341  * @param max_uarch_index      the maximum microarchitecture index expected
342  *                             by the specified function. If the index returned
343  *                             by cpuinfo_get_current_uarch_index() exceeds this
344  *                             value, default_uarch_index will be used instead.
345  *                             default_uarch_index can exceed max_uarch_index.
346  * @param range_i              the number of items to process along the first
347  *    dimension of the 2D grid.
348  * @param range_j              the number of items to process along the second
349  *    dimension of the 2D grid.
350  * @param tile_j               the maximum number of items along the first
351  *    dimension of the 2D grid to process in one function call.
352  * @param tile_j               the maximum number of items along the second
353  *    dimension of the 2D grid to process in one function call.
354  * @param flags                a bitwise combination of zero or more optional
355  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
356  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
357  */
358 void pthreadpool_parallelize_2d_tile_2d_with_uarch(
359 	pthreadpool_t threadpool,
360 	pthreadpool_task_2d_tile_2d_with_id_t function,
361 	void* context,
362 	uint32_t default_uarch_index,
363 	uint32_t max_uarch_index,
364 	size_t range_i,
365 	size_t range_j,
366 	size_t tile_i,
367 	size_t tile_j,
368 	uint32_t flags);
369 
370 /**
371  * Process items on a 3D grid.
372  *
373  * The function implements a parallel version of the following snippet:
374  *
375  *   for (size_t i = 0; i < range_i; i++)
376  *     for (size_t j = 0; j < range_j; j++)
377  *       for (size_t k = 0; k < range_k; k++)
378  *         function(context, i, j, k);
379  *
380  * When the function returns, all items have been processed and the thread pool
381  * is ready for a new task.
382  *
383  * @note If multiple threads call this function with the same thread pool, the
384  *    calls are serialized.
385  *
386  * @param threadpool  the thread pool to use for parallelisation. If threadpool
387  *    is NULL, all items are processed serially on the calling thread.
388  * @param function    the function to call for each tile.
389  * @param context     the first argument passed to the specified function.
390  * @param range_i     the number of items to process along the first dimension
391  *    of the 3D grid.
392  * @param range_j     the number of items to process along the second dimension
393  *    of the 3D grid.
394  * @param range_k     the number of items to process along the third dimension
395  *    of the 3D grid.
396  * @param flags       a bitwise combination of zero or more optional flags
397  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
398  */
399 void pthreadpool_parallelize_3d(
400 	pthreadpool_t threadpool,
401 	pthreadpool_task_3d_t function,
402 	void* context,
403 	size_t range_i,
404 	size_t range_j,
405 	size_t range_k,
406 	uint32_t flags);
407 
408 /**
409  * Process items on a 3D grid with the specified maximum tile size along the
410  * last grid dimension.
411  *
412  * The function implements a parallel version of the following snippet:
413  *
414  *   for (size_t i = 0; i < range_i; i++)
415  *     for (size_t j = 0; j < range_j; j++)
416  *       for (size_t k = 0; k < range_k; k += tile_k)
417  *         function(context, i, j, k, min(range_k - k, tile_k));
418  *
419  * When the function returns, all items have been processed and the thread pool
420  * is ready for a new task.
421  *
422  * @note If multiple threads call this function with the same thread pool, the
423  *    calls are serialized.
424  *
425  * @param threadpool  the thread pool to use for parallelisation. If threadpool
426  *    is NULL, all items are processed serially on the calling thread.
427  * @param function    the function to call for each tile.
428  * @param context     the first argument passed to the specified function.
429  * @param range_i     the number of items to process along the first dimension
430  *    of the 3D grid.
431  * @param range_j     the number of items to process along the second dimension
432  *    of the 3D grid.
433  * @param range_k     the number of items to process along the third dimension
434  *    of the 3D grid.
435  * @param tile_k      the maximum number of items along the third dimension of
436  *    the 3D grid to process in one function call.
437  * @param flags       a bitwise combination of zero or more optional flags
438  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
439  */
440 void pthreadpool_parallelize_3d_tile_1d(
441 	pthreadpool_t threadpool,
442 	pthreadpool_task_3d_tile_1d_t function,
443 	void* context,
444 	size_t range_i,
445 	size_t range_j,
446 	size_t range_k,
447 	size_t tile_k,
448 	uint32_t flags);
449 
450 /**
451  * Process items on a 3D grid with the specified maximum tile size along the
452  * last two grid dimensions.
453  *
454  * The function implements a parallel version of the following snippet:
455  *
456  *   for (size_t i = 0; i < range_i; i++)
457  *     for (size_t j = 0; j < range_j; j += tile_j)
458  *       for (size_t k = 0; k < range_k; k += tile_k)
459  *         function(context, i, j, k,
460  *           min(range_j - j, tile_j), min(range_k - k, tile_k));
461  *
462  * When the function returns, all items have been processed and the thread pool
463  * is ready for a new task.
464  *
465  * @note If multiple threads call this function with the same thread pool, the
466  *    calls are serialized.
467  *
468  * @param threadpool  the thread pool to use for parallelisation. If threadpool
469  *    is NULL, all items are processed serially on the calling thread.
470  * @param function    the function to call for each tile.
471  * @param context     the first argument passed to the specified function.
472  * @param range_i     the number of items to process along the first dimension
473  *    of the 3D grid.
474  * @param range_j     the number of items to process along the second dimension
475  *    of the 3D grid.
476  * @param range_k     the number of items to process along the third dimension
477  *    of the 3D grid.
478  * @param tile_j      the maximum number of items along the second dimension of
479  *    the 3D grid to process in one function call.
480  * @param tile_k      the maximum number of items along the third dimension of
481  *    the 3D grid to process in one function call.
482  * @param flags       a bitwise combination of zero or more optional flags
483  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
484  */
485 void pthreadpool_parallelize_3d_tile_2d(
486 	pthreadpool_t threadpool,
487 	pthreadpool_task_3d_tile_2d_t function,
488 	void* context,
489 	size_t range_i,
490 	size_t range_j,
491 	size_t range_k,
492 	size_t tile_j,
493 	size_t tile_k,
494 	uint32_t flags);
495 
496 /**
497  * Process items on a 3D grid with the specified maximum tile size along the
498  * last two grid dimensions using a microarchitecture-aware task function.
499  *
500  * The function implements a parallel version of the following snippet:
501  *
502  *   uint32_t uarch_index = cpuinfo_initialize() ?
503  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
504  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
505  *   for (size_t i = 0; i < range_i; i++)
506  *     for (size_t j = 0; j < range_j; j += tile_j)
507  *       for (size_t k = 0; k < range_k; k += tile_k)
508  *         function(context, uarch_index, i, j, k,
509  *           min(range_j - j, tile_j), min(range_k - k, tile_k));
510  *
511  * When the function returns, all items have been processed and the thread pool
512  * is ready for a new task.
513  *
514  * @note If multiple threads call this function with the same thread pool, the
515  *    calls are serialized.
516  *
517  * @param threadpool           the thread pool to use for parallelisation. If
518  *    threadpool is NULL, all items are processed serially on the calling
519  *    thread.
520  * @param function             the function to call for each tile.
521  * @param context              the first argument passed to the specified
522  *    function.
523  * @param default_uarch_index  the microarchitecture index to use when
524  *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
525  *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
526  *    max_uarch_index value.
527  * @param max_uarch_index      the maximum microarchitecture index expected by
528  *    the specified function. If the index returned by
529  *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
530  *    will be used instead. default_uarch_index can exceed max_uarch_index.
531  * @param range_i              the number of items to process along the first
532  *    dimension of the 3D grid.
533  * @param range_j              the number of items to process along the second
534  *    dimension of the 3D grid.
535  * @param range_k              the number of items to process along the third
536  *    dimension of the 3D grid.
537  * @param tile_j               the maximum number of items along the second
538  *    dimension of the 3D grid to process in one function call.
539  * @param tile_k               the maximum number of items along the third
540  *    dimension of the 3D grid to process in one function call.
541  * @param flags                a bitwise combination of zero or more optional
542  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
543  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
544  */
545 void pthreadpool_parallelize_3d_tile_2d_with_uarch(
546 	pthreadpool_t threadpool,
547 	pthreadpool_task_3d_tile_2d_with_id_t function,
548 	void* context,
549 	uint32_t default_uarch_index,
550 	uint32_t max_uarch_index,
551 	size_t range_i,
552 	size_t range_j,
553 	size_t range_k,
554 	size_t tile_j,
555 	size_t tile_k,
556 	uint32_t flags);
557 
558 /**
559  * Process items on a 4D grid.
560  *
561  * The function implements a parallel version of the following snippet:
562  *
563  *   for (size_t i = 0; i < range_i; i++)
564  *     for (size_t j = 0; j < range_j; j++)
565  *       for (size_t k = 0; k < range_k; k++)
566  *         for (size_t l = 0; l < range_l; l++)
567  *           function(context, i, j, k, l);
568  *
569  * When the function returns, all items have been processed and the thread pool
570  * is ready for a new task.
571  *
572  * @note If multiple threads call this function with the same thread pool, the
573  *    calls are serialized.
574  *
575  * @param threadpool  the thread pool to use for parallelisation. If threadpool
576  *    is NULL, all items are processed serially on the calling thread.
577  * @param function    the function to call for each tile.
578  * @param context     the first argument passed to the specified function.
579  * @param range_i     the number of items to process along the first dimension
580  *    of the 4D grid.
581  * @param range_j     the number of items to process along the second dimension
582  *    of the 4D grid.
583  * @param range_k     the number of items to process along the third dimension
584  *    of the 4D grid.
585  * @param range_l     the number of items to process along the fourth dimension
586  *    of the 4D grid.
587  * @param flags       a bitwise combination of zero or more optional flags
588  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
589  */
590 void pthreadpool_parallelize_4d(
591 	pthreadpool_t threadpool,
592 	pthreadpool_task_4d_t function,
593 	void* context,
594 	size_t range_i,
595 	size_t range_j,
596 	size_t range_k,
597 	size_t range_l,
598 	uint32_t flags);
599 
600 /**
601  * Process items on a 4D grid with the specified maximum tile size along the
602  * last grid dimension.
603  *
604  * The function implements a parallel version of the following snippet:
605  *
606  *   for (size_t i = 0; i < range_i; i++)
607  *     for (size_t j = 0; j < range_j; j++)
608  *       for (size_t k = 0; k < range_k; k++)
609  *         for (size_t l = 0; l < range_l; l += tile_l)
610  *           function(context, i, j, k, l, min(range_l - l, tile_l));
611  *
612  * When the function returns, all items have been processed and the thread pool
613  * is ready for a new task.
614  *
615  * @note If multiple threads call this function with the same thread pool, the
616  *    calls are serialized.
617  *
618  * @param threadpool  the thread pool to use for parallelisation. If threadpool
619  *    is NULL, all items are processed serially on the calling thread.
620  * @param function    the function to call for each tile.
621  * @param context     the first argument passed to the specified function.
622  * @param range_i     the number of items to process along the first dimension
623  *    of the 4D grid.
624  * @param range_j     the number of items to process along the second dimension
625  *    of the 4D grid.
626  * @param range_k     the number of items to process along the third dimension
627  *    of the 4D grid.
628  * @param range_l     the number of items to process along the fourth dimension
629  *    of the 4D grid.
630  * @param tile_l      the maximum number of items along the fourth dimension of
631  *    the 4D grid to process in one function call.
632  * @param flags       a bitwise combination of zero or more optional flags
633  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
634  */
635 void pthreadpool_parallelize_4d_tile_1d(
636 	pthreadpool_t threadpool,
637 	pthreadpool_task_4d_tile_1d_t function,
638 	void* context,
639 	size_t range_i,
640 	size_t range_j,
641 	size_t range_k,
642 	size_t range_l,
643 	size_t tile_l,
644 	uint32_t flags);
645 
646 /**
647  * Process items on a 4D grid with the specified maximum tile size along the
648  * last two grid dimensions.
649  *
650  * The function implements a parallel version of the following snippet:
651  *
652  *   for (size_t i = 0; i < range_i; i++)
653  *     for (size_t j = 0; j < range_j; j++)
654  *       for (size_t k = 0; k < range_k; k += tile_k)
655  *         for (size_t l = 0; l < range_l; l += tile_l)
656  *           function(context, i, j, k, l,
657  *             min(range_k - k, tile_k), min(range_l - l, tile_l));
658  *
659  * When the function returns, all items have been processed and the thread pool
660  * is ready for a new task.
661  *
662  * @note If multiple threads call this function with the same thread pool, the
663  *    calls are serialized.
664  *
665  * @param threadpool  the thread pool to use for parallelisation. If threadpool
666  *    is NULL, all items are processed serially on the calling thread.
667  * @param function    the function to call for each tile.
668  * @param context     the first argument passed to the specified function.
669  * @param range_i     the number of items to process along the first dimension
670  *    of the 4D grid.
671  * @param range_j     the number of items to process along the second dimension
672  *    of the 4D grid.
673  * @param range_k     the number of items to process along the third dimension
674  *    of the 4D grid.
675  * @param range_l     the number of items to process along the fourth dimension
676  *    of the 4D grid.
677  * @param tile_k      the maximum number of items along the third dimension of
678  *    the 4D grid to process in one function call.
679  * @param tile_l      the maximum number of items along the fourth dimension of
680  *    the 4D grid to process in one function call.
681  * @param flags       a bitwise combination of zero or more optional flags
682  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
683  */
684 void pthreadpool_parallelize_4d_tile_2d(
685 	pthreadpool_t threadpool,
686 	pthreadpool_task_4d_tile_2d_t function,
687 	void* context,
688 	size_t range_i,
689 	size_t range_j,
690 	size_t range_k,
691 	size_t range_l,
692 	size_t tile_k,
693 	size_t tile_l,
694 	uint32_t flags);
695 
696 /**
697  * Process items on a 4D grid with the specified maximum tile size along the
698  * last two grid dimensions using a microarchitecture-aware task function.
699  *
700  * The function implements a parallel version of the following snippet:
701  *
702  *   uint32_t uarch_index = cpuinfo_initialize() ?
703  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
704  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
705  *   for (size_t i = 0; i < range_i; i++)
706  *     for (size_t j = 0; j < range_j; j++)
707  *       for (size_t k = 0; k < range_k; k += tile_k)
708  *         for (size_t l = 0; l < range_l; l += tile_l)
709  *           function(context, uarch_index, i, j, k, l,
710  *             min(range_k - k, tile_k), min(range_l - l, tile_l));
711  *
712  * When the function returns, all items have been processed and the thread pool
713  * is ready for a new task.
714  *
715  * @note If multiple threads call this function with the same thread pool, the
716  *    calls are serialized.
717  *
718  * @param threadpool           the thread pool to use for parallelisation. If
719  *    threadpool is NULL, all items are processed serially on the calling
720  *    thread.
721  * @param function             the function to call for each tile.
722  * @param context              the first argument passed to the specified
723  *    function.
724  * @param default_uarch_index  the microarchitecture index to use when
725  *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
726  *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
727  *    max_uarch_index value.
728  * @param max_uarch_index      the maximum microarchitecture index expected by
729  *    the specified function. If the index returned by
730  *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
731  *    will be used instead. default_uarch_index can exceed max_uarch_index.
732  * @param range_i              the number of items to process along the first
733  *    dimension of the 4D grid.
734  * @param range_j              the number of items to process along the second
735  *    dimension of the 4D grid.
736  * @param range_k              the number of items to process along the third
737  *    dimension of the 4D grid.
738  * @param range_l              the number of items to process along the fourth
739  *    dimension of the 4D grid.
740  * @param tile_k               the maximum number of items along the third
741  *    dimension of the 4D grid to process in one function call.
742  * @param tile_l               the maximum number of items along the fourth
743  *    dimension of the 4D grid to process in one function call.
744  * @param flags                a bitwise combination of zero or more optional
745  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
746  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
747  */
748 void pthreadpool_parallelize_4d_tile_2d_with_uarch(
749 	pthreadpool_t threadpool,
750 	pthreadpool_task_4d_tile_2d_with_id_t function,
751 	void* context,
752 	uint32_t default_uarch_index,
753 	uint32_t max_uarch_index,
754 	size_t range_i,
755 	size_t range_j,
756 	size_t range_k,
757 	size_t range_l,
758 	size_t tile_k,
759 	size_t tile_l,
760 	uint32_t flags);
761 
762 /**
763  * Process items on a 5D grid.
764  *
765  * The function implements a parallel version of the following snippet:
766  *
767  *   for (size_t i = 0; i < range_i; i++)
768  *     for (size_t j = 0; j < range_j; j++)
769  *       for (size_t k = 0; k < range_k; k++)
770  *         for (size_t l = 0; l < range_l; l++)
771  *           for (size_t m = 0; m < range_m; m++)
772  *             function(context, i, j, k, l, m);
773  *
774  * When the function returns, all items have been processed and the thread pool
775  * is ready for a new task.
776  *
777  * @note If multiple threads call this function with the same thread pool, the
778  *    calls are serialized.
779  *
780  * @param threadpool  the thread pool to use for parallelisation. If threadpool
781  *    is NULL, all items are processed serially on the calling thread.
782  * @param function    the function to call for each tile.
783  * @param context     the first argument passed to the specified function.
784  * @param range_i     the number of items to process along the first dimension
785  *    of the 5D grid.
786  * @param range_j     the number of items to process along the second dimension
787  *    of the 5D grid.
788  * @param range_k     the number of items to process along the third dimension
789  *    of the 5D grid.
790  * @param range_l     the number of items to process along the fourth dimension
791  *    of the 5D grid.
792  * @param range_m     the number of items to process along the fifth dimension
793  *    of the 5D grid.
794  * @param flags       a bitwise combination of zero or more optional flags
795  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
796  */
797 void pthreadpool_parallelize_5d(
798 	pthreadpool_t threadpool,
799 	pthreadpool_task_5d_t function,
800 	void* context,
801 	size_t range_i,
802 	size_t range_j,
803 	size_t range_k,
804 	size_t range_l,
805 	size_t range_m,
806 	uint32_t flags);
807 
808 /**
809  * Process items on a 5D grid with the specified maximum tile size along the
810  * last grid dimension.
811  *
812  * The function implements a parallel version of the following snippet:
813  *
814  *   for (size_t i = 0; i < range_i; i++)
815  *     for (size_t j = 0; j < range_j; j++)
816  *       for (size_t k = 0; k < range_k; k++)
817  *         for (size_t l = 0; l < range_l; l++)
818  *           for (size_t m = 0; m < range_m; m += tile_m)
819  *             function(context, i, j, k, l, m, min(range_m - m, tile_m));
820  *
821  * When the function returns, all items have been processed and the thread pool
822  * is ready for a new task.
823  *
824  * @note If multiple threads call this function with the same thread pool, the
825  *    calls are serialized.
826  *
827  * @param threadpool  the thread pool to use for parallelisation. If threadpool
828  *    is NULL, all items are processed serially on the calling thread.
829  * @param function    the function to call for each tile.
830  * @param context     the first argument passed to the specified function.
831  * @param range_i     the number of items to process along the first dimension
832  *    of the 5D grid.
833  * @param range_j     the number of items to process along the second dimension
834  *    of the 5D grid.
835  * @param range_k     the number of items to process along the third dimension
836  *    of the 5D grid.
837  * @param range_l     the number of items to process along the fourth dimension
838  *    of the 5D grid.
839  * @param range_m     the number of items to process along the fifth dimension
840  *    of the 5D grid.
841  * @param tile_m      the maximum number of items along the fifth dimension of
842  *    the 5D grid to process in one function call.
843  * @param flags       a bitwise combination of zero or more optional flags
844  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
845  */
846 void pthreadpool_parallelize_5d_tile_1d(
847 	pthreadpool_t threadpool,
848 	pthreadpool_task_5d_tile_1d_t function,
849 	void* context,
850 	size_t range_i,
851 	size_t range_j,
852 	size_t range_k,
853 	size_t range_l,
854 	size_t range_m,
855 	size_t tile_m,
856 	uint32_t flags);
857 
858 /**
859  * Process items on a 5D grid with the specified maximum tile size along the
860  * last two grid dimensions.
861  *
862  * The function implements a parallel version of the following snippet:
863  *
864  *   for (size_t i = 0; i < range_i; i++)
865  *     for (size_t j = 0; j < range_j; j++)
866  *       for (size_t k = 0; k < range_k; k++)
867  *         for (size_t l = 0; l < range_l; l += tile_l)
868  *           for (size_t m = 0; m < range_m; m += tile_m)
869  *             function(context, i, j, k, l, m,
870  *               min(range_l - l, tile_l), min(range_m - m, tile_m));
871  *
872  * When the function returns, all items have been processed and the thread pool
873  * is ready for a new task.
874  *
875  * @note If multiple threads call this function with the same thread pool, the
876  *    calls are serialized.
877  *
878  * @param threadpool  the thread pool to use for parallelisation. If threadpool
879  *    is NULL, all items are processed serially on the calling thread.
880  * @param function    the function to call for each tile.
881  * @param context     the first argument passed to the specified function.
882  * @param range_i     the number of items to process along the first dimension
883  *    of the 5D grid.
884  * @param range_j     the number of items to process along the second dimension
885  *    of the 5D grid.
886  * @param range_k     the number of items to process along the third dimension
887  *    of the 5D grid.
888  * @param range_l     the number of items to process along the fourth dimension
889  *    of the 5D grid.
890  * @param range_m     the number of items to process along the fifth dimension
891  *    of the 5D grid.
892  * @param tile_l      the maximum number of items along the fourth dimension of
893  *    the 5D grid to process in one function call.
894  * @param tile_m      the maximum number of items along the fifth dimension of
895  *    the 5D grid to process in one function call.
896  * @param flags       a bitwise combination of zero or more optional flags
897  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
898  */
899 void pthreadpool_parallelize_5d_tile_2d(
900 	pthreadpool_t threadpool,
901 	pthreadpool_task_5d_tile_2d_t function,
902 	void* context,
903 	size_t range_i,
904 	size_t range_j,
905 	size_t range_k,
906 	size_t range_l,
907 	size_t range_m,
908 	size_t tile_l,
909 	size_t tile_m,
910 	uint32_t flags);
911 
912 /**
913  * Process items on a 6D grid.
914  *
915  * The function implements a parallel version of the following snippet:
916  *
917  *   for (size_t i = 0; i < range_i; i++)
918  *     for (size_t j = 0; j < range_j; j++)
919  *       for (size_t k = 0; k < range_k; k++)
920  *         for (size_t l = 0; l < range_l; l++)
921  *           for (size_t m = 0; m < range_m; m++)
922  *             for (size_t n = 0; n < range_n; n++)
923  *               function(context, i, j, k, l, m, n);
924  *
925  * When the function returns, all items have been processed and the thread pool
926  * is ready for a new task.
927  *
928  * @note If multiple threads call this function with the same thread pool, the
929  *    calls are serialized.
930  *
931  * @param threadpool  the thread pool to use for parallelisation. If threadpool
932  *    is NULL, all items are processed serially on the calling thread.
933  * @param function    the function to call for each tile.
934  * @param context     the first argument passed to the specified function.
935  * @param range_i     the number of items to process along the first dimension
936  *    of the 6D grid.
937  * @param range_j     the number of items to process along the second dimension
938  *    of the 6D grid.
939  * @param range_k     the number of items to process along the third dimension
940  *    of the 6D grid.
941  * @param range_l     the number of items to process along the fourth dimension
942  *    of the 6D grid.
943  * @param range_m     the number of items to process along the fifth dimension
944  *    of the 6D grid.
945  * @param range_n     the number of items to process along the sixth dimension
946  *    of the 6D grid.
947  * @param tile_n      the maximum number of items along the sixth dimension of
948  *    the 6D grid to process in one function call.
949  * @param flags       a bitwise combination of zero or more optional flags
950  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
951  */
952 void pthreadpool_parallelize_6d(
953   pthreadpool_t threadpool,
954   pthreadpool_task_6d_t function,
955   void* context,
956   size_t range_i,
957   size_t range_j,
958   size_t range_k,
959   size_t range_l,
960   size_t range_m,
961   size_t range_n,
962   uint32_t flags);
963 
964 /**
965  * Process items on a 6D grid with the specified maximum tile size along the
966  * last grid dimension.
967  *
968  * The function implements a parallel version of the following snippet:
969  *
970  *   for (size_t i = 0; i < range_i; i++)
971  *     for (size_t j = 0; j < range_j; j++)
972  *       for (size_t k = 0; k < range_k; k++)
973  *         for (size_t l = 0; l < range_l; l++)
974  *           for (size_t m = 0; m < range_m; m++)
975  *             for (size_t n = 0; n < range_n; n += tile_n)
976  *               function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
977  *
978  * When the function returns, all items have been processed and the thread pool
979  * is ready for a new task.
980  *
981  * @note If multiple threads call this function with the same thread pool, the
982  *    calls are serialized.
983  *
984  * @param threadpool  the thread pool to use for parallelisation. If threadpool
985  *    is NULL, all items are processed serially on the calling thread.
986  * @param function    the function to call for each tile.
987  * @param context     the first argument passed to the specified function.
988  * @param range_i     the number of items to process along the first dimension
989  *    of the 6D grid.
990  * @param range_j     the number of items to process along the second dimension
991  *    of the 6D grid.
992  * @param range_k     the number of items to process along the third dimension
993  *    of the 6D grid.
994  * @param range_l     the number of items to process along the fourth dimension
995  *    of the 6D grid.
996  * @param range_m     the number of items to process along the fifth dimension
997  *    of the 6D grid.
998  * @param range_n     the number of items to process along the sixth dimension
999  *    of the 6D grid.
1000  * @param tile_n      the maximum number of items along the sixth dimension of
1001  *    the 6D grid to process in one function call.
1002  * @param flags       a bitwise combination of zero or more optional flags
1003  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1004  */
1005 void pthreadpool_parallelize_6d_tile_1d(
1006   pthreadpool_t threadpool,
1007   pthreadpool_task_6d_tile_1d_t function,
1008   void* context,
1009   size_t range_i,
1010   size_t range_j,
1011   size_t range_k,
1012   size_t range_l,
1013   size_t range_m,
1014   size_t range_n,
1015   size_t tile_n,
1016   uint32_t flags);
1017 
1018 /**
1019  * Process items on a 6D grid with the specified maximum tile size along the
1020  * last two grid dimensions.
1021  *
1022  * The function implements a parallel version of the following snippet:
1023  *
1024  *   for (size_t i = 0; i < range_i; i++)
1025  *     for (size_t j = 0; j < range_j; j++)
1026  *       for (size_t k = 0; k < range_k; k++)
1027  *         for (size_t l = 0; l < range_l; l++)
1028  *           for (size_t m = 0; m < range_m; m += tile_m)
1029  *             for (size_t n = 0; n < range_n; n += tile_n)
1030  *               function(context, i, j, k, l, m, n,
1031  *                 min(range_m - m, tile_m), min(range_n - n, tile_n));
1032  *
1033  * When the function returns, all items have been processed and the thread pool
1034  * is ready for a new task.
1035  *
1036  * @note If multiple threads call this function with the same thread pool, the
1037  *    calls are serialized.
1038  *
1039  * @param threadpool  the thread pool to use for parallelisation. If threadpool
1040  *    is NULL, all items are processed serially on the calling thread.
1041  * @param function    the function to call for each tile.
1042  * @param context     the first argument passed to the specified function.
1043  * @param range_i     the number of items to process along the first dimension
1044  *    of the 6D grid.
1045  * @param range_j     the number of items to process along the second dimension
1046  *    of the 6D grid.
1047  * @param range_k     the number of items to process along the third dimension
1048  *    of the 6D grid.
1049  * @param range_l     the number of items to process along the fourth dimension
1050  *    of the 6D grid.
1051  * @param range_m     the number of items to process along the fifth dimension
1052  *    of the 6D grid.
1053  * @param range_n     the number of items to process along the sixth dimension
1054  *    of the 6D grid.
1055  * @param tile_m      the maximum number of items along the fifth dimension of
1056  *    the 6D grid to process in one function call.
1057  * @param tile_n      the maximum number of items along the sixth dimension of
1058  *    the 6D grid to process in one function call.
1059  * @param flags       a bitwise combination of zero or more optional flags
1060  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1061  */
1062 void pthreadpool_parallelize_6d_tile_2d(
1063 	pthreadpool_t threadpool,
1064 	pthreadpool_task_6d_tile_2d_t function,
1065 	void* context,
1066 	size_t range_i,
1067 	size_t range_j,
1068 	size_t range_k,
1069 	size_t range_l,
1070 	size_t range_m,
1071 	size_t range_n,
1072 	size_t tile_m,
1073 	size_t tile_n,
1074 	uint32_t flags);
1075 
1076 /**
1077  * Terminates threads in the thread pool and releases associated resources.
1078  *
1079  * @warning  Accessing the thread pool after a call to this function constitutes
1080  *    undefined behaviour and may cause data corruption.
1081  *
1082  * @param[in,out]  threadpool  The thread pool to destroy.
1083  */
1084 void pthreadpool_destroy(pthreadpool_t threadpool);
1085 
1086 
1087 #ifndef PTHREADPOOL_NO_DEPRECATED_API
1088 
1089 /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
1090 #if defined(__GNUC__)
1091 	#define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
1092 #else
1093 	#define PTHREADPOOL_DEPRECATED
1094 #endif
1095 
1096 typedef void (*pthreadpool_function_1d_t)(void*, size_t) PTHREADPOOL_DEPRECATED;
1097 typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t) PTHREADPOOL_DEPRECATED;
1098 typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t) PTHREADPOOL_DEPRECATED;
1099 typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t) PTHREADPOOL_DEPRECATED;
1100 typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t) PTHREADPOOL_DEPRECATED;
1101 typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t) PTHREADPOOL_DEPRECATED;
1102 
1103 void pthreadpool_compute_1d(
1104 	pthreadpool_t threadpool,
1105 	pthreadpool_function_1d_t function,
1106 	void* argument,
1107 	size_t range) PTHREADPOOL_DEPRECATED;
1108 
1109 void pthreadpool_compute_1d_tiled(
1110 	pthreadpool_t threadpool,
1111 	pthreadpool_function_1d_tiled_t function,
1112 	void* argument,
1113 	size_t range,
1114 	size_t tile) PTHREADPOOL_DEPRECATED;
1115 
1116 void pthreadpool_compute_2d(
1117 	pthreadpool_t threadpool,
1118 	pthreadpool_function_2d_t function,
1119 	void* argument,
1120 	size_t range_i,
1121 	size_t range_j) PTHREADPOOL_DEPRECATED;
1122 
1123 void pthreadpool_compute_2d_tiled(
1124 	pthreadpool_t threadpool,
1125 	pthreadpool_function_2d_tiled_t function,
1126 	void* argument,
1127 	size_t range_i,
1128 	size_t range_j,
1129 	size_t tile_i,
1130 	size_t tile_j) PTHREADPOOL_DEPRECATED;
1131 
1132 void pthreadpool_compute_3d_tiled(
1133 	pthreadpool_t threadpool,
1134 	pthreadpool_function_3d_tiled_t function,
1135 	void* argument,
1136 	size_t range_i,
1137 	size_t range_j,
1138 	size_t range_k,
1139 	size_t tile_i,
1140 	size_t tile_j,
1141 	size_t tile_k) PTHREADPOOL_DEPRECATED;
1142 
1143 void pthreadpool_compute_4d_tiled(
1144 	pthreadpool_t threadpool,
1145 	pthreadpool_function_4d_tiled_t function,
1146 	void* argument,
1147 	size_t range_i,
1148 	size_t range_j,
1149 	size_t range_k,
1150 	size_t range_l,
1151 	size_t tile_i,
1152 	size_t tile_j,
1153 	size_t tile_k,
1154 	size_t tile_l) PTHREADPOOL_DEPRECATED;
1155 
1156 #endif /* PTHREADPOOL_NO_DEPRECATED_API */
1157 
1158 #ifdef __cplusplus
1159 } /* extern "C" */
1160 #endif
1161 
1162 #endif /* PTHREADPOOL_H_ */
1163