1 #ifndef _AOMP_INCLUDED__
2 #define _AOMP_INCLUDED__
3
4 #ifndef INLINE
5 # define INLINE /*nada*/
6 #endif
7
8 /*------------- Macros to be used in OpenMP enabled AFNI code ----------------*/
9
10 /* to replace memcpy and memset, which cause trouble inside a parallel region */
11
12 #ifdef USE_OMP
AAmemcpy(void * ooo,void * iii,size_t nnn)13 static INLINE void AAmemcpy( void *ooo , void *iii , size_t nnn )
14 { register size_t jj ; register char *oar, *iar ;
15 if( ooo == NULL || iii == NULL || nnn == 0 ) return ;
16 oar = (char *)ooo ; iar = (char *)iii ;
17 for( jj=0 ; jj < nnn ; jj++ ) *oar++ = *iar++ ;
18 }
AAmemset(void * ooo,int c,size_t nnn)19 static INLINE void AAmemset( void *ooo , int c , size_t nnn )
20 { register size_t jj ; register char cc , *oar ;
21 if( ooo == NULL || nnn == 0 ) return ;
22 oar = (char *)ooo ; cc = (char)c ;
23 for( jj=0 ; jj < nnn ; jj++ ) *oar++ = cc ;
24 }
25 #else
26 # define AAmemcpy memcpy
27 # define AAmemset memset
28 #endif
29 #define AA_memcpy AAmemcpy
30 #define AA_memset AAmemset
31 #define AO_memcpy AAmemcpy
32 #define AO_memset AAmemset
33
34 /* to disable ENTRY/RETURN macros (which use static variables) */
35
36 #if defined(USE_OMP) && defined(USE_TRACING)
37 # define AFNI_OMP_START DBG_stoff++
38 # define AFNI_OMP_END DBG_stoff--
39 #else
40 # define AFNI_OMP_START /*nada*/
41 # define AFNI_OMP_END /*nada*/
42 #endif
43
44 /* Set max number of threads to be at most thn */
45
46 #ifdef USE_OMP
47 # define AFNI_SETUP_OMP(thn) \
48 do{ int mm=omp_get_max_threads() , nn=thn , ee; \
49 ee = (int)AFNI_numenv("OMP_NUM_THREADS") ; \
50 if( ee <= 0 ){ \
51 if( nn < 1 ) nn = 15 ; if( mm > nn ) mm = nn ; \
52 omp_set_num_threads(mm) ; \
53 } \
54 } while(0)
55 #else
56 # define AFNI_SETUP_OMP(thn) /*nada*/
57 #endif
58
59 /* Macro to use in -help output */
60
61 #ifdef USE_OMP
62 # define PRINT_AFNI_OMP_USAGE(pnam,extra) \
63 printf( \
64 "\n" \
65 " =========================================================================\n" \
66 "* This binary version of %s is compiled using OpenMP, a semi-\n" \
67 " automatic parallelizer software toolkit, which splits the work across\n" \
68 " multiple CPUs/cores on the same shared memory computer.\n" \
69 "* OpenMP is NOT like MPI -- it does not work with CPUs connected only\n" \
70 " by a network (e.g., OpenMP doesn't work across cluster nodes).\n" \
71 "* For some implementation and compilation details, please see\n" \
72 " https://afni.nimh.nih.gov/pub/dist/doc/misc/OpenMP.html\n" \
73 "* The number of CPU threads used will default to the maximum number on\n" \
74 " your system. You can control this value by setting environment variable\n" \
75 " OMP_NUM_THREADS to some smaller value (including 1).\n" \
76 "* Un-setting OMP_NUM_THREADS resets OpenMP back to its default state of\n" \
77 " using all CPUs available.\n" \
78 " ++ However, on some systems, it seems to be necessary to set variable\n" \
79 " OMP_NUM_THREADS explicitly, or you only get one CPU.\n" \
80 " ++ On other systems with many CPUS, you probably want to limit the CPU\n" \
81 " count, since using more than (say) 16 threads is probably useless.\n" \
82 "* You must set OMP_NUM_THREADS in the shell BEFORE running the program,\n" \
83 " since OpenMP queries this variable BEFORE the program actually starts.\n" \
84 " ++ You can't usefully set this variable in your ~/.afnirc file or on the\n"\
85 " command line with the '-D' option.\n" \
86 "* How many threads are useful? That varies with the program, and how well\n" \
87 " it was coded. You'll have to experiment on your own systems!\n" \
88 "* The number of CPUs on this particular computer system is ...... %d.\n" \
89 "* The maximum number of CPUs that will be used is now set to .... %d.\n" \
90 "%s" \
91 " =========================================================================\n" \
92 , (pnam) , omp_get_num_procs() , omp_get_max_threads() , \
93 (extra==NULL) ? "\0" : extra \
94 )
95 #else
96 # define PRINT_AFNI_OMP_USAGE(pnam,extra) \
97 printf( \
98 "\n" \
99 " =========================================================================\n" \
100 "* This binary version of %s is NOT compiled using OpenMP, a\n" \
101 " semi-automatic parallelizer software toolkit, which splits the work\n" \
102 " across multiple CPUs/cores on the same shared memory computer.\n" \
103 "* However, the source code is compatible with OpenMP, and can be compiled\n" \
104 " with an OpenMP-capable compiler, such as gcc 8.x+, Intel's icc, and\n" \
105 " Oracle Developer Studio.\n" \
106 "* If you wish to compile this program with OpenMP, see the man page for\n" \
107 " your C compiler, and (if needed) consult the AFNI message board, and\n" \
108 " https://afni.nimh.nih.gov/pub/dist/doc/misc/OpenMP.html\n" \
109 "* However, it would probably be simplest to download a pre-compiled AFNI\n" \
110 " binary set that uses OpenMP!\n" \
111 " https://afni.nimh.nih.gov/pub/dist/doc/htmldoc/index.html\n" \
112 , (pnam) \
113 )
114 #endif
115
116 /*----------------------------------------------------------------------------*/
117 /* Some macros for allocating staticworkspace arrays, per thread.
118 These are to aid in OpenMP-izing existing codes that use static variables.
119 The macros also have non-OpenMP versions, which don't create thread-wise
120 arrays.
121
122 The 'DEFINE' macros declare the static arrays (one element per thread),
123 then the AO_VALUE macro is used to get the particular component for
124 the current thread.
125 *//*--------------------------------------------------------------------------*/
126
127 #ifdef USE_OMP
128
129 #define AOth omp_get_thread_num()
130 #define AO_nth omp_get_num_threads()
131
132 #define AO_NTH_MAX 99
133
134 /*............................................................................*/
135 /* The macro below defines a scalar with name 'nam' of type 'typ'.
136 For example
137
138 AO_DEFINE_SCALAR(int,ncall) ; // can be inside a function or global
139 ...
140 if( AO_VALUE(ncall) == 0 ){
141 // do something special
142 AO_VALUE(ncall)++ ;
143 }
144
145 The above would replace the non-thread safe code
146
147 static int ncall ;
148 ...
149 if( ncall == 0 ){
150 // something
151 ncall++ ;
152 }
153
154 Similar 'DEFINE' macros are available for defining and manipulating
155 per-thread arrays. The AO_VALUE macro can be used to extract the
156 pointer to the array data.
157 *//*..........................................................................*/
158
159 #define AO_DEFINE_SCALAR(typ,nam) static typ AO##nam[AO_NTH_MAX]
160
161 #define AO_DEFINE_ARRAY(typ,nam) static typ *AO##nam[AO_NTH_MAX] ; \
162 static int AOL##nam[AO_NTH_MAX]
163
164 #define AO_DEFINE_2DARRAY(typ,nam) static typ **AO##nam[AO_NTH_MAX] ; \
165 static int AOL1##nam[AO_NTH_MAX] ; \
166 static int AOL2##nam[AO_NTH_MAX]
167
168 #define AO_VALUE(nam) AO##nam[AOth]
169
170 #define AO_ARRAY_LEN(nam) AOL##nam[AOth]
171
172 #define AO_2DARRAY_LEN1(nam) AOL1##nam[AOth]
173 #define AO_2DARRAY_LNE2(nam) AOL2##nam[AOth]
174
175 #define AO_RESIZE_ARRAY(typ,nam,len) \
176 do{ int hh=AOth ; \
177 if( AOL##nam[hh] < len ){ \
178 AO##nam[hh] = (typ *)realloc(AO##nam[hh],sizeof(typ)*len) ; \
179 AOL##nam[hh] = len ; \
180 } } while(0)
181
182 #define AO_FREE_ARRAY(nam) \
183 do{ int hh=AOth; \
184 if( AO##nam[hh] != NULL ){ \
185 free(AO##nam[hh]) ; \
186 AO##nam[hh] = NULL ; \
187 AOL##nam[hh] = 0 ; \
188 } } while(0)
189
190 #define AO_RESIZE_2DARRAY(typ,nam,len1,len2) \
191 do{ int hh=AOth, pp, ll1=AOL1##nam[hh], ll2=AOL2##nam[hh]; \
192 if( ll1 < len1 ){ \
193 AO##nam[hh] = (typ **)realloc(AO##nam[hh],sizeof(typ *)*len1) ; \
194 for( pp=ll1 ; pp < len1 ; pp++ ) AO##nam[hh][pp] = NULL ; \
195 } \
196 if( ll1 != len1 || ll2 != len2 ){ \
197 for( pp=0 ; pp < len1 ; pp++ ) \
198 AO##nam[hh][pp] = (typ *)realloc(AO##nam[hh][pp],sizeof(typ)*len2) ; \
199 AOL1##nam[hh] = len1 ; AOL2##nam[hh] = len2 ; \
200 } \
201 } while(0)
202
203 #define AO_FREE_2DARRAY(nam) \
204 do{ int hh=AOth, ll1=AOL1##nam[hh], pp ; \
205 if( AO##nam[hh] != NULL ){ \
206 for( pp=0 ; pp < ll1 ; pp++ ){ \
207 if( AO##nam[hh][pp] != NULL ) free(AO##nam[hh][pp]) ; \
208 } \
209 free(AO##nam[hh]) ; \
210 AO##nam[hh] = NULL ; AOL1##nam[hh] = AOL2##nam[hh] = 0 ; \
211 } } while(0)
212
213 /*----------------------------------------------------------------------------*/
214 /* Same macros for allocating workspaces, but just one copy of each */
215
216 #else /* not USE_OMP */
217
218 #define AOth 0
219 #define AO_nth 1
220 #define AO_NTH_MAX 1
221
222 #define AO_DEFINE_SCALAR(typ,nam) static typ AO##nam
223
224 #define AO_DEFINE_ARRAY(typ,nam) static typ *AO##nam; static int AOL##nam
225
226 #define AO_DEFINE_2DARRAY(typ,nam) static typ **AO##nam ; \
227 static int AOL1##nam , AOL2##nam
228
229 #define AO_VALUE(nam) AO##nam
230
231 #define AO_ARRAY_LEN(nam) AOL##nam
232 #define AO_2DARRAY_LEN1(nam) AOL1##nam
233 #define AO_2DARRAY_LNE2(nam) AOL2##nam
234
235 #define AO_RESIZE_ARRAY(typ,nam,len) \
236 do{ if( AOL##nam < len ){ \
237 AO##nam = (typ *)realloc(AO##nam,sizeof(typ)*len) ; \
238 AOL##nam = len ; \
239 } } while(0)
240
241 #define AO_FREE_ARRAY(nam) \
242 do{ if( AO##nam != NULL ){ \
243 free(AO##nam); AO##nam = NULL; AOL##nam = 0; \
244 } } while(0)
245
246
247 #define AO_RESIZE_2DARRAY(typ,nam,len1,len2) \
248 do{ int pp, ll1=AOL1##nam, ll2=AOL2##nam; \
249 if( ll1 < len1 ){ \
250 AO##nam = (typ **)realloc(AO##nam,sizeof(typ *)*len1) ; \
251 for( pp=ll1 ; pp < len1 ; pp++ ) AO##nam[pp] = NULL ; \
252 } \
253 if( ll1 != len1 || ll2 != len2 ){ \
254 for( pp=0 ; pp < len1 ; pp++ ) \
255 AO##nam[pp] = (typ *)realloc(AO##nam[pp],sizeof(typ)*len2) ; \
256 AOL1##nam = len1 ; AOL2##nam = len2 ; \
257 } \
258 } while(0)
259
260 #define AO_FREE_2DARRAY(nam) \
261 do{ int ll1=AOL1##nam, pp ; \
262 if( AO##nam != NULL ){ \
263 for( pp=0 ; pp < ll1 ; pp++ ){ \
264 if( AO##nam[pp] != NULL ) free(AO##nam[pp]) ; \
265 } \
266 free(AO##nam) ; \
267 AO##nam = NULL ; AOL1##nam = AOL2##nam = 0 ; \
268 } } while(0)
269
270 #endif /* USE_OMP */
271
272 /*----------------------------------------------------------------------------*/
273
274 #endif /* _AOMP_INCLUDED__ */
275