1 #ifndef _AOMP_INCLUDED__
2 #define _AOMP_INCLUDED__
3 
4 #ifndef INLINE
5 # define INLINE /*nada*/
6 #endif
7 
8 /*------------- Macros to be used in OpenMP enabled AFNI code ----------------*/
9 
10 /* to replace memcpy and memset, which cause trouble inside a parallel region */
11 
12 #ifdef USE_OMP
AAmemcpy(void * ooo,void * iii,size_t nnn)13 static INLINE void AAmemcpy( void *ooo , void *iii , size_t nnn )
14 { register size_t jj ; register char *oar, *iar ;
15   if( ooo == NULL || iii == NULL || nnn == 0 ) return ;
16   oar = (char *)ooo ; iar = (char *)iii ;
17   for( jj=0 ; jj < nnn ; jj++ ) *oar++ = *iar++ ;
18 }
AAmemset(void * ooo,int c,size_t nnn)19 static INLINE void AAmemset( void *ooo , int c , size_t nnn )
20 { register size_t jj ; register char cc , *oar ;
21   if( ooo == NULL || nnn == 0 ) return ;
22   oar = (char *)ooo ; cc = (char)c ;
23   for( jj=0 ; jj < nnn ; jj++ ) *oar++ = cc ;
24 }
25 #else
26 # define AAmemcpy memcpy
27 # define AAmemset memset
28 #endif
29 #define AA_memcpy AAmemcpy
30 #define AA_memset AAmemset
31 #define AO_memcpy AAmemcpy
32 #define AO_memset AAmemset
33 
34 /* to disable ENTRY/RETURN macros (which use static variables) */
35 
36 #if defined(USE_OMP) && defined(USE_TRACING)
37 # define AFNI_OMP_START DBG_stoff++
38 # define AFNI_OMP_END   DBG_stoff--
39 #else
40 # define AFNI_OMP_START   /*nada*/
41 # define AFNI_OMP_END     /*nada*/
42 #endif
43 
44 /* Set max number of threads to be at most thn */
45 
46 #ifdef USE_OMP
47 # define AFNI_SETUP_OMP(thn)                            \
48   do{ int mm=omp_get_max_threads() , nn=thn , ee;       \
49       ee = (int)AFNI_numenv("OMP_NUM_THREADS") ;        \
50       if( ee <= 0 ){                                    \
51         if( nn < 1 ) nn = 15 ; if( mm > nn ) mm = nn ;  \
52         omp_set_num_threads(mm) ;                       \
53       }                                                 \
54   } while(0)
55 #else
56 # define AFNI_SETUP_OMP(thn) /*nada*/
57 #endif
58 
59 /* Macro to use in -help output */
60 
61 #ifdef USE_OMP
62 # define PRINT_AFNI_OMP_USAGE(pnam,extra)                                          \
63   printf(                                                                          \
64     "\n"                                                                           \
65     " =========================================================================\n" \
66     "* This binary version of %s is compiled using OpenMP, a semi-\n"              \
67     "   automatic parallelizer software toolkit, which splits the work across\n"   \
68     "   multiple CPUs/cores on the same shared memory computer.\n"                 \
69     "* OpenMP is NOT like MPI -- it does not work with CPUs connected only\n"      \
70     "   by a network (e.g., OpenMP doesn't work across cluster nodes).\n"          \
71     "* For some implementation and compilation details, please see\n"              \
72     "   https://afni.nimh.nih.gov/pub/dist/doc/misc/OpenMP.html\n"                 \
73     "* The number of CPU threads used will default to the maximum number on\n"     \
74     "   your system. You can control this value by setting environment variable\n" \
75     "   OMP_NUM_THREADS to some smaller value (including 1).\n"                    \
76     "* Un-setting OMP_NUM_THREADS resets OpenMP back to its default state of\n"    \
77     "   using all CPUs available.\n"                                               \
78     "   ++ However, on some systems, it seems to be necessary to set variable\n"   \
79     "      OMP_NUM_THREADS explicitly, or you only get one CPU.\n"                 \
80     "   ++ On other systems with many CPUS, you probably want to limit the CPU\n"  \
81     "      count, since using more than (say) 16 threads is probably useless.\n"   \
82     "* You must set OMP_NUM_THREADS in the shell BEFORE running the program,\n"    \
83     "   since OpenMP queries this variable BEFORE the program actually starts.\n"  \
84     "   ++ You can't usefully set this variable in your ~/.afnirc file or on the\n"\
85     "      command line with the '-D' option.\n"                                   \
86     "* How many threads are useful? That varies with the program, and how well\n"  \
87     "   it was coded. You'll have to experiment on your own systems!\n"            \
88     "* The number of CPUs on this particular computer system is ...... %d.\n"      \
89     "* The maximum number of CPUs that will be used is now set to .... %d.\n"      \
90     "%s"                                                                           \
91     " =========================================================================\n" \
92     , (pnam) , omp_get_num_procs() , omp_get_max_threads() ,                       \
93       (extra==NULL) ? "\0" : extra                                                 \
94   )
95 #else
96 # define PRINT_AFNI_OMP_USAGE(pnam,extra)                                          \
97   printf(                                                                          \
98     "\n"                                                                           \
99     " =========================================================================\n" \
100     "* This binary version of %s is NOT compiled using OpenMP, a\n"                \
101     "   semi-automatic parallelizer software toolkit, which splits the work\n"     \
102     "   across multiple CPUs/cores on the same shared memory computer.\n"          \
103     "* However, the source code is compatible with OpenMP, and can be compiled\n"  \
104     "   with an OpenMP-capable compiler, such as gcc 8.x+, Intel's icc, and\n"     \
105     "   Oracle Developer Studio.\n"                                                \
106     "* If you wish to compile this program with OpenMP, see the man page for\n"    \
107     "   your C compiler, and (if needed) consult the AFNI message board, and\n"    \
108     "   https://afni.nimh.nih.gov/pub/dist/doc/misc/OpenMP.html\n"                 \
109     "* However, it would probably be simplest to download a pre-compiled AFNI\n"   \
110     "   binary set that uses OpenMP!\n"                                            \
111     "   https://afni.nimh.nih.gov/pub/dist/doc/htmldoc/index.html\n"               \
112     , (pnam)                                                                       \
113   )
114 #endif
115 
116 /*----------------------------------------------------------------------------*/
117 /* Some macros for allocating staticworkspace arrays, per thread.
118    These are to aid in OpenMP-izing existing codes that use static variables.
119    The macros also have non-OpenMP versions, which don't create thread-wise
120    arrays.
121 
122    The 'DEFINE' macros declare the static arrays (one element per thread),
123    then the AO_VALUE macro is used to get the particular component for
124    the current thread.
125 *//*--------------------------------------------------------------------------*/
126 
127 #ifdef USE_OMP
128 
129 #define AOth   omp_get_thread_num()
130 #define AO_nth omp_get_num_threads()
131 
132 #define AO_NTH_MAX 99
133 
134 /*............................................................................*/
135 /* The macro below defines a scalar with name 'nam' of type 'typ'.
136    For example
137 
138       AO_DEFINE_SCALAR(int,ncall) ;  // can be inside a function or global
139       ...
140       if( AO_VALUE(ncall) == 0 ){
141         // do something special
142         AO_VALUE(ncall)++ ;
143       }
144 
145    The above would replace the non-thread safe code
146 
147       static int ncall ;
148       ...
149       if( ncall == 0 ){
150         // something
151        ncall++ ;
152       }
153 
154    Similar 'DEFINE' macros are available for defining and manipulating
155    per-thread arrays. The AO_VALUE macro can be used to extract the
156    pointer to the array data.
157 *//*..........................................................................*/
158 
159 #define AO_DEFINE_SCALAR(typ,nam)   static typ  AO##nam[AO_NTH_MAX]
160 
161 #define AO_DEFINE_ARRAY(typ,nam)    static typ *AO##nam[AO_NTH_MAX] ;  \
162                                     static int AOL##nam[AO_NTH_MAX]
163 
164 #define AO_DEFINE_2DARRAY(typ,nam)  static typ **AO##nam[AO_NTH_MAX] ; \
165                                     static int AOL1##nam[AO_NTH_MAX] ; \
166                                     static int AOL2##nam[AO_NTH_MAX]
167 
168 #define AO_VALUE(nam)               AO##nam[AOth]
169 
170 #define AO_ARRAY_LEN(nam)           AOL##nam[AOth]
171 
172 #define AO_2DARRAY_LEN1(nam)        AOL1##nam[AOth]
173 #define AO_2DARRAY_LNE2(nam)        AOL2##nam[AOth]
174 
175 #define AO_RESIZE_ARRAY(typ,nam,len)                                 \
176   do{ int hh=AOth ;                                                  \
177       if( AOL##nam[hh] < len ){                                      \
178         AO##nam[hh] = (typ *)realloc(AO##nam[hh],sizeof(typ)*len) ;  \
179         AOL##nam[hh] = len ;                                         \
180   } } while(0)
181 
182 #define AO_FREE_ARRAY(nam)        \
183   do{ int hh=AOth;                \
184       if( AO##nam[hh] != NULL ){  \
185         free(AO##nam[hh]) ;       \
186         AO##nam[hh] = NULL ;      \
187        AOL##nam[hh] = 0 ;         \
188   } } while(0)
189 
190 #define AO_RESIZE_2DARRAY(typ,nam,len1,len2)                                   \
191   do{ int hh=AOth, pp, ll1=AOL1##nam[hh], ll2=AOL2##nam[hh];                   \
192       if( ll1 < len1 ){                                                        \
193         AO##nam[hh] = (typ **)realloc(AO##nam[hh],sizeof(typ *)*len1) ;        \
194         for( pp=ll1 ; pp < len1 ; pp++ ) AO##nam[hh][pp] = NULL ;              \
195       }                                                                        \
196       if( ll1 != len1 || ll2 != len2 ){                                        \
197         for( pp=0 ; pp < len1 ; pp++ )                                         \
198           AO##nam[hh][pp] = (typ *)realloc(AO##nam[hh][pp],sizeof(typ)*len2) ; \
199         AOL1##nam[hh] = len1 ; AOL2##nam[hh] = len2 ;                          \
200       }                                                                        \
201   } while(0)
202 
203 #define AO_FREE_2DARRAY(nam)                                     \
204   do{ int hh=AOth, ll1=AOL1##nam[hh], pp ;                       \
205       if( AO##nam[hh] != NULL ){                                 \
206         for( pp=0 ; pp < ll1 ; pp++ ){                           \
207           if( AO##nam[hh][pp] != NULL ) free(AO##nam[hh][pp]) ;  \
208         }                                                        \
209         free(AO##nam[hh]) ;                                      \
210         AO##nam[hh] = NULL ; AOL1##nam[hh] = AOL2##nam[hh] = 0 ; \
211   } } while(0)
212 
213 /*----------------------------------------------------------------------------*/
214 /* Same macros for allocating workspaces, but just one copy of each */
215 
216 #else  /* not USE_OMP */
217 
218 #define AOth       0
219 #define AO_nth     1
220 #define AO_NTH_MAX 1
221 
222 #define AO_DEFINE_SCALAR(typ,nam)   static typ AO##nam
223 
224 #define AO_DEFINE_ARRAY(typ,nam)    static typ *AO##nam; static int AOL##nam
225 
226 #define AO_DEFINE_2DARRAY(typ,nam)  static typ **AO##nam ;             \
227                                     static int AOL1##nam , AOL2##nam
228 
229 #define AO_VALUE(nam)               AO##nam
230 
231 #define AO_ARRAY_LEN(nam)           AOL##nam
232 #define AO_2DARRAY_LEN1(nam)        AOL1##nam
233 #define AO_2DARRAY_LNE2(nam)        AOL2##nam
234 
235 #define AO_RESIZE_ARRAY(typ,nam,len)                        \
236   do{ if( AOL##nam < len ){                                 \
237         AO##nam = (typ *)realloc(AO##nam,sizeof(typ)*len) ; \
238         AOL##nam = len ;                                    \
239   } } while(0)
240 
241 #define AO_FREE_ARRAY(nam)                           \
242   do{ if( AO##nam != NULL ){                         \
243         free(AO##nam); AO##nam = NULL; AOL##nam = 0; \
244   } } while(0)
245 
246 
247 #define AO_RESIZE_2DARRAY(typ,nam,len1,len2)                           \
248   do{ int pp, ll1=AOL1##nam, ll2=AOL2##nam;                            \
249       if( ll1 < len1 ){                                                \
250         AO##nam = (typ **)realloc(AO##nam,sizeof(typ *)*len1) ;        \
251         for( pp=ll1 ; pp < len1 ; pp++ ) AO##nam[pp] = NULL ;          \
252       }                                                                \
253       if( ll1 != len1 || ll2 != len2 ){                                \
254         for( pp=0 ; pp < len1 ; pp++ )                                 \
255           AO##nam[pp] = (typ *)realloc(AO##nam[pp],sizeof(typ)*len2) ; \
256         AOL1##nam = len1 ; AOL2##nam = len2 ;                          \
257       }                                                                \
258   } while(0)
259 
260 #define AO_FREE_2DARRAY(nam)                             \
261   do{ int ll1=AOL1##nam, pp ;                            \
262       if( AO##nam != NULL ){                             \
263         for( pp=0 ; pp < ll1 ; pp++ ){                   \
264           if( AO##nam[pp] != NULL ) free(AO##nam[pp]) ;  \
265         }                                                \
266         free(AO##nam) ;                                  \
267         AO##nam = NULL ; AOL1##nam = AOL2##nam = 0 ;     \
268   } } while(0)
269 
270 #endif  /* USE_OMP */
271 
272 /*----------------------------------------------------------------------------*/
273 
274 #endif /* _AOMP_INCLUDED__ */
275