1 /*@ BSdepend.h - This file defines all the message-passing macros
2               for the BlockSolve95 package.
3 
4 	System Description:
5     All the macros can MPI routines.  Note that there is blocking
6     version and a nonblocking version based on whether NO_BLOCKING_SEND
7     is defined.
8 
9  @*/
10 
11 #ifndef __BSdependh
12 #define __BSdependh
13 
14 /* include MPI */
15 #include "mpi.h"
16 #define MPI_Aint int
17 
18 #include <stdio.h>
19 
20 #if defined(PARCH_freebsd)
21 #include <stdlib.h>
22 #endif
23 
24 #if defined(PARCH_sun4) && !defined(__cplusplus) && defined(_Gnu_)
25 extern int fprintf(FILE*,const char*,...);
26 extern void sscanf(char *,...);
27 extern int printf(const char *,...);
28 extern void *malloc(long unsigned int );
29 extern void free(void *);
30 extern int srand(unsigned int);
31 extern int rand();
32 #endif
33 #if defined(__cplusplus)
34 extern "C" {
35 extern void *malloc(long unsigned int );
36 extern void free(void *);
37 extern int  abs(int);
38 extern double fabs(double);
39 extern void srand(int);
40 extern int rand();
41 extern double drand48();
42 extern void srand48(long);
43 extern int exit(int);
44 }
45 #endif
46 
47 #define DEBUG_ALL 1
48 
49 #ifdef DEBUG_ALL
50 #define DEBUG_ACT printf("ERROR: Code %d occured at Line %d in File %s\n",__BSERROR_STATUS,__LINE__,__FILE__);
51 
52 #define DEBUG_ACTC(msg) printf("ERROR: Code %d occured at Line %d in File %s: %s\n",__BSERROR_STATUS,__LINE__,__FILE__,msg);
53 
54 #define SETERR(code_num) {__BSERROR_STATUS=code_num;DEBUG_ACT;}
55 #define SETERRC(code_num,str) {__BSERROR_STATUS=code_num;DEBUG_ACTC(str);}
56 
57 #define GETERR (__BSERROR_STATUS)
58 #define CHKERR(code_num) {if (GETERR) {DEBUG_ACT;return;}}
59 #define CHKERRN(code_num) {if (GETERR) {DEBUG_ACT;return 0;}}
60 
61 #else
62 #define SETERR(code_num)
63 #define SETERRC(code_num,str)
64 #define CHKERR(a)
65 #define CHKERRN(a)
66 #endif
67 
68 #ifdef TRMALLOC
69 #define MALLOC(c) TrMalloc(c,__LINE__,__FILE__)
70 #define FREE(c) TrFree(c,__LINE__,__FILE__)
71 #define CHECK_MEMORY() \
72 { \
73 	int ierr99; \
74 	ierr99 = TrDump(stderr); \
75 }
76 #else
77 #define MALLOC(c) malloc(c)
78 #define FREE(c) free(c)
79 #define CHECK_MEMORY()
80 #endif
81 
82 #define RECVSYNCNOMEM(type,buf,length,data_type,p_info,mpistat) \
83 	MPI_Recv(buf,length,data_type,MPI_ANY_SOURCE,type,p_info->procset,&mpistat)
84 
85 #define RECVSYNCUNSZ(type,buf,length,data_type,p_info,mpistat) \
86 { \
87 	MPI_Aint size99; \
88 	MPI_Status mpistat99; \
89 	MPI_Probe(MPI_ANY_SOURCE,type,p_info->procset,&mpistat99); \
90 	MPI_Get_count(&mpistat99,data_type,&length); \
91 	MPI_Type_size(data_type,&size99); \
92 	MY_MALLOC(buf,(void *),size99*length,1); \
93 	MPI_Recv(buf,length,data_type,mpistat99.MPI_SOURCE, \
94 		mpistat99.MPI_TAG,p_info->procset,&mpistat); \
95 }
96 
97 #define RECVSYNCUNSZN(type,buf,length,data_type,p_info,mpistat) \
98 { \
99 	MPI_Aint size99; \
100 	MPI_Status mpistat99; \
101 	MPI_Probe(MPI_ANY_SOURCE,type,p_info->procset,&mpistat99); \
102 	MPI_Get_count(&mpistat99,data_type,&length); \
103 	MPI_Type_size(data_type,&size99); \
104 	MY_MALLOCN(buf,(void *),size99*length,1); \
105 	MPI_Recv(buf,length,data_type,mpistat99.MPI_SOURCE, \
106 		mpistat99.MPI_TAG,p_info->procset,&mpistat); \
107 }
108 
109 #define MSGFREERECV(msg) MY_FREE(msg)
110 
111 #define RECVASYNCNOMEMFORCE(type,buf,length,data_type,msg_id,p_info) \
112 	MPI_Irecv(buf,length,data_type,MPI_ANY_SOURCE,type,p_info->procset,&(msg_id))
113 
114 #define SENDASYNCNOMEMFORCE(type,buf,size,to_proc,data_type,msg_id,p_info) \
115 	MPI_Irsend(buf,size,data_type,to_proc,type,p_info->procset,&(msg_id))
116 
117 #define SENDASYNCNOMEM(type,buf,size,to_proc,data_type,msg_id,p_info) \
118 	MPI_Isend((void *)buf,size,data_type,to_proc,type,p_info->procset,(MPI_Request *)(&(msg_id)))
119 
120 #define SENDSYNCNOMEM(type,buf,size,to_proc,data_type,p_info) \
121 	MPI_Send((void *)buf,size,data_type,to_proc,type,(p_info->procset))
122 
123 #define SENDWAITNOMEM(type,buf,size,to_proc,data_type,msg_id) \
124 { \
125 	MPI_Status	stat99; \
126 	MPI_Wait(&(msg_id),&stat99); \
127 }
128 
129 #define GISUM(sum_vec,vec_len,work_vec,procset) \
130 { \
131 	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
132 	for (i99=0;i99<(vec_len);i99++) { \
133 		(iptr2)[i99] = (iptr1)[i99]; \
134 	} \
135 	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_SUM,procset); \
136 }
137 
138 #define GIOR(sum_vec,vec_len,work_vec,procset) \
139 { \
140 	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
141 	for (i99=0;i99<(vec_len);i99++) { \
142 		(iptr2)[i99] = (iptr1)[i99]; \
143 	} \
144 	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_LOR,procset); \
145 }
146 
147 #define GIMIN(sum_vec,vec_len,work_vec,procset) \
148 { \
149 	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
150 	for (i99=0;i99<(vec_len);i99++) { \
151 		(iptr2)[i99] = (iptr1)[i99]; \
152 	} \
153 	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_MIN,procset); \
154 }
155 
156 #define GIMAX(sum_vec,vec_len,work_vec,procset) \
157 { \
158 	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
159 	for (i99=0;i99<(vec_len);i99++) { \
160 		(iptr2)[i99] = (iptr1)[i99]; \
161 	} \
162 	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_MAX,procset); \
163 }
164 
165 #define GDMAX(sum_vec,vec_len,work_vec,procset) \
166 { \
167 	int	i99; \
168 	double	*dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \
169 	for (i99=0;i99<(vec_len);i99++) { \
170 		(dptr2)[i99] = (dptr1)[i99]; \
171 	} \
172 	MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_MAX,procset); \
173 }
174 
175 #define GDMIN(sum_vec,vec_len,work_vec,procset) \
176 { \
177 	int	i99; \
178 	double	*dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \
179 	for (i99=0;i99<(vec_len);i99++) { \
180 		(dptr2)[i99] = (dptr1)[i99]; \
181 	} \
182 	MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_MIN,procset); \
183 }
184 
185 #define GDSUM(sum_vec,vec_len,work_vec,procset) \
186 { \
187 	int	i99; \
188 	double	*dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \
189 	for (i99=0;i99<(vec_len);i99++) { \
190 		(dptr2)[i99] = (dptr1)[i99]; \
191 	} \
192 	MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_SUM,procset); \
193 }
194 
195 #define GFSUM(sum_vec,vec_len,work_vec,procset) \
196 { \
197 	int	i99; \
198 	float	*fptr1 = (float *) sum_vec, *fptr2 = (float *) work_vec; \
199 	for (i99=0;i99<(vec_len);i99++) { \
200 		(fptr2)[i99] = (fptr1)[i99]; \
201 	} \
202 	MPI_Allreduce(fptr2,fptr1,vec_len,MPI_FLOAT,MPI_SUM,procset); \
203 }
204 
205 #define GSYNC(procset) MPI_Barrier(procset)
206 
207 #define PSISROOT(procinfo) ((procinfo->my_id == 0) ? 1 : 0)
208 
209 typedef MPI_Comm ProcSet;
210 
211 /* the function PSNbrTree returns the parent, left child, or right child */
212 /* processor number is the processors are organized conceptually as a tree */
213 /* op_code is one of the following */
214 #define PS_PARENT 0
215 #define PS_LCHILD 1
216 #define PS_RCHILD 2
217 #define PSNbrTree(op_code,nbr_id,procset) \
218 { \
219 	int	my_id99, np99; \
220 	MPI_Comm_rank(procset,&my_id99); \
221 	MPI_Comm_size(procset,&np99); \
222 	switch(op_code) { \
223 		case PS_PARENT: \
224 			if (my_id99 == 0) { \
225 				nbr_id = -1; \
226 			} else { \
227 				nbr_id = (my_id99-1) / 2; \
228 			} \
229 			break; \
230 		case PS_LCHILD: \
231 			nbr_id = (2*my_id99) + 1; \
232 			break; \
233 		case PS_RCHILD: \
234 			nbr_id = (2*my_id99) + 2; \
235 			break; \
236 	} \
237 	if (nbr_id >= np99) nbr_id = -1; \
238 }
239 
240 /* PICall(procedure,argc,argv) gets things started */
241 #define PICall(procedure,argc,argv) \
242 { \
243 	MPI_Init(&argc,&argv); \
244 	procedure(argc,argv); \
245 	MPI_Finalize(); \
246 }
247 
248 /* set up the right "Fortran naming" definitions for use in BSsparse.h */
249 /* somehow the name of your architecture must be identified */
250 /* the following setup works for the *vast* majority of systems */
251 /* this affects only the linking of the blas and lapack routines called */
252 /*
253        FORTRANCAPS:       Names are uppercase, no trailing underscore
254        FORTRANUNDERSCORE: Names are lowercase, trailing underscore
255 */
256 #if defined(PARCH_cray) || defined(PARCH_NCUBE) || defined(PARCH_t3d)
257 #define FORTRANCAPS
258 #elif !defined(PARCH_rs6000) && !defined(PARCH_NeXT) && !defined(PARCH_hpux)
259 #define FORTRANUNDERSCORE
260 #endif
261 
262 /* ********************************************************************** */
263 /* this is a set of macros that retrofit the code for machines on which
264    blocking sends are a bad idea.  These macros make all sends asynchronous
265    by allocating buffers for the synchronous sends and cleaning up
266    those buffers as we go */
267 
268 /* To turn this code "on" simply define NO_BLOCKING_SEND */
269 /* For now, the only architecture that we know needs this is the rs6000 */
270 /* and, perhaps, the HPs */
271 #if defined(PARCH_rs6000) || defined(PARCH_hpux) || defined(PARCH_t3d) || defined(PARCH_IRIX)
272 #define NO_BLOCKING_SEND 1
273 #endif
274 
275 typedef	struct __BSmsg_list {
276 	int	msg_type;
277 	char *msg_buf;
278 	int	msg_len;
279 	int	msg_to;
280 	MPI_Datatype msg_data_type;
281 	MPI_Request	msg_id;
282 	struct	__BSmsg_list	*next;
283 } BSmsg_list;
284 
285 #ifdef NO_BLOCKING_SEND
286 #define	MY_SEND_SYNC(Mmsg_list,Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info) \
287 { \
288 	BSmsg_list *node_99; \
289 	int i99; \
290 	MPI_Aint size99; \
291 	char *tmsg_ptr99; \
292 	MY_MALLOC(node_99,(BSmsg_list *),sizeof(BSmsg_list),1); \
293 	node_99->next = Mmsg_list; \
294 	Mmsg_list = node_99; \
295 	MPI_Type_size(Mmsg_data_type,&size99); \
296 	if (Mmsg_len == 0) { \
297 		MY_MALLOC(node_99->msg_buf,(char *),size99,2); \
298 	} else { \
299 		MY_MALLOC(node_99->msg_buf,(char *),Mmsg_len*size99,3); \
300 	} \
301 	node_99->msg_type = Mmsg_type; \
302 	node_99->msg_len = Mmsg_len; \
303 	node_99->msg_to = Mmsg_to; \
304 	node_99->msg_data_type = Mmsg_data_type; \
305 	tmsg_ptr99 = (char *) Mmsg; \
306 	for (i99=0;i99<Mmsg_len*size99;i99++) { \
307 		node_99->msg_buf[i99] = tmsg_ptr99[i99]; \
308 	} \
309 	SENDASYNCNOMEM(Mmsg_type,node_99->msg_buf,Mmsg_len,Mmsg_to,Mmsg_data_type,\
310 		node_99->msg_id,Mp_info); \
311 	CHECK_SEND_LIST(Mmsg_list); \
312 }
313 #else
314 #define	MY_SEND_SYNC(Mmsg_list,Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info) \
315 { \
316 	SENDSYNCNOMEM(Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info); \
317 }
318 #endif
319 
320 #define	MCHECK_SEND_LIST(Mmsg_list) \
321 { \
322 	BSmsg_list	*node_ptr_99, *prev_node_ptr_99, *tnode_ptr_99; \
323 	int	fin99; \
324 	MPI_Status	stat99; \
325 	node_ptr_99 = Mmsg_list; \
326 	prev_node_ptr_99 = Mmsg_list; \
327 	while (node_ptr_99 != NULL) { \
328 		MPI_Test(&(node_ptr_99->msg_id),&fin99,&stat99); \
329 		if (fin99) { \
330 			tnode_ptr_99 = node_ptr_99; \
331 			if (node_ptr_99 == Mmsg_list) { \
332 				Mmsg_list = Mmsg_list->next; \
333 				prev_node_ptr_99 = Mmsg_list; \
334 				node_ptr_99 = Mmsg_list; \
335 			} else { \
336 				prev_node_ptr_99->next = node_ptr_99->next; \
337 				node_ptr_99 = node_ptr_99->next; \
338 			} \
339 			MY_FREE(tnode_ptr_99->msg_buf); \
340 			MY_FREE(tnode_ptr_99); \
341 		} else { \
342 			prev_node_ptr_99 = node_ptr_99; \
343 			node_ptr_99 = node_ptr_99->next; \
344 		} \
345 	} \
346 }
347 #ifdef NO_BLOCKING_SEND
348 #define	CHECK_SEND_LIST(Mmsg_list) MCHECK_SEND_LIST(Mmsg_list)
349 #else
350 #define	CHECK_SEND_LIST(Mmsg_list)
351 #endif
352 
353 #define MFINISH_SEND_LIST(Mmsg_list) \
354 { \
355 	BSmsg_list	*tnode99; \
356 	while(Mmsg_list != NULL) { \
357 		tnode99 = Mmsg_list; \
358 		Mmsg_list = Mmsg_list->next; \
359 		SENDWAITNOMEM(tnode99->msg_type,tnode99->msg_buf,tnode99->msg_len, \
360 			tnode99->msg_to,tnode99->msg_data_type,tnode99->msg_id); \
361 		MY_FREE(tnode99->msg_buf); \
362 		MY_FREE(tnode99); \
363 	} \
364 }
365 #ifdef NO_BLOCKING_SEND
366 #define FINISH_SEND_LIST(Mmsg_list) MFINISH_SEND_LIST(Mmsg_list)
367 #else
368 #define FINISH_SEND_LIST(Mmsg_list)
369 #endif
370 
371 #endif
372