1/*@ BSdepend.h - This file defines all the message-passing macros
2              for the BlockSolve95 package.
3
4	System Description:
5    All the macros can MPI routines.  Note that there is blocking
6    version and a nonblocking version based on whether NO_BLOCKING_SEND
7    is defined.
8
9 @*/
10
11#ifndef __BSdependh
12#define __BSdependh
13
14/* include MPI */
15#include "mpi.h"
16#define MPI_Aint int
17
18#include <stdio.h>
19#if defined(PARCH_sun4) && !defined(__cplusplus) && defined(_Gnu_)
20extern int fprintf(FILE*,const char*,...);
21extern void sscanf(char *,...);
22extern int printf(const char *,...);
23extern void *malloc(long unsigned int );
24extern void free(void *);
25extern int srand(unsigned int);
26extern int rand();
27#endif
28#if defined(__cplusplus)
29extern "C" {
30extern void *malloc(long unsigned int );
31extern void free(void *);
32extern int  abs(int);
33extern double fabs(double);
34extern void srand(int);
35extern int rand();
36extern double drand48();
37extern void srand48(long);
38extern int exit(int);
39}
40#endif
41
42#define DEBUG_ALL 1
43
44#ifdef DEBUG_ALL
45#define DEBUG_ACT printf("ERROR: Code %d occured at Line %d in File %s\n",__BSERROR_STATUS,__LINE__,__FILE__);
46
47#define DEBUG_ACTC(msg) printf("ERROR: Code %d occured at Line %d in File %s: %s\n",__BSERROR_STATUS,__LINE__,__FILE__,msg);
48
49#define SETERR(code_num) {__BSERROR_STATUS=code_num;DEBUG_ACT;}
50#define SETERRC(code_num,str) {__BSERROR_STATUS=code_num;DEBUG_ACTC(str);}
51
52#define GETERR (__BSERROR_STATUS)
53#define CHKERR(code_num) {if (GETERR) {DEBUG_ACT;return;}}
54#define CHKERRN(code_num) {if (GETERR) {DEBUG_ACT;return 0;}}
55
56#else
57#define SETERR(code_num)
58#define SETERRC(code_num,str)
59#define CHKERR(a)
60#define CHKERRN(a)
61#endif
62
63#ifdef TRMALLOC
64#define MALLOC(c) TrMalloc(c,__LINE__,__FILE__)
65#define FREE(c) TrFree(c,__LINE__,__FILE__)
66#define CHECK_MEMORY() \
67{ \
68	int ierr99; \
69	ierr99 = TrDump(stderr); \
70}
71#else
72#define MALLOC(c) malloc(c)
73#define FREE(c) free(c)
74#define CHECK_MEMORY()
75#endif
76
77#define RECVSYNCNOMEM(type,buf,length,data_type,p_info,mpistat) \
78	MPI_Recv(buf,length,data_type,MPI_ANY_SOURCE,type,p_info->procset,&mpistat)
79
80#define RECVSYNCUNSZ(type,buf,length,data_type,p_info,mpistat) \
81{ \
82	MPI_Aint size99; \
83	MPI_Status mpistat99; \
84	MPI_Probe(MPI_ANY_SOURCE,type,p_info->procset,&mpistat99); \
85	MPI_Get_count(&mpistat99,data_type,&length); \
86	MPI_Type_size(data_type,&size99); \
87	MY_MALLOC(buf,(void *),size99*length,1); \
88	MPI_Recv(buf,length,data_type,mpistat99.MPI_SOURCE, \
89		mpistat99.MPI_TAG,p_info->procset,&mpistat); \
90}
91
92#define RECVSYNCUNSZN(type,buf,length,data_type,p_info,mpistat) \
93{ \
94	MPI_Aint size99; \
95	MPI_Status mpistat99; \
96	MPI_Probe(MPI_ANY_SOURCE,type,p_info->procset,&mpistat99); \
97	MPI_Get_count(&mpistat99,data_type,&length); \
98	MPI_Type_size(data_type,&size99); \
99	MY_MALLOCN(buf,(void *),size99*length,1); \
100	MPI_Recv(buf,length,data_type,mpistat99.MPI_SOURCE, \
101		mpistat99.MPI_TAG,p_info->procset,&mpistat); \
102}
103
104#define MSGFREERECV(msg) MY_FREE(msg)
105
106#define RECVASYNCNOMEMFORCE(type,buf,length,data_type,msg_id,p_info) \
107	MPI_Irecv(buf,length,data_type,MPI_ANY_SOURCE,type,p_info->procset,&(msg_id))
108
109#define SENDASYNCNOMEMFORCE(type,buf,size,to_proc,data_type,msg_id,p_info) \
110	MPI_Irsend(buf,size,data_type,to_proc,type,p_info->procset,&(msg_id))
111
112#define SENDASYNCNOMEM(type,buf,size,to_proc,data_type,msg_id,p_info) \
113	MPI_Isend((void *)buf,size,data_type,to_proc,type,p_info->procset,(MPI_Request *)(&(msg_id)))
114
115#define SENDSYNCNOMEM(type,buf,size,to_proc,data_type,p_info) \
116	MPI_Send((void *)buf,size,data_type,to_proc,type,(p_info->procset))
117
118#define SENDWAITNOMEM(type,buf,size,to_proc,data_type,msg_id) \
119{ \
120	MPI_Status	stat99; \
121	MPI_Wait(&(msg_id),&stat99); \
122}
123
124#define GISUM(sum_vec,vec_len,work_vec,procset) \
125{ \
126	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
127	for (i99=0;i99<(vec_len);i99++) { \
128		(iptr2)[i99] = (iptr1)[i99]; \
129	} \
130	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_SUM,procset); \
131}
132
133#define GIOR(sum_vec,vec_len,work_vec,procset) \
134{ \
135	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
136	for (i99=0;i99<(vec_len);i99++) { \
137		(iptr2)[i99] = (iptr1)[i99]; \
138	} \
139	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_LOR,procset); \
140}
141
142#define GIMIN(sum_vec,vec_len,work_vec,procset) \
143{ \
144	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
145	for (i99=0;i99<(vec_len);i99++) { \
146		(iptr2)[i99] = (iptr1)[i99]; \
147	} \
148	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_MIN,procset); \
149}
150
151#define GIMAX(sum_vec,vec_len,work_vec,procset) \
152{ \
153	int	i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \
154	for (i99=0;i99<(vec_len);i99++) { \
155		(iptr2)[i99] = (iptr1)[i99]; \
156	} \
157	MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_MAX,procset); \
158}
159
160#define GDMAX(sum_vec,vec_len,work_vec,procset) \
161{ \
162	int	i99; \
163	double	*dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \
164	for (i99=0;i99<(vec_len);i99++) { \
165		(dptr2)[i99] = (dptr1)[i99]; \
166	} \
167	MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_MAX,procset); \
168}
169
170#define GDMIN(sum_vec,vec_len,work_vec,procset) \
171{ \
172	int	i99; \
173	double	*dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \
174	for (i99=0;i99<(vec_len);i99++) { \
175		(dptr2)[i99] = (dptr1)[i99]; \
176	} \
177	MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_MIN,procset); \
178}
179
180#define GDSUM(sum_vec,vec_len,work_vec,procset) \
181{ \
182	int	i99; \
183	double	*dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \
184	for (i99=0;i99<(vec_len);i99++) { \
185		(dptr2)[i99] = (dptr1)[i99]; \
186	} \
187	MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_SUM,procset); \
188}
189
190#define GFSUM(sum_vec,vec_len,work_vec,procset) \
191{ \
192	int	i99; \
193	float	*fptr1 = (float *) sum_vec, *fptr2 = (float *) work_vec; \
194	for (i99=0;i99<(vec_len);i99++) { \
195		(fptr2)[i99] = (fptr1)[i99]; \
196	} \
197	MPI_Allreduce(fptr2,fptr1,vec_len,MPI_FLOAT,MPI_SUM,procset); \
198}
199
200#define GSYNC(procset) MPI_Barrier(procset)
201
202#define PSISROOT(procinfo) ((procinfo->my_id == 0) ? 1 : 0)
203
204/* the function PSNbrTree returns the parent, left child, or right child */
205/* processor number is the processors are organized conceptually as a tree */
206/* op_code is one of the following */
207#define PS_PARENT 0
208#define PS_LCHILD 1
209#define PS_RCHILD 2
210#define PSNbrTree(op_code,nbr_id,procset) \
211{ \
212	int	my_id99, np99; \
213	MPI_Comm_rank(procset,&my_id99); \
214	MPI_Comm_size(procset,&np99); \
215	switch(op_code) { \
216		case PS_PARENT: \
217			if (my_id99 == 0) { \
218				nbr_id = -1; \
219			} else { \
220				nbr_id = (my_id99-1) / 2; \
221			} \
222			break; \
223		case PS_LCHILD: \
224			nbr_id = (2*my_id99) + 1; \
225			break; \
226		case PS_RCHILD: \
227			nbr_id = (2*my_id99) + 2; \
228			break; \
229	} \
230	if (nbr_id >= np99) nbr_id = -1; \
231}
232
233/* PICall(procedure,argc,argv) gets things started */
234#define PICall(procedure,argc,argv) \
235{ \
236	MPI_Init(&argc,&argv); \
237	procedure(argc,argv); \
238	MPI_Finalize(); \
239}
240
241/* set up the right "Fortran naming" definitions for use in BSsparse.h */
242/* somehow the name of your architecture must be identified */
243/* the following setup works for the *vast* majority of systems */
244/* this affects only the linking of the blas and lapack routines called */
245/*
246       FORTRANCAPS:       Names are uppercase, no trailing underscore
247       FORTRANUNDERSCORE: Names are lowercase, trailing underscore
248*/
249#if defined(PARCH_cray) || defined(PARCH_NCUBE) || defined(PARCH_t3d)
250#define FORTRANCAPS
251#elif !defined(PARCH_rs6000) && !defined(PARCH_NeXT) && !defined(PARCH_hpux)
252#define FORTRANUNDERSCORE
253#endif
254
255/* ********************************************************************** */
256/* this is a set of macros that retrofit the code for machines on which
257   blocking sends are a bad idea.  These macros make all sends asynchronous
258   by allocating buffers for the synchronous sends and cleaning up
259   those buffers as we go */
260
261/* To turn this code "on" simply define NO_BLOCKING_SEND */
262/* For now, the only architecture that we know needs this is the rs6000 */
263/* and, perhaps, the HPs */
264#if defined(PARCH_rs6000) || defined(PARCH_hpux) || defined(PARCH_t3d)
265#define NO_BLOCKING_SEND 1
266#endif
267
268typedef	struct __BSmsg_list {
269	int	msg_type;
270	char *msg_buf;
271	int	msg_len;
272	int	msg_to;
273	MPI_Datatype msg_data_type;
274	MPI_Request	msg_id;
275	struct	__BSmsg_list	*next;
276} BSmsg_list;
277
278#ifdef NO_BLOCKING_SEND
279#define	MY_SEND_SYNC(Mmsg_list,Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info) \
280{ \
281	BSmsg_list *node_99; \
282	int i99; \
283	MPI_Aint size99; \
284	char *tmsg_ptr99; \
285	MY_MALLOC(node_99,(BSmsg_list *),sizeof(BSmsg_list),1); \
286	node_99->next = Mmsg_list; \
287	Mmsg_list = node_99; \
288	MPI_Type_size(Mmsg_data_type,&size99); \
289	if (Mmsg_len == 0) { \
290		MY_MALLOC(node_99->msg_buf,(char *),size99,2); \
291	} else { \
292		MY_MALLOC(node_99->msg_buf,(char *),Mmsg_len*size99,3); \
293	} \
294	node_99->msg_type = Mmsg_type; \
295	node_99->msg_len = Mmsg_len; \
296	node_99->msg_to = Mmsg_to; \
297	node_99->msg_data_type = Mmsg_data_type; \
298	tmsg_ptr99 = (char *) Mmsg; \
299	for (i99=0;i99<Mmsg_len*size99;i99++) { \
300		node_99->msg_buf[i99] = tmsg_ptr99[i99]; \
301	} \
302	SENDASYNCNOMEM(Mmsg_type,node_99->msg_buf,Mmsg_len,Mmsg_to,Mmsg_data_type,\
303		node_99->msg_id,Mp_info); \
304	CHECK_SEND_LIST(Mmsg_list); \
305}
306#else
307#define	MY_SEND_SYNC(Mmsg_list,Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info) \
308{ \
309	SENDSYNCNOMEM(Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info); \
310}
311#endif
312
313#define	MCHECK_SEND_LIST(Mmsg_list) \
314{ \
315	BSmsg_list	*node_ptr_99, *prev_node_ptr_99, *tnode_ptr_99; \
316	int	fin99; \
317	MPI_Status	stat99; \
318	node_ptr_99 = Mmsg_list; \
319	prev_node_ptr_99 = Mmsg_list; \
320	while (node_ptr_99 != NULL) { \
321		MPI_Test(&(node_ptr_99->msg_id),&fin99,&stat99); \
322		if (fin99) { \
323			tnode_ptr_99 = node_ptr_99; \
324			if (node_ptr_99 == Mmsg_list) { \
325				Mmsg_list = Mmsg_list->next; \
326				prev_node_ptr_99 = Mmsg_list; \
327				node_ptr_99 = Mmsg_list; \
328			} else { \
329				prev_node_ptr_99->next = node_ptr_99->next; \
330				node_ptr_99 = node_ptr_99->next; \
331			} \
332			MY_FREE(tnode_ptr_99->msg_buf); \
333			MY_FREE(tnode_ptr_99); \
334		} else { \
335			prev_node_ptr_99 = node_ptr_99; \
336			node_ptr_99 = node_ptr_99->next; \
337		} \
338	} \
339}
340#ifdef NO_BLOCKING_SEND
341#define	CHECK_SEND_LIST(Mmsg_list) MCHECK_SEND_LIST(Mmsg_list)
342#else
343#define	CHECK_SEND_LIST(Mmsg_list)
344#endif
345
346#define MFINISH_SEND_LIST(Mmsg_list) \
347{ \
348	BSmsg_list	*tnode99; \
349	while(Mmsg_list != NULL) { \
350		tnode99 = Mmsg_list; \
351		Mmsg_list = Mmsg_list->next; \
352		SENDWAITNOMEM(tnode99->msg_type,tnode99->msg_buf,tnode99->msg_len, \
353			tnode99->msg_to,tnode99->msg_data_type,tnode99->msg_id); \
354		MY_FREE(tnode99->msg_buf); \
355		MY_FREE(tnode99); \
356	} \
357}
358#ifdef NO_BLOCKING_SEND
359#define FINISH_SEND_LIST(Mmsg_list) MFINISH_SEND_LIST(Mmsg_list)
360#else
361#define FINISH_SEND_LIST(Mmsg_list)
362#endif
363
364#endif
365