1 /*@ BSdepend.h - This file defines all the message-passing macros 2 for the BlockSolve95 package. 3 4 System Description: 5 All the macros can MPI routines. Note that there is blocking 6 version and a nonblocking version based on whether NO_BLOCKING_SEND 7 is defined. 8 9 @*/ 10 11 #ifndef __BSdependh 12 #define __BSdependh 13 14 /* include MPI */ 15 #include "mpi.h" 16 #define MPI_Aint int 17 18 #include <stdio.h> 19 20 #if defined(PARCH_freebsd) 21 #include <stdlib.h> 22 #endif 23 24 #if defined(PARCH_sun4) && !defined(__cplusplus) && defined(_Gnu_) 25 extern int fprintf(FILE*,const char*,...); 26 extern void sscanf(char *,...); 27 extern int printf(const char *,...); 28 extern void *malloc(long unsigned int ); 29 extern void free(void *); 30 extern int srand(unsigned int); 31 extern int rand(); 32 #endif 33 #if defined(__cplusplus) 34 extern "C" { 35 extern void *malloc(long unsigned int ); 36 extern void free(void *); 37 extern int abs(int); 38 extern double fabs(double); 39 extern void srand(int); 40 extern int rand(); 41 extern double drand48(); 42 extern void srand48(long); 43 extern int exit(int); 44 } 45 #endif 46 47 #define DEBUG_ALL 1 48 49 #ifdef DEBUG_ALL 50 #define DEBUG_ACT printf("ERROR: Code %d occured at Line %d in File %s\n",__BSERROR_STATUS,__LINE__,__FILE__); 51 52 #define DEBUG_ACTC(msg) printf("ERROR: Code %d occured at Line %d in File %s: %s\n",__BSERROR_STATUS,__LINE__,__FILE__,msg); 53 54 #define SETERR(code_num) {__BSERROR_STATUS=code_num;DEBUG_ACT;} 55 #define SETERRC(code_num,str) {__BSERROR_STATUS=code_num;DEBUG_ACTC(str);} 56 57 #define GETERR (__BSERROR_STATUS) 58 #define CHKERR(code_num) {if (GETERR) {DEBUG_ACT;return;}} 59 #define CHKERRN(code_num) {if (GETERR) {DEBUG_ACT;return 0;}} 60 61 #else 62 #define SETERR(code_num) 63 #define SETERRC(code_num,str) 64 #define CHKERR(a) 65 #define CHKERRN(a) 66 #endif 67 68 #ifdef TRMALLOC 69 #define MALLOC(c) TrMalloc(c,__LINE__,__FILE__) 70 #define FREE(c) TrFree(c,__LINE__,__FILE__) 71 #define CHECK_MEMORY() \ 72 { \ 73 int ierr99; \ 74 ierr99 = TrDump(stderr); \ 75 } 76 #else 77 #define MALLOC(c) malloc(c) 78 #define FREE(c) free(c) 79 #define CHECK_MEMORY() 80 #endif 81 82 #define RECVSYNCNOMEM(type,buf,length,data_type,p_info,mpistat) \ 83 MPI_Recv(buf,length,data_type,MPI_ANY_SOURCE,type,p_info->procset,&mpistat) 84 85 #define RECVSYNCUNSZ(type,buf,length,data_type,p_info,mpistat) \ 86 { \ 87 MPI_Aint size99; \ 88 MPI_Status mpistat99; \ 89 MPI_Probe(MPI_ANY_SOURCE,type,p_info->procset,&mpistat99); \ 90 MPI_Get_count(&mpistat99,data_type,&length); \ 91 MPI_Type_size(data_type,&size99); \ 92 MY_MALLOC(buf,(void *),size99*length,1); \ 93 MPI_Recv(buf,length,data_type,mpistat99.MPI_SOURCE, \ 94 mpistat99.MPI_TAG,p_info->procset,&mpistat); \ 95 } 96 97 #define RECVSYNCUNSZN(type,buf,length,data_type,p_info,mpistat) \ 98 { \ 99 MPI_Aint size99; \ 100 MPI_Status mpistat99; \ 101 MPI_Probe(MPI_ANY_SOURCE,type,p_info->procset,&mpistat99); \ 102 MPI_Get_count(&mpistat99,data_type,&length); \ 103 MPI_Type_size(data_type,&size99); \ 104 MY_MALLOCN(buf,(void *),size99*length,1); \ 105 MPI_Recv(buf,length,data_type,mpistat99.MPI_SOURCE, \ 106 mpistat99.MPI_TAG,p_info->procset,&mpistat); \ 107 } 108 109 #define MSGFREERECV(msg) MY_FREE(msg) 110 111 #define RECVASYNCNOMEMFORCE(type,buf,length,data_type,msg_id,p_info) \ 112 MPI_Irecv(buf,length,data_type,MPI_ANY_SOURCE,type,p_info->procset,&(msg_id)) 113 114 #define SENDASYNCNOMEMFORCE(type,buf,size,to_proc,data_type,msg_id,p_info) \ 115 MPI_Irsend(buf,size,data_type,to_proc,type,p_info->procset,&(msg_id)) 116 117 #define SENDASYNCNOMEM(type,buf,size,to_proc,data_type,msg_id,p_info) \ 118 MPI_Isend((void *)buf,size,data_type,to_proc,type,p_info->procset,(MPI_Request *)(&(msg_id))) 119 120 #define SENDSYNCNOMEM(type,buf,size,to_proc,data_type,p_info) \ 121 MPI_Send((void *)buf,size,data_type,to_proc,type,(p_info->procset)) 122 123 #define SENDWAITNOMEM(type,buf,size,to_proc,data_type,msg_id) \ 124 { \ 125 MPI_Status stat99; \ 126 MPI_Wait(&(msg_id),&stat99); \ 127 } 128 129 #define GISUM(sum_vec,vec_len,work_vec,procset) \ 130 { \ 131 int i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \ 132 for (i99=0;i99<(vec_len);i99++) { \ 133 (iptr2)[i99] = (iptr1)[i99]; \ 134 } \ 135 MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_SUM,procset); \ 136 } 137 138 #define GIOR(sum_vec,vec_len,work_vec,procset) \ 139 { \ 140 int i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \ 141 for (i99=0;i99<(vec_len);i99++) { \ 142 (iptr2)[i99] = (iptr1)[i99]; \ 143 } \ 144 MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_LOR,procset); \ 145 } 146 147 #define GIMIN(sum_vec,vec_len,work_vec,procset) \ 148 { \ 149 int i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \ 150 for (i99=0;i99<(vec_len);i99++) { \ 151 (iptr2)[i99] = (iptr1)[i99]; \ 152 } \ 153 MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_MIN,procset); \ 154 } 155 156 #define GIMAX(sum_vec,vec_len,work_vec,procset) \ 157 { \ 158 int i99, *iptr1 = (int *) sum_vec, *iptr2 = (int *) work_vec; \ 159 for (i99=0;i99<(vec_len);i99++) { \ 160 (iptr2)[i99] = (iptr1)[i99]; \ 161 } \ 162 MPI_Allreduce(iptr2,iptr1,vec_len,MPI_INT,MPI_MAX,procset); \ 163 } 164 165 #define GDMAX(sum_vec,vec_len,work_vec,procset) \ 166 { \ 167 int i99; \ 168 double *dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \ 169 for (i99=0;i99<(vec_len);i99++) { \ 170 (dptr2)[i99] = (dptr1)[i99]; \ 171 } \ 172 MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_MAX,procset); \ 173 } 174 175 #define GDMIN(sum_vec,vec_len,work_vec,procset) \ 176 { \ 177 int i99; \ 178 double *dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \ 179 for (i99=0;i99<(vec_len);i99++) { \ 180 (dptr2)[i99] = (dptr1)[i99]; \ 181 } \ 182 MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_MIN,procset); \ 183 } 184 185 #define GDSUM(sum_vec,vec_len,work_vec,procset) \ 186 { \ 187 int i99; \ 188 double *dptr1 = (double *) sum_vec, *dptr2 = (double *) work_vec; \ 189 for (i99=0;i99<(vec_len);i99++) { \ 190 (dptr2)[i99] = (dptr1)[i99]; \ 191 } \ 192 MPI_Allreduce(dptr2,dptr1,vec_len,MPI_DOUBLE,MPI_SUM,procset); \ 193 } 194 195 #define GFSUM(sum_vec,vec_len,work_vec,procset) \ 196 { \ 197 int i99; \ 198 float *fptr1 = (float *) sum_vec, *fptr2 = (float *) work_vec; \ 199 for (i99=0;i99<(vec_len);i99++) { \ 200 (fptr2)[i99] = (fptr1)[i99]; \ 201 } \ 202 MPI_Allreduce(fptr2,fptr1,vec_len,MPI_FLOAT,MPI_SUM,procset); \ 203 } 204 205 #define GSYNC(procset) MPI_Barrier(procset) 206 207 #define PSISROOT(procinfo) ((procinfo->my_id == 0) ? 1 : 0) 208 209 typedef MPI_Comm ProcSet; 210 211 /* the function PSNbrTree returns the parent, left child, or right child */ 212 /* processor number is the processors are organized conceptually as a tree */ 213 /* op_code is one of the following */ 214 #define PS_PARENT 0 215 #define PS_LCHILD 1 216 #define PS_RCHILD 2 217 #define PSNbrTree(op_code,nbr_id,procset) \ 218 { \ 219 int my_id99, np99; \ 220 MPI_Comm_rank(procset,&my_id99); \ 221 MPI_Comm_size(procset,&np99); \ 222 switch(op_code) { \ 223 case PS_PARENT: \ 224 if (my_id99 == 0) { \ 225 nbr_id = -1; \ 226 } else { \ 227 nbr_id = (my_id99-1) / 2; \ 228 } \ 229 break; \ 230 case PS_LCHILD: \ 231 nbr_id = (2*my_id99) + 1; \ 232 break; \ 233 case PS_RCHILD: \ 234 nbr_id = (2*my_id99) + 2; \ 235 break; \ 236 } \ 237 if (nbr_id >= np99) nbr_id = -1; \ 238 } 239 240 /* PICall(procedure,argc,argv) gets things started */ 241 #define PICall(procedure,argc,argv) \ 242 { \ 243 MPI_Init(&argc,&argv); \ 244 procedure(argc,argv); \ 245 MPI_Finalize(); \ 246 } 247 248 /* set up the right "Fortran naming" definitions for use in BSsparse.h */ 249 /* somehow the name of your architecture must be identified */ 250 /* the following setup works for the *vast* majority of systems */ 251 /* this affects only the linking of the blas and lapack routines called */ 252 /* 253 FORTRANCAPS: Names are uppercase, no trailing underscore 254 FORTRANUNDERSCORE: Names are lowercase, trailing underscore 255 */ 256 #if defined(PARCH_cray) || defined(PARCH_NCUBE) || defined(PARCH_t3d) 257 #define FORTRANCAPS 258 #elif !defined(PARCH_rs6000) && !defined(PARCH_NeXT) && !defined(PARCH_hpux) 259 #define FORTRANUNDERSCORE 260 #endif 261 262 /* ********************************************************************** */ 263 /* this is a set of macros that retrofit the code for machines on which 264 blocking sends are a bad idea. These macros make all sends asynchronous 265 by allocating buffers for the synchronous sends and cleaning up 266 those buffers as we go */ 267 268 /* To turn this code "on" simply define NO_BLOCKING_SEND */ 269 /* For now, the only architecture that we know needs this is the rs6000 */ 270 /* and, perhaps, the HPs */ 271 #if defined(PARCH_rs6000) || defined(PARCH_hpux) || defined(PARCH_t3d) || defined(PARCH_IRIX) 272 #define NO_BLOCKING_SEND 1 273 #endif 274 275 typedef struct __BSmsg_list { 276 int msg_type; 277 char *msg_buf; 278 int msg_len; 279 int msg_to; 280 MPI_Datatype msg_data_type; 281 MPI_Request msg_id; 282 struct __BSmsg_list *next; 283 } BSmsg_list; 284 285 #ifdef NO_BLOCKING_SEND 286 #define MY_SEND_SYNC(Mmsg_list,Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info) \ 287 { \ 288 BSmsg_list *node_99; \ 289 int i99; \ 290 MPI_Aint size99; \ 291 char *tmsg_ptr99; \ 292 MY_MALLOC(node_99,(BSmsg_list *),sizeof(BSmsg_list),1); \ 293 node_99->next = Mmsg_list; \ 294 Mmsg_list = node_99; \ 295 MPI_Type_size(Mmsg_data_type,&size99); \ 296 if (Mmsg_len == 0) { \ 297 MY_MALLOC(node_99->msg_buf,(char *),size99,2); \ 298 } else { \ 299 MY_MALLOC(node_99->msg_buf,(char *),Mmsg_len*size99,3); \ 300 } \ 301 node_99->msg_type = Mmsg_type; \ 302 node_99->msg_len = Mmsg_len; \ 303 node_99->msg_to = Mmsg_to; \ 304 node_99->msg_data_type = Mmsg_data_type; \ 305 tmsg_ptr99 = (char *) Mmsg; \ 306 for (i99=0;i99<Mmsg_len*size99;i99++) { \ 307 node_99->msg_buf[i99] = tmsg_ptr99[i99]; \ 308 } \ 309 SENDASYNCNOMEM(Mmsg_type,node_99->msg_buf,Mmsg_len,Mmsg_to,Mmsg_data_type,\ 310 node_99->msg_id,Mp_info); \ 311 CHECK_SEND_LIST(Mmsg_list); \ 312 } 313 #else 314 #define MY_SEND_SYNC(Mmsg_list,Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info) \ 315 { \ 316 SENDSYNCNOMEM(Mmsg_type,Mmsg,Mmsg_len,Mmsg_to,Mmsg_data_type,Mp_info); \ 317 } 318 #endif 319 320 #define MCHECK_SEND_LIST(Mmsg_list) \ 321 { \ 322 BSmsg_list *node_ptr_99, *prev_node_ptr_99, *tnode_ptr_99; \ 323 int fin99; \ 324 MPI_Status stat99; \ 325 node_ptr_99 = Mmsg_list; \ 326 prev_node_ptr_99 = Mmsg_list; \ 327 while (node_ptr_99 != NULL) { \ 328 MPI_Test(&(node_ptr_99->msg_id),&fin99,&stat99); \ 329 if (fin99) { \ 330 tnode_ptr_99 = node_ptr_99; \ 331 if (node_ptr_99 == Mmsg_list) { \ 332 Mmsg_list = Mmsg_list->next; \ 333 prev_node_ptr_99 = Mmsg_list; \ 334 node_ptr_99 = Mmsg_list; \ 335 } else { \ 336 prev_node_ptr_99->next = node_ptr_99->next; \ 337 node_ptr_99 = node_ptr_99->next; \ 338 } \ 339 MY_FREE(tnode_ptr_99->msg_buf); \ 340 MY_FREE(tnode_ptr_99); \ 341 } else { \ 342 prev_node_ptr_99 = node_ptr_99; \ 343 node_ptr_99 = node_ptr_99->next; \ 344 } \ 345 } \ 346 } 347 #ifdef NO_BLOCKING_SEND 348 #define CHECK_SEND_LIST(Mmsg_list) MCHECK_SEND_LIST(Mmsg_list) 349 #else 350 #define CHECK_SEND_LIST(Mmsg_list) 351 #endif 352 353 #define MFINISH_SEND_LIST(Mmsg_list) \ 354 { \ 355 BSmsg_list *tnode99; \ 356 while(Mmsg_list != NULL) { \ 357 tnode99 = Mmsg_list; \ 358 Mmsg_list = Mmsg_list->next; \ 359 SENDWAITNOMEM(tnode99->msg_type,tnode99->msg_buf,tnode99->msg_len, \ 360 tnode99->msg_to,tnode99->msg_data_type,tnode99->msg_id); \ 361 MY_FREE(tnode99->msg_buf); \ 362 MY_FREE(tnode99); \ 363 } \ 364 } 365 #ifdef NO_BLOCKING_SEND 366 #define FINISH_SEND_LIST(Mmsg_list) MFINISH_SEND_LIST(Mmsg_list) 367 #else 368 #define FINISH_SEND_LIST(Mmsg_list) 369 #endif 370 371 #endif 372