1 /*
2  *   ppuzzle.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.2 (July 2004)
6  *
7  * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler
8  * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer,
9  *                  M. Vingron, and Arndt von Haeseler
10  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11  *
12  * All parts of the source except where indicated are distributed under
13  * the GNU public licence.  See http://www.opensource.org for details.
14  *
15  * ($Id$)
16  *
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #  include <config.h>
21 #endif
22 
23 #define EXTERN extern
24 
25 #include <mpi.h>
26 #include <time.h>
27 #include "ppuzzle.h"
28 
29 
30 int PP_IamMaster;
31 int PP_IamSlave;
32 int PP_Myid;
33 int PP_MyMaster;
34 int PP_NumProcs;
35 MPI_Comm PP_Comm;
36 
37 int *freeslaves;           /* Queue of free slaves */
38 int firstslave,            /* headpointer of queue */
39     lastslave;             /* tailpointer of queue */
40 
41 int *permutsent,
42     *permutrecved,
43     *quartsent,
44     *quartrecved,
45     *doquartsent,
46     *doquartrecved,
47     *splitsent,
48     *splitrecved,
49     *permutsentn,
50     *permutrecvedn,
51     *quartsentn,
52     *quartrecvedn,
53     *doquartsentn,
54     *doquartrecvedn,
55     *splitsentn,
56     *splitrecvedn;
57 
58 double *walltimes,
59        *cputimes;
60 double *fullwalltimes,
61        *fullcputimes;
62 double *altwalltimes,
63        *altcputimes;
64 
65 int PP_permutsent = 0;          /* # of  */
66 int PP_permutrecved = 0;        /* # of  */
67 int PP_quartsent = 0;           /* # of  */
68 int PP_quartrecved = 0;         /* # of  */
69 int PP_doquartsent = 0;         /* # of  */
70 int PP_doquartrecved = 0;       /* # of  */
71 int PP_splitsent = 0;           /* # of  */
72 int PP_splitrecved = 0;         /* # of  */
73 int PP_permutsentn = 0;          /* # of  */
74 int PP_permutrecvedn = 0;        /* # of  */
75 int PP_quartsentn = 0;           /* # of  */
76 int PP_quartrecvedn = 0;         /* # of  */
77 int PP_doquartsentn = 0;         /* # of  */
78 int PP_doquartrecvedn = 0;       /* # of  */
79 int PP_splitsentn = 0;           /* # of  */
80 int PP_splitrecvedn = 0;         /* # of  */
81 
82 double PP_starttime     = 0,
83  PP_stoptime      = 0,
84  PP_inittime      = 0,
85  PP_paramcomptime = 0,
86  PP_paramsendtime = 0,
87  PP_quartcomptime = 0,
88  PP_quartsendtime = 0,
89  PP_puzzletime    = 0,
90  PP_treetime      = 0,
91  PP_lasttime      = 0;
92 
93 int PP_MaxSlave = 0;
94 
95 /*********************************************************************
96 *  debugging                                                         *
97 *********************************************************************/
98 
99 static int deb_num = 0;
fprintfdebug(FILE * fp,char s[],char f[],int l)100 void fprintfdebug(FILE *fp, char s[], char f[], int l)
101 {
102         fprintf(fp, "[%2d] %s (%s:%d)\n", deb_num++, s, f, l);
103 }
104 /*
105 fprintfdebug(stderr, "debug message", __FILE__, __LINE__);
106 */
107 
108 
109 /*********************************************************************
110 *  miscellaneous utilities                                           *
111 *********************************************************************/
112 
dcmp(const void * a,const void * b)113 int dcmp(const void *a, const void *b)
114 {
115 	if (*(double *)a > *(double *)b) return (-1);
116 	else if (*(double *)a < *(double *)b) return 1;
117 	else return 0;
118 } /* dcmp */
119 
120 /******************/
121 
PP_cmpd(int rank,double a,double b)122 void PP_cmpd(int rank, double a, double b)
123 {
124   if (a != b)
125      fprintf(STDOUT, "(%2d) *** %.3f != %.3f\n", rank, a, b);
126 } /* PP_cmpd */
127 
128 /******************/
129 
PP_cmpi(int rank,int a,int b)130 void PP_cmpi(int rank, int a, int b)
131 {
132   if (a != b)
133      fprintf(STDOUT, "(%2d) *** %d != %d\n", rank, a, b);
134 } /* PP_cmpi */
135 
136 /******************/
137 
PP_timer()138 double PP_timer()
139 {
140   double tmptime;
141   if (PP_lasttime == 0) {
142      PP_lasttime = MPI_Wtime();
143      return(0);
144      }
145   else {
146      tmptime = PP_lasttime;
147      PP_lasttime = MPI_Wtime();
148      return(PP_lasttime - tmptime);
149   }
150 } /* PP_timer */
151 
152 /******************/
153 
PP_Printerror(FILE * of,int id,int err)154 void PP_Printerror(FILE *of, int id, int err)
155 {
156         char  errstr[MPI_MAX_ERROR_STRING];
157         int   errstrlen;
158 
159   if ((err > MPI_SUCCESS) && (err <= MPI_ERR_LASTCODE)) {
160     MPI_Error_string(err, errstr, &errstrlen);
161     fprintf(of, "(%2d) MPI ERROR %d : %s\n", id, err, errstr);
162     }
163   else {
164     if (err == MPI_SUCCESS)
165       fprintf(of, "(%2d) MPI ERROR %d : No error\n", id, err);
166     else
167       fprintf(of, "(%2d) MPI ERROR %d : unknown error number\n", id, err);
168   }
169 } /* PP_Printerror */
170 
171 /******************/
172 
PP_Printbiparts(cmatrix biparts)173 void PP_Printbiparts(cmatrix biparts)
174 { int n1, n2;
175     for (n1=0; n1<(Maxspc-3); n1++) {
176        if (n1==0) fprintf(STDOUT, "(%2d) bipartition : ", PP_Myid);
177        else       fprintf(STDOUT, "(%2d)             : ", PP_Myid);
178        for (n2=0; n2<Maxspc; n2++)
179           fprintf(STDOUT, "%c", biparts[n1][n2]);
180     fprintf(STDOUT, "\n");
181     }
182 }  /* PP_Printbiparts */
183 
184 /******************/
185 
186 
187 /*********************************************************************
188 *  queue for storing the ranks of slaves waiting for work            *
189 *********************************************************************/
190 
PP_initslavequeue()191 void PP_initslavequeue()
192 {
193   int n;
194   freeslaves = new_ivector(PP_NumProcs);
195     firstslave = 0;
196     PP_MaxSlave = PP_NumProcs-1;
197     lastslave  = PP_MaxSlave-1;
198     freeslaves[PP_MaxSlave] = PP_MaxSlave;
199     for (n=0; n<PP_MaxSlave; n++){
200       freeslaves[n] = n+1;
201     }
202 } /* PP_initslavequeue */
203 
204 /******************/
205 
PP_fullslave()206 int PP_fullslave()
207 {
208 return (freeslaves[PP_MaxSlave] == PP_MaxSlave);
209 } /* PP_fullslave */
210 
211 /******************/
212 
PP_emptyslave()213 int PP_emptyslave()
214 {
215 return (freeslaves[PP_MaxSlave] == 0);
216 } /* PP_emptyslave */
217 
218 /******************/
219 
PP_putslave(int sl)220 void PP_putslave(int sl)
221 {
222   if (freeslaves[PP_MaxSlave] == PP_MaxSlave) {
223     fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR PP1 TO DEVELOPERS\n\n\n");
224     MPI_Finalize();
225     exit(1);
226   }
227   (freeslaves[PP_MaxSlave])++;
228   lastslave = (lastslave + 1) % PP_MaxSlave;
229   freeslaves[lastslave] = sl;
230 } /* PP_putslave */
231 
232 /******************/
233 
PP_getslave()234 int PP_getslave()
235 {
236   int tmp;
237   if (freeslaves[PP_MaxSlave] == 0)
238     return MPI_PROC_NULL;
239   else {
240     tmp = freeslaves[firstslave];
241     firstslave = (firstslave + 1) % PP_MaxSlave;
242     (freeslaves[PP_MaxSlave])--;
243     return tmp;
244   }
245 } /* PP_getslave */
246 
247 /*epe*/
248 /*********************************************************************
249 *  procedures to parallelize parameter estimation                    *
250 *********************************************************************/
251 
PP_Update_Rates()252 void PP_Update_Rates() /*master broadcasts updated values of Rates and Distanmat to all slaves*/
253 {
254 	int          dummy=0, dest;
255 	MPI_Status   stat;
256 
257 	if (PP_IamMaster) for (dest=1; dest<PP_NumProcs; dest++)
258 		MPI_Send(&dummy, 0, MPI_INT, dest, PP_UPDATERATES, PP_Comm);
259 	else	MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_UPDATERATES, PP_Comm, &stat);
260 
261 	MPI_Bcast(Rates, numcats, MPI_DOUBLE, PP_MyMaster, PP_Comm);
262 
263 	return;
264 } /* PP_Update_Rates */
265 
266 
267 
PP_Update_fracinv()268 void PP_Update_fracinv() /*master broadcasts updated values of fracinv and Distanmat to all slaves*/
269 {
270 	int          dummy=0, dest;
271 	MPI_Status   stat;
272 
273 	if (PP_IamMaster) for (dest=1; dest<PP_NumProcs; dest++)
274 		MPI_Send(&dummy, 0, MPI_INT, dest, PP_UPDATEFRACINV, PP_Comm);
275 	else	MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_UPDATEFRACINV, PP_Comm, &stat);
276 
277 	MPI_Bcast(&fracinv, 1, MPI_DOUBLE, PP_MyMaster, PP_Comm);
278 
279 	return;
280 } /* PP_Update_fracinv */
281 
282 
283 
PP_Update_EEI()284 void PP_Update_EEI() /*master broadcasts updated values of Eval, Evec, Ievc and Distanmat to all
285 slaves*/
286 {
287 	MPI_Datatype Dtypes[3];
288 	int          Dtypelens[3];
289 	MPI_Aint     Dtypeaddr[3];
290 	MPI_Datatype PP_Data;
291 	MPI_Status   stat;
292 	/* int          dummy=0, dest, error, jobs=.5*Maxspc*(Maxspc-1); */
293 	int          dummy=0, dest;
294 
295 	if (PP_IamMaster) for (dest=1; dest<PP_NumProcs; dest++)
296 		MPI_Send(&dummy, 0, MPI_INT, dest, PP_UPDATEEEI, PP_Comm);
297 	else	MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_UPDATEEEI, PP_Comm, &stat);
298 
299 	Dtypes[0] = MPI_DOUBLE; Dtypelens[0] = tpmradix;
300 	MPI_Address(&(Eval[0]), &(Dtypeaddr[0]));
301 	Dtypes[1] = MPI_DOUBLE; Dtypelens[1] = tpmradix * tpmradix;
302 	MPI_Address(&(Evec[0][0]), &(Dtypeaddr[1]));
303 	Dtypes[2] = MPI_DOUBLE; Dtypelens[2] = tpmradix * tpmradix;
304 	MPI_Address(&(Ievc[0][0]), &(Dtypeaddr[2]));
305 
306 	MPI_Type_struct(3, Dtypelens, Dtypeaddr, Dtypes, &PP_Data);
307 	MPI_Type_commit(&PP_Data);
308 
309 	MPI_Bcast (MPI_BOTTOM, 1, PP_Data, PP_MyMaster, PP_Comm);
310 
311 	MPI_Type_free(&PP_Data);
312 
313 	return;
314 } /* PP_Update_EEI */
315 
316 
317 
PP_NoUpdate()318 void PP_NoUpdate() /*master broadcasts updated values of Distanmat to all slaves*/
319 {
320 	MPI_Status   stat;
321 	int          dummy=0, dest;
322 
323 	if (PP_IamMaster) for (dest=1; dest<PP_NumProcs; dest++)
324 		MPI_Send(&dummy, 0, MPI_INT, dest, PP_NOUPDATE, PP_Comm);
325 	else	MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_NOUPDATE, PP_Comm, &stat);
326 
327 	return;
328 } /* PP_NoUpdate */
329 
330 
331 
PP_Final_Update()332 void PP_Final_Update()
333 {
334 	MPI_Datatype Dtypes[6];
335 	int          Dtypelens[6];
336 	MPI_Aint     Dtypeaddr[6];
337 	MPI_Datatype PP_Data;
338 	MPI_Status   stat;
339 	int          dest, dummy=0;
340 	int          jobs = .5*Maxspc*(Maxspc-1);
341 	double*      DMVector = new_dvector(jobs);
342 
343 	Dtypes[0] = MPI_DOUBLE; Dtypelens[0] = jobs;
344 	MPI_Address(&(DMVector[0]), &(Dtypeaddr[0]));
345 	Dtypes[1] = MPI_DOUBLE; Dtypelens[1] = numcats;
346 	MPI_Address(&(Rates[0]), &(Dtypeaddr[1]));
347 	Dtypes[2] = MPI_DOUBLE; Dtypelens[2] = 1;
348 	MPI_Address(&(fracinv), &(Dtypeaddr[2]));
349 	Dtypes[3] = MPI_DOUBLE; Dtypelens[3] = tpmradix;
350 	MPI_Address(&(Eval[0]), &(Dtypeaddr[3]));
351 	Dtypes[4] = MPI_DOUBLE; Dtypelens[4] = tpmradix * tpmradix;
352 	MPI_Address(&(Evec[0][0]), &(Dtypeaddr[4]));
353 	Dtypes[5] = MPI_DOUBLE; Dtypelens[5] = tpmradix * tpmradix;
354 	MPI_Address(&(Ievc[0][0]), &(Dtypeaddr[5]));
355 
356 	MPI_Type_struct(6, Dtypelens, Dtypeaddr, Dtypes, &PP_Data);
357 	MPI_Type_commit(&PP_Data);
358 
359 	if (PP_IamMaster) {
360 		for (dest=1; dest<PP_NumProcs; dest++)
361 			MPI_Send(&dummy, 0, MPI_INT, dest, PP_FINALUPDATE, PP_Comm);
362 		PP_DMToVector(0, 1, jobs, DMVector);
363 	}
364 	else	MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_FINALUPDATE, PP_Comm, &stat);
365 
366 	MPI_Bcast(MPI_BOTTOM, 1, PP_Data, PP_MyMaster, PP_Comm);
367 
368 	MPI_Type_free(&PP_Data);
369 
370 	if (!PP_IamMaster) PP_VectorToDM(0, 1, jobs, DMVector);
371 
372 	return;
373 } /* PP_Final_Update */
374 
375 
376 
PP_Mldistance(int first,int * LastChunk)377 void PP_Mldistance(int first, int* LastChunk)
378 {
379 	MPI_Status stat;
380 	int i, chunk=0, x=0, y=1;
381 	dvector initvalue, result;
382 
383 	if (first)
384 	{
385 		schedtype sched;
386 		initsched(&sched, .5*Maxspc*(Maxspc-1), PP_NumProcs-1, 1);
387 		for (i=1; i<PP_Myid; i++) PP_NextCoord(&x, &y, SCHEDALG_PARAM_EST(&sched));
388 		chunk = SCHEDALG_PARAM_EST(&sched);
389 		*LastChunk = chunk;
390 		initvalue = new_dvector(chunk);
391 		for (i=PP_Myid+1; i<PP_NumProcs; i++) *LastChunk = SCHEDALG_PARAM_EST(&sched);
392 		MPI_Scatterv(NULL, NULL, NULL, MPI_DOUBLE,
393 			initvalue, chunk, MPI_DOUBLE, PP_MyMaster, PP_Comm);
394 	}
395 	else
396 	{
397 		initvalue = new_dvector(*LastChunk+2);
398 		MPI_Recv(initvalue, *LastChunk+2, MPI_DOUBLE, PP_MyMaster, PP_MLDISTANCE, PP_Comm, &stat);
399 		for (i=1; i<=*LastChunk; i++) if (initvalue[i]<0)
400 		{
401 			chunk=i;
402 			x=-initvalue[i];
403 			y=initvalue[i+1];
404 			break;
405 		}
406 		*LastChunk = chunk;
407 	}
408 
409 /* HAS */
410 	if (chunk!=0) {
411 		result = new_dvector(chunk);
412 		for (i=0; i<chunk; i++)
413 			{result[i] = mldistance(x, y, initvalue[i]); PP_NextCoord(&x, &y, 1);}
414 		MPI_Send(result, chunk, MPI_DOUBLE, PP_MyMaster, PP_MLDISTANCE, PP_Comm);
415 
416 	}
417 }
418 
PP_Lslength()419 void PP_Lslength()
420 {
421 	int i, i1, j, j1, j2, k, numbrnch, numpair, initmsg[2], numspc, numibrnch;
422 	unsigned long int isum;
423 	ivector pths, jobsize = new_ivector(PP_NumProcs), atamt_tmp_coord = new_ivector(PP_NumProcs);
424 	dvector my_atamt_tmp, atamt_tmp;
425 	ivector IPaths;
426 	cmatrix atmt;
427 	int numjobs, my_jobsize, my_x=0, my_y=0;
428 	MPI_Status   stat;
429 
430 	MPI_Recv(initmsg, 2, MPI_INT, PP_MyMaster, PP_LSLENGTH, PP_Comm, &stat);
431 	numspc = initmsg[0]; numibrnch = initmsg[1];
432 
433 	initmsg[0] = numspc; initmsg[1] = numibrnch;
434 	IPaths = new_ivector(numibrnch*numspc);
435 	MPI_Bcast(IPaths, numibrnch*numspc, MPI_INT, PP_MyMaster, PP_Comm);
436 
437 	numbrnch = numspc + numibrnch;
438 	numpair = (numspc * (numspc - 1)) / 2;
439 	atmt = new_cmatrix(numbrnch, numpair);
440 
441 	numjobs = (numbrnch * (numbrnch+1))/2;
442 	my_jobsize = numjobs/PP_NumProcs;
443 	if (PP_Myid < numjobs%PP_NumProcs)
444 	{
445 		my_jobsize++;
446 		PP_NextCoord_atamt(&my_x, &my_y, PP_Myid*(numjobs/PP_NumProcs)+PP_Myid, numbrnch);
447 	}
448 	else    PP_NextCoord_atamt(&my_x, &my_y,PP_Myid*(numjobs/PP_NumProcs)+numjobs%PP_NumProcs, numbrnch);
449 
450 /*	fprintf(STDOUT, "[%d] my_x=%d, my_y=%d, numjobs=%d, my_jobsize=%d, numspc=%d, numibrnch=%d\n",
451 		PP_Myid, my_x, my_y, numjobs, my_jobsize, numspc, numibrnch);
452 */
453 #ifdef VT
454 	VT_begin(1000);
455 #endif
456 	for (i = 0; i < numspc; i++) {
457 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
458 			if (j1 == i) {
459 				for (j2 = 0; j2 < j1; j2++, j++) {
460 					atmt[i][j] = 1;
461 				}
462 			} else {
463 				for (j2 = 0; j2 < j1; j2++, j++) {
464 					if (j2 == i)
465 						atmt[i][j] = 1;
466 					else
467 						atmt[i][j] = 0;
468 				}
469 			}
470 		}
471 	}
472 #ifdef VT
473 	VT_end(1000);
474 	VT_begin(1010);
475 #endif
476 	for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
477 		pths = &IPaths[i1*numspc];
478 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
479 			for (j2 = 0; j2 < j1; j2++, j++) {
480 				if (pths[j1] != pths[j2])
481 					atmt[i][j] = 1;
482 				else
483 					atmt[i][j] = 0;
484 			}
485 		}
486 	}
487 #ifdef VT
488 	VT_end(1010);
489 #endif
490 	my_atamt_tmp = new_dvector(my_jobsize);
491 #ifdef VT
492 	VT_begin(1020);
493 #endif
494 	for (i = 0; i < my_jobsize; i++) {
495 		for (k = 0, isum = 0; k < numpair; k++)
496 			if (atmt[my_x][k] && atmt[my_y][k]) isum ++;
497 		my_atamt_tmp[i] = (double) isum;
498 		PP_NextCoord_atamt(&my_x, &my_y, 1, numbrnch);
499 	}
500 #ifdef VT
501 	VT_end(1020);
502 #endif
503 	atamt_tmp = new_dvector(my_jobsize); /* added for epe (HAS) */
504 	MPI_Gatherv(my_atamt_tmp, my_jobsize, MPI_DOUBLE,
505 		atamt_tmp, jobsize, atamt_tmp_coord, MPI_DOUBLE, PP_MyMaster, PP_Comm);
506 	free_dvector(my_atamt_tmp); /* added for epe (HAS) */
507 	free_dvector(atamt_tmp); /* added for epe (HAS) */
508 	return;
509 }
510 
PP_NextCoord(int * x,int * y,int chunk)511 void PP_NextCoord(int* x, int* y, int chunk)
512 {
513 	int lx = *x, ly = *y;
514 
515 	while ((chunk>0)&&(lx<Maxspc-1))
516 	{
517 		if ((chunk + ly) > (Maxspc - 1))
518 		{
519 			chunk -= Maxspc - ly - 1;
520 			lx ++;
521 			ly = lx;
522 		}
523 		else
524 		{
525 			ly += chunk;
526 			chunk = 0;
527 		}
528 	}
529 
530 	*x = lx; *y = ly;
531 
532 	return;
533 }
534 
PP_NextCoord_atamt(int * x,int * y,int chunk,int dimen)535 void PP_NextCoord_atamt(int* x, int* y, int chunk, int dimen)
536 {
537 	int lx = *x, ly = *y;
538 
539 	while ((chunk>0)&&(lx<dimen))
540 	{
541 		if ((chunk + ly) > lx)
542 		{
543 			chunk -= lx - ly + 1;
544 			lx ++;
545 			ly = 0;
546 		}
547 		else
548 		{
549 			ly += chunk;
550 			chunk = 0;
551 		}
552 	}
553 
554 	*x = lx; *y = ly;
555 
556 	return;
557 }
558 
PP_VectorToDM(int x,int y,int chunk,double * result)559 void PP_VectorToDM(int x, int y, int chunk, double* result)
560 {
561 	int i;
562 
563 	for (i=0; i<chunk; i++)
564 	{
565 		Distanmat[x][y] = result[i];
566 		Distanmat[y][x] = result[i];
567 		PP_NextCoord(&x, &y, 1);
568 	}
569 	return;
570 }
571 
PP_DMToVector(int x,int y,int chunk,double * result)572 void PP_DMToVector(int x, int y, int chunk, double* result)
573 {
574 	int i;
575 
576 	for (i=0; i<chunk; i++)
577 	{
578 		result[i] = Distanmat[x][y];
579 		PP_NextCoord(&x, &y, 1);
580 	}
581 	return;
582 }
583 
584 
585 /*********************************************************************
586 *  procedures to parallelize the puzzling step                       *
587 *********************************************************************/
588 
PP_do_write_quart(int e,int f,int g,int h,double d1,double d2,double d3,uli * numbq,uli * bqarr,int usebestq)589 unsigned char PP_do_write_quart(
590 		int    e,
591 		int    f,
592 		int    g,
593 		int    h,
594 		double d1,
595 		double d2,
596 		double d3,
597 		uli   *numbq,
598 		uli   *bqarr,
599                 int    usebestq)
600 {
601         unsigned char qpbranching;
602         int           badquartet;
603 #if 0
604 	double        lhs[3];
605 	double        tmpqweight[3];
606 	int           tmpqworder[3];
607 	int           tmpsqorder[3];
608 	double        tmpsqdiff[3];
609 	unsigned char treebits[3];
610 	unsigned char discreteweight[3];
611 /* unsigned char discreteweight[3], treebits[3]; */
612 	double        onethird;
613 	double        temp;
614 	double        wlist[6];
615 	double        plist[6];
616 	unsigned char tmpweight;
617 	double templog;
618 	double temp1, temp2, temp3;
619 
620 
621 
622 
623 	onethird = 1.0/3.0;
624 	treebits[0] = (unsigned char) 1;
625 	treebits[1] = (unsigned char) 2;
626 	treebits[2] = (unsigned char) 4;
627 #endif
628 
629 	qpbranching = loglkl2weight(e, f, g, h, d1, d2, d3, usebestq);
630 
631 	badquartet = FALSE;
632 
633 	writequartet(e, f, g, h, qpbranching);
634 
635          /* a bad quartet is a quartet that shows
636             equal weights for all three possible topologies */
637          if (qpbranching == 7) badquartet = TRUE;
638 
639          if (badquartet) {
640                bqarr[(*numbq)++] = quart2num(e, f, g, h);
641 #	       ifdef PVERBOSE3
642 	          fprintf(STDOUT, "(%2d) bad quartet: %d  %d  %d %d -> %ld\n",
643 	                  PP_Myid, e, f, g, h, quart2num(e, f, g, h));
644 #	       endif /* PVERBOSE3 */
645                badqs++;
646                badtaxon[e]++;
647                badtaxon[f]++;
648                badtaxon[g]++;
649                badtaxon[h]++;
650          } /* if badquartet */
651          return(qpbranching);
652 } /* PP_do_write_quart */
653 
654 /*********************************************************************
655 *  sending/receiving the important sizes and parameter  (M->S)       *
656 *********************************************************************/
657 
658 #ifndef USE_WINDOWS
PP_SendSizes(int mspc,int msite,int ncats,int nptrn,int rad,int outgr,double frconst,int rseed,int fixedorder,int consensus)659 void PP_SendSizes(int    mspc,
660                   int    msite,
661                   int    ncats,
662                   int    nptrn,
663                   int    rad,
664                   int    outgr,
665                   double frconst,
666                   int    rseed,
667 		  int    fixedorder,
668 		  int    consensus)
669 #else
670 void PP_SendSizes(int    mspc,
671                   int    msite,
672                   int    alimsite,
673                   int    alistart,
674                   int    aliend,
675                   int    ncats,
676                   int    nptrn,
677                   int    rad,
678                   int    outgr,
679                   double frconst,
680                   int    rseed,
681 		  int    fixedorder,
682 		  int    consensus)
683 #endif
684 {
685 #ifndef USE_WINDOWS
686 #   define NUMINT 9
687 #   define NUMDBL 1
688 #else
689 #   define NUMINT 12
690 #   define NUMDBL 1
691 #endif
692   int    ints[NUMINT];
693   double doubles[NUMDBL];
694   MPI_Datatype Dtypes[2] =    {MPI_INT, MPI_DOUBLE};
695   int          Dtypelens[2] = {NUMINT , NUMDBL};
696   MPI_Aint     Dtypeaddr[2];
697   MPI_Datatype PP_Sizes;
698   int          dest;
699   int          error;
700 
701 # ifdef PVERBOSE2
702     fprintf(STDOUT, "(%2d) Sending: Maxspc=%d  Maxsite=%d  numcats=%d\n", PP_Myid, mspc, msite, ncats);
703     fprintf(STDOUT, "(%2d)          Numprtn=%d  tpmradix=%d  fracconst=%.3f\n", PP_Myid, nptrn, rad, frconst);
704 # endif /* PVERBOSE2 */
705 
706   ints[0] = mspc;
707   ints[1] = msite;
708   ints[2] = ncats;
709   ints[3] = nptrn;
710   ints[4] = rad;
711   ints[5] = outgr;
712   ints[6] = rseed;
713   ints[7] = fixedorder;
714   ints[8] = consensus;
715 #ifdef USE_WINDOWS
716   ints[9] = alimsite;
717   ints[10] = alistart;
718   ints[11] = aliend;
719 #endif
720   doubles[0] = frconst;
721 
722   MPI_Address(ints,     Dtypeaddr);
723   MPI_Address(doubles, (Dtypeaddr+1));
724 
725   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Sizes);
726   MPI_Type_commit(&PP_Sizes);
727 
728   for (dest=1; dest<PP_NumProcs; dest++) {
729 
730     error = MPI_Ssend(MPI_BOTTOM, 1, PP_Sizes, dest, PP_SIZES, PP_Comm);
731     if (error != MPI_SUCCESS)
732       PP_Printerror(STDOUT, 600+PP_Myid, error);
733 
734 #   ifdef PVERBOSE3
735        fprintf(STDOUT, "(%2d) -> (%2d) Sent Sizes\n", PP_Myid, dest);
736 #   endif /* PVERBOSE3 */
737 
738   } /* for each slave */
739 
740   MPI_Type_free(&PP_Sizes);
741 
742 # ifdef PVERBOSE3
743     fprintf(STDOUT, "(%2d) ... Sent Sizes\n", PP_Myid);
744 # endif /* PVERBOSE3 */
745 
746 # undef NUMINT
747 # undef NUMDBL
748 } /* PP_SendSizes */
749 
750 
751 /******************/
752 
753 #ifndef USE_WINDOWS
PP_RecvSizes(int * mspc,int * msite,int * ncats,int * nptrn,int * rad,int * outgr,double * frconst,int * rseed,int * fixedorder,int * consensus)754 void PP_RecvSizes(int    *mspc,
755                   int    *msite,
756                   int    *ncats,
757                   int    *nptrn,
758                   int    *rad,
759                   int    *outgr,
760                   double *frconst,
761                   int    *rseed,
762 		  int    *fixedorder,
763 		  int    *consensus)
764 #else
765 void PP_RecvSizes(int    *mspc,
766                   int    *msite,
767                   int    *alimsite,
768                   int    *alistart,
769                   int    *aliend,
770                   int    *ncats,
771                   int    *nptrn,
772                   int    *rad,
773                   int    *outgr,
774                   double *frconst,
775                   int    *rseed,
776 		  int    *fixedorder,
777 		  int    *consensus)
778 #endif
779 {
780 #ifndef USE_WINDOWS
781 #  define NUMINT 9
782 #  define NUMDBL 1
783 #else
784 #  define NUMINT 12
785 #  define NUMDBL 1
786 #endif
787   int    ints[NUMINT];
788   double doubles[NUMDBL];
789   MPI_Datatype Dtypes[2] =    {MPI_INT, MPI_DOUBLE};
790   int          Dtypelens[2] = {NUMINT , NUMDBL};
791   MPI_Aint     Dtypeaddr[2];
792   MPI_Datatype PP_Sizes;
793   MPI_Status   stat;
794   int          error;
795 
796 # ifdef PVERBOSE3
797     fprintf(STDOUT, "(%2d) Receiving Sizes ...\n", PP_Myid);
798 # endif /* PVERBOSE3 */
799 
800   MPI_Address(ints,     Dtypeaddr);
801   MPI_Address(doubles, (Dtypeaddr+1));
802 
803   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Sizes);
804   MPI_Type_commit(&PP_Sizes);
805 
806   error = MPI_Probe(PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat);
807   if (error != MPI_SUCCESS)
808     PP_Printerror(STDOUT, 700+PP_Myid, error);
809   if (stat.MPI_TAG != PP_SIZES) {
810 	if (stat.MPI_TAG == PP_DONE) {
811 		PP_RecvDone();
812 #		ifdef PVERBOSE1
813 			fprintf(STDOUT, "(%2d) Finishing...\n", PP_Myid);
814 #		endif /* PVERBOSE1 */
815 		MPI_Finalize();
816 		exit(1);
817 	} else {
818 		fprintf(STDOUT, "(%2d) Error: unexpected TAG received...\n", PP_Myid);
819 		MPI_Finalize();
820 		exit(1);
821 	}
822   }
823 
824   error = MPI_Recv(MPI_BOTTOM, 1, PP_Sizes, PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat);
825   if (error != MPI_SUCCESS)
826     PP_Printerror(STDOUT, 700+PP_Myid, error);
827   if (stat.MPI_TAG != PP_SIZES) {
828 	fprintf(STDOUT, "(%2d) Error: unexpected TAG received...\n", PP_Myid);
829 	MPI_Finalize();
830 	exit(1);
831   }
832 
833   *mspc       = ints[0];
834   *msite      = ints[1];
835   *ncats      = ints[2];
836   *nptrn      = ints[3];
837   *rad        = ints[4];
838   *outgr      = ints[5];
839   *rseed      = ints[6];
840   *fixedorder = ints[7];
841   *consensus  = ints[8];
842 #ifdef USE_WINDOWS
843   *alimsite   = ints[9];
844   *alistart   = ints[10];
845   *aliend     = ints[11];
846 #endif
847   *frconst = doubles[0];
848 
849   MPI_Type_free(&PP_Sizes);
850 
851 # ifdef PVERBOSE2
852     fprintf(STDOUT, "(%2d) <- (%2d) Received: Maxspec=%d  Maxsite=%d  numcats=%d\n", PP_Myid, PP_MyMaster, *mspc, *msite, *ncats);
853     fprintf(STDOUT, "(%2d)                   Numprtn=%d  tpmradix=%d  fracconst=%.3f\n", PP_Myid, *nptrn, *rad, *frconst);
854 # endif /* PVERBOSE2 */
855 
856 # undef NUMINT
857 # undef NUMDBL
858 } /* PP_RecvSizes */
859 
860 
861 
862 /*********************************************************************
863 *  sending/receiving the data matrizes  (M->S)                       *
864 *********************************************************************/
865 
PP_RecvData(cmatrix Seqpat,ivector Alias,ivector Weight,ivector constpat,dvector Rates,dvector Eval,dvector Freqtpm,dmatrix Evec,dmatrix Ievc,dmatrix iexp,dcube ltprobr)866 void PP_RecvData(
867 	cmatrix Seqpat,           /* cmatrix (Maxspc x Numptrn)             */
868 	ivector Alias,            /* ivector (Maxsite)                      */
869 	ivector Weight,           /* ivector (Numptrn)                      */
870 	ivector constpat,
871 	dvector Rates,            /* dvector (numcats)                      */
872 	dvector Eval,             /* dvector (tpmradix)                     */
873 	dvector Freqtpm,
874 	dmatrix Evec,             /* dmatrix (tpmradix x tpmradix)          */
875 	dmatrix Ievc,
876 	dmatrix iexp,
877 /*	dmatrix Distanmat,*/ /*epe*/ /* dmatrix (Maxspc x Maxspc)           */
878 	dcube   ltprobr)          /* dcube (numcats x tpmradix x tpmradix)  */
879 {
880   MPI_Datatype Dtypes[12];
881   int          Dtypelens[12];
882   MPI_Aint     Dtypeaddr[12];
883   MPI_Datatype PP_Data;
884   MPI_Status   stat;
885   int          error;
886 
887 # ifdef PVERBOSE3
888     fprintf(STDOUT, "(%2d) Receiving Sizes ...\n", PP_Myid);
889 # endif /* PVERBOSE2 */
890 
891   Dtypes  [0] = MPI_CHAR; Dtypelens [0] = Maxspc * Numptrn;
892   MPI_Address(&(Seqpat[0][0]), &(Dtypeaddr[0]));
893   Dtypes  [1] = MPI_INT; Dtypelens  [1] = Maxsite ;
894   MPI_Address(&(Alias[0]), &(Dtypeaddr[1]));
895   Dtypes  [2] = MPI_INT; Dtypelens  [2] = Numptrn ;
896   MPI_Address(&(Weight[0]), &(Dtypeaddr[2]));
897   Dtypes  [3] = MPI_INT; Dtypelens  [3] = Numptrn ;
898   MPI_Address(&(constpat[0]), &(Dtypeaddr[3]));
899   Dtypes  [4] = MPI_DOUBLE; Dtypelens  [4] = numcats ;
900   MPI_Address(&(Rates[0]), &(Dtypeaddr[4]));
901   Dtypes  [5] = MPI_DOUBLE; Dtypelens  [5] = tpmradix ;
902   MPI_Address(&(Eval[0]), &(Dtypeaddr[5]));
903   Dtypes  [6] = MPI_DOUBLE; Dtypelens  [6] = tpmradix ;
904   MPI_Address(&(Freqtpm[0]), &(Dtypeaddr[6]));
905   Dtypes  [7] = MPI_DOUBLE; Dtypelens  [7] = tpmradix * tpmradix ;
906   MPI_Address(&(Evec[0][0]), &(Dtypeaddr[7]));
907   Dtypes  [8] = MPI_DOUBLE; Dtypelens  [8] = tpmradix * tpmradix ;
908   MPI_Address(&(Ievc[0][0]), &(Dtypeaddr[8]));
909   Dtypes  [9] = MPI_DOUBLE; Dtypelens [9] = tpmradix * tpmradix ;
910   MPI_Address(&(iexp[0][0]), &(Dtypeaddr[9]));
911   Dtypes [10] = MPI_DOUBLE; Dtypelens [10] = Maxspc * Maxspc ;
912   MPI_Address(&(Distanmat[0][0]), &(Dtypeaddr[10]));
913   Dtypes [11] = MPI_DOUBLE; Dtypelens [11] = numcats * tpmradix * tpmradix ;
914   MPI_Address(&(ltprobr[0][0][0]), &(Dtypeaddr[11]));
915 
916   MPI_Type_struct(12, Dtypelens, Dtypeaddr, Dtypes, &PP_Data);
917   MPI_Type_commit(&PP_Data);
918 
919 
920   error = MPI_Probe(PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat);
921   if (error != MPI_SUCCESS)
922     PP_Printerror(STDOUT, 700+PP_Myid, error);
923   if (stat.MPI_TAG != PP_DATA) {
924         if (stat.MPI_TAG == PP_DONE) {
925                 PP_RecvDone();
926 #               ifdef PVERBOSE1
927                         fprintf(STDOUT, "(%2d) Finishing...\n", PP_Myid);
928 #               endif /* PVERBOSE1 */
929                 MPI_Finalize();
930                 exit(1);
931         } else {
932                 fprintf(STDOUT, "(%2d) Error: unexpected TAG received...\n", PP_Myid);
933                 MPI_Finalize();
934                 exit(1);
935         }
936   }
937 
938 
939   error = MPI_Recv(MPI_BOTTOM, 1, PP_Data, PP_MyMaster, PP_DATA, PP_Comm, &stat);
940   if (error != MPI_SUCCESS)
941     PP_Printerror(STDOUT, 900+PP_Myid, error);
942 
943 # ifdef PVERBOSE2
944     fprintf(STDOUT, "(%2d) <- (%2d) Received : Alias(0)=%d - Weight(0)=%d - constpat(0)=%d\n", PP_Myid, PP_MyMaster, Alias[0], Weight[0], constpat[0]);
945     fprintf(STDOUT, "(%2d)                    Rates(0)=%.3f - Eval(0)=%.3f - Freqtpm(0)=%.3f\n", PP_Myid, Rates[0], Eval[0], Freqtpm[0]);
946     fprintf(STDOUT, "(%2d)                    Evec(0,0)=%.3f - Ievc(0,0)=%.3f - iexp(0,0)=%.3f - Distanmat(0,1)=%.3f\n", PP_Myid, Evec[0][0], Ievc[0][0], iexp[0][0], Distanmat[0][1]);
947     fprintf(STDOUT, "(%2d)                    Distanmat(0,1)=%.3f\n", PP_Myid, Distanmat[0][1]);
948     fprintf(STDOUT, "(%2d)                    ltprobr(0,0,0)=%.3f\n", PP_Myid, ltprobr[0][0][0]);
949 # endif /* PVERBOSE2 */
950 
951   MPI_Type_free(&PP_Data);
952 
953 } /* PP_RecvData */
954 
955 
956 /******************/
957 
PP_SendData(cmatrix Seqpat,ivector Alias,ivector Weight,ivector constpat,dvector Rates,dvector Eval,dvector Freqtpm,dmatrix Evec,dmatrix Ievc,dmatrix iexp,dcube ltprobr)958 void PP_SendData(
959         cmatrix Seqpat,           /* cmatrix (Maxspc x Numptrn)             */
960         ivector Alias,            /* ivector (Maxsite)                      */
961         ivector Weight,           /* ivector (Numptrn)                      */
962         ivector constpat,
963         dvector Rates,            /* dvector (numcats)                      */
964         dvector Eval,             /* dvector (tpmradix)                     */
965         dvector Freqtpm,
966         dmatrix Evec,             /* dmatrix (tpmradix x tpmradix)          */
967         dmatrix Ievc,
968         dmatrix iexp,
969 /*	dmatrix Distanmat,*/ /*epe*/ /* dmatrix (Maxspc x Maxspc)           */
970         dcube   ltprobr)          /* dcube (numcats x tpmradix x tpmradix)  */
971 {
972   MPI_Datatype Dtypes[12];
973   int          Dtypelens[12];
974   MPI_Aint     Dtypeaddr[12];
975   MPI_Datatype PP_Data;
976   int          dest;
977   int          error;
978 
979 # ifdef PVERBOSE2
980     fprintf(STDOUT, "(%2d) Sending: Alias(0)=%d - Weight(0)=%d - constpat(0)=%d\n", PP_Myid, Alias[0], Weight[0], constpat[0]);
981     fprintf(STDOUT, "(%2d)          Rates(0)=%.3f - Eval(0)=%.3f - Freqtpm(0)=%.3f\n", PP_Myid, Rates[0], Eval[0], Freqtpm[0]);
982     fprintf(STDOUT, "(%2d)          Evec(0,0)=%.3f - Ievc(0,0)=%.3f - iexp(0,0)=%.3f - Distanmat(0,1)=%.3f\n", PP_Myid, Evec[0][0], Ievc[0][0], iexp[0][0], Distanmat[0][1]);
983     fprintf(STDOUT, "(%2d)          ltprobr(0,0,0)=%.3f\n", PP_Myid, ltprobr[0][0][0]);
984 # endif /* PVERBOSE2 */
985 
986   Dtypes  [0] = MPI_CHAR; Dtypelens [0] = Maxspc * Numptrn;
987   MPI_Address(&(Seqpat[0][0]), &(Dtypeaddr[0]));
988   Dtypes  [1] = MPI_INT; Dtypelens  [1] = Maxsite ;
989   MPI_Address(&(Alias[0]), &(Dtypeaddr[1]));
990   Dtypes  [2] = MPI_INT; Dtypelens  [2] = Numptrn ;
991   MPI_Address(&(Weight[0]), &(Dtypeaddr[2]));
992   Dtypes  [3] = MPI_INT; Dtypelens  [3] = Numptrn ;
993   MPI_Address(&(constpat[0]), &(Dtypeaddr[3]));
994   Dtypes  [4] = MPI_DOUBLE; Dtypelens  [4] = numcats ;
995   MPI_Address(&(Rates[0]), &(Dtypeaddr[4]));
996   Dtypes  [5] = MPI_DOUBLE; Dtypelens  [5] = tpmradix ;
997   MPI_Address(&(Eval[0]), &(Dtypeaddr[5]));
998   Dtypes  [6] = MPI_DOUBLE; Dtypelens  [6] = tpmradix ;
999   MPI_Address(&(Freqtpm[0]), &(Dtypeaddr[6]));
1000   Dtypes  [7] = MPI_DOUBLE; Dtypelens  [7] = tpmradix * tpmradix ;
1001   MPI_Address(&(Evec[0][0]), &(Dtypeaddr[7]));
1002   Dtypes  [8] = MPI_DOUBLE; Dtypelens  [8] = tpmradix * tpmradix ;
1003   MPI_Address(&(Ievc[0][0]), &(Dtypeaddr[8]));
1004   Dtypes  [9] = MPI_DOUBLE; Dtypelens  [9] = tpmradix * tpmradix ;
1005   MPI_Address(&(iexp[0][0]), &(Dtypeaddr [9]));
1006   Dtypes [10] = MPI_DOUBLE; Dtypelens [10] = Maxspc * Maxspc ;
1007   MPI_Address(&(Distanmat[0][0]), &(Dtypeaddr[10]));
1008   Dtypes [11] = MPI_DOUBLE; Dtypelens [11] = numcats * tpmradix * tpmradix ;
1009   MPI_Address(&(ltprobr[0][0][0]), &(Dtypeaddr[11]));
1010 
1011   MPI_Type_struct(12, Dtypelens, Dtypeaddr, Dtypes, &PP_Data);
1012   MPI_Type_commit(&PP_Data);
1013 
1014   for (dest=1; dest<PP_NumProcs; dest++) {
1015 
1016     error = MPI_Ssend(MPI_BOTTOM, 1, PP_Data, dest, PP_DATA, PP_Comm);
1017     if (error != MPI_SUCCESS)
1018       PP_Printerror(STDOUT, 1100+PP_Myid, error);
1019 
1020 #     ifdef PVERBOSE3
1021          fprintf(STDOUT, "(%2d) -> (%2d) Sent Data\n", PP_Myid, dest);
1022 #     endif /* PVERBOSE2 */
1023 
1024   } /* for each slave */
1025 
1026   MPI_Type_free(&PP_Data);
1027 
1028 # ifdef PVERBOSE3
1029     fprintf(STDOUT, "(%2d) ... Sent Data\n", PP_Myid);
1030 # endif /* PVERBOSE2 */
1031 
1032 } /* PP_SendData */
1033 
1034 
1035 /**************************************************************************
1036 *  procedures to send the request to compute a single quartet  (M->S)     *
1037 **************************************************************************/
1038 
PP_SendDoQuart(int dest,int a,int b,int c,int d,int usebestq,int approx)1039 void PP_SendDoQuart(int    dest,
1040                     int    a,
1041                     int    b,
1042                     int    c,
1043                     int    d,
1044 		    int    usebestq,
1045                     int    approx)
1046 {
1047 # define NUMINT 6
1048   int    ints[NUMINT];
1049   int    error;
1050 
1051   ints[0] = a;
1052   ints[1] = b;
1053   ints[2] = c;
1054   ints[3] = d;
1055   ints[4] = usebestq;
1056   ints[5] = approx;
1057 
1058   PP_doquartsent++;
1059   PP_doquartsentn++;
1060 
1061 # ifdef PVERBOSE2
1062     fprintf(STDOUT, "(%2d) Sending  -> (%2d): Quart(%d,%d,%d,%d)\n", PP_Myid, dest, a, b, c, d);
1063 # endif /* PVERBOSE2 */
1064 
1065   error = MPI_Ssend(ints, NUMINT, MPI_INT, dest, PP_DOQUART, PP_Comm);
1066   if (error != MPI_SUCCESS)
1067      PP_Printerror(STDOUT, PP_Myid, error);
1068 
1069 # ifdef PVERBOSE3
1070     fprintf(STDOUT, "(%2d) ... Sent \n", PP_Myid);
1071 # endif /* PVERBOSE3 */
1072 # undef NUMINT
1073 
1074 } /* PP_SendDoQuart */
1075 
1076 
1077 
1078 /******************/
1079 
PP_RecvDoQuart(int * a,int * b,int * c,int * d,int * usebestq,int * approx)1080 void PP_RecvDoQuart(int    *a,
1081                     int    *b,
1082                     int    *c,
1083                     int    *d,
1084 		    int    *usebestq,
1085                     int    *approx)
1086 {
1087 # define NUMINT 6
1088   int    ints[NUMINT];
1089   int    error;
1090   MPI_Status   stat;
1091   PP_doquartrecved++;
1092   PP_doquartrecvedn++;
1093 
1094 # ifdef PVERBOSE3
1095     fprintf(STDOUT, "(%2d) Receiving: Quart\n", PP_Myid);
1096 # endif /* PVERBOSE3 */
1097 
1098   error = MPI_Recv(ints, NUMINT, MPI_INT, PP_MyMaster, PP_DOQUART, PP_Comm, &stat);
1099   if (error != MPI_SUCCESS)
1100     PP_Printerror(STDOUT, 200+PP_Myid, error);
1101 
1102   *a        = ints[0];
1103   *b        = ints[1];
1104   *c        = ints[2];
1105   *d        = ints[3];
1106   *usebestq = ints[4];
1107   *approx   = ints[5];
1108 
1109 # ifdef PVERBOSE2
1110     fprintf(STDOUT, "(%2d) Received: Quart(%d,%d,%d,%d,%c)\n", PP_Myid, *a, *b, *c, *d, (approx ? 'A' : 'E'));
1111 # endif /* PVERBOSE2 */
1112 # undef NUMINT
1113 
1114 } /* PP_RecvDoQuart */
1115 
1116 
1117 /**************************************************************************
1118 *  procedures to send the result of a single quartet  (S->M)              *
1119 **************************************************************************/
1120 
PP_SendQuart(int a,int b,int c,int d,double d1,double d2,double d3,int usebestq,int approx)1121 void PP_SendQuart(int    a,
1122                   int    b,
1123                   int    c,
1124                   int    d,
1125                   double d1,
1126                   double d2,
1127                   double d3,
1128 		  int    usebestq,
1129                   int    approx)
1130 {
1131 # define NUMINT 6
1132 # define NUMDBL 3
1133   int    ints[NUMINT];
1134   double doubles[NUMDBL];
1135   MPI_Datatype Dtypes[2] =    {MPI_INT, MPI_DOUBLE};
1136   int          Dtypelens[2] = {NUMINT , NUMDBL};
1137   MPI_Aint     Dtypeaddr[2];
1138   MPI_Datatype PP_Quart;
1139   int          error;
1140 
1141   PP_quartsent++;
1142   PP_quartsentn++;
1143   ints[0] = a;
1144   ints[1] = b;
1145   ints[2] = c;
1146   ints[3] = d;
1147   doubles[0] = d1;
1148   doubles[1] = d2;
1149   doubles[2] = d3;
1150   ints[4] = usebestq;
1151   ints[5] = approx;
1152 
1153   MPI_Address(ints,     Dtypeaddr);
1154   MPI_Address(doubles, (Dtypeaddr+1));
1155 
1156   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Quart);
1157   MPI_Type_commit(&PP_Quart);
1158 
1159 # ifdef PVERBOSE2
1160     fprintf(STDOUT, "(%2d) Sending: Quart(%d,%d,%d,%d) = (%.3f, %.3f, %.3f)\n", PP_Myid, a, b, c, d, d1, d2, d3);
1161 # endif /* PVERBOSE2 */
1162 
1163   error = MPI_Ssend(MPI_BOTTOM, 1, PP_Quart, PP_MyMaster, PP_QUART, PP_Comm);
1164   if (error != MPI_SUCCESS)
1165     PP_Printerror(STDOUT, 400+PP_Myid, error);
1166 
1167   MPI_Type_free(&PP_Quart);
1168 
1169 # ifdef PVERBOSE3
1170     fprintf(STDOUT, "(%2d) ... Sent \n", PP_Myid);
1171 # endif /* PVERBOSE3 */
1172 # undef NUMINT
1173 # undef NUMDBL
1174 
1175 } /* PP_SendQuart */
1176 
1177 
1178 
1179 /******************/
1180 
PP_RecvQuart(int * a,int * b,int * c,int * d,double * d1,double * d2,double * d3,int * usebestq,int * approx)1181 void PP_RecvQuart(int    *a,
1182                   int    *b,
1183                   int    *c,
1184                   int    *d,
1185                   double *d1,
1186                   double *d2,
1187                   double *d3,
1188 		  int    *usebestq,
1189                   int    *approx)
1190 {
1191 # define NUMINT 6
1192 # define NUMDBL 3
1193   int          ints[NUMINT];
1194   double       doubles[NUMDBL];
1195   MPI_Datatype Dtypes[2] =    {MPI_INT, MPI_DOUBLE};
1196   int          Dtypelens[2] = {NUMINT , NUMDBL};
1197   MPI_Aint     Dtypeaddr[2];
1198   MPI_Datatype PP_Quart;
1199   int          error;
1200   MPI_Status   stat;
1201 
1202   PP_quartrecved++;
1203   PP_quartrecvedn++;
1204   MPI_Address(ints,     Dtypeaddr);
1205   MPI_Address(doubles, (Dtypeaddr+1));
1206 
1207   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Quart);
1208   MPI_Type_commit(&PP_Quart);
1209 
1210   error = MPI_Recv(MPI_BOTTOM, 1, PP_Quart, MPI_ANY_SOURCE, PP_QUART, PP_Comm, &stat);
1211 
1212   if (error != MPI_SUCCESS)
1213     PP_Printerror(STDOUT, 500+PP_Myid, error);
1214 
1215   PP_putslave(stat.MPI_SOURCE);
1216 
1217   *a        = ints[0];
1218   *b        = ints[1];
1219   *c        = ints[2];
1220   *d        = ints[3];
1221   *d1       = doubles[0];
1222   *d2       = doubles[1];
1223   *d3       = doubles[2];
1224   *usebestq = ints[4];
1225   *approx   = ints[5];
1226 
1227   MPI_Type_free(&PP_Quart);
1228 
1229 # ifdef PVERBOSE2
1230     fprintf(STDOUT, "(%2d) Received <- (%2d): Quart(%d,%d,%d,%d)=(%.3f, %.3f, %.3f)\n", PP_Myid, stat.MPI_SOURCE, *a, *b, *c, *d, *d1, *d2, *d3);
1231 # endif /* PVERBOSE2 */
1232 # undef NUMINT
1233 # undef NUMDBL
1234 
1235 } /* PP_RecvQuart */
1236 
1237 
1238 
1239 /**************************************************************************
1240 *  procedures to send the request to compute a block of quartets  (M->S)  *
1241 **************************************************************************/
1242 
PP_SendDoQuartBlock(int dest,uli firstq,uli amount,int usebestq,int approx)1243 void PP_SendDoQuartBlock(int dest,
1244                          uli firstq,
1245                          uli amount,
1246 		         int usebestq,
1247                          int approx)
1248 {
1249 #	define NUMULI 4
1250 	uli           ulongs[NUMULI];
1251 	int           error;
1252 
1253         PP_doquartsent += amount;
1254         PP_doquartsentn++;
1255 #	ifdef PVERBOSE2
1256 	   fprintf(STDOUT, "(%2d) Sending: DOQuartBlock Signal\n", PP_Myid);
1257 #	endif /* PVERBOSE2 */
1258 
1259 	ulongs[0] = firstq;
1260 	ulongs[1] = amount;
1261 	ulongs[2] = (uli)usebestq;
1262 	ulongs[3] = (uli)approx;
1263 
1264 	error = MPI_Ssend(ulongs, NUMULI, MPI_UNSIGNED_LONG, dest, PP_DOQUARTBLOCK, PP_Comm);
1265 	if (error != MPI_SUCCESS) PP_Printerror(STDOUT, 2100+PP_Myid, error);
1266 
1267 #	ifdef PVERBOSE3
1268 	   fprintf(STDOUT, "(%2d) ... Sent DOQuartBlock Signal (addr:%ld, num:%ld)\n", PP_Myid, firstq, amount);
1269 #	endif /* PVERBOSE3 */
1270 #       undef NUMULI
1271 
1272 } /* PP_SendDoQuartBlock */
1273 
1274 /******************/
1275 
PP_RecvDoQuartBlock(uli * firstq,uli * amount,uli ** bq,int * usebestq,int * approx)1276 void PP_RecvDoQuartBlock(uli  *firstq,
1277                          uli  *amount,
1278                          uli **bq,
1279 		         int  *usebestq,
1280                          int  *approx)
1281 {
1282 #	define NUMULI 4
1283 	uli           ulongs[NUMULI];
1284 	MPI_Status    stat;
1285 	int           error;
1286 
1287 #	ifdef PVERBOSE2
1288 	  fprintf(STDOUT, "(%2d) Receiving: DOQuartBlock Signal\n", PP_Myid);
1289 #	endif /* PVERBOSE2 */
1290 
1291 	error = MPI_Recv(&ulongs, NUMULI, MPI_UNSIGNED_LONG, PP_MyMaster, PP_DOQUARTBLOCK, PP_Comm, &stat);
1292 	if (error != MPI_SUCCESS)
1293 	   PP_Printerror(STDOUT, 2100+PP_Myid, error);
1294 
1295 	*firstq   = ulongs[0];
1296 	*amount   = ulongs[1];
1297 	*usebestq = (int)ulongs[2];
1298 	*approx   = (int)ulongs[3];
1299 
1300 	*bq = calloc((size_t)*amount, sizeof(uli));
1301 
1302         PP_doquartrecved += *amount;
1303         PP_doquartrecvedn++;
1304 
1305 #	ifdef PVERBOSE3
1306 	   fprintf(STDOUT, "(%2d) ... DOQuartBlock (addr:%ld, num:%ld)\n",
1307 	                      PP_Myid, *firstq, *amount);
1308 #	endif /* PVERBOSE3 */
1309 
1310 #	undef NUMULI
1311 } /* PP_RecvDoQuartBlock */
1312 
1313 /*********************************************************************
1314 *  procedures to send the results of a block of quartets  (S->M)     *
1315 *********************************************************************/
1316 
PP_SendQuartBlock(uli startq,uli numofq,unsigned char * quartetinfo,uli fullresqs,uli partresqs,uli unresqs,uli missingqs,uli numofbq,uli * bq,int usebestq,int approx)1317 void PP_SendQuartBlock(uli  startq,
1318                        uli  numofq,
1319                        unsigned char *quartetinfo,
1320 		       uli  fullresqs, /* number of fully resolved quartets */
1321 		       uli  partresqs, /* number of partly resolved quartets */
1322 		       uli  unresqs,   /* number of unresolved quartets */
1323 		       uli  missingqs, /* number of missing quartets */
1324                        uli  numofbq,
1325                        uli *bq,
1326 		       int  usebestq,
1327                        int  approx)
1328 {
1329 # define NUMULI 7
1330 # define NUMINT 2
1331   unsigned char *trueaddr;
1332   uli            truenum;
1333   int            error;
1334   int    ints[NUMINT];
1335   uli    ulis[NUMULI];
1336   MPI_Datatype Dtypes[2] =    {MPI_UNSIGNED_LONG, MPI_INT};
1337   int          Dtypelens[2] = {NUMULI,            NUMINT};
1338   MPI_Aint     Dtypeaddr[2];
1339   MPI_Datatype PP_QBlockSpecs;
1340   MPI_Datatype DtypesRes[2] = {MPI_UNSIGNED_CHAR, MPI_UNSIGNED_LONG};
1341   int          DtypelensRes[2];
1342   MPI_Aint     DtypeaddrRes[2];
1343   MPI_Datatype PP_QBlockRes;
1344 
1345 /*
1346   uli *bq;
1347   uli  numofbq;
1348 */
1349 
1350   PP_quartsent += numofq;
1351   PP_quartsentn++;
1352 
1353   truenum = (uli)((numofq+1)/2);
1354   trueaddr = (unsigned char *)(quartetinfo + (uli)(startq/2));
1355 
1356 # ifdef PVERBOSE2
1357     fprintf(STDOUT, "(%2d) Sending: startq=%lud numofq=%lud\n", PP_Myid, startq, numofq);
1358     fprintf(STDOUT, "(%2d)          approx=%c\n", PP_Myid, (approx ? 'A' : 'E'));
1359 # endif /* PVERBOSE2 */
1360 
1361   ints[0] = usebestq;
1362   ints[1] = approx;
1363   ulis[0] = startq;
1364   ulis[1] = numofq;
1365   ulis[2] = numofbq;
1366   ulis[3] = unresqs;   /* number of unresolved quartets */
1367   ulis[4] = partresqs; /* number of partly resolved quartets */
1368   ulis[5] = fullresqs; /* number of fully resolved quartets */
1369   ulis[6] = missingqs; /* number of missing quartets */
1370 
1371   MPI_Address(ulis,  Dtypeaddr);
1372   MPI_Address(ints, (Dtypeaddr+1));
1373 
1374   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_QBlockSpecs);
1375   MPI_Type_commit(&PP_QBlockSpecs);
1376 
1377 # ifdef PVERBOSE2
1378     fprintf(STDOUT, "(%2d) Sending: xxPP_QuartBlockSpecs(0,%lu)=%d,%d\n", PP_Myid, truenum-1, trueaddr[0], trueaddr[truenum-1]);
1379 # endif /* PVERBOSE2 */
1380 
1381 
1382   error = MPI_Ssend(MPI_BOTTOM, 1, PP_QBlockSpecs, PP_MyMaster, PP_QUARTBLOCKSPECS, PP_Comm);
1383 # ifdef PVERBOSE3
1384     fprintf(STDOUT, "(%2d) ... Sent QuartBlockSpecs (%ld, %ld, %ld, %d)\n", PP_Myid, ulis[0], ulis[1], ulis[2], ints[0]);
1385 # endif /* PVERBOSE3 */
1386 
1387   MPI_Address(trueaddr, DtypeaddrRes);
1388   DtypelensRes[0] = truenum;
1389 
1390   MPI_Address(bq, (DtypeaddrRes + 1));
1391   DtypelensRes[1] = numofbq;
1392   MPI_Type_struct(2, DtypelensRes, DtypeaddrRes, DtypesRes, &PP_QBlockRes);
1393   MPI_Type_commit(&PP_QBlockRes);
1394 
1395   error = MPI_Ssend(MPI_BOTTOM, 1, PP_QBlockRes, PP_MyMaster, PP_QUARTBLOCK, PP_Comm);
1396   if (error != MPI_SUCCESS)
1397     PP_Printerror(STDOUT, PP_Myid, error);
1398 
1399   MPI_Type_free(&PP_QBlockSpecs);
1400   MPI_Type_free(&PP_QBlockRes);
1401 # ifdef PVERBOSE3
1402     fprintf(STDOUT, "(%2d) ... Sent xxPP_QuartBlock(0,%lu)=%d,%d\n", PP_Myid, truenum-1, trueaddr[0], trueaddr[truenum-1]);
1403 # endif /* PVERBOSE3 */
1404 
1405 # undef NUMULI
1406 # undef NUMINT
1407 } /* PP_SendQuartBlock */
1408 
1409 
1410 
1411 /******************/
1412 
PP_RecvQuartBlock(int slave,uli * startq,uli * numofq,unsigned char * quartetinfo,uli * fullresqs,uli * partresqs,uli * unresqs,uli * missingqs,int * usebestq,int * approx)1413 void PP_RecvQuartBlock(int slave,
1414                        uli *startq,
1415                        uli *numofq,
1416                        unsigned char *quartetinfo,
1417 		       uli *fullresqs, /* number of fully resolved quartets */
1418 		       uli *partresqs, /* number of partly resolved quartets */
1419 		       uli *unresqs,   /* number of unresolved quartets */
1420 		       uli *missingqs, /* number of missing quartets */
1421 		       int *usebestq,
1422                        int *approx)
1423 {
1424 #	define NUMULI 7
1425 #	define NUMINT 2
1426 	unsigned char *trueaddr;
1427 	uli            truenum;
1428 	int            error;
1429 	int            dest;
1430 	int    ints[NUMINT];
1431 	uli    ulis[NUMULI];
1432 	MPI_Datatype Dtypes[2] =    {MPI_UNSIGNED_LONG, MPI_INT};
1433 	int          Dtypelens[2] = {NUMULI,             NUMINT};
1434 	MPI_Aint     Dtypeaddr[2];
1435 	MPI_Datatype PP_QBlockSpecs;
1436 	MPI_Datatype DtypesRes[2] =    {MPI_UNSIGNED_CHAR, MPI_UNSIGNED_LONG};
1437 	int          DtypelensRes[2];
1438 	MPI_Aint     DtypeaddrRes[2];
1439 	MPI_Datatype PP_QBlockRes;
1440 	MPI_Status   stat;
1441 	uli          count;
1442 	uli          idx;
1443 	uli          qstart;
1444 	uli          qend;
1445 	int          a, b, c, i;
1446 	unsigned char quartettype;
1447 	uli tmpunresqs   = 0; /* number of unresolved quartets */
1448 	uli tmppartresqs = 0; /* number of partly resolved quartets */
1449 	uli tmpfullresqs = 0; /* number of fully resolved quartets */
1450 	uli tmpmissingqs = 0; /* number of missing quartets */
1451 uli num;
1452 uli *numofbq;
1453 uli *bq;
1454 numofbq=&num;
1455 #	ifdef PVERBOSE3
1456 	    fprintf(STDOUT, "(%2d) Receiving QuartBlock ...\n", PP_Myid);
1457 #	endif /* PVERBOSE3 */
1458 	MPI_Address(ulis,  Dtypeaddr);
1459 	MPI_Address(ints, (Dtypeaddr+1));
1460 
1461 	MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_QBlockSpecs);
1462 	MPI_Type_commit(&PP_QBlockSpecs);
1463 
1464 	MPI_Probe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &stat);
1465 	dest = stat.MPI_SOURCE;
1466 	error = MPI_Recv(MPI_BOTTOM, 1, PP_QBlockSpecs, dest, PP_QUARTBLOCKSPECS, PP_Comm, &stat);
1467 	if (error != MPI_SUCCESS)
1468 	      PP_Printerror(STDOUT, PP_Myid, error);
1469 
1470         *usebestq  = ints[0];
1471 	*approx    = ints[1];
1472 	*startq    = ulis[0];
1473 	*numofq    = ulis[1];
1474 	*numofbq   = ulis[2];
1475 	*unresqs   = ulis[3]; /* number of unresolved quartets */
1476 	*partresqs = ulis[4]; /* number of partly resolved quartets */
1477 	*fullresqs = ulis[5]; /* number of fully resolved quartets */
1478 	*missingqs = ulis[6]; /* number of missing quartets */
1479 
1480         PP_quartrecved += *numofq;
1481         PP_quartrecvedn++;
1482 	truenum = (uli)((*numofq+1)/2);
1483 	trueaddr = (unsigned char *)(quartetinfo + (uli)(*startq/2));
1484 #       ifdef PVERBOSE3
1485            fprintf(STDOUT, "(%2d) ... Recv QuartBlockSpecs (%ld, %ld, %ld, %d)\n", PP_Myid, ulis[0], ulis[1], ulis[2], ints[0]);
1486 #       endif /* PVERBOSE3 */
1487 
1488 	DtypelensRes[0] =  truenum;
1489 	MPI_Address(trueaddr,  DtypeaddrRes);
1490 
1491 	bq = calloc((size_t) *numofbq, sizeof(uli));
1492 
1493 	DtypelensRes[1] = *numofbq;
1494 	MPI_Address(bq, (DtypeaddrRes+1));
1495 	MPI_Type_struct(2, DtypelensRes, DtypeaddrRes, DtypesRes, &PP_QBlockRes);
1496 	MPI_Type_commit(&PP_QBlockRes);
1497 
1498 	error = MPI_Recv(MPI_BOTTOM, 1, PP_QBlockRes, dest, PP_QUARTBLOCK, PP_Comm, &stat);
1499 	if (error != MPI_SUCCESS)
1500 	    PP_Printerror(STDOUT, PP_Myid, error);
1501 #       ifdef PVERBOSE3
1502            fprintf(STDOUT, "(%2d) ... Recv QuartBlock \n", PP_Myid);
1503 #       endif /* PVERBOSE3 */
1504 
1505 	/* MPI package check */
1506 	qstart = *startq;
1507 	qend   = qstart + *numofq - 1;
1508 	for (i = 3; i < Maxspc; i++)
1509 	   for (c = 2; c < i; c++)
1510 	      for (b = 1; b < c; b++)
1511 	         for (a = 0; a < b; a++) {
1512 
1513                     /* use a better way, not computing this every time */
1514 	            idx = (uli) a +
1515 	                  (uli) b*(b-1)/2 +
1516 	                  (uli) c*(c-1)*(c-2)/6 +
1517 	                  (uli) i*(i-1)*(i-2)*(i-3)/24;
1518 	            if ((idx >= qstart) && (idx <= qend)) {
1519 	               quartettype = readquartet(a,b,c,i);
1520                        switch(quartettype) {
1521                            case 1: /* 001 */
1522                            case 2: /* 010 */
1523                            case 4: /* 100 */
1524                                {
1525 	                          tmpfullresqs++; /* number of fully resolved quartets */
1526                                   break;
1527                                }
1528                            case 3: /* 011 */
1529                            case 5: /* 101 */
1530                            case 6: /* 110 */
1531                                {
1532 	                          tmppartresqs++; /* number of partly resolved quartets */
1533                                   break;
1534                                }
1535                            case 7: /* 111 */
1536                                {
1537 	                          tmpunresqs++;   /* number of unresolved quartets */
1538 #if 0 /* to make the part below obsolete */
1539                                   badqs++;
1540                                   badtaxon[a]++;
1541                                   badtaxon[b]++;
1542                                   badtaxon[c]++;
1543                                   badtaxon[d]++;
1544                                   if (show_optn) {
1545 	                             fputid10(unresfp, a);
1546 	                             fprintf(unresfp, "  ");
1547 	                             fputid10(unresfp, b);
1548 	                             fprintf(unresfp, "  ");
1549 	                             fputid10(unresfp, c);
1550 	                             fprintf(unresfp, "  ");
1551 	                             fputid(unresfp, d);
1552 	                             fprintf(unresfp, "\n");
1553 	                          }
1554 #endif
1555                                   break;
1556                                }
1557                            case 0: /* 000 */
1558                            default:
1559                                {
1560 	                          tmpmissingqs++; /* number of missing quartets */
1561                                   break;
1562                                }
1563                        } /* end switch quartettype */
1564 	            } /* if idx */
1565 	         } /* for for for for */
1566 	/* end MPI package check */
1567 
1568 	if((tmpfullresqs != *fullresqs) ||
1569 	   (tmppartresqs != *partresqs) ||
1570            (tmpunresqs != *unresqs) ||
1571 	   (tmpmissingqs != *missingqs)) {
1572 		fprintf(stderr, "ERROR: Quartets differ between send and receive!!! (received/sent)\n");
1573 		fprintf(stderr, "fully (%lu/%lu), partly(%lu/%lu), unresolved(%lu/%lu), missing(%lu/%lu)\n",
1574 	   		tmpfullresqs, *fullresqs, tmppartresqs, *partresqs,
1575 			tmpunresqs, *unresqs, tmpmissingqs, *missingqs);
1576 	}
1577 
1578         PP_putslave(dest);
1579 
1580 	for(count = 0; count < *numofbq; count++){
1581 	  int a, b, c, d;
1582 	  num2quart(bq[count], &a, &b, &c, &d);
1583 #	  ifdef PVERBOSE2
1584 	     fprintf(STDOUT, "(%2d) %ld. bad quarted (%d, %d, %d, %d) = %ld\n", PP_Myid, count, a, b, c, d, bq[count]);
1585 #	  endif /* PVERBOSE2 */
1586 
1587 	  badqs++;
1588 	  badtaxon[a]++;
1589 	  badtaxon[b]++;
1590 	  badtaxon[c]++;
1591 	  badtaxon[d]++;
1592 	  if (show_optn) {
1593 	     fputid10(unresfp, a);
1594 	     fprintf(unresfp, "  ");
1595 	     fputid10(unresfp, b);
1596 	     fprintf(unresfp, "  ");
1597 	     fputid10(unresfp, c);
1598 	     fprintf(unresfp, "  ");
1599 	     fputid(unresfp, d);
1600 	     fprintf(unresfp, "\n");
1601 	  }
1602 	}
1603 	free(bq);
1604 	MPI_Type_free(&PP_QBlockSpecs);
1605 	MPI_Type_free(&PP_QBlockRes);
1606 #	ifdef PVERBOSE2
1607            fprintf(STDOUT, "(%2d) <- (%2d) ... Recv xxPP_QuartBlock(0,%lu)=%d,%d\n", PP_Myid, dest, truenum-1, trueaddr[0], trueaddr[truenum-1]);
1608 #	endif /* PVERBOSE2 */
1609 
1610 #	undef NUMULI
1611 #	undef NUMINT
1612 } /* PP_RecvQuartBlock */
1613 
1614 
1615 /*********************************************************************
1616 *  send/receive array with all quartets  (M->S)                      *
1617 *********************************************************************/
1618 
PP_SendAllQuarts(unsigned long Numquartets,unsigned char * quartetinfo)1619 void PP_SendAllQuarts(unsigned long  Numquartets,
1620                       unsigned char *quartetinfo)
1621 {
1622   MPI_Datatype  Dtypes[1] =    {MPI_UNSIGNED_CHAR};
1623   int           Dtypelens[1];
1624   MPI_Aint      Dtypeaddr[1];
1625   MPI_Datatype  PP_AllQuarts;
1626   int           dest;
1627   int           error;
1628 
1629 # ifdef PVERBOSE2
1630     fprintf(STDOUT, "(%2d) Sending: PP_AllQuart(0)=%d\n", PP_Myid, quartetinfo[0]);
1631 # endif /* PVERBOSE2 */
1632 
1633   /* compute number of quartets */
1634   if (Numquartets % 2 == 0) { /* even number */
1635      Dtypelens[0] = (Numquartets)/2;
1636   } else { /* odd number */
1637      Dtypelens[0] = (Numquartets + 1)/2;
1638   }
1639 
1640   MPI_Address(&(quartetinfo[0]), Dtypeaddr);
1641   MPI_Type_struct(1, Dtypelens, Dtypeaddr, Dtypes, &PP_AllQuarts);
1642   MPI_Type_commit(&PP_AllQuarts);
1643 
1644   for (dest=1; dest<PP_NumProcs; dest++) {
1645 
1646     error = MPI_Ssend(MPI_BOTTOM, 1, PP_AllQuarts, dest, PP_ALLQUARTS, PP_Comm);
1647     if (error != MPI_SUCCESS)
1648       PP_Printerror(STDOUT, 1200+PP_Myid, error);
1649 
1650 #   ifdef PVERBOSE3
1651        fprintf(STDOUT, "(%2d) -> (%2d) ... Sent xxAllQuart(0,%d)=%d,%d (%luq -> %db)\n",
1652                            PP_Myid, dest, Dtypelens[0]-1, quartetinfo[0], quartetinfo[Dtypelens[0]-1],
1653                            Numquartets, Dtypelens[0]-1);
1654 #   endif /* PVERBOSE3 */
1655   } /* for each slave */
1656 
1657   MPI_Type_free(&PP_AllQuarts);
1658 
1659 
1660 } /* PP_SendAllQuarts */
1661 
1662 
1663 
1664 /******************/
1665 
PP_RecvAllQuarts(int taxa,unsigned long * Numquartets,unsigned char * quartetinfo)1666 void PP_RecvAllQuarts(int            taxa,
1667                       unsigned long *Numquartets,
1668                       unsigned char *quartetinfo)
1669 {
1670   MPI_Datatype  Dtypes[1] =    {MPI_UNSIGNED_CHAR};
1671   int           Dtypelens[1];
1672   MPI_Aint      Dtypeaddr[1];
1673   MPI_Datatype  PP_AllQuarts;
1674   MPI_Status    stat;
1675   int           error;
1676 
1677 # ifdef PVERBOSE3
1678     fprintf(STDOUT, "(%2d) Receiving AllQuarts ...\n", PP_Myid);
1679 # endif /* PVERBOSE3 */
1680 
1681   /* compute number of quartets */
1682   *Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
1683   if (*Numquartets % 2 == 0) { /* even number */
1684     Dtypelens[0] = (*Numquartets)/2;
1685   } else { /* odd number */
1686     Dtypelens[0] = (*Numquartets + 1)/2;
1687   }
1688 
1689   MPI_Address(&(quartetinfo[0]), Dtypeaddr);
1690   MPI_Type_struct(1, Dtypelens, Dtypeaddr, Dtypes, &PP_AllQuarts);
1691   MPI_Type_commit(&PP_AllQuarts);
1692 
1693   error = MPI_Recv(MPI_BOTTOM, 1, PP_AllQuarts, PP_MyMaster, PP_ALLQUARTS, PP_Comm, &stat);
1694   if (error != MPI_SUCCESS)
1695     PP_Printerror(STDOUT, 1300+PP_Myid, error);
1696 
1697   MPI_Type_free(&PP_AllQuarts);
1698 
1699 # ifdef PVERBOSE2
1700      fprintf(STDOUT, "(%2d) <- (%2d) ... Recv xxAllQuart(0,%d)=%d,%d (%luq -> %db)\n",
1701                         PP_Myid, PP_MyMaster, Dtypelens[0]-1, quartetinfo[0], quartetinfo[Dtypelens[0]-1],
1702                         *Numquartets, Dtypelens[0]-1);
1703 # endif /* PVERBOSE2 */
1704 
1705 } /* PP_RecvAllQuarts */
1706 
1707 
1708 
1709 /*********************************************************************
1710 *  procedures to send the splits of a puzzle tree to the master      *
1711 *********************************************************************/
1712 
PP_SendSplitsBlock(int taxa,uli blocksize,cmatrix * biparts,int pstnum,treelistitemtype * pstlist)1713 void PP_SendSplitsBlock(int               taxa,
1714                         uli               blocksize,
1715                         cmatrix          *biparts,
1716                         int               pstnum,
1717                         treelistitemtype *pstlist)
1718 {
1719   MPI_Datatype     *Dtypes;
1720   int              *Dtypelens;
1721   MPI_Aint         *Dtypeaddr;
1722   MPI_Datatype      PP_Biparts;
1723   int               error;
1724   int               n;
1725   int               ints[3];
1726   int              *pstnumarr;
1727   treelistitemtype *pstptr;
1728 
1729   PP_splitsent+=blocksize;
1730   PP_splitsentn++;
1731 
1732   ints[0] = taxa;
1733   ints[1] = (int) blocksize;
1734   ints[2] = pstnum;
1735   error = MPI_Ssend(ints, 3, MPI_INT, PP_MyMaster, PP_PUZZLEBLOCKSPECS, PP_Comm);
1736   if (error != MPI_SUCCESS)
1737     PP_Printerror(STDOUT, 1800+PP_Myid, error);
1738 
1739   Dtypes    = calloc((size_t) (blocksize + pstnum + 1), sizeof(MPI_Datatype));
1740   Dtypelens = calloc((size_t) (blocksize + pstnum + 1), sizeof(int));
1741   Dtypeaddr = calloc((size_t) (blocksize + pstnum + 1), sizeof(MPI_Aint));
1742   pstnumarr = calloc((size_t) pstnum, sizeof(int));
1743 
1744 # ifdef PVERBOSE2
1745     fprintf(STDOUT, "(%2d) Sending: PP_bipartsblock(0..%lu,0,0)8=\"%c\"\n", PP_Myid, blocksize, biparts[0][0][0]);
1746 # endif /* PVERBOSE2 */
1747 
1748   for (n=0; n<(int)blocksize; n++) {
1749     Dtypes[n]    = MPI_CHAR;
1750     Dtypelens[n] = (taxa - 3) * taxa;
1751     MPI_Address(&(biparts[n][0][0]), &(Dtypeaddr[n]));
1752   }
1753   pstptr = pstlist;
1754   for (n=0; n<pstnum; n++) {
1755     Dtypes[(int)blocksize + n]    = MPI_CHAR;
1756     Dtypelens[(int)blocksize + n] = psteptreestrlen;
1757     MPI_Address((*pstptr).tree, &(Dtypeaddr[(int)blocksize + n]));
1758     pstnumarr[n] = (*pstptr).count;
1759 #   ifdef PVERBOSE3
1760        fprintf(STDOUT, "(%2d) Sent tree item ->%d: [%d/%d] #=%d  \"%s\"\n",
1761                PP_Myid, PP_MyMaster, n, pstnum, pstnumarr[n], (*pstptr).tree);
1762 #   endif /* PVERBOSE3 */
1763     pstptr = (*pstptr).succ;
1764   }
1765   Dtypes[((int)blocksize + pstnum)]    = MPI_INT;
1766   Dtypelens[((int)blocksize + pstnum)] = pstnum;
1767   MPI_Address(&(pstnumarr[0]), &(Dtypeaddr[((int)blocksize + pstnum)]));
1768 
1769   MPI_Type_struct(((int)blocksize + pstnum + 1), Dtypelens, Dtypeaddr, Dtypes, &PP_Biparts);
1770   MPI_Type_commit(&PP_Biparts);
1771 
1772   error = MPI_Ssend(MPI_BOTTOM, 1, PP_Biparts, PP_MyMaster, PP_PUZZLEBLOCK, PP_Comm);
1773   if (error != MPI_SUCCESS)
1774     PP_Printerror(STDOUT, 1800+PP_Myid, error);
1775 
1776   MPI_Type_free(&PP_Biparts);
1777   free(Dtypes);
1778   free(Dtypelens);
1779   free(Dtypeaddr);
1780   free(pstnumarr);
1781 
1782 # ifdef PVERBOSE3
1783     fprintf(STDOUT, "(%2d) ... Sent PP_bipartsblock\n", PP_Myid);
1784 # endif /* PVERBOSE3 */
1785 
1786 } /* PP_SendSplitsBlock */
1787 
1788 /******************/
1789 
PP_RecvSplitsBlock(int * taxa,uli * blocksize,cmatrix ** bip,treelistitemtype ** pstlist,int * pstnum,int * pstsum)1790 void PP_RecvSplitsBlock(int               *taxa,
1791                         uli               *blocksize,
1792                         cmatrix          **bip,
1793                         treelistitemtype **pstlist,
1794                         int               *pstnum,
1795                         int               *pstsum)
1796 /* bp -> (*bip) */
1797 {
1798   MPI_Datatype      *Dtypes;
1799   int               *Dtypelens;
1800   MPI_Aint          *Dtypeaddr;
1801   MPI_Datatype       PP_Biparts;
1802   MPI_Status         stat;
1803   int                error;
1804   int                n;
1805   int                dest;
1806   int                ints[3];
1807   int                pstlistnum;
1808   int                tmpnum;
1809   int                tmpsum;
1810   int               *pstnumarr;
1811   char             **pstarr;
1812   treelistitemtype  *treeitem;
1813 
1814   error = MPI_Recv(ints, 3, MPI_INT, MPI_ANY_SOURCE, PP_PUZZLEBLOCKSPECS, PP_Comm, &stat);
1815   if (error != MPI_SUCCESS)
1816     PP_Printerror(STDOUT, 1900+PP_Myid, error);
1817 
1818     dest       =       stat.MPI_SOURCE;
1819    *taxa       =       ints[0];
1820    *blocksize  = (uli) ints[1];
1821     pstlistnum =       ints[2];
1822 
1823 # ifdef PVERBOSE2
1824     fprintf(STDOUT, "(%2d) Received<-%d: PP_bipartsblockspec(t=%d,b=%ld,p=%d)\n", PP_Myid, dest, *taxa, *blocksize, pstlistnum);
1825 # endif /* PVERBOSE2 */
1826 
1827   PP_splitrecved += *blocksize;
1828   PP_splitrecvedn++;
1829 
1830   Dtypes    = calloc((size_t) (*blocksize + pstlistnum + 1), sizeof(MPI_Datatype));
1831   Dtypelens = calloc((size_t) (*blocksize + pstlistnum + 1), sizeof(int));
1832   Dtypeaddr = calloc((size_t) (*blocksize + pstlistnum + 1), sizeof(MPI_Aint));
1833   (*bip)    = (cmatrix *) calloc((size_t) *blocksize, sizeof(void *));
1834   pstnumarr = (int *)     calloc((size_t) pstlistnum, sizeof(int));
1835   pstarr    = (char **)   calloc((size_t) pstlistnum, sizeof(char *));
1836 
1837 /*  pstarr[0] = (char *) calloc((size_t) (psteptreestrlen * pstlistnum), sizeof(char));
1838     for(n=1; n<pstlistnum; n++)
1839     pstarr[n] = &(pstarr[0][n * psteptreestrlen]);
1840 */
1841 
1842   for (n=0; n<(int)*blocksize; n++) {
1843     (*bip)[n]        = new_cmatrix(*taxa - 3, *taxa);
1844     Dtypes[n]    = MPI_CHAR;
1845     Dtypelens[n] = (*taxa - 3) * *taxa;
1846     MPI_Address(&((*bip)[n][0][0]), &(Dtypeaddr[n]));
1847   }
1848   for (n=0; n<pstlistnum; n++) {
1849     pstarr[n]        = (char *)calloc((size_t) psteptreestrlen, sizeof(char));
1850     Dtypes[(int)*blocksize + n]    = MPI_CHAR;
1851     Dtypelens[(int)*blocksize + n] = psteptreestrlen;
1852     MPI_Address(&(pstarr[n][0]), &(Dtypeaddr[(int)*blocksize + n]));
1853   }
1854 
1855   Dtypes[(int)*blocksize + pstlistnum]    = MPI_INT;
1856   Dtypelens[(int)*blocksize + pstlistnum] = pstlistnum;
1857   MPI_Address(&(pstnumarr[0]), &(Dtypeaddr[(int)*blocksize + pstlistnum]));
1858 
1859   MPI_Type_struct(((int)*blocksize + pstlistnum + 1), Dtypelens, Dtypeaddr, Dtypes, &PP_Biparts);
1860   MPI_Type_commit(&PP_Biparts);
1861 
1862   error = MPI_Recv(MPI_BOTTOM, 1, PP_Biparts, dest, PP_PUZZLEBLOCK, PP_Comm, &stat);
1863   if (error != MPI_SUCCESS)
1864     PP_Printerror(STDOUT, 1910+PP_Myid, error);
1865 
1866   tmpsum = *pstsum;
1867   for (n=0; n<pstlistnum; n++) tmpsum += pstnumarr[n];
1868   tmpnum = *pstnum;
1869 
1870   for (n=0; n<pstlistnum; n++) {
1871 #   ifdef PVERBOSE3
1872        fprintf(STDOUT, "(%2d) Received tree item <-%d: [%d/%d] #=%d  \"%s\"\n",
1873                PP_Myid, dest, n, pstlistnum, pstnumarr[n], pstarr[n]);
1874 #   endif /* PVERBOSE3 */
1875 
1876     treeitem = addtree2list(&(pstarr[n]), pstnumarr[n], pstlist, pstnum, pstsum);
1877     if((listqptrees == PSTOUT_LIST)
1878       || (listqptrees == PSTOUT_LISTORDER)) {
1879        /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */
1880        fprintf(qptlist, "%d.\t%d\t%d\t%d\t%d\t%d\n",
1881               PP_splitrecvedn, pstnumarr[n], (*treeitem).count, (*treeitem).id, *pstnum, tmpsum);
1882     }
1883 
1884 
1885   }
1886 
1887   MPI_Type_free(&PP_Biparts);
1888   free(Dtypes);
1889   free(Dtypelens);
1890   free(Dtypeaddr);
1891   free(pstnumarr);
1892   free(pstarr);
1893 
1894 # ifdef PVERBOSE2
1895     fprintf(STDOUT, "(%2d) Received<-%d: PP_bipartsblock(0..%lu,0,0):=%c\n", PP_Myid, dest, *blocksize, (*bip)[0][0][0]);
1896 # endif /* PVERBOSE2 */
1897 
1898   PP_putslave(dest);
1899 
1900   /* MPI_Type_free(&PP_Biparts); */
1901 
1902 } /* PP_RecvSplitsBlock */
1903 
1904 
1905 /*********************************************************************
1906 *  procedures to send the request to compute puzzle tree             *
1907 *********************************************************************/
1908 
1909 /* send all <puzzlings> requests and receive results */
PP_SendDoPermutBlock(uli puzzlings)1910 void PP_SendDoPermutBlock(uli puzzlings)
1911 {
1912 	int           dest;               /* destination process */
1913 	int           error;              /* MPI error code      */
1914 	uli           numpuzzlings;       /* puzzlings scheduled for 1 chunk */
1915 	uli           puzzlingssent = 0;  /* puzzlings we are waiting for */
1916 	schedtype     sched;              /* scheduler data */
1917 	uli           bipnum;             /* bipartition counter */
1918 	int           tx;                 /* number of received trees */
1919 	uli           bs;                 /* number of received split sets */
1920 	cmatrix      *bp;                 /* split sets */
1921 
1922 
1923 
1924 	initsched(&sched, puzzlings, PP_NumProcs-1, 4);
1925 	/* numpuzzlings = sc(&sched); */
1926 	numpuzzlings = SCHEDALG_PUZZLING(&sched);
1927 #	ifdef SCHED_DEBUG
1928 		if ((numpuzzlings < 4) && (numpuzzlings != 0))
1929 			fprintf(stderr, "SCHED-ERROR: %d < 4 (puzzling) !!!", numpuzzlings);
1930 #	endif /* SCHED_DEBUG */
1931 
1932 	while (numpuzzlings > 0) {
1933 		if (PP_emptyslave()) {
1934 			PP_RecvSplitsBlock(&tx, &bs, &bp, &psteptreelist,
1935 			                   &psteptreenum, &psteptreesum);
1936 			for (bipnum=0; bipnum<bs; bipnum++) {
1937 	       			makenewsplitentries(bp[bipnum], Maxspc-3,
1938 				                    &splitpatterns, &splitsizes,
1939 				                    &splitfreqs, &numbiparts,
1940 				                    &maxbiparts, Maxspc);
1941 				free_cmatrix(bp[bipnum]);
1942 			}
1943 			free(bp);
1944 			puzzlingssent -= bs;
1945 
1946 			checktime(&time0, &time1, &time2, PP_permutsent-puzzlingssent, puzzlings, &mflag);
1947 
1948 		}
1949 
1950 		dest = PP_getslave();
1951 
1952 #		ifdef PVERBOSE2
1953 			fprintf(STDOUT, "(%2d) Sending: DOPuzzleBlock Signal\n", PP_Myid);
1954 #		endif /* PVERBOSE2 */
1955 
1956 		error = MPI_Ssend(&numpuzzlings, 1, MPI_UNSIGNED_LONG, dest, PP_DOPUZZLEBLOCK, PP_Comm);
1957 		if (error != MPI_SUCCESS)
1958 			PP_Printerror(STDOUT, 2100+PP_Myid, error);
1959 
1960 #		ifdef PVERBOSE3
1961 			fprintf(STDOUT, "(%2d) ... Sent DOPuzzleBlock Signal\n", PP_Myid);
1962 #		endif /* PVERBOSE3 */
1963 
1964 		puzzlingssent += numpuzzlings;
1965 		PP_permutsent += numpuzzlings;
1966 		PP_permutsentn ++;
1967 
1968 		numpuzzlings = SCHEDALG_PUZZLING(&sched);
1969 #		ifdef SCHED_DEBUG
1970 			if ((numpuzzlings < 4) && (numpuzzlings != 0))
1971 				fprintf(stderr, "SCHED-ERROR: %d < 4 (puzzling) !!!", numpuzzlings);
1972 #		endif /* SCHED_DEBUG */
1973 
1974 	} /* while something left to send */
1975 
1976 	/* while waiting for results */
1977 	while (puzzlingssent > 0) {
1978 		/* receive next result */
1979 		PP_RecvSplitsBlock(&tx, &bs, &bp, &psteptreelist, &psteptreenum, &psteptreesum);
1980 		/* file splits */
1981 		for (bipnum=0; bipnum<bs; bipnum++) {
1982 			makenewsplitentries(bp[bipnum],Maxspc-3,
1983 			                    &splitpatterns, &splitsizes,
1984 			                    &splitfreqs, &numbiparts,
1985 			                    &maxbiparts, Maxspc);
1986 			free_cmatrix(bp[bipnum]);
1987 		}
1988 		free(bp);
1989 		puzzlingssent -= bs;
1990 
1991 		checktime(&time0, &time1, &time2, PP_permutsent-puzzlingssent, puzzlings, &mflag);
1992 
1993 	}
1994 
1995 } /* PP_SendDoPermutBlock */
1996 
1997 /******************/
1998 
1999 
PP_RecvDoPermutBlock(uli * taxa)2000 void PP_RecvDoPermutBlock(uli *taxa)
2001 {
2002   MPI_Status    stat;
2003   int           error;
2004 
2005 # ifdef PVERBOSE2
2006     fprintf(STDOUT, "(%2d) Receiving: DOPuzzleBlock Signal\n", PP_Myid);
2007 # endif /* PVERBOSE2 */
2008 
2009   error = MPI_Recv(taxa, 1, MPI_UNSIGNED_LONG, PP_MyMaster, PP_DOPUZZLEBLOCK, PP_Comm, &stat);
2010   if (error != MPI_SUCCESS)
2011     PP_Printerror(STDOUT, 2100+PP_Myid, error);
2012 
2013   PP_permutrecved += *taxa;
2014   PP_permutrecvedn ++;
2015 
2016 # ifdef PVERBOSE3
2017     fprintf(STDOUT, "(%2d) ... DOPuzzleBlock Signal\n", PP_Myid);
2018 # endif /* PVERBOSE3 */
2019 
2020 } /* PP_RecvDoPermutBlock */
2021 
2022 /*********************************************************************
2023 *  procedures to make the slaves to finalize                         *
2024 *********************************************************************/
2025 
PP_SendDone()2026 void PP_SendDone()
2027 {
2028 # define NUMINT 8
2029 # define NUMDBL 6
2030   int           dest;
2031   int           error;
2032   MPI_Status    stat;
2033   int    ints[NUMINT];
2034   double doubles[NUMDBL];
2035   MPI_Datatype Dtypes[2] =    {MPI_INT, MPI_DOUBLE};
2036   int          Dtypelens[2] = {NUMINT , NUMDBL};
2037   MPI_Aint     Dtypeaddr[2];
2038   MPI_Datatype PP_Stats;
2039 
2040 # ifdef PVERBOSE2
2041     fprintf(STDOUT, "(%2d) Sending: DONE Signal\n", PP_Myid);
2042 # endif /* PVERBOSE2 */
2043 
2044   for (dest=1; dest<PP_NumProcs; dest++) {
2045 
2046     error = MPI_Ssend(&dest, 0, MPI_INT, dest, PP_DONE, PP_Comm);
2047     if (error != MPI_SUCCESS)
2048       PP_Printerror(STDOUT, 2100+PP_Myid, error);
2049 
2050   } /* for each slave */
2051 
2052 # ifdef PVERBOSE3
2053     fprintf(STDOUT, "(%2d) ... Sent DONE Signal\n", PP_Myid);
2054 # endif /* PVERBOSE3 */
2055 
2056   MPI_Address(ints,     Dtypeaddr);
2057   MPI_Address(doubles, (Dtypeaddr+1));
2058 
2059   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Stats);
2060   MPI_Type_commit(&PP_Stats);
2061 
2062   doquartrecved[0]  = 0;
2063   doquartrecvedn[0] = 0;
2064   quartsent[0]      = 0;
2065   quartsentn[0]     = 0;
2066   permutrecved[0]   = 0;
2067   permutrecvedn[0]  = 0;
2068   splitsent[0]      = 0;
2069   splitsentn[0]     = 0;
2070   cputimes[0]       = 0.0;
2071   walltimes[0]      = 0.0;
2072   fullcputimes[0]   = 0.0;
2073   fullwalltimes[0]  = 0.0;
2074   altcputimes[0]    = 0.0;
2075   altwalltimes[0]   = 0.0;
2076 
2077   for (dest=1; dest<PP_NumProcs; dest++) {
2078 
2079      error = MPI_Recv(MPI_BOTTOM, 1, PP_Stats, dest, PP_STATS, PP_Comm, &stat);
2080      if (error != MPI_SUCCESS)
2081        PP_Printerror(STDOUT, 2100+PP_Myid, error);
2082 
2083      doquartrecved[dest]  = ints[0];
2084      doquartrecvedn[dest] = ints[1];
2085      quartsent[dest]      = ints[2];
2086      quartsentn[dest]     = ints[3];
2087      permutrecved[dest]   = ints[4];
2088      permutrecvedn[dest]  = ints[5];
2089      splitsent[dest]      = ints[6];
2090      splitsentn[dest]     = ints[7];
2091      cputimes[dest]       = doubles[0];
2092      walltimes[dest]      = doubles[1];
2093      fullcputimes[dest]   = doubles[2];
2094      fullwalltimes[dest]  = doubles[3];
2095      altcputimes[dest]    = doubles[4];
2096      altwalltimes[dest]   = doubles[5];
2097 
2098 #    ifdef PVERBOSE1
2099         fprintf(STDOUT, "(%2d) ... Stats received (%d/%d, %d/%d, %d/%d, %d/%d, %f, %f, %f, %f, %f, %f)\n",
2100         PP_Myid, doquartrecved[dest], doquartrecvedn[dest], quartsent[dest], quartsentn[dest],
2101                  permutrecved[dest], permutrecvedn[dest], splitsent[dest], splitsentn[dest],
2102                  cputimes[dest], walltimes[dest], fullcputimes[dest], fullwalltimes[dest],
2103                  altcputimes[dest], altwalltimes[dest]);
2104 #    endif /* PVERBOSE1 */
2105   } /* for each slave */
2106 
2107   MPI_Type_free(&PP_Stats);
2108 
2109 # undef NUMINT
2110 # undef NUMDBL
2111 } /* PP_SendDone */
2112 
2113 /******************/
2114 
2115 
2116 
PP_RecvDone()2117 void PP_RecvDone()
2118 {
2119 # define NUMINT 8
2120 # define NUMDBL 6
2121 
2122   int           dummy;
2123   MPI_Status    stat;
2124   int           error;
2125   int    ints[NUMINT];
2126   double doubles[NUMDBL];
2127   MPI_Datatype Dtypes[2] =    {MPI_INT, MPI_DOUBLE};
2128   int          Dtypelens[2] = {NUMINT , NUMDBL};
2129   MPI_Aint     Dtypeaddr[2];
2130   MPI_Datatype PP_Stats;
2131 
2132 # ifdef PVERBOSE2
2133     fprintf(STDOUT, "(%2d) Receiving: DONE Signal\n", PP_Myid);
2134 # endif /* PVERBOSE2 */
2135 
2136   error = MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_DONE, PP_Comm, &stat);
2137   if (error != MPI_SUCCESS)
2138     PP_Printerror(STDOUT, 2100+PP_Myid, error);
2139 
2140 # ifdef PVERBOSE3
2141     fprintf(STDOUT, "(%2d) ... DONE Signal\n", PP_Myid);
2142 # endif /* PVERBOSE3 */
2143 
2144   addtimes(OVERALL, &tarr);
2145 # ifdef TIMEDEBUG
2146   printtimearr(&tarr);
2147 # endif /* TIMEDEBUG */
2148 
2149   time(&walltimestop);
2150   walltime = difftime(walltimestop, walltimestart);
2151   cputimestop = clock();
2152   cputime = ((double) (cputimestop - cputimestart) / CLOCKS_PER_SEC);
2153 
2154 # ifdef PVERBOSE1
2155     fprintf(STDOUT, "(%2d) total wallclock time: %0.2f sec (= %0.2f min = %0.2f hours)\n",
2156             PP_Myid, walltime, walltime / 60, walltime / 3600);
2157     fprintf(STDOUT, "(%2d) total CPU time: %0.2f sec (= %0.2f min = %0.2f hours)\n",
2158             PP_Myid, cputime, cputime / 60, cputime / 3600);
2159 # endif /* PVERBOSE1 */
2160 
2161   ints[0] = PP_doquartrecved;
2162   ints[1] = PP_doquartrecvedn;
2163   ints[2] = PP_quartsent;
2164   ints[3] = PP_quartsentn;
2165   ints[4] = PP_permutrecved;
2166   ints[5] = PP_permutrecvedn;
2167   ints[6] = PP_splitsent;
2168   ints[7] = PP_splitsentn;
2169   doubles[0] = cputime;
2170   doubles[1] = walltime;
2171   doubles[2] = tarr.fullcpu;
2172   doubles[3] = tarr.fulltime;
2173   doubles[4] = tarr.cpu;
2174   doubles[5] = tarr.time;
2175 
2176   MPI_Address(ints,     Dtypeaddr);
2177   MPI_Address(doubles, (Dtypeaddr+1));
2178 
2179   MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Stats);
2180   MPI_Type_commit(&PP_Stats);
2181 
2182   error = MPI_Ssend(MPI_BOTTOM, 1, PP_Stats, PP_MyMaster, PP_STATS, PP_Comm);
2183   if (error != MPI_SUCCESS)
2184     PP_Printerror(STDOUT, 2100+PP_Myid, error);
2185 
2186   MPI_Type_free(&PP_Stats);
2187 
2188 # ifdef PVERBOSE3
2189         fprintf(STDOUT, "(%2d) ... Stats sent (%d/%d, %d/%d, %d/%d, %d/%d, %f, %f)\n",
2190         PP_Myid, PP_doquartrecved, PP_doquartrecvedn, PP_quartsent, PP_quartsentn,
2191                  PP_permutrecved, PP_permutrecvedn, PP_splitsent, PP_splitsentn,
2192                  cputime, walltime, fullcputime, fullwalltime, altcputime, altwalltime);
2193 # endif /* PVERBOSE3 */
2194 
2195 # undef NUMINT
2196 # undef NUMDBL
2197 } /* PP_RecvDone */
2198 
2199 /*********************************************************************
2200 *  procedures to initialize and cleanup                              *
2201 *********************************************************************/
2202 
PP_Init(int * argc,char ** argv[])2203 void PP_Init(int *argc, char **argv[])
2204 {
2205   MPI_Init(argc, argv);
2206   PP_Comm = MPI_COMM_WORLD;
2207   MPI_Comm_rank(PP_Comm, &PP_Myid);
2208   MPI_Comm_size(PP_Comm, &PP_NumProcs);
2209 
2210   if (PP_NumProcs <= 1) {
2211 	fprintf(STDOUT, "\nHalt: TREE-PUZZLE needs at least 2 processes for a parallel run!!!\n\n");
2212 	MPI_Finalize();
2213 	exit(1);
2214   }
2215 
2216   PP_MyMaster = 0;
2217 
2218   if (PP_Myid == PP_MyMaster) {
2219     PP_IamMaster=1;
2220     PP_IamSlave =0;
2221     PP_initslavequeue();
2222 
2223     permutsent     = new_ivector(PP_NumProcs);
2224     permutrecved   = new_ivector(PP_NumProcs);
2225     quartsent      = new_ivector(PP_NumProcs);
2226     quartrecved    = new_ivector(PP_NumProcs);
2227     doquartsent    = new_ivector(PP_NumProcs);
2228     doquartrecved  = new_ivector(PP_NumProcs);
2229     splitsent      = new_ivector(PP_NumProcs);
2230     splitrecved    = new_ivector(PP_NumProcs);
2231     permutsentn    = new_ivector(PP_NumProcs);
2232     permutrecvedn  = new_ivector(PP_NumProcs);
2233     quartsentn     = new_ivector(PP_NumProcs);
2234     quartrecvedn   = new_ivector(PP_NumProcs);
2235     doquartsentn   = new_ivector(PP_NumProcs);
2236     doquartrecvedn = new_ivector(PP_NumProcs);
2237     splitsentn     = new_ivector(PP_NumProcs);
2238     splitrecvedn   = new_ivector(PP_NumProcs);
2239 
2240     walltimes      = new_dvector(PP_NumProcs);
2241     cputimes       = new_dvector(PP_NumProcs);
2242     fullwalltimes  = new_dvector(PP_NumProcs);
2243     fullcputimes   = new_dvector(PP_NumProcs);
2244     altwalltimes   = new_dvector(PP_NumProcs);
2245     altcputimes    = new_dvector(PP_NumProcs);
2246     { int n;
2247       for (n = 0; n<PP_NumProcs; n++){
2248         permutsent     [n] = -1;
2249 	permutrecved   [n] = -1;
2250 	quartsent      [n] = -1;
2251 	quartrecved    [n] = -1;
2252 	doquartsent    [n] = -1;
2253 	doquartrecved  [n] = -1;
2254 	splitsent      [n] = -1;
2255 	splitrecved    [n] = -1;
2256 	permutsentn    [n] = -1;
2257 	permutrecvedn  [n] = -1;
2258 	quartsentn     [n] = -1;
2259 	quartrecvedn   [n] = -1;
2260 	doquartsentn   [n] = -1;
2261 	doquartrecvedn [n] = -1;
2262 	splitsentn     [n] = -1;
2263 	splitrecvedn   [n] = -1;
2264 	walltimes      [n] = -1.0;
2265 	cputimes       [n] = -1.0;
2266 	fullwalltimes  [n] = -1.0;
2267 	fullcputimes   [n] = -1.0;
2268 	altwalltimes   [n] = -1.0;
2269 	altcputimes    [n] = -1.0;
2270       }
2271     }
2272   } else {
2273     PP_IamMaster=0;
2274     PP_IamSlave =1;
2275   }
2276 
2277 # ifdef PVERBOSE1
2278     { char ma[7] = "master";
2279       char sl[7] = "slave ";
2280       char *st;
2281       char PP_ProcName[MPI_MAX_PROCESSOR_NAME] = "empty";
2282       int  flag;
2283       int  PP_ProcNameLen = 6;
2284       int  PP_Clocksync = 0;
2285       int  PP_Maxtag    = 0;
2286       int  PP_IO        = 0;
2287       int  PP_Hostexist = 0;
2288     if (PP_IamMaster)
2289       st = ma;
2290     else
2291       st = sl;
2292     fprintf(STDOUT, "(%2d) Init: ID %d, %d processes running \n", PP_Myid, PP_Myid, PP_NumProcs);
2293     fprintf(STDOUT, "(%2d)       I am %s \n", PP_Myid, st);
2294     MPI_Get_processor_name(PP_ProcName, &PP_ProcNameLen);
2295     fprintf(STDOUT, "(%2d)       I am running on \"%s\"\n", PP_Myid, PP_ProcName);
2296     MPI_Attr_get(PP_Comm, MPI_IO, &PP_IO, &flag);
2297     if (flag) fprintf(STDOUT, "(%2d)       next IO Proc=%d - ", PP_Myid, PP_IO);
2298     MPI_Attr_get(PP_Comm, MPI_TAG_UB, &PP_Maxtag, &flag);
2299     if (flag) fprintf(STDOUT, "MaxTag=%d - ", PP_Maxtag);
2300     MPI_Attr_get(PP_Comm, MPI_HOST, &PP_Hostexist, &flag);
2301     if (flag) {
2302       if (PP_Hostexist == MPI_PROC_NULL)
2303         fprintf(STDOUT, "No HostProc - ");
2304       else
2305         fprintf(STDOUT, "HostProc=%d - ", PP_Hostexist);
2306     }
2307     MPI_Attr_get(PP_Comm, MPI_WTIME_IS_GLOBAL, &PP_Clocksync, &flag);
2308     if (PP_Clocksync)
2309       fprintf(STDOUT, "global time");
2310     else
2311       fprintf(STDOUT, "local time");
2312     fprintf(STDOUT, "\n");
2313     }
2314 # endif /* PVERBOSE1 */
2315 } /* PP_Init */
2316 
2317 /******************/
2318 
PP_Finalize()2319 void PP_Finalize()
2320 {
2321   if (PP_IamMaster) {
2322     free_ivector(freeslaves);
2323 
2324     free_ivector(permutsent);
2325     free_ivector(permutrecved);
2326     free_ivector(quartsent);
2327     free_ivector(quartrecved);
2328     free_ivector(doquartsent);
2329     free_ivector(doquartrecved);
2330     free_ivector(splitsent);
2331     free_ivector(splitrecved);
2332     free_ivector(permutsentn);
2333     free_ivector(permutrecvedn);
2334     free_ivector(quartsentn);
2335     free_ivector(quartrecvedn);
2336     free_ivector(doquartsentn);
2337     free_ivector(doquartrecvedn);
2338     free_ivector(splitsentn);
2339     free_ivector(splitrecvedn);
2340 
2341     free_dvector(walltimes);
2342     free_dvector(cputimes);
2343     free_dvector(fullwalltimes);
2344     free_dvector(fullcputimes);
2345     free_dvector(altwalltimes);
2346     free_dvector(altcputimes);
2347   }
2348 
2349 # ifdef PVERBOSE1
2350     fprintf(STDOUT, "(%2d) Finished ...\n", PP_Myid);
2351 
2352     fprintf(STDOUT, "(%2d) doqu(s%6d/%6d,%6d/%6d) qu(s%6d/%6d,%6d/%6d)\n",
2353             PP_Myid, PP_doquartsent, PP_doquartsentn, PP_doquartrecved, PP_doquartrecvedn,
2354                      PP_quartsent, PP_quartsentn, PP_quartrecved, PP_quartrecvedn);
2355     fprintf(STDOUT, "(%2d) perm(s%6d/%6d,%6d/%6d) spli(s%6d/%6d,%6d/%6d)\n",
2356             PP_Myid, PP_permutsent, PP_permutsentn, PP_permutrecved, PP_permutrecvedn,
2357                      PP_splitsent, PP_splitsentn, PP_splitrecved, PP_splitrecvedn);
2358     if (PP_IamMaster) {
2359        fprintf(STDOUT, "(%2d)  Init: %2.4f   Param(Comp: %2.4f  Send: %2.4f)\n", PP_Myid, PP_inittime, PP_paramcomptime, PP_paramsendtime);
2360        fprintf(STDOUT, "(%2d)  Quart(Comp: %2.4f  Send: %2.4f)   Puzzle: %2.4f   Tree: %2.4f\n", PP_Myid, PP_quartcomptime, PP_quartsendtime, PP_puzzletime, PP_treetime);
2361     }
2362 # endif /* PVERBOSE1 */
2363 
2364   MPI_Finalize();
2365 } /* PP_Finalize */
2366 
2367 /***********************************************************
2368 *  main part of the slave process                          *
2369 ***********************************************************/
2370 
slave_main(int argc,char * argv[])2371 int slave_main(int argc, char *argv[])
2372 {
2373 	int i, a, b, c, d, approx, usebestq;
2374 	int PP_LastChunk, dummy; /*epe*/
2375 	double d1, d2, d3;
2376 
2377 	int        notdone;
2378 	MPI_Status stat;
2379 	int        PP_AllQuartsReceived = 0;
2380 	unsigned char quartettype;
2381 
2382 #ifndef USE_WINDOWS
2383 	PP_RecvSizes(&Maxspc, &Maxsite, &numcats, &Numptrn, &tpmradix, &outgroup, &fracconst, &randseed, &fixedorder_optn, &consensus_optn);
2384 #else
2385 	PP_RecvSizes(&Maxspc, &Maxsite, &alimaxsite, &alistart, &aliend, &numcats, &Numptrn, &tpmradix, &outgroup, &fracconst, &randseed, &fixedorder_optn, &consensus_optn);
2386 #endif
2387 
2388 /*epe*/ ltprobr = new_dcube(numcats, tpmradix,tpmradix);
2389 /*epe*/ iexp = new_dmatrix(tpmradix,tpmradix);
2390 	psteptreestrlen = (Maxspc * (int)(1 + log10((double)Maxspc))) +
2391 	                  (Maxspc * 3);
2392 
2393 
2394         Maxbrnch = 2*Maxspc - 3;
2395 
2396 	initrandom(randseed);
2397 
2398 	/* initialise ML (from PP_mlstart) */
2399 	Seqpat    = new_cmatrix(Maxspc, Numptrn);
2400 	Alias     = new_ivector(Maxsite);
2401 	Weight    = new_ivector(Numptrn);
2402 	constpat  = new_ivector(Numptrn);
2403 	Rates     = new_dvector(numcats);
2404 	Eval      = new_dvector(tpmradix);
2405 	Freqtpm   = new_dvector(tpmradix);
2406 	Evec      = new_dmatrix(tpmradix,tpmradix);
2407 	Ievc      = new_dmatrix(tpmradix,tpmradix);
2408 	iexp      = new_dmatrix(tpmradix,tpmradix);
2409 	Distanmat = new_dmatrix(Maxspc, Maxspc);
2410 	ltprobr   = new_dcube(numcats, tpmradix,tpmradix);
2411 	PP_RecvData(Seqpat,                       /* cmatrix */
2412 	            Alias, Weight, constpat,      /* ivector */
2413 	            Rates, Eval, Freqtpm,         /* dvector */
2414 	            Evec, Ievc, iexp,             /* dmatrix */
2415 	            /* Distanmat, */ /*epe*/ /* dmatrix */
2416 	            ltprobr);                     /* dcube   */
2417 
2418 	Ctree = new_quartet(Numptrn, Seqpat);
2419 	Numbrnch = NUMQBRNCH;
2420 	Numibrnch = NUMQIBRNCH;
2421 	Numspc = NUMQSPC;
2422 
2423 	/* allocate and initialize qinfomatr */
2424 	qinfomatr = new_ulimatrix(9,Maxspc);
2425 	for (i = 0; i < Maxspc; i++) {
2426 		qinfomatr[0][i] = 0;
2427 		qinfomatr[1][i] = 0;
2428 		qinfomatr[2][i] = 0;
2429 		qinfomatr[3][i] = 0;
2430 		qinfomatr[4][i] = 0;
2431 		qinfomatr[5][i] = 0;
2432 		qinfomatr[6][i] = 0;
2433 		qinfomatr[7][i] = 0;
2434 		qinfomatr[8][i] = 0;
2435 	}
2436 
2437 	/* allocate and initialize badtaxonvector */
2438 	badtaxon = new_ulivector(Maxspc);
2439 	for (i = 0; i < Maxspc; i++) badtaxon[i] = 0;
2440 
2441 	/* allocate memory for quartets */
2442 	quartetinfo = callocquartets(Maxspc);
2443 
2444         /* prepare for consensus tree analysis */
2445 	initconsensus();
2446 
2447 	MPI_Probe(PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat);
2448 
2449 	notdone = (stat.MPI_TAG != PP_DONE);
2450         if (!notdone) {
2451 	  PP_RecvDone();
2452 #         ifdef TIMEDEBUG
2453              printtimearr(&tarr);
2454 #         endif /* TIMEDEBUG */
2455 
2456 	}
2457 
2458 	while (notdone) {
2459 	  switch (stat.MPI_TAG) {
2460 /*epe*/	    case PP_NOUPDATE:
2461 	    {
2462 		PP_NoUpdate();
2463 		PP_Mldistance(1, &PP_LastChunk);
2464 		break;
2465 	    }
2466 /*epe*/	    case PP_UPDATERATES:
2467 	    {
2468 		PP_Update_Rates();
2469 		PP_Mldistance(1, &PP_LastChunk);
2470 		break;
2471 	    }
2472 /*epe*/	    case PP_UPDATEFRACINV:
2473 	    {
2474 		PP_Update_fracinv();
2475 		PP_Mldistance(1, &PP_LastChunk);
2476 		break;
2477 	    }
2478 /*epe*/	    case PP_UPDATEEEI:
2479 	    {
2480 		PP_Update_EEI();
2481 		PP_Mldistance(1, &PP_LastChunk);
2482 		break;
2483 	    }
2484 /*epe*/	    case PP_MLDISTANCE:
2485 	    {
2486 		PP_Mldistance(0, &PP_LastChunk);
2487 		break;
2488 	    }
2489 /*epe*/	    case PP_FINALUPDATE:
2490 	    {
2491 		PP_Final_Update();
2492 		break;
2493 	    }
2494 /*epe*/	    case PP_LSLENGTH:
2495 	    {
2496 		PP_Lslength();
2497 		break;
2498 	    }
2499 /*epe*/	    case PP_TEST:
2500 	    {
2501 		MPI_Recv(&dummy, 0, MPI_INT, PP_MyMaster, PP_TEST, PP_Comm, &stat);
2502 		fprintf(STDOUT, "[%d] performing TEST\n", PP_Myid);
2503 		break;
2504 	    }
2505 
2506 	    case PP_ALLQUARTS: {
2507 	  	PP_RecvAllQuarts(Maxspc, &Numquartets, quartetinfo);
2508 		PP_AllQuartsReceived = 1;
2509 		break;
2510 		}
2511 	    case PP_DOQUARTBLOCK:
2512 	    case PP_DOQUARTBLOCKSPECS:
2513                 {
2514 		uli qtodo, qstart, qend, idx, nofbq, *bqarr;
2515 		int a, b, c, i;
2516 		double d1, d2, d3;
2517 		uli unresqs = 0;   /* number of unresolved quartets */
2518 		uli partresqs = 0; /* number of partly resolved quartets */
2519 		uli fullresqs = 0; /* number of fully resolved quartets */
2520 		uli missingqs = 0; /* number of missing quartets */
2521 
2522 
2523 	        nofbq=0;
2524 		PP_RecvDoQuartBlock(&qstart, &qtodo, &bqarr, &usebestq, &approx);
2525 		qend   = (qstart + qtodo) - 1;
2526 		unresqs = 0;   /* number of unresolved quartets */
2527 		partresqs = 0; /* number of partly resolved quartets */
2528 		fullresqs = 0; /* number of fully resolved quartets */
2529 		missingqs = 0; /* number of missing quartets */
2530 #		ifdef PVERBOSE1
2531 			fprintf(STDOUT, "(%2d) Quartets %4ld->%4ld (%dx%ld)\n", PP_Myid, qstart, qend, PP_NumProcs-1, qtodo);
2532 #		endif
2533 
2534 		addtimes(GENERAL, &tarr);
2535 		for (i = 3; i < Maxspc; i++)
2536 		   for (c = 2; c < i; c++)
2537 		      for (b = 1; b < c; b++)
2538 		         for (a = 0; a < b; a++) {
2539 
2540                             /* use a better way, not computing this every time */
2541 		            idx = (uli) a +
2542 		                  (uli) b*(b-1)/2 +
2543 		                  (uli) c*(c-1)*(c-2)/6 +
2544 		                  (uli) i*(i-1)*(i-2)*(i-3)/24;
2545 		            if ((idx >= qstart) && (idx <= qend)) {
2546 #			       ifdef PVERBOSE4
2547 			          fprintf(STDOUT, "(%2d)  %4ld <---> (%d,%d,%d,%d)\n",PP_Myid, idx, a,b,c,i);
2548 #			       endif
2549 			       compute_quartlklhds(a,b,c,i,&d1,&d2,&d3,approx);
2550 		               quartettype = PP_do_write_quart(a,b,c,i,d1,d2,d3,&nofbq,bqarr,usebestq);
2551                                switch(quartettype) {
2552                                    case 1: /* 001 */
2553                                    case 2: /* 010 */
2554                                    case 4: /* 100 */
2555                                        {
2556 		                          fullresqs++; /* number of fully resolved quartets */
2557                                           break;
2558                                        }
2559                                    case 3: /* 011 */
2560                                    case 5: /* 101 */
2561                                    case 6: /* 110 */
2562                                        {
2563 		                          partresqs++; /* number of partly resolved quartets */
2564                                           break;
2565                                        }
2566                                    case 7: /* 111 */
2567                                        {
2568 		                          unresqs++;   /* number of unresolved quartets */
2569                                           break;
2570                                        }
2571                                    case 0: /* 000 */
2572                                    default:
2573                                        {
2574 		                          missingqs++; /* number of missing quartets */
2575                                           break;
2576                                        }
2577                                } /* end switch quartettype */
2578 			       addtimes(QUARTETS, &tarr);
2579 		            } /* if idx */
2580 		         } /* for for for for */
2581 		PP_SendQuartBlock(qstart, qtodo, quartetinfo, fullresqs, partresqs, unresqs, missingqs, nofbq, bqarr, usebestq, approx);
2582 
2583 		free(bqarr); bqarr=NULL;
2584 
2585 		break;
2586 		}
2587 
2588 	    case PP_DOPUZZLEBLOCK: {
2589 		if (PP_AllQuartsReceived){
2590 			uli numtrial;
2591 
2592 			/* receive number of steps to perform */
2593 			PP_RecvDoPermutBlock(&numtrial);
2594 
2595 			allpstep(numtrial, quartetinfo, Maxspc, fixedorder_optn);
2596 
2597 		} else {
2598 			fprintf(STDOUT, "ERROR: Requested Puzzle Tree without receiving Quartets!!!\n");
2599 			notdone = 0;
2600 		}
2601 		break;
2602 		}
2603 	    case PP_DOQUART: {
2604 		PP_RecvDoQuart(&a,&b,&c,&d, &usebestq, &approx);
2605 		addtimes(GENERAL, &tarr);
2606 		compute_quartlklhds(a,b,c,d,&d1,&d2,&d3,approx);
2607 		addtimes(QUARTETS, &tarr);
2608 		PP_SendQuart(a,b,c,d,d1,d2,d3, usebestq, approx);
2609 		break;
2610 		}
2611 	    default: {
2612 		fprintf(STDOUT, "ERROR: Unknown Message Tag received\n");
2613 		MPI_Abort(PP_Comm, 1);
2614 		}
2615 	  } /* switch stat.MPI_TAG */
2616 
2617 	  MPI_Probe(PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat);
2618 	  notdone = (stat.MPI_TAG != PP_DONE);
2619 	  if (!notdone)
2620 	    PP_RecvDone();
2621 
2622 	} /* while notdone */
2623 
2624         return (0);
2625 
2626 } /* slave_main */
2627 
2628 
2629