1 /*
2  * ml1.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.2 (July 2004)
6  *
7  * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler
8  * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer,
9  *                  M. Vingron, and Arndt von Haeseler
10  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11  *
12  * All parts of the source except where indicated are distributed under
13  * the GNU public licence.  See http://www.opensource.org for details.
14  *
15  * ($Id$)
16  *
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #  include <config.h>
21 #endif
22 
23 
24 /******************************************************************************/
25 /* definitions and prototypes                                                 */
26 /******************************************************************************/
27 
28 #define EXTERN extern
29 
30 /* prototypes */
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <math.h>
34 #include <ctype.h>
35 #include "util.h"
36 #include "ml.h"
37 /*epe*/
38 #ifdef PARALLEL
39 #	include <mpi.h>
40 #	include "ppuzzle.h"
41 #endif
42 
43 
44 #define STDOUT     stdout
45 #ifndef PARALLEL             /* because printf() runs significantly faster */
46                              /* than fprintf(stdout) on an Apple McIntosh  */
47                              /* (HS) */
48 #       define FPRINTF    printf
49 #       define STDOUTFILE
50 #else
51 #       define FPRINTF    fprintf
52 #       define STDOUTFILE STDOUT,
53 	void PP_Finalize();  /* prototype */
54 #endif
55 
56 EXTERN int printrmatr_optn;  /* print rate matrix to screen */
57 #if 0
58 extern int PP_NumProcs; /*epe*/ /* might be included via ppuzzle.h ? */
59 extern int PP_Myid; /*epe*/     /* might be included via ppuzzle.h ? */
60 #endif
61 
62 /******************************************************************************/
63 /* compacting sequence data information                                       */
64 /******************************************************************************/
65 
66 
67 /***************************** internal functions *****************************/
68 
69 
70 /* make all frequencies a little different */
convfreq(dvector freqemp)71 void convfreq(dvector freqemp)
72 {
73 	int i, j, maxi=0;
74 	double freq, maxfreq, sum;
75 
76 
77 	sum = 0.0;
78 	maxfreq = 0.0;
79 	for (i = 0; i < tpmradix; i++) {
80 		freq = freqemp[i];
81 		if (freq < MINFREQ) freqemp[i] = MINFREQ;
82 		if (freq > maxfreq) {
83 			maxfreq = freq;
84 			maxi = i;
85 		}
86 		sum += freqemp[i];
87 	}
88 	freqemp[maxi] += 1.0 - sum;
89 
90 	for (i = 0; i < tpmradix - 1; i++) {
91 		for (j = i + 1; j < tpmradix; j++) {
92 			if (freqemp[i] == freqemp[j]) {
93 				freqemp[i] += MINFDIFF/2.0;
94 				freqemp[j] -= MINFDIFF/2.0;
95 			}
96 		}
97 	}
98 } /* convfreq */
99 
100 /* sort site patters of original input data */
tp_radixsort(cmatrix seqchar,ivector ali,int maxspc,int maxsite,int * numptrn)101 void tp_radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
102 	int *numptrn)
103 {
104 	int i, j, k, l, n, pass;
105 	int *awork;
106 	int *count;
107 
108 
109 	awork = new_ivector(maxsite);
110 	count = new_ivector(tpmradix+1);
111 #ifndef USE_WINDOWS
112 	for (i = 0; i < maxsite; i++)                                      /* init alignment column order 0, 1, 2, ... */
113 		ali[i] = i;
114 #else
115 	alimaxsize = aliwinend - aliwinstart + 1;
116 	k = 0;
117 	for (i = aliwinstart; i < aliwinend; i++) {
118 		ali[k++] = i;
119 #endif
120 	for (pass = maxspc - 1; pass >= 0; pass--) {                       /* for every taxon */
121 		for (j = 0; j < tpmradix+1; j++)                           /* reset count array */
122 			count[j] = 0;
123 #ifndef USE_WINDOWS
124 		for (i = 0; i < maxsite; i++)                              /* counting bin size needed */
125 #else
126 		for (i = 0; i < alimaxsite; i++)                           /* counting bin size needed */
127 #endif
128 			count[(int) seqchar[pass][ali[i]]]++;
129 		for (j = 1; j < tpmradix+1; j++)                           /* converting bin sizes to start indices */
130 			count[j] += count[j-1];
131 #ifndef USE_WINDOWS
132 		for (i = maxsite-1; i >= 0; i--)                           /* produce consistently sorted order of alignment columns for current species to awork */
133 #else
134 		for (i = alimaxsite-1; i >= 0; i--)                        /* produce consistently sorted order of alignment columns for current species to awork */
135 #endif
136 			awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
137 		for (i = 0; i < maxsite; i++)                              /* set intermediate order */
138 			ali[i] = awork[i];
139 	}
140 	free_ivector(awork);
141 	free_ivector(count);
142 
143 	n = 1;
144 #ifndef USE_WINDOWS
145 	for (j = 1; j < maxsite; j++) {                                    /* counting number of site pattern */
146 #else
147 	for (j = 1; j < alimaxsite; j++) {                                 /* counting number of site pattern */
148 #endif
149 		k = ali[j];
150 		l = ali[j-1];
151 		for (i = 0; i < maxspc; i++) {
152 			if (seqchar[i][l] != seqchar[i][k]) {
153 				n++;
154 				break;
155 			}
156 		}
157 	}
158 	*numptrn = n;
159 } /* tp_radixsort */
160 
161 
162 void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
163 	ivector weight, int maxspc, int maxsite, int numptrn)
164 {
165 	int i, j, k, n;
166 	int agree_flag; /* boolean */
167 
168 
169 	n = 0;
170 	k = ali[n];
171 #ifndef USE_WINDOWS
172 	for (i = 0; i < maxspc; i++) {
173 #else
174 	for (i = aliwinstart - 1; i < aliwinend - 1; i++) {
175 #endif
176 		seqconint[i][n] = seqchar[i][k];
177 	}
178 	weight[n] = 1;
179 	Alias[k] = 0;
180 #ifndef USE_WINDOWS
181 	for (j = 1; j < maxsite; j++) {
182 #else
183 	for (j = 1; j < alimaxsite; j++) {
184 #endif
185 		k = ali[j];
186 		agree_flag = TRUE;
187 		for (i = 0; i < maxspc; i++) {
188 			if (seqconint[i][n] != seqchar[i][k]) {
189 				agree_flag = FALSE;
190 				break;
191 			}
192 		}
193 		if (agree_flag == FALSE) {
194 			n++;
195 			for (i = 0; i < maxspc; i++) {
196 				seqconint[i][n] = seqchar[i][k];
197 			}
198 			weight[n] = 1;
199 			Alias[k] = n;
200 		} else {
201 			weight[n]++;
202 			Alias[k] = n;
203 		}
204 	}
205 	n++;
206 	if (numptrn != n) {
207 		/* Problem in condenceseq */
208 		fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
209 #		if PARALLEL
210 			PP_Finalize();
211 #		endif
212 		exit(1);
213 	}
214 } /* condenceseq */
215 
216 void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
217 	int *numconst, int *numconstpat)
218 {
219 	int character, s, i, constflag;
220 
221 	*numconst    = 0;
222 	*numconstpat = 0;
223 	for (s = 0; s < numptrn; s++) { /* check all patterns */
224 		constpat[s] = FALSE;
225 		constflag = TRUE;
226 		character = seqpat[0][s];
227 		for (i = 1; i < maxspc; i++) {
228 			if (seqpat[i][s] != character) {
229 				constflag = FALSE;
230 				break;
231 			}
232 		}
233 
234 		/* if constant pattern (patterns with wildcards are not */
235                 /* constant)                                            */
236 		if (character != tpmradix && constflag) {
237 			(*numconst) = (*numconst) + weight[s];
238 			(*numconstpat)++;
239 			constpat[s] = TRUE;
240 		}
241 	}
242 } /* countconstantsites */
243 
244 /***************************** exported functions *****************************/
245 
246 
247 void evaluateseqs()
248 {
249 	ivector ali;
250 
251 	convfreq(Freqtpm); /* make all frequencies slightly different */
252 	ali = new_ivector(Maxsite);
253 	tp_radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
254 	Seqpat = new_cmatrix(Maxspc, Numptrn);
255 	constpat = new_ivector(Numptrn);
256 	Weight = new_ivector(Numptrn);
257 	condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
258 	free_ivector(ali);
259 	countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
260 	fracconstpat = (double) Numconstpat / (double) Numptrn;
261 #ifndef USE_WINDOWS
262 	fracconst    = (double) Numconst / (double) Maxsite;
263 #else
264 	fracconst    = (double) Numconst / (double) alimaxsite;
265 #endif
266 } /* evaluateseqs */
267 
268 
269 /******************************************************************************/
270 /* computation of Pij(t)                                                      */
271 /******************************************************************************/
272 
273 
274 /***************************** internal functions *****************************/
275 
276 
277 void elmhes(dmatrix a, ivector ordr, int n)
278 {
279 	int m, j, i;
280 	double y, x;
281 
282 
283 	for (i = 0; i < n; i++)
284 		ordr[i] = 0;
285 	for (m = 2; m < n; m++) {
286 		x = 0.0;
287 		i = m;
288 		for (j = m; j <= n; j++) {
289 			if (fabs(a[j - 1][m - 2]) > fabs(x)) {
290 				x = a[j - 1][m - 2];
291 				i = j;
292 			}
293 		}
294 		ordr[m - 1] = i;      /* vector */
295 		if (i != m) {
296 			for (j = m - 2; j < n; j++) {
297 				y = a[i - 1][j];
298 				a[i - 1][j] = a[m - 1][j];
299 				a[m - 1][j] = y;
300 			}
301 			for (j = 0; j < n; j++) {
302 				y = a[j][i - 1];
303 				a[j][i - 1] = a[j][m - 1];
304 				a[j][m - 1] = y;
305 			}
306 		}
307 		if (x != 0.0) {
308 			for (i = m; i < n; i++) {
309 				y = a[i][m - 2];
310 				if (y != 0.0) {
311 					y /= x;
312 					a[i][m - 2] = y;
313 					for (j = m - 1; j < n; j++)
314 						a[i][j] -= y * a[m - 1][j];
315 					for (j = 0; j < n; j++)
316 						a[j][m - 1] += y * a[j][i];
317 				}
318 			}
319 		}
320 	}
321 } /* elmhes */
322 
323 
324 void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
325 {
326 	int i, j, m;
327 
328 
329 	for (i = 0; i < n; i++) {
330 		for (j = i + 1; j < n; j++) {
331 			zz[i][j] = 0.0;
332 			zz[j][i] = 0.0;
333 		}
334 		zz[i][i] = 1.0;
335 	}
336 	if (n <= 2)
337 		return;
338 	for (m = n - 1; m >= 2; m--) {
339 		for (i = m; i < n; i++)
340 			zz[i][m - 1] = a[i][m - 2];
341 		i = ordr[m - 1];
342 		if (i != m) {
343 			for (j = m - 1; j < n; j++) {
344 				zz[m - 1][j] = zz[i - 1][j];
345 				zz[i - 1][j] = 0.0;
346 			}
347 			zz[i - 1][m - 1] = 1.0;
348 		}
349 	}
350 } /* eltran */
351 
352 
353 void mcdiv(double ar, double ai, double br, double bi,
354 	double *cr, double *ci)
355 {
356 	double s, ars, ais, brs, bis;
357 
358 
359 	s = fabs(br) + fabs(bi);
360 	ars = ar / s;
361 	ais = ai / s;
362 	brs = br / s;
363 	bis = bi / s;
364 	s = brs * brs + bis * bis;
365 	*cr = (ars * brs + ais * bis) / s;
366 	*ci = (ais * brs - ars * bis) / s;
367 } /* mcdiv */
368 
369 
370 void hqr2(int n, int low, int hgh, dmatrix h,
371 	dmatrix zz, dvector wr, dvector wi)
372 {
373 	int i, j, k, l=0, m, en, na, itn, its;
374 	double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
375 	int notlas; /* boolean */
376 
377 
378 	norm = 0.0;
379 	k = 1;
380 	/* store isolated roots and compute matrix norm */
381 	for (i = 0; i < n; i++) {
382 		for (j = k - 1; j < n; j++)
383 			norm += fabs(h[i][j]);
384 		k = i + 1;
385 		if (i + 1 < low || i + 1 > hgh) {
386 			wr[i] = h[i][i];
387 			wi[i] = 0.0;
388 		}
389 	}
390 	en = hgh;
391 	t = 0.0;
392 	itn = n * 30;
393 	while (en >= low) {	/* search for next eigenvalues */
394 		its = 0;
395 		na = en - 1;
396 		while (en >= 1) {
397 			/* look for single small sub-diagonal element */
398 			for (l = en; l > low; l--) {
399 				s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
400 				if (s == 0.0)
401 					s = norm;
402 				tst1 = s;
403 				tst2 = tst1 + fabs(h[l - 1][l - 2]);
404 				if (tst2 == tst1)
405 					goto L100;
406 			}
407 			l = low;
408 	L100:
409 			x = h[en - 1][en - 1];	/* form shift */
410 			if (l == en || l == na)
411 				break;
412 			if (itn == 0) {
413 				/* all eigenvalues have not converged */
414 				fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
415 #				if PARALLEL
416 					PP_Finalize();
417 #				endif
418 				exit(1);
419 			}
420 			y = h[na - 1][na - 1];
421 			w = h[en - 1][na - 1] * h[na - 1][en - 1];
422 			/* form exceptional shift */
423 			if (its == 10 || its == 20) {
424 				t += x;
425 				for (i = low - 1; i < en; i++)
426 					h[i][i] -= x;
427 				s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
428 				x = 0.75 * s;
429 				y = x;
430 				w = -0.4375 * s * s;
431 			}
432 			its++;
433 			itn--;
434 			/* look for two consecutive small sub-diagonal elements */
435 			for (m = en - 2; m >= l; m--) {
436 				z = h[m - 1][m - 1];
437 				r = x - z;
438 				s = y - z;
439 				p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
440 				q = h[m][m] - z - r - s;
441 				r = h[m + 1][m];
442 				s = fabs(p) + fabs(q) + fabs(r);
443 				p /= s;
444 				q /= s;
445 				r /= s;
446 				if (m == l)
447 					break;
448 				tst1 = fabs(p) *
449 						(fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
450 				tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
451 				if (tst2 == tst1)
452 					break;
453 			}
454 			for (i = m + 2; i <= en; i++) {
455 				h[i - 1][i - 3] = 0.0;
456 				if (i != m + 2)
457 					h[i - 1][i - 4] = 0.0;
458 			}
459 			for (k = m; k <= na; k++) {
460 				notlas = (k != na);
461 				if (k != m) {
462 					p = h[k - 1][k - 2];
463 					q = h[k][k - 2];
464 					r = 0.0;
465 					if (notlas)
466 						r = h[k + 1][k - 2];
467 					x = fabs(p) + fabs(q) + fabs(r);
468 					if (x != 0.0) {
469 						p /= x;
470 						q /= x;
471 						r /= x;
472 					}
473 				}
474 				if (x != 0.0) {
475 					if (p < 0.0) /* sign */
476 						s = - sqrt(p * p + q * q + r * r);
477 					else
478 						s = sqrt(p * p + q * q + r * r);
479 					if (k != m)
480 						h[k - 1][k - 2] = -s * x;
481 					else {
482 						if (l != m)
483 							h[k - 1][k - 2] = -h[k - 1][k - 2];
484 					}
485 					p += s;
486 					x = p / s;
487 					y = q / s;
488 					z = r / s;
489 					q /= p;
490 					r /= p;
491 					if (!notlas) {
492 						for (j = k - 1; j < n; j++) {	/* row modification */
493 							p = h[k - 1][j] + q * h[k][j];
494 							h[k - 1][j] -= p * x;
495 							h[k][j] -= p * y;
496 						}
497 						j = (en < (k + 3)) ? en : (k + 3); /* min */
498 						for (i = 0; i < j; i++) {	/* column modification */
499 							p = x * h[i][k - 1] + y * h[i][k];
500 							h[i][k - 1] -= p;
501 							h[i][k] -= p * q;
502 						}
503 						/* accumulate transformations */
504 						for (i = low - 1; i < hgh; i++) {
505 							p = x * zz[i][k - 1] + y * zz[i][k];
506 							zz[i][k - 1] -= p;
507 							zz[i][k] -= p * q;
508 						}
509 					} else {
510 						for (j = k - 1; j < n; j++) {	/* row modification */
511 							p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
512 							h[k - 1][j] -= p * x;
513 							h[k][j] -= p * y;
514 							h[k + 1][j] -= p * z;
515 						}
516 						j = (en < (k + 3)) ? en : (k + 3); /* min */
517 						for (i = 0; i < j; i++) {	/* column modification */
518 							p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
519 							h[i][k - 1] -= p;
520 							h[i][k] -= p * q;
521 							h[i][k + 1] -= p * r;
522 						}
523 						/* accumulate transformations */
524 						for (i = low - 1; i < hgh; i++) {
525 							p = x * zz[i][k - 1] + y * zz[i][k] +
526 								z * zz[i][k + 1];
527 							zz[i][k - 1] -= p;
528 							zz[i][k] -= p * q;
529 							zz[i][k + 1] -= p * r;
530 						}
531 					}
532 				}
533 			}	       /* for k */
534 		}		       /* while infinite loop */
535 		if (l == en) {	       /* one root found */
536 			h[en - 1][en - 1] = x + t;
537 			wr[en - 1] = h[en - 1][en - 1];
538 			wi[en - 1] = 0.0;
539 			en = na;
540 			continue;
541 		}
542 		y = h[na - 1][na - 1];
543 		w = h[en - 1][na - 1] * h[na - 1][en - 1];
544 		p = (y - x) / 2.0;
545 		q = p * p + w;
546 		z = sqrt(fabs(q));
547 		h[en - 1][en - 1] = x + t;
548 		x = h[en - 1][en - 1];
549 		h[na - 1][na - 1] = y + t;
550 		if (q >= 0.0) {	       /* real pair */
551 			if (p < 0.0) /* sign */
552 				z = p - fabs(z);
553 			else
554 				z = p + fabs(z);
555 			wr[na - 1] = x + z;
556 			wr[en - 1] = wr[na - 1];
557 			if (z != 0.0)
558 				wr[en - 1] = x - w / z;
559 			wi[na - 1] = 0.0;
560 			wi[en - 1] = 0.0;
561 			x = h[en - 1][na - 1];
562 			s = fabs(x) + fabs(z);
563 			p = x / s;
564 			q = z / s;
565 			r = sqrt(p * p + q * q);
566 			p /= r;
567 			q /= r;
568 			for (j = na - 1; j < n; j++) {	/* row modification */
569 				z = h[na - 1][j];
570 				h[na - 1][j] = q * z + p * h[en - 1][j];
571 				h[en - 1][j] = q * h[en - 1][j] - p * z;
572 			}
573 			for (i = 0; i < en; i++) {	/* column modification */
574 				z = h[i][na - 1];
575 				h[i][na - 1] = q * z + p * h[i][en - 1];
576 				h[i][en - 1] = q * h[i][en - 1] - p * z;
577 			}
578 			/* accumulate transformations */
579 			for (i = low - 1; i < hgh; i++) {
580 				z = zz[i][na - 1];
581 				zz[i][na - 1] = q * z + p * zz[i][en - 1];
582 				zz[i][en - 1] = q * zz[i][en - 1] - p * z;
583 			}
584 		} else {	       /* complex pair */
585 			wr[na - 1] = x + p;
586 			wr[en - 1] = x + p;
587 			wi[na - 1] = z;
588 			wi[en - 1] = -z;
589 		}
590 		en -= 2;
591 	}			       /* while en >= low */
592 	/* backsubstitute to find vectors of upper triangular form */
593 	if (norm != 0.0) {
594 		for (en = n; en >= 1; en--) {
595 			p = wr[en - 1];
596 			q = wi[en - 1];
597 			na = en - 1;
598 			if (q == 0.0) {/* real vector */
599 				m = en;
600 				h[en - 1][en - 1] = 1.0;
601 				if (na != 0) {
602 					for (i = en - 2; i >= 0; i--) {
603 						w = h[i][i] - p;
604 						r = 0.0;
605 						for (j = m - 1; j < en; j++)
606 							r += h[i][j] * h[j][en - 1];
607 						if (wi[i] < 0.0) {
608 							z = w;
609 							s = r;
610 						} else {
611 							m = i + 1;
612 							if (wi[i] == 0.0) {
613 								t = w;
614 								if (t == 0.0) {
615 									tst1 = norm;
616 									t = tst1;
617 									do {
618 										t = 0.01 * t;
619 										tst2 = norm + t;
620 									} while (tst2 > tst1);
621 								}
622 								h[i][en - 1] = -(r / t);
623 							} else {	/* solve real equations */
624 								x = h[i][i + 1];
625 								y = h[i + 1][i];
626 								q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
627 								t = (x * s - z * r) / q;
628 								h[i][en - 1] = t;
629 								if (fabs(x) > fabs(z))
630 									h[i + 1][en - 1] = (-r - w * t) / x;
631 								else
632 									h[i + 1][en - 1] = (-s - y * t) / z;
633 							}
634 							/* overflow control */
635 							t = fabs(h[i][en - 1]);
636 							if (t != 0.0) {
637 								tst1 = t;
638 								tst2 = tst1 + 1.0 / tst1;
639 								if (tst2 <= tst1) {
640 									for (j = i; j < en; j++)
641 										h[j][en - 1] /= t;
642 								}
643 							}
644 						}
645 					}
646 				}
647 			} else if (q > 0.0) {
648 				m = na;
649 				if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
650 					h[na - 1][na - 1] = q / h[en - 1][na - 1];
651 					h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
652 														h[en - 1][na - 1];
653 				} else
654 					mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
655 						&h[na - 1][na - 1], &h[na - 1][en - 1]);
656 				h[en - 1][na - 1] = 0.0;
657 				h[en - 1][en - 1] = 1.0;
658 				if (en != 2) {
659 					for (i = en - 3; i >= 0; i--) {
660 						w = h[i][i] - p;
661 						ra = 0.0;
662 						sa = 0.0;
663 						for (j = m - 1; j < en; j++) {
664 							ra += h[i][j] * h[j][na - 1];
665 							sa += h[i][j] * h[j][en - 1];
666 						}
667 						if (wi[i] < 0.0) {
668 							z = w;
669 							r = ra;
670 							s = sa;
671 						} else {
672 							m = i + 1;
673 							if (wi[i] == 0.0)
674 								mcdiv(-ra, -sa, w, q, &h[i][na - 1],
675 									&h[i][en - 1]);
676 							else {	/* solve complex equations */
677 								x = h[i][i + 1];
678 								y = h[i + 1][i];
679 								vr = (wr[i] - p) * (wr[i] - p);
680 								vr = vr + wi[i] * wi[i] - q * q;
681 								vi = (wr[i] - p) * 2.0 * q;
682 								if (vr == 0.0 && vi == 0.0) {
683 									tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
684 												   fabs(y) + fabs(z));
685 									vr = tst1;
686 									do {
687 										vr = 0.01 * vr;
688 										tst2 = tst1 + vr;
689 									} while (tst2 > tst1);
690 								}
691 								mcdiv(x * r - z * ra + q * sa,
692 									 x * s - z * sa - q * ra, vr, vi,
693 								     &h[i][na - 1], &h[i][en - 1]);
694 								if (fabs(x) > fabs(z) + fabs(q)) {
695 									h[i + 1]
696 										[na - 1] = (q * h[i][en - 1] -
697 													w * h[i][na - 1] - ra) / x;
698 									h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
699 														q * h[i][na - 1]) / x;
700 								} else
701 									mcdiv(-r - y * h[i][na - 1],
702 										 -s - y * h[i][en - 1], z, q,
703 									     &h[i + 1][na - 1], &h[i + 1][en - 1]);
704 							}
705 							/* overflow control */
706 							t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
707 								 fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
708 							if (t != 0.0) {
709 								tst1 = t;
710 								tst2 = tst1 + 1.0 / tst1;
711 								if (tst2 <= tst1) {
712 									for (j = i; j < en; j++) {
713 										h[j][na - 1] /= t;
714 										h[j][en - 1] /= t;
715 									}
716 								}
717 							}
718 						}
719 					}
720 				}
721 			}
722 		}
723 		/* end back substitution. vectors of isolated roots */
724 		for (i = 0; i < n; i++) {
725 			if (i + 1 < low || i + 1 > hgh) {
726 				for (j = i; j < n; j++)
727 					zz[i][j] = h[i][j];
728 			}
729 		}
730 		/* multiply by transformation matrix to give vectors of
731 		 * original full matrix. */
732 		for (j = n - 1; j >= low - 1; j--) {
733 			m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
734 			for (i = low - 1; i < hgh; i++) {
735 				z = 0.0;
736 				for (k = low - 1; k < m; k++)
737 					z += zz[i][k] * h[k][j];
738 				zz[i][j] = z;
739 			}
740 		}
741 	}
742 	return;
743 } /* hqr2 */
744 
745 
746 /* make rate matrix with 0.01 expected substitutions per unit time */
747 void onepamratematrix(dmatrix a)
748 {
749 	int i, j;
750 	double delta, temp, sum;
751 	dvector m;
752 
753 	for (i = 0; i < tpmradix; i++)
754 	{
755 		for (j = 0; j < tpmradix; j++)
756 		{
757 			a[i][j] = Freqtpm[j]*a[i][j];
758 		}
759 	}
760 
761 	m = new_dvector(tpmradix);
762 	for (i = 0, sum = 0.0; i < tpmradix; i++)
763 	{
764 		for (j = 0, temp = 0.0; j < tpmradix; j++)
765 			temp += a[i][j];
766 		m[i] = temp; /* row sum */
767 		sum += temp*Freqtpm[i]; /* exp. rate */
768 	}
769 	delta = 0.01 / sum; /* 0.01 subst. per unit time */
770 	for (i = 0; i < tpmradix; i++) {
771 		for (j = 0; j < tpmradix; j++) {
772 			if (i != j)
773 				a[i][j] = delta * a[i][j];
774 			else
775 				a[i][j] = delta * (-m[i]);
776 		}
777 	}
778 	free_dvector(m);
779 } /* onepamratematrix */
780 
781 
782 /* print rate matrix to fp for testing purposes */
783 void printratematrix(FILE *fp, dmatrix a, int do_multpi)
784 {
785 	int i, j;
786 
787 	double sum = 0.0;
788 	double rowsum;
789 
790 	/* fprintf(fp, "\n"); */
791 	for (i = 0; i < tpmradix; i++)
792 	{
793 		rowsum = 0.0;
794 		fprintf(fp, "%-10s ",int2code(i));
795 		for (j = 0; j < tpmradix; j++)
796 		{
797 			if (! do_multpi) {
798 				fprintf(fp, "%10.5f", a[i][j]);
799 				if (i != j) rowsum += a[i][j];
800 			} else {
801 				fprintf(fp, "%10.5f", a[i][j] * Freqtpm[i]);
802 				if (i != j) rowsum += a[i][j] * Freqtpm[i];
803 				/* if (i != j) rowsum += Freqtpm[i] * a[i][j]; */
804 			}
805 		}
806 		sum += rowsum;
807 		fprintf(fp, " row=%10.5f\n",rowsum);
808 	}
809 	fprintf(fp, "off-diagonal sum = %10.5f\n", sum);
810 } /* printratematrix */
811 
812 
813 void eigensystem(
814 	dvector eval, 	/* vector of Eigen-values */
815 	dmatrix evec)	/* vector of Eigen-vectors*/
816 {
817 	dvector evali, forg;
818 	dmatrix a, b;
819 	ivector ordr;
820 	int i, j, k, error;
821 	double zero;
822 
823 
824 	ordr = new_ivector(tpmradix);
825 	evali = new_dvector(tpmradix);
826 	forg = new_dvector(tpmradix);
827 	a = new_dmatrix(tpmradix,tpmradix);
828 	b = new_dmatrix(tpmradix,tpmradix);
829 
830 	rtfdata(a, forg); /* get relative transition matrix and frequencies */
831 	if (printrmatr_optn) {
832 		fprintf(stdout, "Current rate matrix R :\n");
833 		printratematrix(stdout, a, FALSE); /* print rate matrix */
834 		printratematrix(stdout, a, TRUE); /* print rate matrix */
835 		fprintf(stdout, "\n");
836 	}
837 
838 	onepamratematrix(a); /* make 1 PAM rate matrix */
839 	if (printrmatr_optn) {
840 		fprintf(stdout, "Current transition matrix Q (1 PAM normalized):\n"); /* print rate matrix */
841 		printratematrix(stdout, a, FALSE); /* print rate matrix multiplied with pi */
842 		printratematrix(stdout, a, TRUE); /* print rate matrix multiplied with pi */
843 	}
844 
845 
846 	/* copy a to b */
847 	for (i = 0; i < tpmradix; i++)
848 		for (j = 0; j < tpmradix; j++)
849 			b[i][j] = a[i][j];
850 
851 	elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
852 	eltran(a, evec, ordr, tpmradix);
853 	hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
854 
855 	/* check eigenvalue equation */
856 	error = FALSE;
857 	for (j = 0; j < tpmradix; j++) {
858 		for (i = 0, zero = 0.0; i < tpmradix; i++) {
859 			for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
860 			zero -= eval[j] * evec[i][j];
861 			if (fabs(zero) > 1.0e-5)
862 				error = TRUE;
863 		}
864 	}
865 	if (error)
866 		fprintf(STDOUT, "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
867 
868 	free_ivector(ordr);
869 	free_dvector(evali);
870 	free_dvector(forg);
871 	free_dmatrix(a);
872 	free_dmatrix(b);
873 } /* eigensystem */
874 
875 
876 void luinverse(dmatrix inmat, dmatrix imtrx, int size)
877 {
878     double eps = 1.0e-20; /* ! */
879 	int i, j, k, l, maxi=0, idx, ix, jx;
880 	double sum, tmp, maxb, aw;
881 	ivector index;
882 	double *wk;
883 	dmatrix omtrx;
884 
885 
886 	index = new_ivector(tpmradix);
887 	omtrx = new_dmatrix(tpmradix,tpmradix);
888 
889 	/* copy inmat to omtrx */
890 	for (i = 0; i < tpmradix; i++)
891 		for (j = 0; j < tpmradix; j++)
892 			omtrx[i][j] = inmat[i][j];
893 
894 	wk = (double *) calloc((size_t)size, sizeof(double));
895 	aw = 1.0;
896 	for (i = 0; i < size; i++) {
897 		maxb = 0.0;
898 		for (j = 0; j < size; j++) {
899 			if (fabs(omtrx[i][j]) > maxb)
900 				maxb = fabs(omtrx[i][j]);
901 		}
902 		if (maxb == 0.0) {
903 			/* Singular matrix */
904 			fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
905 #			if PARALLEL
906 				PP_Finalize();
907 #			endif
908 			exit(1);
909 		}
910 		wk[i] = 1.0 / maxb;
911 	}
912 	for (j = 0; j < size; j++) {
913 		for (i = 0; i < j; i++) {
914 			sum = omtrx[i][j];
915 			for (k = 0; k < i; k++)
916 				sum -= omtrx[i][k] * omtrx[k][j];
917 			omtrx[i][j] = sum;
918 		}
919 		maxb = 0.0;
920 		for (i = j; i < size; i++) {
921 			sum = omtrx[i][j];
922 			for (k = 0; k < j; k++)
923 				sum -= omtrx[i][k] * omtrx[k][j];
924 			omtrx[i][j] = sum;
925 			tmp = wk[i] * fabs(sum);
926 			if (tmp >= maxb) {
927 				maxb = tmp;
928 				maxi = i;
929 			}
930 		}
931 		if (j != maxi) {
932 			for (k = 0; k < size; k++) {
933 				tmp = omtrx[maxi][k];
934 				omtrx[maxi][k] = omtrx[j][k];
935 				omtrx[j][k] = tmp;
936 			}
937 			aw = -aw;
938 			wk[maxi] = wk[j];
939 		}
940 		index[j] = maxi;
941 		if (omtrx[j][j] == 0.0)
942 			omtrx[j][j] = eps;
943 		if (j != size - 1) {
944 			tmp = 1.0 / omtrx[j][j];
945 			for (i = j + 1; i < size; i++)
946 				omtrx[i][j] *= tmp;
947 		}
948 	}
949 	for (jx = 0; jx < size; jx++) {
950 		for (ix = 0; ix < size; ix++)
951 			wk[ix] = 0.0;
952 		wk[jx] = 1.0;
953 		l = -1;
954 		for (i = 0; i < size; i++) {
955 			idx = index[i];
956 			sum = wk[idx];
957 			wk[idx] = wk[i];
958 			if (l != -1) {
959 				for (j = l; j < i; j++)
960 					sum -= omtrx[i][j] * wk[j];
961 			} else if (sum != 0.0)
962 				l = i;
963 			wk[i] = sum;
964 		}
965 		for (i = size - 1; i >= 0; i--) {
966 			sum = wk[i];
967 			for (j = i + 1; j < size; j++)
968 				sum -= omtrx[i][j] * wk[j];
969 			wk[i] = sum / omtrx[i][i];
970 		}
971 		for (ix = 0; ix < size; ix++)
972 			imtrx[ix][jx] = wk[ix];
973 	}
974 	free((char *)wk);
975 	wk = NULL;
976 	free_ivector(index);
977 	free_dmatrix(omtrx);
978 } /* luinverse */
979 
980 
981 void checkevector(dmatrix evec, dmatrix ivec, int nn)
982 {
983 	int i, j, ia, ib, ic, error;
984 	dmatrix matx;
985 	double sum;
986 
987 
988 	matx = new_dmatrix(nn, nn);
989 	/* multiply matrix of eigenvectors and its inverse */
990 	for (ia = 0; ia < nn; ia++) {
991 		for (ic = 0; ic < nn; ic++) {
992 			sum = 0.0;
993 			for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
994 			matx[ia][ic] = sum;
995 		}
996 	}
997 	/* check whether the unitary matrix is obtained */
998 	error = FALSE;
999 	for (i = 0; i < nn; i++) {
1000 		for (j = 0; j < nn; j++) {
1001 			if (i == j) {
1002 				if (fabs(matx[i][j] - 1.0) > 1.0e-5)
1003 					error = TRUE;
1004 			} else {
1005 				if (fabs(matx[i][j]) > 1.0e-5)
1006 					error = TRUE;
1007 			}
1008 		}
1009 	}
1010 	if (error) {
1011 		fprintf(STDOUT, "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
1012 	}
1013 	free_dmatrix(matx);
1014 } /* checkevector */
1015 
1016 
1017 /***************************** exported functions *****************************/
1018 
1019 
1020 /* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
1021 void tranprobmat()
1022 {
1023 	eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
1024 	luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
1025 	checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
1026 } /* tranprobmat */
1027 
1028 
1029 /* compute P(t) */
1030 void tprobmtrx(double arc, dmatrix tpr)
1031 {
1032 	register int i, j, k;
1033 	register double temp;
1034 
1035 
1036 	for (k = 0; k < tpmradix; k++) {
1037 		temp = exp(arc * Eval[k]);
1038 		for (j = 0; j < tpmradix; j++)
1039 			iexp[k][j] = Ievc[k][j] * temp;
1040 	}
1041 	for (i = 0; i < tpmradix; i++) {
1042 		for (j = 0; j < tpmradix; j++) {
1043 			temp = 0.0;
1044 			for (k = 0; k < tpmradix; k++)
1045 				temp += Evec[i][k] * iexp[k][j];
1046 			tpr[i][j] = fabs(temp);
1047 		}
1048 	}
1049 } /* tprobmtrx */
1050 
1051 
1052 /******************************************************************************/
1053 /* estimation of maximum likelihood distances                                 */
1054 /******************************************************************************/
1055 
1056 /* compute total log-likelihood
1057    input: likelihoods for each site and non-zero rate
1058    output: total log-likelihood (incl. zero rate category) */
1059 double comptotloglkl(dmatrix cdl)
1060 {
1061 	int k, r;
1062 	double loglkl, fv, fv2, sitelkl;
1063 
1064 	loglkl = 0.0;
1065 	fv = 1.0-fracinv;
1066 	fv2 = (1.0-fracinv)/(double) numcats;
1067 
1068 	if (numcats == 1) {
1069 
1070 		for (k = 0; k < Numptrn; k++) {
1071 
1072 			/* compute likelihood for pattern k */
1073 			sitelkl = cdl[0][k]*fv;
1074 			if (constpat[k] == TRUE)
1075 				sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1076 
1077 			/* total log-likelihood */
1078 			loglkl += log(sitelkl)*Weight[k];
1079 
1080 		}
1081 
1082 	} else {
1083 
1084 		for (k = 0; k < Numptrn; k++) {
1085 
1086 			/* this general routine works always but it's better
1087                to run it only when it's really necessary */
1088 
1089 			/* compute likelihood for pattern k */
1090 			sitelkl = 0.0;
1091 			for (r = 0; r < numcats; r++)
1092 				sitelkl += cdl[r][k];
1093 			sitelkl = fv2*sitelkl;
1094 			if (constpat[k] == TRUE)
1095 				sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1096 
1097 			/* total log-likelihood */
1098 			loglkl += log(sitelkl)*Weight[k];
1099 
1100 		}
1101 
1102 	}
1103 
1104 	return loglkl;
1105 } /* comptotloglkl */
1106 
1107 
1108 /* computes the site log-likelihoods
1109    input: likelihoods for each site and non-zero rate
1110    output: log-likelihood for each site */
1111 void allsitelkl(dmatrix cdl, dvector aslkl)
1112 {
1113 	int k, r;
1114 	double fv, fv2, sitelkl;
1115 
1116 	fv = 1.0-fracinv;
1117 	fv2 = (1.0-fracinv)/(double) numcats;
1118 
1119 	if (numcats == 1) {
1120 
1121 		for (k = 0; k < Numptrn; k++) {
1122 
1123 			/* compute likelihood for pattern k */
1124 			sitelkl = cdl[0][k]*fv;
1125 			if (constpat[k] == TRUE)
1126 				sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1127 
1128 			/* site log-likelihood */
1129 			aslkl[k] = log(sitelkl);
1130 		}
1131 
1132 	} else {
1133 
1134 		for (k = 0; k < Numptrn; k++) {
1135 
1136 			/* this general routine works always but it's better
1137                to run it only when it's really necessary */
1138 
1139 			/* compute likelihood for pattern k */
1140 			sitelkl = 0.0;
1141 			for (r = 0; r < numcats; r++)
1142 				sitelkl += cdl[r][k];
1143 			sitelkl = fv2*sitelkl;
1144 			if (constpat[k] == TRUE)
1145 				sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1146 
1147 			/* total log-likelihood */
1148 			aslkl[k] = log(sitelkl);
1149 
1150 		}
1151 	}
1152 } /* allsitelkl */
1153 
1154 
1155 /* YYYY */
1156 /* computes the site log-likelihoods
1157    input: likelihoods for each site and non-zero rate
1158    output: log-likelihood for each site */
1159 void writesitelklvectorbin(FILE *outf, dvector aslkl)
1160 {
1161 	int s;
1162 	int dsize;
1163 
1164 	dsize = sizeof(double);
1165 
1166 	for (s = 0; s < Maxsite; s++) {
1167 		fprintf(stdout, "%4d (%4d) %.5f \n", s, Alias[s], aslkl[Alias[s]]);
1168 		if( 1 != fwrite( &(aslkl[Alias[s]]), dsize, 1, outf) ) {
1169 			fprintf(stderr, "ERROR writing log-likelihood of site %d to file!!!\n", Alias[s]);
1170 #			if PARALLEL
1171 				PP_Finalize();
1172 #			endif
1173 			exit (1);
1174 		}
1175 	}
1176 
1177 
1178 } /* writesitelklvectorbin */
1179 
1180 /* outputs a name and its log-likelihoods to outf */
1181 void writesitelklvectorphy(FILE *outf, char *name, int numsite, dvector aslkl)
1182 {
1183 	int s;
1184 	int dsize;
1185 
1186 	dsize = sizeof(double);
1187 
1188 	fprintf(outf, "%s", name);
1189 	for (s = 0; s < Maxsite; s++) {
1190 		if( 0 == fprintf(outf, " %.5f", aslkl[Alias[s]])) {
1191 			fprintf(stderr, "ERROR writing log-likelihood of site %d for '%s' to file!!!\n", Alias[s], name);
1192 #			if PARALLEL
1193 				PP_Finalize();
1194 #			endif
1195 			exit (1);
1196 		}
1197 	}
1198 	fprintf(outf, "\n");
1199 } /* writesitelklvectorphy */
1200 
1201 
1202 /* outputs a name and its log-likelihoods to outf */
1203 void writesitelklmatrixphy(FILE *outf, int numut, int numsite, dmatrix allslkl, dmatrix allslklc, int clock)
1204 {
1205 	int t;
1206 	char tmpname[20];
1207 
1208 	if (clock)
1209 		fprintf(outf, " %d %d \n", 2 * numut, numsite);
1210 	else
1211 		fprintf(outf, " %d %d \n", numut, numsite);
1212 
1213 	for (t = 0; t < numut; t++) {
1214 		sprintf(tmpname, "tr%-7d  ", t+1);
1215 		writesitelklvectorphy(outf, tmpname, numsite, allslkl[t]);
1216 	}
1217 
1218 	if (clock) {
1219 		for (t = 0; t < numut; t++) {
1220 			sprintf(tmpname, "ctr%-7d ", t+1);
1221 			writesitelklvectorphy(outf, tmpname, numsite, allslklc[t]);
1222 		}
1223 	}
1224 
1225 } /* writesitelklmatrixphy */
1226 
1227 
1228 
1229 
1230 /***************************** internal functions *****************************/
1231 
1232 /* compute negative log-likelihood of distance arc between sequences seqchi/j */
1233 double pairlkl(double arc)
1234 {
1235 	int k, r, ci, cj;
1236 	double loglkl, fv, sitelkl;
1237 
1238 
1239 	/* compute tpms */
1240 	for (r = 0; r < numcats; r++)
1241 		/* compute tpm for rate category r */
1242 		tprobmtrx(arc*Rates[r], ltprobr[r]);
1243 
1244 	loglkl = 0.0;
1245 	fv = 1.0-fracinv;
1246 
1247 	if (numcats == 1) {
1248 
1249 		for (k = 0; k < Numptrn; k++) {
1250 
1251 			/* compute likelihood for site k */
1252 			ci = seqchi[k];
1253 			cj = seqchj[k];
1254 			if (ci != tpmradix && cj != tpmradix)
1255 				sitelkl = ltprobr[0][ci][cj]*fv;
1256 			else
1257 				sitelkl = fv;
1258 			if (ci == cj && ci != tpmradix)
1259 				sitelkl += fracinv*Freqtpm[ci];
1260 
1261 			/* total log-likelihood */
1262 			loglkl += log(sitelkl)*Weight[k];
1263 
1264 		}
1265 
1266 	} else {
1267 
1268 		for (k = 0; k < Numptrn; k++) {
1269 
1270 			/* this general routine works always but it's better
1271                to run it only when it's really necessary */
1272 
1273 			/* compute likelihood for site k */
1274 			ci = seqchi[k];
1275 			cj = seqchj[k];
1276 			if (ci != tpmradix && cj != tpmradix) {
1277 				sitelkl = 0.0;
1278 				for (r = 0; r < numcats; r++)
1279 					sitelkl += ltprobr[r][ci][cj];
1280 				sitelkl = fv*sitelkl/(double) numcats;
1281 			} else
1282 				sitelkl = fv;
1283 			if (ci == cj && ci != tpmradix)
1284 				sitelkl += fracinv*Freqtpm[ci];
1285 
1286 			/* total log-likelihood */
1287 			loglkl += log(sitelkl)*Weight[k];
1288 
1289 		}
1290 
1291 	}
1292 
1293 	/* return negative log-likelihood as we use a minimizing procedure */
1294 	return -loglkl;
1295 } /* pairlkl */
1296 
1297 
1298 /***************************** exported functions *****************************/
1299 
1300 
1301 /* maximum likelihood distance between sequence i and j */
1302 double mldistance(int i, int j, double init)
1303 {
1304 	double dist, fx, f2x;
1305 
1306 	if (i == j) return 0.0;
1307 
1308 	/* use old distance as start value */
1309 /*epe	dist = Distanmat[i][j];*/
1310 	dist = init;
1311 
1312 	if (dist == 0.0) return 0.0;
1313 
1314 	seqchi = Seqpat[i];
1315 	seqchj = Seqpat[j];
1316 
1317 	if (dist <= MINARC) dist = MINARC+1.0;
1318 	if (dist >= MAXARC) dist = MAXARC-1.0;
1319 
1320  	dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON_BRANCH, &fx, &f2x);
1321 
1322 	return dist;
1323 } /* mldistance */
1324 
1325 
1326 /* initialize distance matrix */
1327 void initdistan()
1328 {
1329 	int i, j, k, diff, x, y;
1330 	double obs, temp;
1331 
1332 	for (i = 0; i < Maxspc; i++) {
1333 		Distanmat[i][i] = 0.0;
1334 		for (j = i + 1; j < Maxspc; j++) {
1335 			seqchi = Seqpat[i];
1336 			seqchj = Seqpat[j];
1337 
1338 			/* count observed differences */
1339 			diff = 0;
1340 			for (k = 0; k < Numptrn; k++) {
1341 				x = seqchi[k];
1342 				y = seqchj[k];
1343 				if (x != y &&
1344 					x != tpmradix &&
1345 					y != tpmradix)
1346 					diff += Weight[k];
1347 			}
1348 			if (diff == 0)
1349 				Distanmat[i][j] = 0.0;
1350 			else {
1351 				/* use generalized JC correction to get first estimate
1352 				   (for the SH model the observed distance is used) */
1353 				/* observed distance */
1354 #ifndef USE_WINDOWS
1355 				obs = (double) diff / (double) Maxsite;
1356 #else
1357 				obs = (double) diff / (double) alimaxsite;
1358 #endif
1359 				temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1360 				if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1361 					/* use JC corrected distance */
1362 					Distanmat[i][j] = -100.0*(tpmradix-1.0)/tpmradix * log(temp);
1363 				else
1364 					/* use observed distance */
1365 					Distanmat[i][j] = obs * 100.0;
1366 				if (Distanmat[i][j] < MINARC) Distanmat[i][j] = MINARC;
1367 				if (Distanmat[i][j] > MAXARC) Distanmat[i][j] = MAXARC;
1368 			}
1369 			Distanmat[j][i] = Distanmat[i][j];
1370 		}
1371 	}
1372 } /* initdistan */
1373 
1374 /* compute distance matrix */
1375 /*epe*/
1376 #if PARALLEL
1377 void computedistan()
1378 {
1379 	int i, slaveno, loop;
1380 	int xr, yr, xs=0, ys=1, numstartjobs=0, dummychunk=0;
1381 	dvector startval, dummyval=new_dvector(1);
1382 	ivector startlen = new_ivector(PP_NumProcs), startpoint = new_ivector(PP_NumProcs);
1383 	imatrix coords = new_imatrix(PP_NumProcs, 3);
1384 	schedtype sched;
1385 	MPI_Status stat;
1386 
1387 	loop = .5 * Maxspc * (Maxspc-1);
1388 	initsched(&sched, loop, PP_NumProcs-1, 1);
1389 	if (PP_NumProcs < loop) loop = PP_NumProcs-1;
1390 
1391 	startlen[0] = 0;
1392 	startpoint[0] = 0;
1393 
1394 	/*Send first min(#numprocs, #matrix entries) matrix entries - one to each processor.
1395 	If #numprocs > #matrix entries, send all matrix entries; some processors remain idle.*/
1396 	for (slaveno = 1; slaveno < PP_NumProcs; slaveno++)
1397 	{
1398 
1399 		if (slaveno<=loop)
1400 		{
1401 			unsigned int chunks = SCHEDALG_PARAM_EST(&sched);
1402 
1403 			startlen[slaveno] = chunks;
1404 			startpoint[slaveno] = numstartjobs;
1405 			numstartjobs += chunks;
1406 
1407 			coords[slaveno][0] = xs;
1408 			coords[slaveno][1] = ys;
1409 			coords[slaveno][2] = chunks;
1410 
1411 			PP_NextCoord(&xs, &ys, chunks);
1412 		}
1413 		else startlen[slaveno] = 0;
1414 	}
1415 	startval = new_dvector(numstartjobs);
1416 	PP_DMToVector(0, 1, numstartjobs, startval);
1417 	MPI_Scatterv(startval, startlen, startpoint, MPI_DOUBLE,
1418 		dummyval, dummychunk, MPI_DOUBLE, PP_MyMaster, PP_Comm);
1419 	free_dvector(startval);
1420 
1421 	/*The remaining matrix entries (if there are any) are sent to those processors that are
1422 	returning results.*/
1423 	while (xs<Maxspc-1)
1424 	{
1425 		dvector result, initval;
1426 		unsigned int chunkr, chunks = SCHEDALG_PARAM_EST(&sched);
1427 
1428 		MPI_Probe(MPI_ANY_SOURCE, PP_MLDISTANCE, PP_Comm, &stat);
1429 		slaveno = stat.MPI_SOURCE;
1430 
1431 		xr = coords[slaveno][0];
1432 		yr = coords[slaveno][1];
1433 		chunkr = coords[slaveno][2];
1434 
1435 		result = new_dvector(chunkr);
1436 
1437 		MPI_Recv(result, chunkr, MPI_DOUBLE, slaveno, PP_MLDISTANCE, PP_Comm, &stat);
1438 
1439 		PP_VectorToDM(xr, yr, chunkr, result);
1440 
1441 		free_dvector(result);
1442 
1443 		coords[slaveno][0] = xs;
1444 		coords[slaveno][1] = ys;
1445 		coords[slaveno][2] = chunks;
1446 
1447 		initval = new_dvector(chunks+2);
1448 		PP_DMToVector(xs, ys, chunks, initval);
1449 		initval[chunks] = (double) -xs;
1450 		initval[chunks+1] = (double) ys;
1451 
1452 		MPI_Send(initval, chunks+2, MPI_DOUBLE, slaveno, PP_MLDISTANCE, PP_Comm);
1453 
1454 		PP_NextCoord(&xs, &ys, chunks);
1455 	}
1456 
1457 	/*min(#numprocs, #matrix entries) results have to be received.*/
1458 	for (i = 1; i <= loop; i++)
1459 	{
1460 		double* result;
1461 		unsigned int chunkr;
1462 
1463 		MPI_Probe(MPI_ANY_SOURCE, PP_MLDISTANCE, PP_Comm, &stat);
1464 		slaveno = stat.MPI_SOURCE;
1465 
1466 		xr = coords[slaveno][0];
1467 		yr = coords[slaveno][1];
1468 		chunkr = coords[slaveno][2];
1469 
1470 		result = new_dvector(chunkr);
1471 
1472 		MPI_Recv(result, chunkr, MPI_DOUBLE, slaveno, PP_MLDISTANCE, PP_Comm, &stat);
1473 
1474 		PP_VectorToDM(xr, yr, chunkr, result);
1475 	}
1476 	return;
1477 } /* computedistan - parallel */
1478 
1479 #else /* not PARALLEL */
1480 
1481 void computedistan()
1482 {
1483 	int i, j;
1484 
1485 	for (i = 0; i < Maxspc; i++)
1486 		for (j = i; j < Maxspc; j++) {
1487 			Distanmat[i][j] = mldistance(i, j, Distanmat[i][j]);
1488 			Distanmat[j][i] = Distanmat[i][j];
1489 		}
1490 } /* computedistan */
1491 #endif /* not PARALLEL */
1492 
1493 
1494 /******************************************************************************/
1495 /* computation of maximum likelihood edge lengths for a given tree            */
1496 /******************************************************************************/
1497 
1498 
1499 /***************************** internal functions *****************************/
1500 
1501 
1502 /* multiply partial likelihoods */
1503 void productpartials(Node *op)
1504 {
1505 	Node *cp;
1506 	int i, j, r;
1507 	dcube opc, cpc;
1508 
1509 	cp = op;
1510 	opc = op->partials;
1511 	while (cp->isop->isop != op) {
1512 		cp = cp->isop;
1513 		cpc = cp->partials;
1514 		for (r = 0; r < numcats; r++)
1515 			for (i = 0; i < Numptrn; i++)
1516 				for (j = 0; j < tpmradix; j++)
1517 					opc[r][i][j] *= cpc[r][i][j];
1518 	}
1519 } /* productpartials */
1520 
1521 
1522 /* compute internal partial likelihoods */
1523 void partialsinternal(Node *op)
1524 {
1525 	int i, j, k, r;
1526 	double sum;
1527 	dcube oprob, cprob;
1528 
1529 	if (clockmode == 1) { /* clocklike branch lengths */
1530 		for (r = 0; r < numcats; r++) {
1531 			tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1532 		}
1533 	} else {  /* non-clocklike branch lengths */
1534 		for (r = 0; r < numcats; r++) {
1535 			tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1536 		}
1537 	}
1538 
1539 	oprob = op->partials;
1540 	cprob = op->kinp->isop->partials;
1541 	for (r = 0; r < numcats; r++) {
1542 		for (k = 0; k < Numptrn; k++) {
1543 			for (i = 0; i < tpmradix; i++) {
1544 				sum = 0.0;
1545 				for (j = 0; j < tpmradix; j++)
1546 					sum += ltprobr[r][i][j] * cprob[r][k][j];
1547 				oprob[r][k][i] = sum;
1548 			}
1549 		}
1550 	}
1551 } /* partialsinternal */
1552 
1553 
1554 /* compute external partial likelihoods */
1555 void partialsexternal(Node *op)
1556 {
1557 	int i, j, k, r;
1558 	dcube oprob;
1559 	cvector dseqi;
1560 
1561 	if (clockmode == 1) { /* clocklike branch lengths */
1562 		for (r = 0; r < numcats; r++) {
1563 			tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1564 		}
1565 	} else {  /* nonclocklike branch lengths */
1566 		for (r = 0; r < numcats; r++) {
1567 			tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1568 		}
1569 	}
1570 
1571 	oprob = op->partials;
1572 	dseqi = op->kinp->eprob;
1573 	for (r = 0; r < numcats; r++) {
1574 		for (k = 0; k < Numptrn; k++) {
1575 			if ((j = dseqi[k]) == tpmradix) {
1576 				for (i = 0; i < tpmradix; i++)
1577 					oprob[r][k][i] = 1.0;
1578 			} else {
1579 				for (i = 0; i < tpmradix; i++)
1580 					oprob[r][k][i] = ltprobr[r][i][j];
1581 			}
1582 		}
1583 	}
1584 } /* partialsexternal */
1585 
1586 
1587 /* compute all partial likelihoods */
1588 void initpartials(Tree *tr)
1589 {
1590 	Node *cp, *rp;
1591 
1592 	cp = rp = tr->rootp;
1593 	do {
1594 		cp = cp->isop->kinp;
1595 		if (cp->isop == NULL) { /* external node */
1596 			cp = cp->kinp; /* not descen */
1597 			partialsexternal(cp);
1598 		} else { /* internal node */
1599 			if (!cp->descen) {
1600 				productpartials(cp->kinp->isop);
1601 				partialsinternal(cp);
1602 			}
1603 		}
1604 	} while (cp != rp);
1605 } /* initpartials */
1606 
1607 
1608 /* compute log-likelihood given internal branch with length arc
1609    between partials partiali and partials partialj */
1610 double intlkl(double arc)
1611 {
1612 	double sumlk, slk;
1613 	int r, s, i, j;
1614 	dmatrix cdl;
1615 
1616 	cdl = Ctree->condlkl;
1617 	for (r = 0; r < numcats; r++) {
1618 		tprobmtrx(arc*Rates[r], ltprobr[r]);
1619 	}
1620 	for (r = 0; r < numcats; r++) {
1621 		for (s = 0; s < Numptrn; s++) {
1622 			sumlk = 0.0;
1623 			for (i = 0; i < tpmradix; i++) {
1624 				slk = 0.0;
1625 				for (j = 0; j < tpmradix; j++)
1626 					slk += partialj[r][s][j] * ltprobr[r][i][j];
1627 				sumlk += Freqtpm[i] * partiali[r][s][i] * slk;
1628 			}
1629 			cdl[r][s] = sumlk;
1630 		}
1631 	}
1632 
1633 	/* compute total log-likelihood for current tree */
1634 	Ctree->lklhd = comptotloglkl(cdl);
1635 
1636 	return -(Ctree->lklhd); /* we use a minimizing procedure */
1637 } /* intlkl */
1638 
1639 
1640 /* optimize internal branch */
1641 void optinternalbranch(Node *op)
1642 {
1643 	double arc, fx, f2x;
1644 
1645 	partiali = op->isop->partials;
1646 	partialj = op->kinp->isop->partials;
1647 	arc = op->length; /* nonclocklike branch lengths */
1648 	if (arc <= MINARC) arc = MINARC+1.0;
1649 	if (arc >= MAXARC) arc = MAXARC-1.0;
1650 	arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON_BRANCH, &fx, &f2x);
1651 	op->kinp->length = arc;
1652 	op->length = arc;
1653 
1654 	/* variance of branch length */
1655 	f2x = fabs(f2x);
1656 	if (1.0/(MAXARC*MAXARC) < f2x)
1657 		op->varlen = 1.0/f2x;
1658 	else
1659 		op->varlen = MAXARC*MAXARC;
1660 } /* optinternalbranch */
1661 
1662 
1663 /* compute log-likelihood given external branch with length arc
1664    between partials partiali and sequence seqchi */
1665 double extlkl(double arc)
1666 {
1667 	double sumlk;
1668 	int r, s, i, j;
1669 	dvector opb;
1670 	dmatrix cdl;
1671 
1672 	cdl = Ctree->condlkl;
1673 	for (r = 0; r < numcats; r++) {
1674 		tprobmtrx(arc*Rates[r], ltprobr[r]);
1675 	}
1676 	for (r = 0; r < numcats; r++) {
1677 		for (s = 0; s < Numptrn; s++) {
1678 			opb = partiali[r][s];
1679 			sumlk = 0.0;
1680 			if ((j = seqchi[s]) != tpmradix) {
1681 				for (i = 0; i < tpmradix; i++)
1682 					sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1683 			} else {
1684 				for (i = 0; i < tpmradix; i++)
1685 					sumlk += Freqtpm[i] * opb[i];
1686 			}
1687 			cdl[r][s] = sumlk;
1688 		}
1689 	}
1690 
1691 	/* compute total log-likelihood for current tree */
1692 	Ctree->lklhd = comptotloglkl(cdl);
1693 
1694 	return -(Ctree->lklhd); /* we use a minimizing procedure */
1695 } /* extlkl */
1696 
1697 /* optimize external branch */
1698 void optexternalbranch(Node *op)
1699 {
1700 	double arc, fx, f2x;
1701 
1702 	partiali = op->isop->partials;
1703 	seqchi = op->kinp->eprob;
1704 	arc = op->length; /* nonclocklike branch lengths */
1705 	if (arc <= MINARC) arc = MINARC+1.0;
1706 	if (arc >= MAXARC) arc = MAXARC-1.0;
1707 	arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON_BRANCH, &fx, &f2x);
1708 	op->kinp->length = arc;
1709 	op->length = arc;
1710 
1711 	 /* variance of branch length */
1712 	f2x = fabs(f2x);
1713 	if (1.0/(MAXARC*MAXARC) < f2x)
1714 		op->varlen = 1.0/f2x;
1715 	else
1716 		op->varlen = MAXARC*MAXARC;
1717 } /* optexternalbranch */
1718 
1719 
1720 /* finish likelihoods for each rate and site */
1721 void finishlkl(Node *op)
1722 {
1723 	int r, k, i, j;
1724 	double arc, sumlk, slk;
1725 	dmatrix cdl;
1726 
1727 	partiali = op->isop->partials;
1728 	partialj = op->kinp->isop->partials;
1729 	cdl = Ctree->condlkl;
1730 	arc = op->length; /* nonclocklike branch lengths */
1731 	for (r = 0; r < numcats; r++) {
1732 		tprobmtrx(arc*Rates[r], ltprobr[r]);
1733 	}
1734 	for (r = 0; r < numcats; r++) {
1735 		for (k = 0; k < Numptrn; k++) {
1736 			sumlk = 0.0;
1737 			for (i = 0; i < tpmradix; i++) {
1738 				slk = 0.0;
1739 				for (j = 0; j < tpmradix; j++)
1740 					slk += partialj[r][k][j] * ltprobr[r][i][j];
1741 				sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1742 			}
1743 			cdl[r][k] = sumlk;
1744 		}
1745 	}
1746 } /* finishlkl */
1747 
1748 
1749 /***************************** exported functions *****************************/
1750 
1751 
1752 /* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1753 double optlkl(Tree *tr)
1754 {
1755 	Node *cp, *rp;
1756 	int nconv;
1757 	double lendiff;
1758 
1759 	clockmode = 0; /* nonclocklike branch lengths */
1760 	nconv = 0;
1761 	Converg = FALSE;
1762 	initpartials(tr);
1763 	for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1764 
1765 		cp = rp = tr->rootp;
1766 		do {
1767 			cp = cp->isop->kinp;
1768 			productpartials(cp->kinp->isop);
1769 			if (cp->isop == NULL) { /* external node */
1770 				cp = cp->kinp; /* not descen */
1771 
1772 				lendiff = cp->length;
1773 				optexternalbranch(cp);
1774 				lendiff = fabs(lendiff - cp->length);
1775 				if (lendiff < EPSILON_BRANCH) nconv++;
1776 				else nconv = 0;
1777 
1778 				partialsexternal(cp);
1779 			} else { /* internal node */
1780 				if (cp->descen) {
1781 					partialsinternal(cp);
1782 				} else {
1783 
1784 					lendiff = cp->length;
1785 					optinternalbranch(cp);
1786 					lendiff = fabs(lendiff - cp->length);
1787 					if (lendiff < EPSILON_BRANCH) nconv++;
1788 					else nconv = 0;
1789 
1790 					/* eventually compute likelihoods for each site */
1791 					if ((cp->number == Numibrnch-1 && lendiff < EPSILON_BRANCH) ||
1792 					Numit == MAXIT-1) finishlkl(cp);
1793 
1794 					partialsinternal(cp);
1795 				}
1796 			}
1797 			if (nconv >= Numbrnch) { /* convergence */
1798 				Converg = TRUE;
1799 				cp = rp; /* get out of here */
1800 			}
1801 		} while (cp != rp);
1802 	}
1803 
1804 	/* compute total log-likelihood for current tree */
1805 	return comptotloglkl(tr->condlkl);
1806 } /* optlkl */
1807 
1808 
1809 /* compute likelihood of tree for given branch lengths */
1810 double treelkl(Tree *tr)
1811 {
1812 	int i, k, r;
1813 	Node *cp;
1814 	dmatrix cdl;
1815 	dcube prob1, prob2;
1816 	double sumlk;
1817 
1818 	/* compute for each site and rate log-likelihoods */
1819 	initpartials(tr);
1820 	cp = tr->rootp;
1821 	productpartials(cp->isop);
1822 	prob1 = cp->partials;
1823 	prob2 = cp->isop->partials;
1824 	cdl = tr->condlkl;
1825 	for (r = 0; r < numcats; r++) {
1826 		for (k = 0; k < Numptrn; k++) {
1827 			sumlk = 0.0;
1828 			for (i = 0; i < tpmradix; i++)
1829 				sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1830 			cdl[r][k] = sumlk;
1831 		}
1832 	}
1833 
1834 	/* return total log-likelihood for current tree */
1835 	return comptotloglkl(cdl);
1836 } /* treelkl */
1837 
1838 
1839 /******************************************************************************/
1840 /* least-squares estimate of branch lengths                                   */
1841 /******************************************************************************/
1842 
1843 
1844 /***************************** internal functions *****************************/
1845 
1846 
1847 void luequation(dmatrix amat, dvector yvec, int size)
1848 {
1849     double eps = 1.0e-20; /* ! */
1850 	int i, j, k, l, maxi=0, idx;
1851 	double sum, tmp, maxb, aw;
1852 	dvector wk;
1853 	ivector index;
1854 
1855 
1856 	wk = new_dvector(size);
1857 	index = new_ivector(size);
1858 	aw = 1.0;
1859 	for (i = 0; i < size; i++) {
1860 		maxb = 0.0;
1861 		for (j = 0; j < size; j++) {
1862 			if (fabs(amat[i][j]) > maxb)
1863 				maxb = fabs(amat[i][j]);
1864 		}
1865 		if (maxb == 0.0) {
1866 			/* Singular matrix */
1867 			fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1868 #			if PARALLEL
1869 				PP_Finalize();
1870 #			endif
1871 			exit(1);
1872 		}
1873 		wk[i] = 1.0 / maxb;
1874 	}
1875 	for (j = 0; j < size; j++) {
1876 		for (i = 0; i < j; i++) {
1877 			sum = amat[i][j];
1878 			for (k = 0; k < i; k++)
1879 				sum -= amat[i][k] * amat[k][j];
1880 			amat[i][j] = sum;
1881 		}
1882 		maxb = 0.0;
1883 		for (i = j; i < size; i++) {
1884 			sum = amat[i][j];
1885 			for (k = 0; k < j; k++)
1886 				sum -= amat[i][k] * amat[k][j];
1887 			amat[i][j] = sum;
1888 			tmp = wk[i] * fabs(sum);
1889 			if (tmp >= maxb) {
1890 				maxb = tmp;
1891 				maxi = i;
1892 			}
1893 		}
1894 		if (j != maxi) {
1895 			for (k = 0; k < size; k++) {
1896 				tmp = amat[maxi][k];
1897 				amat[maxi][k] = amat[j][k];
1898 				amat[j][k] = tmp;
1899 			}
1900 			aw = -aw;
1901 			wk[maxi] = wk[j];
1902 		}
1903 		index[j] = maxi;
1904 		if (amat[j][j] == 0.0)
1905 			amat[j][j] = eps;
1906 		if (j != size - 1) {
1907 			tmp = 1.0 / amat[j][j];
1908 			for (i = j + 1; i < size; i++)
1909 				amat[i][j] *= tmp;
1910 		}
1911 	}
1912 	l = -1;
1913 	for (i = 0; i < size; i++) {
1914 		idx = index[i];
1915 		sum = yvec[idx];
1916 		yvec[idx] = yvec[i];
1917 		if (l != -1) {
1918 			for (j = l; j < i; j++)
1919 				sum -= amat[i][j] * yvec[j];
1920 		} else if (sum != 0.0)
1921 			l = i;
1922 		yvec[i] = sum;
1923 	}
1924 	for (i = size - 1; i >= 0; i--) {
1925 		sum = yvec[i];
1926 		for (j = i + 1; j < size; j++)
1927 			sum -= amat[i][j] * yvec[j];
1928 		yvec[i] = sum / amat[i][i];
1929 	}
1930 	free_ivector(index);
1931 	free_dvector(wk);
1932 } /* luequation */
1933 
1934 
1935 /* least square estimation of branch lengths
1936    used for the approximate ML and as starting point
1937    in the calculation of the exact value of the ML */
1938 /*epe*/
1939 #if PARALLEL
1940 void lslength_par(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1941 {
1942 	int i, i1, j, j1, j2, k, numbrnch, numpair, initmsg[2];
1943 	unsigned long int isum;
1944 	double sum, leng, alllen, rss;
1945 	ivector pths, jobsize = new_ivector(PP_NumProcs), atamt_tmp_coord = new_ivector(PP_NumProcs);
1946 	dvector my_atamt_tmp, atamt_tmp;
1947 	ivector IPaths;
1948 	cmatrix atmt;
1949 	dmatrix atamt;
1950 	Node **ebp, **ibp;
1951 	int numjobs, my_jobsize, my_x=0, my_y=0;
1952 
1953 	if (PP_Myid != PP_MyMaster)
1954 	{
1955 		fprintf(STDOUT, "ERROR: Slave [%d] in parallel lslength()!\n", PP_Myid);
1956 		return;
1957 	}
1958 
1959 	numbrnch = numspc + numibrnch;
1960 	numpair = (numspc * (numspc - 1)) / 2;
1961 	atmt = new_cmatrix(numbrnch, numpair);
1962 	atamt = new_dmatrix(numbrnch, numbrnch);
1963 
1964 	initmsg[0] = numspc; initmsg[1] = numibrnch;
1965 	for (i=1; i<PP_NumProcs; i++) MPI_Send(initmsg, 2, MPI_INT, i, PP_LSLENGTH, PP_Comm);
1966 
1967 	ibp = tr->ibrnchp;
1968 	IPaths = new_ivector(numibrnch*numspc);
1969 	for (i=0; i<numibrnch; i++) for (j=0; j<numspc; j++)
1970 		IPaths[i*numspc+j] = ibp[i]->paths[j];
1971 
1972 	MPI_Bcast(IPaths, numibrnch*numspc, MPI_INT, PP_MyMaster, PP_Comm);
1973 	free_ivector(IPaths);
1974 
1975 	numjobs = (numbrnch * (numbrnch+1))/2;
1976 	my_jobsize = numjobs/PP_NumProcs;
1977 	if (numjobs%PP_NumProcs>0) my_jobsize++;
1978 	jobsize[0] = my_jobsize;
1979 	for (i=1; i<PP_NumProcs; i++)
1980 	{
1981 		jobsize[i] = numjobs/PP_NumProcs;
1982 		if (i < numjobs%PP_NumProcs) jobsize[i]++;
1983 		atamt_tmp_coord[i] = atamt_tmp_coord[i-1]+jobsize[i-1];
1984 	}
1985 
1986 	for (i = 0; i < numspc; i++) {
1987 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
1988 			if (j1 == i) {
1989 				for (j2 = 0; j2 < j1; j2++, j++) {
1990 					atmt[i][j] = 1;
1991 				}
1992 			} else {
1993 				for (j2 = 0; j2 < j1; j2++, j++) {
1994 					if (j2 == i)
1995 						atmt[i][j] = 1;
1996 					else
1997 						atmt[i][j] = 0;
1998 				}
1999 			}
2000 		}
2001 	}
2002 	for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
2003 		pths = ibp[i1]->paths;
2004 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
2005 			for (j2 = 0; j2 < j1; j2++, j++) {
2006 				if (pths[j1] != pths[j2])
2007 					atmt[i][j] = 1;
2008 				else
2009 					atmt[i][j] = 0;
2010 			}
2011 		}
2012 	}
2013 	if (PP_Myid == PP_MyMaster)
2014 	{
2015 		my_atamt_tmp = new_dvector(my_jobsize);
2016 #ifdef VT
2017 		VT_begin(1020);
2018 #endif
2019 		for (i = 0; i < my_jobsize; i++) {
2020 			for (k = 0, isum = 0; k < numpair; k++)
2021 				if (atmt[my_x][k] && atmt[my_y][k]) isum ++;
2022 			my_atamt_tmp[i] = (double) isum;
2023 			PP_NextCoord_atamt(&my_x, &my_y, 1, numbrnch);
2024 		}
2025 #ifdef VT
2026 		VT_end(1020);
2027 #endif
2028 		atamt_tmp = new_dvector(numjobs);
2029 		MPI_Gatherv(my_atamt_tmp, my_jobsize, MPI_DOUBLE,
2030 			atamt_tmp, jobsize, atamt_tmp_coord, MPI_DOUBLE, PP_MyMaster, PP_Comm);
2031 		free_dvector(my_atamt_tmp);
2032 		k=0;
2033 		for (i = 0; i < numbrnch; i++) {
2034 			for (j = 0; j <= i; j++) {
2035 				atamt[i][j] = atamt_tmp[k];
2036 				atamt[j][i] = atamt_tmp[k];
2037 				k++;
2038 			}
2039 		}
2040 		free_dvector(atamt_tmp);
2041 	}
2042 	else
2043 	{
2044 #ifdef VT
2045 		VT_begin(1020);
2046 #endif
2047 		for (i = 0; i < numbrnch; i++) {
2048 			for (j = 0; j <= i; j++) {
2049 				for (k = 0, isum = 0; k < numpair; k++)
2050 					if (atmt[i][k] && atmt[j][k]) isum ++;
2051 				atamt[i][j] = (double) isum;
2052 				atamt[j][i] = (double) isum;
2053 			}
2054 		}
2055 	}
2056 	for (i = 0; i < numbrnch; i++) {
2057 		for (k = 0, sum = 0.0; k < numpair; k++)
2058 			if (atmt[i][k]) sum += distanvec[k];
2059 		Brnlength[i] = sum;
2060 	}
2061 	luequation(atamt, Brnlength, numbrnch);
2062 	for (i = 0, rss = 0.0; i < numpair; i++) {
2063 		sum = distanvec[i];
2064 		for (j = 0; j < numbrnch; j++) {
2065 			if (atmt[j][i] && Brnlength[j] > 0.0)
2066 				sum -= Brnlength[j];
2067 		}
2068 		rss += sum * sum;
2069 	}
2070 	tr->rssleast = sqrt(rss);
2071 	alllen = 0.0;
2072 	ebp = tr->ebrnchp;
2073 	for (i = 0; i < numspc; i++) {
2074 		leng = Brnlength[i];
2075 		alllen += leng;
2076 		if (leng < MINARC) leng = MINARC;
2077 		if (leng > MAXARC) leng = MAXARC;
2078 		if (clockmode) { /* clock */
2079 			ebp[i]->lengthc = leng;
2080 			ebp[i]->kinp->lengthc = leng;
2081 		} else { /* no clock */
2082 			ebp[i]->length = leng;
2083 			ebp[i]->kinp->length = leng;
2084 		}
2085 		Brnlength[i] = leng;
2086 	}
2087 	for (i = 0, j = numspc; i < numibrnch; i++, j++) {
2088 		leng = Brnlength[j];
2089 		alllen += leng;
2090 		if (leng < MINARC) leng = MINARC;
2091 		if (leng > MAXARC) leng = MAXARC;
2092 		if (clockmode) { /* clock */
2093 			ibp[i]->lengthc = leng;
2094 			ibp[i]->kinp->lengthc = leng;
2095 		} else { /* no clock */
2096 			ibp[i]->length = leng;
2097 			ibp[i]->kinp->length = leng;
2098 		}
2099 		Brnlength[j] = leng;
2100 	}
2101 	free_cmatrix(atmt);
2102 	free_dmatrix(atamt);
2103 } /* lslength_par */
2104 
2105 #endif /* PARALLEL */
2106 
2107 /* Ekki's sped-up version of lslength */
2108 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
2109 {
2110 	int i, i1, j, j1, j2, k, numbrnch, numpair;
2111 	unsigned long int isum;
2112 	double sum, leng, alllen, rss;
2113 	ivector pths;
2114 	cmatrix atmt;
2115 	dmatrix atamt;
2116 	Node **ebp, **ibp;
2117 
2118 	numbrnch = numspc + numibrnch;
2119 	numpair = (numspc * (numspc - 1)) / 2;
2120 	atmt = new_cmatrix(numbrnch, numpair);
2121 	atamt = new_dmatrix(numbrnch, numbrnch);
2122 	ebp = tr->ebrnchp;
2123 	ibp = tr->ibrnchp;
2124 
2125 	for (i = 0; i < numspc; i++) {
2126 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
2127 			if (j1 == i) {
2128 				for (j2 = 0; j2 < j1; j2++, j++) {
2129 					atmt[i][j] = 1;
2130 				}
2131 			} else {
2132 				for (j2 = 0; j2 < j1; j2++, j++) {
2133 					if (j2 == i)
2134 						atmt[i][j] = 1;
2135 					else
2136 						atmt[i][j] = 0;
2137 				}
2138 			}
2139 		}
2140 	}
2141 	for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
2142 		pths = ibp[i1]->paths;
2143 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
2144 			for (j2 = 0; j2 < j1; j2++, j++) {
2145 				if (pths[j1] != pths[j2])
2146 					atmt[i][j] = 1;
2147 				else
2148 					atmt[i][j] = 0;
2149 			}
2150 		}
2151 	}
2152 	for (i = 0; i < numbrnch; i++) {
2153 		for (j = 0; j <= i; j++) {
2154 			for (k = 0, isum = 0; k < numpair; k++)
2155 				if (atmt[i][k] && atmt[j][k]) isum ++;
2156 			atamt[i][j] = (double) isum;
2157 			atamt[j][i] = (double) isum;
2158 		}
2159 	}
2160 	for (i = 0; i < numbrnch; i++) {
2161 		for (k = 0, sum = 0.0; k < numpair; k++)
2162 			sum += (double) atmt[i][k] * distanvec[k];
2163 		Brnlength[i] = sum;
2164 	}
2165 	luequation(atamt, Brnlength, numbrnch);
2166 	for (i = 0, rss = 0.0; i < numpair; i++) {
2167 		sum = distanvec[i];
2168 		for (j = 0; j < numbrnch; j++) {
2169 			if (atmt[j][i] && Brnlength[j] > 0.0)
2170 				sum -= Brnlength[j];
2171 		}
2172 		rss += sum * sum;
2173 	}
2174 	tr->rssleast = sqrt(rss);
2175 	alllen = 0.0;
2176 	for (i = 0; i < numspc; i++) {
2177 		leng = Brnlength[i];
2178 		alllen += leng;
2179 		if (leng < MINARC) leng = MINARC;
2180 		if (leng > MAXARC) leng = MAXARC;
2181 		if (clockmode) { /* clock */
2182 			ebp[i]->lengthc = leng;
2183 			ebp[i]->kinp->lengthc = leng;
2184 		} else { /* no clock */
2185 			ebp[i]->length = leng;
2186 			ebp[i]->kinp->length = leng;
2187 		}
2188 		Brnlength[i] = leng;
2189 	}
2190 	for (i = 0, j = numspc; i < numibrnch; i++, j++) {
2191 		leng = Brnlength[j];
2192 		alllen += leng;
2193 		if (leng < MINARC) leng = MINARC;
2194 		if (leng > MAXARC) leng = MAXARC;
2195 		if (clockmode) { /* clock */
2196 			ibp[i]->lengthc = leng;
2197 			ibp[i]->kinp->lengthc = leng;
2198 		} else { /* no clock */
2199 			ibp[i]->length = leng;
2200 			ibp[i]->kinp->length = leng;
2201 		}
2202 		Brnlength[j] = leng;
2203 	}
2204 	free_cmatrix(atmt);
2205 	free_dmatrix(atamt);
2206 } /* lslength */
2207 
2208 
2209 #if 0
2210 /* old slow least square fit */
2211 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
2212 {
2213 	int i, i1, j, j1, j2, k, numbrnch, numpair;
2214 	double sum, leng, alllen, rss;
2215 	ivector pths;
2216 	dmatrix atmt, atamt;
2217 	Node **ebp, **ibp;
2218 
2219 	numbrnch = numspc + numibrnch;
2220 	numpair = (numspc * (numspc - 1)) / 2;
2221 	atmt = new_dmatrix(numbrnch, numpair);
2222 	atamt = new_dmatrix(numbrnch, numbrnch);
2223 	ebp = tr->ebrnchp;
2224 	ibp = tr->ibrnchp;
2225 	for (i = 0; i < numspc; i++) {
2226 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
2227 			if (j1 == i) {
2228 				for (j2 = 0; j2 < j1; j2++, j++) {
2229 					atmt[i][j] = 1.0;
2230 				}
2231 			} else {
2232 				for (j2 = 0; j2 < j1; j2++, j++) {
2233 					if (j2 == i)
2234 						atmt[i][j] = 1.0;
2235 					else
2236 						atmt[i][j] = 0.0;
2237 				}
2238 			}
2239 		}
2240 	}
2241 	for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
2242 		pths = ibp[i1]->paths;
2243 		for (j1 = 1, j = 0; j1 < numspc; j1++) {
2244 			for (j2 = 0; j2 < j1; j2++, j++) {
2245 				if (pths[j1] != pths[j2])
2246 					atmt[i][j] = 1.0;
2247 				else
2248 					atmt[i][j] = 0.0;
2249 			}
2250 		}
2251 	}
2252 	for (i = 0; i < numbrnch; i++) {
2253 		for (j = 0; j <= i; j++) {
2254 			for (k = 0, sum = 0.0; k < numpair; k++)
2255 				sum += atmt[i][k] * atmt[j][k];
2256 			atamt[i][j] = sum;
2257 			atamt[j][i] = sum;
2258 		}
2259 	}
2260 	for (i = 0; i < numbrnch; i++) {
2261 		for (k = 0, sum = 0.0; k < numpair; k++)
2262 			sum += atmt[i][k] * distanvec[k];
2263 		Brnlength[i] = sum;
2264 	}
2265 	luequation(atamt, Brnlength, numbrnch);
2266 	for (i = 0, rss = 0.0; i < numpair; i++) {
2267 		sum = distanvec[i];
2268 		for (j = 0; j < numbrnch; j++) {
2269 			if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
2270 				sum -= Brnlength[j];
2271 		}
2272 		rss += sum * sum;
2273 	}
2274 	tr->rssleast = sqrt(rss);
2275 	alllen = 0.0;
2276 	for (i = 0; i < numspc; i++) {
2277 		leng = Brnlength[i];
2278 		alllen += leng;
2279 		if (leng < MINARC) leng = MINARC;
2280 		if (leng > MAXARC) leng = MAXARC;
2281 		if (clockmode) { /* clock */
2282 			ebp[i]->lengthc = leng;
2283 			ebp[i]->kinp->lengthc = leng;
2284 		} else { /* no clock */
2285 			ebp[i]->length = leng;
2286 			ebp[i]->kinp->length = leng;
2287 		}
2288 		Brnlength[i] = leng;
2289 	}
2290 	for (i = 0, j = numspc; i < numibrnch; i++, j++) {
2291 		leng = Brnlength[j];
2292 		alllen += leng;
2293 		if (leng < MINARC) leng = MINARC;
2294 		if (leng > MAXARC) leng = MAXARC;
2295 		if (clockmode) { /* clock */
2296 			ibp[i]->lengthc = leng;
2297 			ibp[i]->kinp->lengthc = leng;
2298 		} else { /* no clock */
2299 			ibp[i]->length = leng;
2300 			ibp[i]->kinp->length = leng;
2301 		}
2302 		Brnlength[j] = leng;
2303 	}
2304 	free_dmatrix(atmt);
2305 	free_dmatrix(atamt);
2306 } /* lslength */
2307 #endif
2308 
2309 /* set branch lengths to the externally given lengths  (-usebranchlen) */
2310 void setextlength(Tree *tr, int numspc, int numibrnch, dvector Brnlength)
2311 {
2312 	int i, j, numbrnch;
2313 	double leng, alllen;
2314 	Node **ebp, **ibp;
2315 
2316 	numbrnch = numspc + numibrnch;
2317 	ebp = tr->ebrnchp;
2318 	ibp = tr->ibrnchp;
2319 
2320 	alllen = 0.0;
2321 	for (i = 0; i < numspc; i++) {
2322 		if (ebp[i]->lengthset) {
2323 			leng = ebp[i]->lengthext;
2324 		} else {
2325 			if (ebp[i]->kinp->lengthset) {
2326 				leng = ebp[i]->kinp->lengthext;
2327 			} else {
2328 				fprintf(stderr, "WARNING: no branchlength set, defaults to 0.0!!!");
2329 				leng = 0.0;
2330 			}
2331 		}
2332 		alllen += leng;
2333 		if (leng < MINARC) leng = MINARC;
2334 		if (leng > MAXARC) leng = MAXARC;
2335 		if (clockmode) { /* clock */
2336 			ebp[i]->lengthc = leng;
2337 			ebp[i]->kinp->lengthc = leng;
2338 		} else { /* no clock */
2339 			ebp[i]->length = leng;
2340 			ebp[i]->kinp->length = leng;
2341 		}
2342 		Brnlength[i] = leng;
2343 	}
2344 	for (i = 0, j = numspc; i < numibrnch; i++, j++) {
2345 		if (ibp[i]->lengthset) {
2346 			leng = ibp[i]->lengthext;
2347 		} else {
2348 			if (ibp[i]->kinp->lengthset) {
2349 				leng = ibp[i]->kinp->lengthext;
2350 			} else {
2351 				fprintf(stderr, "WARNING: no branchlength set, defaults to 0.0!!!");
2352 				leng = 0.0;
2353 			}
2354 		}
2355 		alllen += leng;
2356 		if (leng < MINARC) leng = MINARC;
2357 		if (leng > MAXARC) leng = MAXARC;
2358 		if (clockmode) { /* clock */
2359 			ibp[i]->lengthc = leng;
2360 			ibp[i]->kinp->lengthc = leng;
2361 		} else { /* no clock */
2362 			ibp[i]->length = leng;
2363 			ibp[i]->kinp->length = leng;
2364 		}
2365 		Brnlength[j] = leng;
2366 	}
2367 } /* setextlength */
2368 
2369 
2370