1 /*
2 PhyML: a program that computes maximum likelihood phylogenies from
3 DNA or AA homologous sequences.
4
5 Copyright (C) Stephane Guindon. Oct 2003 onward.
6
7 All parts of the source except where indicated are distributed under
8 the GNU public licence. See http://www.opensource.org for details.
9
10 */
11
12 #include "utilities.h"
13 #include "tbe.h"
14 #include "assert.h"
15
16 #ifdef BEAGLE
17 #include "beagle_utils.h"
18 #endif
19
20 int CALL;
21 int TIME;
22
23 //////////////////////////////////////////////////////////////
24 //////////////////////////////////////////////////////////////
25
String_To_Dbl(char * string)26 phydbl String_To_Dbl(char *string)
27 {
28 phydbl buff;
29 char *endptr;
30
31 if(!string)
32 {
33 PhyML_Fprintf(stderr,"\n. String object empty.");
34 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
35 }
36
37 errno = !ERANGE;
38 buff = strtod(string,&endptr);
39
40 if(string == endptr || errno == ERANGE)
41 {
42 PhyML_Printf("\n. Error in translating string '%s' to double.",string);
43 PhyML_Printf("\n. %d",errno == ERANGE);
44 PhyML_Printf("\n. buff = %f",buff);
45 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
46 }
47 return buff;
48 }
49
50 //////////////////////////////////////////////////////////////
51 //////////////////////////////////////////////////////////////
52
String_To_Int(char * string)53 int String_To_Int(char *string)
54 {
55 int buff;
56 char *endptr;
57
58 if(!string)
59 {
60 PhyML_Fprintf(stderr,"\n. String object empty.");
61 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
62 }
63
64 errno = !ERANGE;
65 buff = (int)strtol(string,&endptr, 10);
66
67 if(string == endptr || errno == ERANGE)
68 {
69 PhyML_Fprintf(stderr,"\n. Error in translating string '%s' to integer.",string);
70 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
71 }
72
73 return buff;
74 }
75
76 //////////////////////////////////////////////////////////////
77 //////////////////////////////////////////////////////////////
78
Unroot_Tree(char ** subtrees)79 void Unroot_Tree(char **subtrees)
80 {
81 char **tmp_sub;
82 int degree,i,j;
83
84 PhyML_Printf("\n. Removing the root...\n");
85
86 tmp_sub = Sub_Trees(subtrees[0],°ree);
87 if(degree >= 2)
88 {
89 strcpy(subtrees[2],subtrees[1]);
90 Clean_Multifurcation(tmp_sub,degree,2);
91 for(j=0;j<2;j++) strcpy(subtrees[j],tmp_sub[j]);
92 }
93 else
94 {
95 tmp_sub = Sub_Trees(subtrees[1],°ree);
96 strcpy(subtrees[2],subtrees[0]);
97 Clean_Multifurcation(tmp_sub,degree,2);
98 for(j=0;j<2;j++) strcpy(subtrees[j],tmp_sub[j]);
99 }
100
101 for(i=0;i<degree;i++) Free(tmp_sub[i]);
102 Free(tmp_sub);
103 }
104
105 //////////////////////////////////////////////////////////////
106 //////////////////////////////////////////////////////////////
107
Set_Edge_Dirs(t_edge * b,t_node * a,t_node * d,t_tree * tree)108 void Set_Edge_Dirs(t_edge *b, t_node *a, t_node *d, t_tree *tree)
109 {
110 int i;
111
112 if(a == b->rght)
113 {
114 PhyML_Fprintf(stderr,"\n. a->num = %d ; d->num = %d",a->num,d->num);
115 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
116 }
117
118 if(d == b->left)
119 {
120 PhyML_Fprintf(stderr,"\n. a->num = %d ; d->num = %d",a->num,d->num);
121 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
122 }
123
124 b->l_r = b->r_l = -1;
125 for(i=0;i<3;i++)
126 {
127 /* if((a->v[i]) && ((a->v[i] == d) || (e_root && a->b[i] == e_root))) */
128 if((a->v[i]) && ((a->v[i] == d)))
129 {
130 b->l_r = i; /* we consider here that 'a' is on the left handside of 'b'*/
131 a->b[i] = b;
132 }
133 /* if((d->v[i]) && ((d->v[i] == a) || (e_root && d->b[i] == e_root))) */
134 if((d->v[i]) && ((d->v[i] == a)))
135 {
136 b->r_l = i; /* we consider here that 'd' is on the right handside of 'b'*/
137 d->b[i] = b;
138 }
139 }
140
141 if(a->tax) {b->r_l = 0; for(i=0;i<3;i++) if(d->v[i]==a) {b->l_r = i; break;}}
142
143 b->l_v1 = b->l_v2 = b->r_v1 = b->r_v2 = -1;
144 for(i=0;i<3;i++)
145 {
146 if(b->left->v[i] != b->rght)
147 {
148 if(b->l_v1 < 0) b->l_v1 = i;
149 else b->l_v2 = i;
150 }
151
152 if(b->rght->v[i] != b->left)
153 {
154 if(b->r_v1 < 0) b->r_v1 = i;
155 else b->r_v2 = i;
156 }
157 }
158 }
159
160 //////////////////////////////////////////////////////////////
161 //////////////////////////////////////////////////////////////
162
Restrict_To_Coding_Position(align ** data,option * io)163 void Restrict_To_Coding_Position(align **data, option *io)
164 {
165 int i,j,curr_pos;
166
167 if(io->codpos != -1)
168 {
169 for(i=0;i<io->n_otu;i++)
170 {
171 curr_pos = 0;
172 for(j=io->codpos-1;j<data[i]->len;j+=3)
173 {
174 data[i]->state[curr_pos] = data[i]->state[j];
175 curr_pos++;
176 }
177 data[i]->len /= 3;
178 }
179 }
180 }
181
182 //////////////////////////////////////////////////////////////
183 //////////////////////////////////////////////////////////////
184
Uppercase(char * ch)185 void Uppercase(char *ch)
186 {
187 /* convert ch to upper case -- either ASCII or EBCDIC */
188 *ch = isupper((int)*ch) ? *ch : toupper((int)*ch);
189 }
190
191 //////////////////////////////////////////////////////////////
192 //////////////////////////////////////////////////////////////
193
Lowercase(char * ch)194 void Lowercase(char *ch)
195 {
196 /* convert ch to upper case -- either ASCII or EBCDIC */
197 *ch = isupper((int)*ch) ? tolower((int)*ch) : *ch;
198 }
199
200 //////////////////////////////////////////////////////////////
201 //////////////////////////////////////////////////////////////
202
Compact_Data(align ** data,option * io)203 calign *Compact_Data(align **data, option *io)
204 {
205 calign *cdata_tmp,*cdata;
206 int i,j,k,site;
207 int n_patt,which_patt;
208 char **sp_names;
209 int n_otu;
210 pnode *proot;
211 int compress;
212 int n_ambigu,is_ambigu;
213 scalar_dbl *io_wght;
214 phydbl len,inc,n_invar;
215
216
217 n_otu = io->n_otu;
218 n_patt = 0;
219 which_patt = 0;
220
221 sp_names = (char **)mCalloc(n_otu,sizeof(char *));
222 for(i=0;i<n_otu;i++)
223 {
224 sp_names[i] = (char *)mCalloc(T_MAX_NAME,sizeof(char));
225 strcpy(sp_names[i],data[i]->name);
226 }
227
228 cdata_tmp = Make_Calign(n_otu,data[0]->len,io->state_len,data[0]->len,sp_names,0,NULL);
229 Init_Calign(n_otu,data[0]->len,data[0]->len,cdata_tmp);
230
231 proot = (pnode *)Create_Pnode(T_MAX_ALPHABET);
232
233 for(i=0;i<n_otu;i++) Free(sp_names[i]);
234 Free(sp_names);
235
236 if(data[0]->len%io->state_len)
237 {
238 PhyML_Fprintf(stderr,"\n. Sequence length is not a multiple of %d\n",io->state_len);
239 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
240 }
241
242 // Read in weights given in input file
243 io_wght = NULL;
244 if(io->has_io_weights == YES)
245 {
246 io_wght = Read_Io_Weights(io);
247 if(Scalar_Len(io_wght) - data[0]->len > 0)
248 {
249 PhyML_Fprintf(stderr,"\n. Sequence length (%d) differs from number of weights (%d).\n",
250 data[0]->len,
251 Scalar_Len(io_wght));
252 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
253 }
254 }
255
256 compress = io->colalias;
257 n_ambigu = 0;
258 is_ambigu = NO;
259
260 if(!io->quiet && !compress) PhyML_Printf("\n. WARNING: sequences are not compressed !\n");
261
262 inc = -1.0;
263 len = 0.0;
264 Fors(site,data[0]->len,io->state_len)
265 {
266 if(io->has_io_weights == YES)
267 inc = Scalar_Elem(site,io_wght);
268 else
269 inc = 1.;
270
271 // Sequence length taking into account input weights, if any
272 len += inc;
273
274 if(io->rm_ambigu == YES)
275 {
276 is_ambigu = NO;
277 for(j=0;j<n_otu;j++) if(Is_Ambigu(data[j]->state+site,io->datatype,io->state_len)) break;
278 if(j != n_otu)
279 {
280 is_ambigu = YES;
281 n_ambigu++;
282 }
283 }
284
285 if(!is_ambigu)
286 {
287 if(compress)
288 {
289 which_patt = -1;
290
291 Traverse_Prefix_Tree(site,-1,&which_patt,&n_patt,data,io,proot);
292 if(which_patt == n_patt-1) /* New pattern found */
293 {
294 n_patt--;
295 k = n_patt;
296 }
297 else
298 {
299 k = n_patt-10;
300 }
301 }
302 else
303 {
304 k = n_patt;
305 }
306
307 if(k == n_patt) /* add a new site pattern */
308 {
309 for(j=0;j<n_otu;j++)
310 Copy_One_State(data[j]->state+site,
311 cdata_tmp->c_seq[j]->state+n_patt*io->state_len,
312 io->state_len);
313
314 for(j=0;j<n_otu;j++) cdata_tmp->c_seq[j]->state[n_patt*io->state_len+1] = '\0';
315
316 for(i=0;i<n_otu;i++)
317 {
318 for(j=0;j<n_otu;j++)
319 {
320 if(!(Are_Compatible(cdata_tmp->c_seq[i]->state+n_patt*io->state_len,
321 cdata_tmp->c_seq[j]->state+n_patt*io->state_len,
322 io->state_len,
323 io->datatype))) break;
324 }
325 if(j != n_otu) break;
326 }
327
328 if((j == n_otu) && (i == n_otu)) /* all characters at that site are compatible with one another:
329 the site may be invariant */
330 {
331 for(j=0;j<n_otu;j++)
332 {
333 cdata_tmp->invar[n_patt] = Assign_State(cdata_tmp->c_seq[j]->state+n_patt*io->state_len,
334 io->datatype,
335 io->state_len);
336
337 if(cdata_tmp->invar[n_patt] > -1.) break;
338 }
339 }
340 else cdata_tmp->invar[n_patt] = -1;
341
342 cdata_tmp->sitepatt[site] = n_patt;
343 cdata_tmp->wght[n_patt] += inc;
344 n_patt += 1;
345 }
346 else
347 {
348 cdata_tmp->sitepatt[site] = which_patt;
349 cdata_tmp->wght[which_patt] += inc;
350 }
351 }
352 }
353
354 data[0]->len -= n_ambigu;
355
356 cdata_tmp->init_len = data[0]->len;
357 cdata_tmp->crunch_len = n_patt;
358 for(i=0;i<n_otu;i++) cdata_tmp->c_seq[i]->len = n_patt;
359 for(i=0;i<n_otu;i++) cdata_tmp->c_seq[i]->num = i;
360
361 if(!io->quiet) PhyML_Printf("\n. %d patterns found (out of a total of %d sites). \n",n_patt,data[0]->len);
362
363 if((io->rm_ambigu == YES) && (n_ambigu > 0)) PhyML_Printf("\n. Removed %d columns of the alignment as they contain ambiguous characters (e.g., gaps) \n",n_ambigu);
364
365 n_invar=0.0;
366 for(i=0;i<cdata_tmp->crunch_len;i++) if(cdata_tmp->invar[i] > -1.) n_invar+=cdata_tmp->wght[i];
367
368 if(io->quiet == NO)
369 {
370 if((n_invar - ceil(n_invar)) < 1.E-10)
371 PhyML_Printf("\n. %d sites without polymorphism (%.2f%c).\n",(int)n_invar,100.*(phydbl)n_invar/len,'%');
372 else
373 PhyML_Printf("\n. %f sites without polymorphism (%.2f%c).\n",n_invar,100.*(phydbl)n_invar/len,'%');
374 }
375
376 cdata_tmp->obs_pinvar = (phydbl)n_invar/len;
377
378 cdata_tmp->io = io;
379
380
381 if(io->datatype == NT) Get_Base_Freqs(cdata_tmp);
382 else if(io->datatype == AA) Get_AA_Freqs(cdata_tmp);
383 else {/* Uniform state frequency distribution.*/}
384
385 cdata = Copy_Cseq(cdata_tmp,io);
386
387 Free_Calign(cdata_tmp);
388 Free_Prefix_Tree(proot,T_MAX_ALPHABET);
389
390 Check_Ambiguities(cdata,io->datatype,io->state_len);
391 Set_D_States(cdata,io->datatype,io->state_len);
392
393 if(io_wght != NULL) Free_Scalar_Dbl(io_wght);
394
395 return cdata;
396 }
397
398 //////////////////////////////////////////////////////////////
399 //////////////////////////////////////////////////////////////
400
401
Compact_Cdata(calign * data,option * io)402 calign *Compact_Cdata(calign *data, option *io)
403 {
404 calign *cdata;
405 int i,j,k,site;
406 int n_patt,which_patt;
407 int n_otu;
408
409 n_otu = data->n_otu;
410
411 cdata = (calign *)mCalloc(1,sizeof(calign));
412 cdata->n_otu = n_otu;
413 cdata->c_seq = (align **)mCalloc(n_otu,sizeof(align *));
414 cdata->wght = (phydbl *)mCalloc(data->crunch_len,sizeof(phydbl));
415 cdata->obs_state_frq = (phydbl *)mCalloc(io->mod->ns,sizeof(phydbl));
416 cdata->ambigu = (short int *)mCalloc(data->crunch_len,sizeof(short int));
417 cdata->invar = (short int *)mCalloc(data->crunch_len,sizeof(short int));
418
419 cdata->crunch_len = cdata->init_len = -1;
420 for(j=0;j<n_otu;j++)
421 {
422 cdata->c_seq[j] = (align *)mCalloc(1,sizeof(align));
423 cdata->c_seq[j]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
424 strcpy(cdata->c_seq[j]->name,data->c_seq[j]->name);
425 cdata->c_seq[j]->state = (char *)mCalloc(data->crunch_len,sizeof(char));
426 cdata->c_seq[j]->is_ambigu = (short int *)mCalloc(data->crunch_len,sizeof(short int));
427 cdata->c_seq[j]->state[0] = data->c_seq[j]->state[0];
428 }
429
430
431 n_patt = which_patt = 0;
432
433 for(site=0;site<data->crunch_len;site++)
434 {
435 if(data->wght[site] > 0.0)
436 {
437 for(k=0;k<n_patt;k++)
438 {
439 for(j=0;j<n_otu;j++)
440 {
441 if(strncmp(cdata->c_seq[j]->state+k*io->state_len,
442 data->c_seq[j]->state+site*io->state_len,
443 io->state_len))
444 break;
445 }
446
447 if(j == n_otu)
448 {
449 which_patt = k;
450 break;
451 }
452 }
453
454 if(k == n_patt)
455 {
456 for(j=0;j<n_otu;j++) Copy_One_State(data->c_seq[j]->state+site*io->state_len,
457 cdata->c_seq[j]->state+n_patt*io->state_len,
458 io->state_len);
459
460 for(i=0;i<n_otu;i++)
461 {
462 for(j=0;j<n_otu;j++)
463 {
464 if(!(Are_Compatible(cdata->c_seq[i]->state+n_patt*io->state_len,
465 cdata->c_seq[j]->state+n_patt*io->state_len,
466 io->state_len,
467 io->datatype))) break;
468 }
469 if(j != n_otu) break;
470 }
471
472 if((j == n_otu) && (i == n_otu))
473 {
474 for(j=0;j<n_otu;j++)
475 {
476 cdata->invar[n_patt] = Assign_State(cdata->c_seq[j]->state+n_patt*io->state_len,
477 io->datatype,
478 io->state_len);
479
480 if(cdata->invar[n_patt] > -1.) break;
481 }
482 }
483 else cdata->invar[n_patt] = -1;
484
485 cdata->wght[n_patt] += data->wght[site];
486 n_patt+=1;
487 }
488 else cdata->wght[which_patt] += data->wght[site];
489
490 /* Print_Site(cdata,k,n_otu,"\n",io->stepsize); */
491 }
492 }
493
494 cdata->init_len = data->crunch_len;
495 cdata->crunch_len = n_patt;
496 for(i=0;i<n_otu;i++) cdata->c_seq[i]->len = n_patt;
497
498 if(io->datatype == NT) Get_Base_Freqs(cdata);
499 else if(io->datatype == AA) Get_AA_Freqs(cdata);
500 else {/* Not implemented yet */}
501
502 return cdata;
503 }
504
505 //////////////////////////////////////////////////////////////
506 //////////////////////////////////////////////////////////////
507
Traverse_Prefix_Tree(int site,int seqnum,int * patt_num,int * n_patt,align ** data,option * io,pnode * n)508 void Traverse_Prefix_Tree(int site, int seqnum, int *patt_num, int *n_patt, align **data, option *io, pnode *n)
509 {
510 if(seqnum == io->n_otu-1)
511 {
512 n->weight++;
513 if(n->weight == 1)
514 {
515 n->num = *n_patt;
516 (*n_patt) += 1;
517 }
518 (*patt_num) = n->num;
519 return;
520 }
521 else
522 {
523 int next_state;
524
525 next_state = -1;
526 next_state = Assign_State_With_Ambiguity(data[seqnum+1]->state+site,
527 io->datatype,
528 io->state_len);
529
530 if(!n->next[next_state]) n->next[next_state] = Create_Pnode(T_MAX_ALPHABET);
531 Traverse_Prefix_Tree(site,seqnum+1,patt_num,n_patt,data,io,n->next[next_state]);
532 }
533 }
534
535 //////////////////////////////////////////////////////////////
536 //////////////////////////////////////////////////////////////
537
538
Create_Pnode(int size)539 pnode *Create_Pnode(int size)
540 {
541 pnode *n;
542 int i;
543
544 n = (pnode *)mCalloc(1,sizeof(pnode ));
545 n->next = (pnode **)mCalloc(size,sizeof(pnode *));
546 for(i=0;i<size;i++) n->next[i] = NULL;
547 n->weight = 0;
548 n->num = -1;
549 return n;
550 }
551 //////////////////////////////////////////////////////////////
552 //////////////////////////////////////////////////////////////
553
554 //////////////////////////////////////////////////////////////
555 //////////////////////////////////////////////////////////////
556
Get_Base_Freqs(calign * data)557 void Get_Base_Freqs(calign *data)
558 {
559 int i,j,k;
560 phydbl A,C,G,T;
561 phydbl fA,fC,fG,fT;
562 phydbl w;
563
564 fA = fC = fG = fT = .25;
565
566 for(k=0;k<8;k++)
567 {
568 A = C = G = T = .0;
569 for(i=0;i<data->n_otu;i++)
570 {
571 for(j=0;j<data->crunch_len;j++)
572 {
573 w = data->wght[j];
574 if(w)
575 {
576 switch(data->c_seq[i]->state[j])
577 {
578 case 'A' : A+=w;
579 break;
580 case 'C' : C+=w;
581 break;
582 case 'G' : G+=w;
583 break;
584 case 'T' : T+=w;
585 break;
586 case 'U' : T+=w;
587 break;
588 case 'M' : C+=w*fC/(fC+fA); A+=w*fA/(fA+fC);
589 break;
590 case 'R' : G+=w*fG/(fA+fG); A+=w*fA/(fA+fG);
591 break;
592 case 'W' : T+=w*fT/(fA+fT); A+=w*fA/(fA+fT);
593 break;
594 case 'S' : C+=w*fC/(fC+fG); G+=w*fG/(fC+fG);
595 break;
596 case 'Y' : C+=w*fC/(fC+fT); T+=w*fT/(fT+fC);
597 break;
598 case 'K' : G+=w*fG/(fG+fT); T+=w*fT/(fT+fG);
599 break;
600 case 'B' : C+=w*fC/(fC+fG+fT); G+=w*fG/(fC+fG+fT); T+=w*fT/(fC+fG+fT);
601 break;
602 case 'D' : A+=w*fA/(fA+fG+fT); G+=w*fG/(fA+fG+fT); T+=w*fT/(fA+fG+fT);
603 break;
604 case 'H' : A+=w*fA/(fA+fC+fT); C+=w*fC/(fA+fC+fT); T+=w*fT/(fA+fC+fT);
605 break;
606 case 'V' : A+=w*fA/(fA+fC+fG); C+=w*fC/(fA+fC+fG); G+=w*fG/(fA+fC+fG);
607 break;
608 case 'N' : case 'X' : case '?' : case 'O' : case '-' :
609 A+=w*fA; C+=w*fC; G+=w*fG; T+=w*fT; break;
610 default : break;
611 }
612 }
613 }
614 }
615 fA = A/(A+C+G+T);
616 fC = C/(A+C+G+T);
617 fG = G/(A+C+G+T);
618 fT = T/(A+C+G+T);
619 }
620
621 data->obs_state_frq[0] = fA;
622 data->obs_state_frq[1] = fC;
623 data->obs_state_frq[2] = fG;
624 data->obs_state_frq[3] = fT;
625 }
626
627 //////////////////////////////////////////////////////////////
628 //////////////////////////////////////////////////////////////
629
Get_AA_Freqs(calign * data)630 void Get_AA_Freqs(calign *data)
631 {
632 int i,j,k;
633 phydbl A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y;
634 phydbl fA,fC,fD,fE,fF,fG,fH,fI,fK,fL,fM,fN,fP,fQ,fR,fS,fT,fV,fW,fY;
635 int w;
636 phydbl sum;
637
638 fA = fC = fD = fE = fF = fG = fH = fI = fK = fL =
639 fM = fN = fP = fQ = fR = fS = fT = fV = fW = fY = 1./20.;
640
641 for(k=0;k<8;k++)
642 {
643 A = C = D = E = F = G = H = I = K = L =
644 M = N = P = Q = R = S = T = V = W = Y = .0;
645
646 for(i=0;i<data->n_otu;i++)
647 {
648 for(j=0;j<data->crunch_len;j++)
649 {
650 w = data->wght[j];
651 if(w)
652 {
653 switch(data->c_seq[i]->state[j])
654 {
655 case 'A' : A+=w; break;
656 case 'C' : C+=w; break;
657 case 'D' : D+=w; break;
658 case 'E' : E+=w; break;
659 case 'F' : F+=w; break;
660 case 'G' : G+=w; break;
661 case 'H' : H+=w; break;
662 case 'I' : I+=w; break;
663 case 'K' : K+=w; break;
664 case 'L' : L+=w; break;
665 case 'M' : M+=w; break;
666 case 'N' : N+=w; break;
667 case 'P' : P+=w; break;
668 case 'Q' : Q+=w; break;
669 case 'R' : R+=w; break;
670 case 'S' : S+=w; break;
671 case 'T' : T+=w; break;
672 case 'V' : V+=w; break;
673 case 'W' : W+=w; break;
674 case 'Y' : Y+=w; break;
675 case 'Z' : Q+=w; break;
676 case 'X' : case '?' : case 'O' : case '-' :
677 A+=w*fA;
678 C+=w*fC;
679 D+=w*fD;
680 E+=w*fE;
681 F+=w*fF;
682 G+=w*fG;
683 H+=w*fH;
684 I+=w*fI;
685 K+=w*fK;
686 L+=w*fL;
687 M+=w*fM;
688 N+=w*fN;
689 P+=w*fP;
690 Q+=w*fQ;
691 R+=w*fR;
692 S+=w*fS;
693 T+=w*fT;
694 V+=w*fV;
695 W+=w*fW;
696 Y+=w*fY;
697 break;
698 default : break;
699 }
700 }
701 }
702 }
703 sum = (A+C+D+E+F+G+H+I+K+L+M+N+P+Q+R+S+T+V+W+Y);
704 fA = A/sum; fC = C/sum; fD = D/sum; fE = E/sum;
705 fF = F/sum; fG = G/sum; fH = H/sum; fI = I/sum;
706 fK = K/sum; fL = L/sum; fM = M/sum; fN = N/sum;
707 fP = P/sum; fQ = Q/sum; fR = R/sum; fS = S/sum;
708 fT = T/sum; fV = V/sum; fW = W/sum; fY = Y/sum;
709 }
710
711 data->obs_state_frq[0] = fA; data->obs_state_frq[1] = fR; data->obs_state_frq[2] = fN; data->obs_state_frq[3] = fD;
712 data->obs_state_frq[4] = fC; data->obs_state_frq[5] = fQ; data->obs_state_frq[6] = fE; data->obs_state_frq[7] = fG;
713 data->obs_state_frq[8] = fH; data->obs_state_frq[9] = fI; data->obs_state_frq[10] = fL; data->obs_state_frq[11] = fK;
714 data->obs_state_frq[12] = fM; data->obs_state_frq[13] = fF; data->obs_state_frq[14] = fP; data->obs_state_frq[15] = fS;
715 data->obs_state_frq[16] = fT; data->obs_state_frq[17] = fW; data->obs_state_frq[18] = fY; data->obs_state_frq[19] = fV;
716 }
717
718 //////////////////////////////////////////////////////////////
719 //////////////////////////////////////////////////////////////
720
721 // Swap the nodes on the left and right of e1 with the nodes
722 // on the left and right of e2 respectively, or on the
723 // right and left of e2 if swap == YES
724
Swap_Nodes_On_Edges(t_edge * e1,t_edge * e2,int swap,t_tree * tree)725 void Swap_Nodes_On_Edges(t_edge *e1, t_edge *e2, int swap, t_tree *tree)
726 {
727 t_node *buff;
728
729 printf("\n. Swap edge %d (%d %d) with %d (%d %d)",e1->num,e1->left->num,e1->rght->num,e2->num,e2->left->num,e2->rght->num);
730
731 if(swap == NO)
732 {
733 buff = e1->left;
734 e1->left = e2->left;
735 e2->left = buff;
736
737 buff = e1->rght;
738 e1->rght = e2->rght;
739 e2->rght = buff;
740
741 }
742 else
743 {
744 buff = e1->left;
745 e1->left = e2->rght;
746 e2->rght = buff;
747
748 buff = e1->rght;
749 e1->rght = e2->left;
750 e2->left = buff;
751 }
752
753 Connect_One_Edge_To_Two_Nodes(e1->left,e1->rght,e1,tree);
754 Connect_One_Edge_To_Two_Nodes(e2->left,e2->rght,e2,tree);
755 }
756
757 /*////////////////////////////////////////////////////////////
758 ////////////////////////////////////////////////////////////*/
759 /* As opposed to Connect_Edges_To_Nodes_Recur, the ordering of
760 edges connected to tips does not depend on the topology.
761 Use this function when you just have a table of edges not
762 not connected to any node and the reciprocal is true.
763 */
Connect_Edges_To_Nodes_Serial(t_tree * tree)764 void Connect_Edges_To_Nodes_Serial(t_tree *tree)
765 {
766 int i,j;
767
768 /* Reset */
769 for(i=0;i<2*tree->n_otu-1;++i) for(j=0;j<3;j++) if(tree->a_nodes[i] != NULL) tree->a_nodes[i]->b[j] = NULL;
770
771 for(i=0;i<tree->n_otu;i++)
772 {
773 assert(tree->a_nodes[i]->tax);
774 assert(tree->a_nodes[i] != tree->a_nodes[i]->v[0]);
775
776 // Required so that p_lk_tip_r corresponds to the sequence at tree->a_nodes[i]
777 if(tree->a_edges[i]->p_lk_tip_r != NULL) assert(tree->a_edges[i]->rght == tree->a_nodes[i]);
778
779 Connect_One_Edge_To_Two_Nodes(tree->a_nodes[i],
780 tree->a_nodes[i]->v[0],
781 tree->a_edges[i],
782 tree);
783 }
784
785
786 tree->num_curr_branch_available = tree->n_otu;
787
788 for(i=tree->n_otu;i<2*tree->n_otu-3;i++)
789 {
790 assert(!tree->a_nodes[i]->tax);
791
792 for(j=0;j<3;j++)
793 if(!tree->a_nodes[i]->b[j])
794 {
795 assert(tree->a_nodes[i] != tree->a_nodes[i]->v[j]);
796
797 Connect_One_Edge_To_Two_Nodes(tree->a_nodes[i],
798 tree->a_nodes[i]->v[j],
799 tree->a_edges[tree->num_curr_branch_available],
800 tree);
801 }
802 }
803
804 if(tree->n_root != NULL)
805 {
806 tree->a_edges[tree->num_curr_branch_available]->left = tree->n_root;
807 tree->a_edges[tree->num_curr_branch_available]->rght = tree->n_root->v[1];
808 tree->n_root->b[1] = tree->a_edges[tree->num_curr_branch_available];
809 tree->a_edges[tree->num_curr_branch_available]->num = tree->num_curr_branch_available;
810 tree->num_curr_branch_available++;
811
812 tree->a_edges[tree->num_curr_branch_available]->left = tree->n_root;
813 tree->a_edges[tree->num_curr_branch_available]->rght = tree->n_root->v[2];
814 tree->n_root->b[2] = tree->a_edges[tree->num_curr_branch_available];
815 tree->a_edges[tree->num_curr_branch_available]->num = tree->num_curr_branch_available;
816 tree->num_curr_branch_available++;
817 }
818 }
819
820 /*////////////////////////////////////////////////////////////
821 ////////////////////////////////////////////////////////////*/
822
Connect_Edges_To_Nodes_Recur(t_node * a,t_node * d,t_tree * tree)823 void Connect_Edges_To_Nodes_Recur(t_node *a, t_node *d, t_tree *tree)
824 {
825 int i;
826
827 assert(a!=d);
828 Connect_One_Edge_To_Two_Nodes(a,d,tree->a_edges[tree->num_curr_branch_available],tree);
829
830 if(d->tax) return;
831 else
832 for(i=0;i<3;i++)
833 if(d->v[i] != a) /* Don't add d->b[i] != tree->e_root condition here since tree is not wired yet... */
834 Connect_Edges_To_Nodes_Recur(d,d->v[i],tree);
835 }
836
837 //////////////////////////////////////////////////////////////
838 //////////////////////////////////////////////////////////////
839
Connect_One_Edge_To_Two_Nodes(t_node * a,t_node * d,t_edge * b,t_tree * tree)840 void Connect_One_Edge_To_Two_Nodes(t_node *a, t_node *d, t_edge *b, t_tree *tree)
841 {
842 int i,dir_a_d,dir_d_a;
843
844 assert(a != tree->n_root);
845 assert(b);
846
847 if(a == NULL || d == NULL || a->num == d->num)
848 {
849 PhyML_Fprintf(stderr,"\n. a: %d d: %d b: %d root: %d",a?a->num:-1,d?d->num:-1,b?b->num:-1,tree->n_root?tree->n_root->num:-1);
850 assert(FALSE);
851 }
852
853 dir_a_d = -1;
854 for(i=0;i<3;i++) if(a->v[i] == d) { dir_a_d = i; break; }
855
856 dir_d_a = -1;
857 for(i=0;i<3;i++) if(d->v[i] == a) { dir_d_a = i; break; }
858
859 if(dir_a_d == -1 || dir_d_a == -1)
860 {
861 PhyML_Printf("\n. a:%d a->v[0]:%d a->v[1]:%d a->v[2]:%d d:%d d->v[0]:%d d->v[1]:%d d->v[2]:%d root:%d",
862 a->num,
863 a->v[0]?a->v[0]->num:-1,
864 a->v[1]?a->v[1]->num:-1,
865 a->v[2]?a->v[2]->num:-1,
866 d->num,
867 d->v[0]?d->v[0]->num:-1,
868 d->v[1]?d->v[1]->num:-1,
869 d->v[2]?d->v[2]->num:-1,
870 tree->n_root ? tree->n_root->num : -1);
871 assert(FALSE);
872 }
873
874 a->b[dir_a_d] = b;
875 d->b[dir_d_a] = b;
876 b->left = a;
877 b->rght = d;
878 if(a->tax) {b->rght = a; b->left = d;} /* root */
879 /* a tip is necessarily on the righthand side of the t_edge */
880
881 if(a->tax == NO && d->tax == NO)
882 {
883 b->num = tree->num_curr_branch_available;
884 tree->num_curr_branch_available++;
885 }
886 else if(d->tax) b->num = d->num;
887 else if(a->tax) b->num = a->num;
888 else assert(FALSE);
889
890 assert(a != d);
891
892 (b->left == a)?
893 (Set_Edge_Dirs(b,a,d,tree)):
894 (Set_Edge_Dirs(b,d,a,tree));
895
896 b->l_old->v = b->l->v;
897 }
898
899 //////////////////////////////////////////////////////////////
900 //////////////////////////////////////////////////////////////
901
Update_Dirs(t_tree * tree)902 void Update_Dirs(t_tree *tree)
903 {
904 int i;
905 int buff;
906 t_edge *b;
907
908 b = NULL;
909 buff = -1;
910 for(i=0;i<2*tree->n_otu-3;++i)
911 {
912 b = tree->a_edges[i];
913
914 if((!b->left->tax) && (b->left->v[b->l_v1]->num < b->left->v[b->l_v2]->num))
915 {
916 buff = b->l_v1;
917 b->l_v1 = b->l_v2;
918 b->l_v2 = buff;
919 }
920 if((!b->rght->tax) && (b->rght->v[b->r_v1]->num < b->rght->v[b->r_v2]->num))
921 {
922 buff = b->r_v1;
923 b->r_v1 = b->r_v2;
924 b->r_v2 = buff;
925 }
926 }
927 }
928
929 //////////////////////////////////////////////////////////////
930 //////////////////////////////////////////////////////////////
931
Exit(char * message)932 void Exit(char *message)
933 {
934 fflush(NULL);
935 PhyML_Fprintf(stderr,"%s",message);
936 exit(1);
937 }
938
939 //////////////////////////////////////////////////////////////
940 //////////////////////////////////////////////////////////////
941
mCalloc(int nb,size_t size)942 void *mCalloc(int nb, size_t size)
943 {
944 void *allocated;
945
946 if((allocated = calloc((size_t)nb,size)) != NULL)
947 /* if((allocated = malloc((size_t)nb*(size_t)size)) != NULL) */
948 return allocated;
949 else
950 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
951
952 return NULL;
953 }
954
955 //////////////////////////////////////////////////////////////
956 //////////////////////////////////////////////////////////////
957
mRealloc(void * p,int nb,size_t size)958 void *mRealloc(void *p,int nb, size_t size)
959 {
960 if((p = realloc(p,(size_t)nb*size)) != NULL)
961 return p;
962 else
963 Exit("\n. Err.: low memory\n");
964
965 return NULL;
966 }
967
968 //////////////////////////////////////////////////////////////
969 //////////////////////////////////////////////////////////////
970
971
972 /* t_tree *Make_Light_Tree_Struct(int n_otu) */
973 /* { */
974 /* t_tree *tree; */
975 /* int i; */
976
977 /* tree = (t_tree *)mCalloc(1,sizeof(t_tree )); */
978 /* tree->a_edges = (t_edge **)mCalloc(2*n_otu-3,sizeof(t_edge *)); */
979 /* tree->a_nodes = (t_node **)mCalloc(2*n_otu-2,sizeof(t_node *)); */
980 /* tree->n_otu = n_otu; */
981
982 /* For(i,2*n_otu-3) */
983 /* tree->a_edges[i] = Make_Edge_Light(NULL,NULL,i); */
984
985 /* For(i,2*n_otu-2) */
986 /* tree->a_nodes[i] = Make_Node_Light(i); */
987
988 /* return tree; */
989 /* } */
990
991 //////////////////////////////////////////////////////////////
992 //////////////////////////////////////////////////////////////
993
994
Sort_Phydbl_Decrease(const void * a,const void * b)995 int Sort_Phydbl_Decrease(const void *a, const void *b)
996 {
997 if((*(phydbl *)(a)) >= (*(phydbl *)(b))) return -1;
998 else return 1;
999 }
1000
1001 //////////////////////////////////////////////////////////////
1002 //////////////////////////////////////////////////////////////
1003
1004
Qksort_Int(int * A,int * B,int ilo,int ihi)1005 void Qksort_Int(int *A, int *B, int ilo, int ihi)
1006 {
1007 phydbl pivot; // pivot value for partitioning array
1008 int ulo, uhi; // indices at ends of unpartitioned region
1009 int ieq; // least index of array entry with value equal to pivot
1010 int tempEntry; // temporary entry used for swapping
1011
1012 if (ilo >= ihi) {
1013 return;
1014 }
1015 // Select a pivot value.
1016 pivot = A[(ilo + ihi)/2];
1017 // Initialize ends of unpartitioned region and least index of entry
1018 // with value equal to pivot.
1019 ieq = ulo = ilo;
1020 uhi = ihi;
1021 // While the unpartitioned region is not empty, try to reduce its size.
1022 while (ulo <= uhi) {
1023 if (A[uhi] > pivot) {
1024 // Here, we can reduce the size of the unpartitioned region and
1025 // try again.
1026 uhi--;
1027 } else {
1028 // Here, A[uhi] <= pivot, so swap entries at indices ulo and
1029 // uhi.
1030 tempEntry = A[ulo];
1031 A[ulo] = A[uhi];
1032 A[uhi] = tempEntry;
1033
1034 if(B)
1035 {
1036 tempEntry = B[ulo];
1037 B[ulo] = B[uhi];
1038 B[uhi] = tempEntry;
1039 }
1040
1041 // After the swap, A[ulo] <= pivot.
1042 if (A[ulo] < pivot)
1043 {
1044 // Swap entries at indices ieq and ulo.
1045 tempEntry = A[ieq];
1046 A[ieq] = A[ulo];
1047 A[ulo] = tempEntry;
1048
1049
1050 if(B)
1051 {
1052 tempEntry = B[ieq];
1053 B[ieq] = B[ulo];
1054 B[ulo] = tempEntry;
1055 }
1056
1057
1058 // After the swap, A[ieq] < pivot, so we need to change
1059 // ieq.
1060 ieq++;
1061 // We also need to change ulo, but we also need to do
1062 // that when A[ulo] = pivot, so we do it after this if
1063 // statement.
1064 }
1065 // Once again, we can reduce the size of the unpartitioned
1066 // region and try again.
1067 ulo++;
1068 }
1069 }
1070 // Now, all entries from index ilo to ieq - 1 are less than the pivot
1071 // and all entries from index uhi to ihi + 1 are greater than the
1072 // pivot. So we have two regions of the array that can be sorted
1073 // recursively to put all of the entries in order.
1074 Qksort_Int(A, B, ilo, ieq - 1);
1075 Qksort_Int(A, B, uhi + 1, ihi);
1076 }
1077
1078 //////////////////////////////////////////////////////////////
1079 //////////////////////////////////////////////////////////////
1080
1081
1082 /* Sort in ascending order. Elements in B (if provided) are also re-ordered according to the ordering of A */
Qksort(phydbl * A,phydbl * B,int ilo,int ihi)1083 void Qksort(phydbl *A, phydbl *B, int ilo, int ihi)
1084 {
1085 phydbl pivot; // pivot value for partitioning array
1086 int ulo, uhi; // indices at ends of unpartitioned region
1087 int ieq; // least index of array entry with value equal to pivot
1088 phydbl tempEntry; // temporary entry used for swapping
1089
1090 if (ilo >= ihi) {
1091 return;
1092 }
1093 // Select a pivot value.
1094 pivot = A[(ilo + ihi)/2];
1095 // Initialize ends of unpartitioned region and least index of entry
1096 // with value equal to pivot.
1097 ieq = ulo = ilo;
1098 uhi = ihi;
1099 // While the unpartitioned region is not empty, try to reduce its size.
1100 while (ulo <= uhi) {
1101 if (A[uhi] > pivot) {
1102 // Here, we can reduce the size of the unpartitioned region and
1103 // try again.
1104 uhi--;
1105 } else {
1106 // Here, A[uhi] <= pivot, so swap entries at indices ulo and
1107 // uhi.
1108 tempEntry = A[ulo];
1109 A[ulo] = A[uhi];
1110 A[uhi] = tempEntry;
1111
1112 if(B)
1113 {
1114 tempEntry = B[ulo];
1115 B[ulo] = B[uhi];
1116 B[uhi] = tempEntry;
1117 }
1118
1119
1120
1121 // After the swap, A[ulo] <= pivot.
1122 if (A[ulo] < pivot) {
1123 // Swap entries at indices ieq and ulo.
1124 tempEntry = A[ieq];
1125 A[ieq] = A[ulo];
1126 A[ulo] = tempEntry;
1127
1128
1129 if(B)
1130 {
1131 tempEntry = B[ieq];
1132 B[ieq] = B[ulo];
1133 B[ulo] = tempEntry;
1134 }
1135
1136
1137 // After the swap, A[ieq] < pivot, so we need to change
1138 // ieq.
1139 ieq++;
1140 // We also need to change ulo, but we also need to do
1141 // that when A[ulo] = pivot, so we do it after this if
1142 // statement.
1143 }
1144 // Once again, we can reduce the size of the unpartitioned
1145 // region and try again.
1146 ulo++;
1147 }
1148 }
1149 // Now, all entries from index ilo to ieq - 1 are less than the pivot
1150 // and all entries from index uhi to ihi + 1 are greater than the
1151 // pivot. So we have two regions of the array that can be sorted
1152 // recursively to put all of the entries in order.
1153 Qksort(A, B, ilo, ieq - 1);
1154 Qksort(A, B, uhi + 1, ihi);
1155 }
1156
1157 /********************************************************/
1158
Qksort_Matrix(phydbl ** A,int col,int ilo,int ihi)1159 void Qksort_Matrix(phydbl **A, int col, int ilo, int ihi)
1160 {
1161 phydbl pivot; // pivot value for partitioning array
1162 int ulo, uhi; // indices at ends of unpartitioned region
1163 int ieq; // least index of array entry with value equal to pivot
1164 phydbl *tempEntry; // temporary entry used for swapping
1165
1166 tempEntry = NULL;
1167
1168 if (ilo >= ihi) {
1169 return;
1170 }
1171 // Select a pivot value.
1172 pivot = A[(ilo + ihi)/2][col];
1173 // Initialize ends of unpartitioned region and least index of entry
1174 // with value equal to pivot.
1175 ieq = ulo = ilo;
1176 uhi = ihi;
1177 // While the unpartitioned region is not empty, try to reduce its size.
1178 while (ulo <= uhi) {
1179 if (A[uhi][col] > pivot) {
1180 // Here, we can reduce the size of the unpartitioned region and
1181 // try again.
1182 uhi--;
1183 } else {
1184 // Here, A[uhi] <= pivot, so swap entries at indices ulo and
1185 // uhi.
1186 tempEntry = A[ulo];
1187 A[ulo] = A[uhi];
1188 A[uhi] = tempEntry;
1189 // After the swap, A[ulo] <= pivot.
1190 if (A[ulo][col] < pivot) {
1191 // Swap entries at indices ieq and ulo.
1192 tempEntry = A[ieq];
1193 A[ieq] = A[ulo];
1194 A[ulo] = tempEntry;
1195 // After the swap, A[ieq] < pivot, so we need to change
1196 // ieq.
1197 ieq++;
1198 // We also need to change ulo, but we also need to do
1199 // that when A[ulo] = pivot, so we do it after this if
1200 // statement.
1201 }
1202 // Once again, we can reduce the size of the unpartitioned
1203 // region and try again.
1204 ulo++;
1205 }
1206 }
1207 // Now, all entries from index ilo to ieq - 1 are less than the pivot
1208 // and all entries from index uhi to ihi + 1 are greater than the
1209 // pivot. So we have two regions of the array that can be sorted
1210 // recursively to put all of the entries in order.
1211 Qksort_Matrix(A, col, ilo, ieq - 1);
1212 Qksort_Matrix(A, col, uhi + 1, ihi);
1213 }
1214
1215
1216 //////////////////////////////////////////////////////////////
1217 //////////////////////////////////////////////////////////////
1218
Add_Taxa_To_Constraint_Tree(FILE * fp,calign * cdata)1219 char *Add_Taxa_To_Constraint_Tree(FILE *fp, calign *cdata)
1220 {
1221 char *line,*long_line;
1222 t_tree *tree;
1223 int i,j,open;
1224
1225 rewind(fp);
1226
1227 line = Return_Tree_String_Phylip(fp);
1228 tree = Read_Tree(&line);
1229
1230 long_line = (char *)mCalloc(T_MAX_LINE,sizeof(char));
1231 strcpy(long_line,line);
1232 i = 1;
1233 open = 1;
1234 while(open)
1235 {
1236 if(line[i]=='(') open++;
1237 if(line[i]==')') open--;
1238 if(i > T_MAX_LINE) assert(FALSE);
1239 i++;
1240 }
1241 long_line[i-1] = '\0';
1242
1243 for(i=0;i<cdata->n_otu;i++)
1244 {
1245 for(j=0;j<tree->n_otu;j++)
1246 {
1247 if(!strcmp(tree->a_nodes[j]->name,cdata->c_seq[i]->name))
1248 break;
1249 }
1250
1251 if(j == tree->n_otu)
1252 {
1253 strcat(long_line,",");
1254 strcat(long_line,cdata->c_seq[i]->name);
1255 }
1256
1257 }
1258
1259 strcat(long_line,");");
1260
1261 Free_Tree(tree);
1262 Free(line);
1263
1264
1265 return long_line;
1266 }
1267
1268 //////////////////////////////////////////////////////////////
1269 //////////////////////////////////////////////////////////////
1270
Check_Constraint_Tree_Taxa_Names(t_tree * tree,calign * cdata)1271 void Check_Constraint_Tree_Taxa_Names(t_tree *tree, calign *cdata)
1272 {
1273 int i,j,n_otu_tree,n_otu_cdata;
1274
1275 n_otu_tree = tree->n_otu;
1276 n_otu_cdata = cdata->n_otu;
1277
1278 for(i=0;i<n_otu_tree;i++)
1279 {
1280 for(j=0;j<n_otu_cdata;j++)
1281 {
1282 if(!strcmp(tree->a_nodes[i]->name,cdata->c_seq[j]->name))
1283 break;
1284 }
1285
1286 if(j==n_otu_cdata)
1287 {
1288 PhyML_Fprintf(stderr,"\n. '%s' was not found in sequence data set\n",tree->a_nodes[i]->name);
1289 Warn_And_Exit("\n. PhyML finished prematurely.");
1290 }
1291 }
1292 }
1293
1294 //////////////////////////////////////////////////////////////
1295 //////////////////////////////////////////////////////////////
1296
Copy_Tax_Names_To_Tip_Labels(t_tree * tree,calign * data)1297 void Copy_Tax_Names_To_Tip_Labels(t_tree *tree, calign *data)
1298 {
1299 int i;
1300
1301 for(i=0;i<tree->n_otu;i++)
1302 {
1303 tree->a_nodes[i]->name = (char *)mCalloc((int)strlen(data->c_seq[i]->name)+1,sizeof(char));
1304 tree->a_nodes[i]->ori_name = tree->a_nodes[i]->name;
1305 strcpy(tree->a_nodes[i]->name,data->c_seq[i]->name);
1306 tree->a_nodes[i]->tax = 1;
1307 tree->a_nodes[i]->num = i;
1308 }
1309 }
1310
1311 //////////////////////////////////////////////////////////////
1312 //////////////////////////////////////////////////////////////
1313
Share_Lk_Struct(t_tree * t_full,t_tree * t_empt)1314 void Share_Lk_Struct(t_tree *t_full, t_tree *t_empt)
1315 {
1316 int i,j,n_otu;
1317 t_edge *b_e,*b_f;
1318 t_node *n_e, *n_f;
1319
1320 n_otu = t_full->n_otu;
1321 t_empt->c_lnL_sorted = t_full->c_lnL_sorted;
1322 t_empt->unscaled_site_lk_cat = t_full->unscaled_site_lk_cat;
1323 t_empt->cur_site_lk = t_full->cur_site_lk;
1324 t_empt->old_site_lk = t_full->old_site_lk;
1325 t_empt->log_lks_aLRT = t_full->log_lks_aLRT;
1326 t_empt->site_lk_cat = t_full->site_lk_cat;
1327 t_empt->fact_sum_scale = t_full->fact_sum_scale;
1328 t_empt->eigen_lr_left = t_full->eigen_lr_left;
1329 t_empt->eigen_lr_rght = t_full->eigen_lr_rght;
1330 t_empt->dot_prod = t_full->dot_prod;
1331 t_empt->expl = t_full->expl;
1332
1333 For(i,2*n_otu-3)
1334 {
1335 b_f = t_full->a_edges[i];
1336 b_e = t_empt->a_edges[i];
1337
1338 b_e->Pij_rr = b_f->Pij_rr;
1339 b_e->tPij_rr = b_f->tPij_rr;
1340
1341 b_e->nni = b_f->nni;
1342 }
1343
1344
1345 for(i=n_otu;i<2*n_otu-2;i++)
1346 {
1347 n_f = t_full->a_nodes[i];
1348 n_e = t_empt->a_nodes[i];
1349
1350 for(j=0;j<3;j++)
1351 {
1352 if(n_f->b[j]->left == n_f)
1353 {
1354 if(n_e->b[j]->left == n_e)
1355 {
1356 n_e->b[j]->p_lk_left = n_f->b[j]->p_lk_left;
1357 n_e->b[j]->p_lk_loc_left = n_f->b[j]->p_lk_loc_left;
1358 n_e->b[j]->patt_id_left = n_f->b[j]->patt_id_left;
1359 n_e->b[j]->sum_scale_left = n_f->b[j]->sum_scale_left;
1360 n_e->b[j]->sum_scale_left_cat = n_f->b[j]->sum_scale_left_cat;
1361 n_e->b[j]->p_lk_tip_l = n_f->b[j]->p_lk_tip_l;
1362 }
1363 else
1364 {
1365 n_e->b[j]->p_lk_rght = n_f->b[j]->p_lk_left;
1366 n_e->b[j]->p_lk_loc_rght = n_f->b[j]->p_lk_loc_left;
1367 n_e->b[j]->patt_id_rght = n_f->b[j]->patt_id_left;
1368 n_e->b[j]->sum_scale_rght = n_f->b[j]->sum_scale_left;
1369 n_e->b[j]->sum_scale_rght_cat = n_f->b[j]->sum_scale_left_cat;
1370 n_e->b[j]->p_lk_tip_r = n_f->b[j]->p_lk_tip_l;
1371 }
1372 }
1373 else
1374 {
1375 if(n_e->b[j]->rght == n_e)
1376 {
1377 n_e->b[j]->p_lk_rght = n_f->b[j]->p_lk_rght;
1378 n_e->b[j]->p_lk_loc_rght = n_f->b[j]->p_lk_loc_rght;
1379 n_e->b[j]->patt_id_rght = n_f->b[j]->patt_id_rght;
1380 n_e->b[j]->sum_scale_rght = n_f->b[j]->sum_scale_rght;
1381 n_e->b[j]->sum_scale_rght_cat = n_f->b[j]->sum_scale_rght_cat;
1382 n_e->b[j]->p_lk_tip_r = n_f->b[j]->p_lk_tip_r;
1383 }
1384 else
1385 {
1386 n_e->b[j]->p_lk_left = n_f->b[j]->p_lk_rght;
1387 n_e->b[j]->p_lk_loc_left = n_f->b[j]->p_lk_loc_rght;
1388 n_e->b[j]->patt_id_left = n_f->b[j]->patt_id_rght;
1389 n_e->b[j]->sum_scale_left = n_f->b[j]->sum_scale_rght;
1390 n_e->b[j]->sum_scale_left_cat = n_f->b[j]->sum_scale_rght_cat;
1391 n_e->b[j]->p_lk_tip_l = n_f->b[j]->p_lk_tip_r;
1392 }
1393 }
1394 }
1395 }
1396
1397 for(i=0;i<n_otu;i++)
1398 {
1399 n_f = t_full->a_nodes[i];
1400 n_e = t_empt->a_nodes[i];
1401
1402 if(n_f->b[0]->rght == n_f)
1403 {
1404 n_e->b[0]->p_lk_rght = n_f->b[0]->p_lk_rght;
1405 n_e->b[0]->p_lk_loc_rght = n_f->b[0]->p_lk_loc_rght;
1406 n_e->b[0]->patt_id_rght = n_f->b[0]->patt_id_rght;
1407 n_e->b[0]->sum_scale_rght = n_f->b[0]->sum_scale_rght;
1408 n_e->b[0]->sum_scale_rght_cat = n_f->b[0]->sum_scale_rght_cat;
1409 n_e->b[0]->p_lk_tip_r = n_f->b[0]->p_lk_tip_r;
1410 }
1411 else
1412 {
1413 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1414 }
1415 }
1416 }
1417
1418 //////////////////////////////////////////////////////////////
1419 //////////////////////////////////////////////////////////////
1420
Share_Spr_Struct(t_tree * t_full,t_tree * t_empt)1421 void Share_Spr_Struct(t_tree *t_full, t_tree *t_empt)
1422 {
1423 t_empt->size_spr_list_one_edge = t_full->size_spr_list_one_edge;
1424 t_empt->spr_list_one_edge = t_full->spr_list_one_edge;
1425 t_empt->best_spr = t_full->best_spr;
1426 t_empt->spr_list_all_edge = t_full->spr_list_all_edge;
1427 t_empt->size_spr_list_all_edge = t_full->size_spr_list_all_edge;
1428 }
1429
1430 //////////////////////////////////////////////////////////////
1431 //////////////////////////////////////////////////////////////
1432
Share_Pars_Struct(t_tree * t_full,t_tree * t_empt)1433 void Share_Pars_Struct(t_tree *t_full, t_tree *t_empt)
1434 {
1435 int i;
1436
1437 t_empt->site_pars = t_full->site_pars;
1438 t_empt->step_mat = t_full->step_mat;
1439
1440 For(i,2*t_full->n_otu-3)
1441 {
1442 t_empt->a_edges[i]->ui_l = t_full->a_edges[i]->ui_l;
1443 t_empt->a_edges[i]->ui_r = t_full->a_edges[i]->ui_r;
1444
1445 t_empt->a_edges[i]->pars_l = t_full->a_edges[i]->pars_l;
1446 t_empt->a_edges[i]->pars_r = t_full->a_edges[i]->pars_r;
1447
1448 t_empt->a_edges[i]->p_pars_l = t_full->a_edges[i]->p_pars_l;
1449 t_empt->a_edges[i]->p_pars_r = t_full->a_edges[i]->p_pars_r;
1450 }
1451 }
1452
1453 //////////////////////////////////////////////////////////////
1454 //////////////////////////////////////////////////////////////
1455
Sort_Edges_NNI_Score(t_tree * tree,t_edge ** sorted_edges,int n_elem)1456 int Sort_Edges_NNI_Score(t_tree *tree, t_edge **sorted_edges, int n_elem)
1457 {
1458 int i,j,done;
1459 t_edge *buff;
1460
1461 do
1462 {
1463 done = YES;
1464 for(i=0;i<n_elem-1;i++)
1465 {
1466 for(j=i+1;j<n_elem;j++)
1467 {
1468 if(sorted_edges[j]->nni->score < sorted_edges[i]->nni->score)
1469 {
1470 done = NO;
1471 buff = sorted_edges[j];
1472 sorted_edges[j] = sorted_edges[i];
1473 sorted_edges[i] = buff;
1474 }
1475 }
1476 }
1477 }
1478 while(done == NO);
1479
1480 return 1;
1481 }
1482
1483 //////////////////////////////////////////////////////////////
1484 //////////////////////////////////////////////////////////////
1485
Sort_Edges_Depth(t_tree * tree,t_edge ** sorted_edges,int n_elem)1486 int Sort_Edges_Depth(t_tree *tree, t_edge **sorted_edges, int n_elem)
1487 {
1488 int i,j;
1489 t_edge *buff;
1490 phydbl *depth,buff_depth;
1491
1492 depth = (phydbl *)mCalloc(n_elem,sizeof(phydbl));
1493
1494 for(i=0;i<n_elem;i++)
1495 depth[i] =
1496 sorted_edges[i]->left->bip_size[sorted_edges[i]->l_r] *
1497 sorted_edges[i]->rght->bip_size[sorted_edges[i]->r_l] ;
1498
1499
1500 for(i=0;i<n_elem-1;i++)
1501 {
1502 for(j=i+1;j<n_elem;j++)
1503 {
1504 if(depth[i] > depth[j])
1505 {
1506 buff = sorted_edges[i];
1507 sorted_edges[i] = sorted_edges[j];
1508 sorted_edges[j] = buff;
1509
1510 buff_depth = depth[i];
1511 depth[i] = depth[j];
1512 depth[j] = buff_depth;
1513 }
1514 }
1515 }
1516
1517 Free(depth);
1518
1519 return 1;
1520 }
1521
1522 //////////////////////////////////////////////////////////////
1523 //////////////////////////////////////////////////////////////
1524
NNI(t_tree * tree,t_edge * b_fcus,int do_swap)1525 void NNI(t_tree *tree, t_edge *b_fcus, int do_swap)
1526 {
1527 t_node *v1,*v2,*v3,*v4;
1528 phydbl lk0, lk1, lk2;
1529 phydbl lk0_init, lk1_init, lk2_init;
1530 scalar_dbl *len0,*len1,*len2;
1531 scalar_dbl *var0,*var1,*var2;
1532 phydbl l_infa, l_infb;
1533 phydbl lk_init;
1534
1535 if(tree->prev) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1536
1537 lk_init = tree->c_lnL;
1538 b_fcus->nni->init_lk = tree->c_lnL;;
1539 b_fcus->nni->best_conf = 0;
1540 b_fcus->nni->score = +1.0;
1541 lk0 = lk1 = lk2 = UNLIKELY;
1542 v1 = v2 = v3 = v4 = NULL;
1543
1544 if(b_fcus->nni->init_l != NULL) Copy_Scalar_Dbl(b_fcus->l,b_fcus->nni->init_l);
1545 else b_fcus->nni->init_l = Duplicate_Scalar_Dbl(b_fcus->l);
1546
1547 v1 = b_fcus->left->v[b_fcus->l_v1];
1548 v2 = b_fcus->left->v[b_fcus->l_v2];
1549 v3 = b_fcus->rght->v[b_fcus->r_v1];
1550 v4 = b_fcus->rght->v[b_fcus->r_v2];
1551
1552 Record_Br_Len(tree);
1553
1554 if(v1->num < v2->num) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1555 if(v3->num < v4->num) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1556
1557 /************************************************************/
1558 Swap(v2,b_fcus->left,b_fcus->rght,v3,tree);
1559 Set_Both_Sides(YES,tree);
1560
1561 MIXT_Set_Alias_Subpatt(YES,tree);
1562 lk1_init = Update_Lk_At_Given_Edge(b_fcus,tree);
1563 MIXT_Set_Alias_Subpatt(NO,tree);
1564
1565 l_infa = 1.;
1566 l_infb = 1.E-4;
1567 lk1 = lk1_init;
1568
1569 if(tree->mod->s_opt->nni_br_len_opt == YES)
1570 {
1571 if(tree->mod->s_opt->fast_nni)
1572 {
1573 lk1 = Fast_Br_Len(b_fcus,tree,YES);
1574 }
1575 else
1576 {
1577 lk1 = Br_Len_Opt(&(b_fcus->l->v),b_fcus,tree);
1578 }
1579 }
1580
1581 if(lk1 < lk1_init - tree->mod->s_opt->min_diff_lk_local)
1582 {
1583 PhyML_Printf("\n. %f %f %G",l_infa,l_infb,b_fcus->l->v);
1584 PhyML_Printf("\n. %f -- %f",lk1_init,lk1);
1585 PhyML_Printf("\n. Err. in NNI (1)");
1586 }
1587
1588 len1 = Duplicate_Scalar_Dbl(b_fcus->l);
1589 var1 = Duplicate_Scalar_Dbl(b_fcus->l_var);
1590 Swap(v3,b_fcus->left,b_fcus->rght,v2,tree);
1591 /************************************************************/
1592
1593
1594 /************************************************************/
1595 Swap(v2,b_fcus->left,b_fcus->rght,v4,tree);
1596 Restore_Br_Len(tree);
1597 Set_Both_Sides(YES,tree);
1598
1599 MIXT_Set_Alias_Subpatt(YES,tree);
1600 lk2_init = Update_Lk_At_Given_Edge(b_fcus,tree);
1601 MIXT_Set_Alias_Subpatt(NO,tree);
1602
1603 l_infa = 1.;
1604 l_infb = 1.E-4;
1605
1606 lk2 = lk2_init;
1607
1608 if(tree->mod->s_opt->nni_br_len_opt == YES)
1609 {
1610 if(tree->mod->s_opt->fast_nni)
1611 {
1612 lk2 = Fast_Br_Len(b_fcus,tree,YES);
1613 }
1614 else
1615 {
1616 lk2 = Br_Len_Opt(&(b_fcus->l->v),b_fcus,tree);
1617 }
1618 }
1619
1620 if(lk2 < lk2_init - tree->mod->s_opt->min_diff_lk_local)
1621 {
1622 PhyML_Printf("\n. %f %f %G",l_infa,l_infb,b_fcus->l->v);
1623 PhyML_Printf("\n. %f -- %f",lk2_init,lk2);
1624 PhyML_Printf("\n. Err. in NNI (2)");
1625 }
1626
1627 len2 = Duplicate_Scalar_Dbl(b_fcus->l);
1628 var2 = Duplicate_Scalar_Dbl(b_fcus->l_var);
1629 Swap(v4,b_fcus->left,b_fcus->rght,v2,tree);
1630 /************************************************************/
1631
1632
1633
1634 /************************************************************/
1635 Restore_Br_Len(tree);
1636 Set_Both_Sides(YES,tree);
1637
1638 MIXT_Set_Alias_Subpatt(YES,tree);
1639 lk0_init = Update_Lk_At_Given_Edge(b_fcus,tree);
1640 MIXT_Set_Alias_Subpatt(NO,tree);
1641
1642
1643 if(FABS(lk0_init - lk_init) > tree->mod->s_opt->min_diff_lk_local)
1644 {
1645 PhyML_Fprintf(stderr,"\n. lk_init = %f; lk = %f diff = %f l = %G",
1646 lk_init,
1647 lk0_init,
1648 lk_init-lk0_init,
1649 b_fcus->l->v);
1650 PhyML_Fprintf(stderr,"\n. Curr_lnL = %f",Lk(NULL,tree));
1651 Exit("\n. Err. in NNI (3)");
1652 }
1653
1654 l_infa = 1.;
1655 l_infb = 1.E-4;
1656 lk0 = lk0_init;
1657
1658 if(tree->mod->s_opt->nni_br_len_opt == YES)
1659 {
1660 if(tree->mod->s_opt->fast_nni)
1661 {
1662 lk0 = Fast_Br_Len(b_fcus,tree,YES);
1663 }
1664 else
1665 {
1666 lk0 = Br_Len_Opt(&(b_fcus->l->v),b_fcus,tree);
1667 }
1668 }
1669
1670 if(lk0 < lk_init - tree->mod->s_opt->min_diff_lk_local)
1671 {
1672 PhyML_Printf("\n. %f %f %f",l_infa,l_infb,b_fcus->l->v);
1673 PhyML_Printf("\n. %f -- %f",lk0_init,lk0);
1674 PhyML_Printf("\n. Err. in NNI (3)\n");
1675 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1676 }
1677
1678 len0 = Duplicate_Scalar_Dbl(b_fcus->l);
1679 var0 = Duplicate_Scalar_Dbl(b_fcus->l_var);
1680 /************************************************************/
1681
1682 b_fcus->nni->lk0 = lk0;
1683 b_fcus->nni->lk1 = lk1;
1684 b_fcus->nni->lk2 = lk2;
1685
1686 if(b_fcus->nni->l0 == NULL) b_fcus->nni->l0 = Duplicate_Scalar_Dbl(len0);
1687 else Copy_Scalar_Dbl(len0,b_fcus->nni->l0);
1688
1689 if(b_fcus->nni->l1 == NULL) b_fcus->nni->l1 = Duplicate_Scalar_Dbl(len1);
1690 else Copy_Scalar_Dbl(len1,b_fcus->nni->l1);
1691
1692 if(b_fcus->nni->l2 == NULL) b_fcus->nni->l2 = Duplicate_Scalar_Dbl(len2);
1693 else Copy_Scalar_Dbl(len2,b_fcus->nni->l2);
1694
1695 if(b_fcus->nni->v0 == NULL) b_fcus->nni->v0 = Duplicate_Scalar_Dbl(var0);
1696 else Copy_Scalar_Dbl(var0,b_fcus->nni->v0);
1697
1698 if(b_fcus->nni->v1 == NULL) b_fcus->nni->v1 = Duplicate_Scalar_Dbl(var1);
1699 else Copy_Scalar_Dbl(var1,b_fcus->nni->v1);
1700
1701 if(b_fcus->nni->v2 == NULL) b_fcus->nni->v2 = Duplicate_Scalar_Dbl(var2);
1702 else Copy_Scalar_Dbl(var2,b_fcus->nni->v2);
1703
1704 b_fcus->nni->score = lk0 - MAX(lk1,lk2);
1705
1706 if((b_fcus->nni->score < tree->mod->s_opt->min_diff_lk_local) &&
1707 (b_fcus->nni->score > -tree->mod->s_opt->min_diff_lk_local))
1708 {
1709 b_fcus->nni->score = .0;
1710 b_fcus->nni->lk1 = b_fcus->nni->lk0;
1711 b_fcus->nni->lk2 = b_fcus->nni->lk0;
1712 }
1713
1714 if(lk0 > MAX(lk1,lk2))
1715 {
1716 b_fcus->nni->best_conf = 0;
1717 b_fcus->nni->swap_node_v1 = NULL;
1718 b_fcus->nni->swap_node_v2 = NULL;
1719 b_fcus->nni->swap_node_v3 = NULL;
1720 b_fcus->nni->swap_node_v4 = NULL;
1721
1722 if(b_fcus->nni->best_l == NULL) b_fcus->nni->best_l = Duplicate_Scalar_Dbl(len0);
1723 else Copy_Scalar_Dbl(len0,b_fcus->nni->best_l);
1724
1725 if(b_fcus->nni->best_v == NULL) b_fcus->nni->best_v = Duplicate_Scalar_Dbl(var0);
1726 else Copy_Scalar_Dbl(var0,b_fcus->nni->best_v);
1727 }
1728 else if(lk1 > MAX(lk0,lk2))
1729 {
1730 b_fcus->nni->best_conf = 1;
1731 b_fcus->nni->swap_node_v1 = v2;
1732 b_fcus->nni->swap_node_v2 = b_fcus->left;
1733 b_fcus->nni->swap_node_v3 = b_fcus->rght;
1734 b_fcus->nni->swap_node_v4 = v3;
1735
1736 if(b_fcus->nni->best_l == NULL) b_fcus->nni->best_l = Duplicate_Scalar_Dbl(len1);
1737 else Copy_Scalar_Dbl(len1,b_fcus->nni->best_l);
1738
1739 if(b_fcus->nni->best_v == NULL) b_fcus->nni->best_v = Duplicate_Scalar_Dbl(var1);
1740 else Copy_Scalar_Dbl(var1,b_fcus->nni->best_v);
1741 }
1742 else if(lk2 > MAX(lk0,lk1))
1743 {
1744 b_fcus->nni->best_conf = 2;
1745 b_fcus->nni->swap_node_v1 = v2;
1746 b_fcus->nni->swap_node_v2 = b_fcus->left;
1747 b_fcus->nni->swap_node_v3 = b_fcus->rght;
1748 b_fcus->nni->swap_node_v4 = v4;
1749
1750 if(b_fcus->nni->best_l == NULL) b_fcus->nni->best_l = Duplicate_Scalar_Dbl(len2);
1751 else Copy_Scalar_Dbl(len2,b_fcus->nni->best_l);
1752
1753 if(b_fcus->nni->best_v == NULL) b_fcus->nni->best_v = Duplicate_Scalar_Dbl(var2);
1754 else Copy_Scalar_Dbl(var2,b_fcus->nni->best_v);
1755 }
1756 else
1757 {
1758 b_fcus->nni->score = +1.0;
1759 b_fcus->nni->best_conf = 0;
1760 b_fcus->nni->swap_node_v1 = NULL;
1761 b_fcus->nni->swap_node_v2 = NULL;
1762 b_fcus->nni->swap_node_v3 = NULL;
1763 b_fcus->nni->swap_node_v4 = NULL;
1764
1765 if(b_fcus->nni->best_l == NULL) b_fcus->nni->best_l = Duplicate_Scalar_Dbl(len0);
1766 else Copy_Scalar_Dbl(len0,b_fcus->nni->best_l);
1767
1768 if(b_fcus->nni->best_v == NULL) b_fcus->nni->best_v = Duplicate_Scalar_Dbl(var0);
1769 else Copy_Scalar_Dbl(var0,b_fcus->nni->best_v);
1770 }
1771
1772
1773 if(do_swap == YES)
1774 {
1775 if((lk1 > lk0) || (lk2 > lk0))
1776 {
1777 tree->n_swap++;
1778
1779 if(lk1 > lk2)
1780 {
1781 Swap(v2,b_fcus->left,b_fcus->rght,v3,tree);
1782
1783 if(b_fcus->nni->best_l == NULL) b_fcus->nni->best_l = Duplicate_Scalar_Dbl(len1);
1784 else Copy_Scalar_Dbl(len1,b_fcus->nni->best_l);
1785
1786 if(b_fcus->nni->best_v == NULL) b_fcus->nni->best_v = Duplicate_Scalar_Dbl(var1);
1787 else Copy_Scalar_Dbl(var1,b_fcus->nni->best_v);
1788 }
1789 else
1790 {
1791 Swap(v2,b_fcus->left,b_fcus->rght,v4,tree);
1792
1793 if(b_fcus->nni->best_l == NULL) b_fcus->nni->best_l = Duplicate_Scalar_Dbl(len2);
1794 else Copy_Scalar_Dbl(len2,b_fcus->nni->best_l);
1795
1796 if(b_fcus->nni->best_v == NULL) b_fcus->nni->best_v = Duplicate_Scalar_Dbl(var2);
1797 else Copy_Scalar_Dbl(var2,b_fcus->nni->best_v);
1798 }
1799 }
1800 }
1801 else
1802 {
1803 Restore_Br_Len(tree);
1804 Update_PMat_At_Given_Edge(b_fcus,tree);
1805 tree->c_lnL = lk_init;
1806 }
1807
1808 Free_Scalar_Dbl(len0);
1809 Free_Scalar_Dbl(len1);
1810 Free_Scalar_Dbl(len2);
1811 Free_Scalar_Dbl(var0);
1812 Free_Scalar_Dbl(var1);
1813 Free_Scalar_Dbl(var2);
1814
1815 }
1816
1817 //////////////////////////////////////////////////////////////
1818 //////////////////////////////////////////////////////////////
1819
1820
NNI_Pars(t_tree * tree,t_edge * b_fcus,int do_swap)1821 void NNI_Pars(t_tree *tree, t_edge *b_fcus, int do_swap)
1822 {
1823 t_node *v1,*v2,*v3,*v4;
1824 int pars0, pars1, pars2;
1825 int pars_init;
1826
1827 pars_init = tree->c_pars;
1828 b_fcus->nni->best_conf = 0;
1829 b_fcus->nni->score = +1.0;
1830
1831 pars0 = pars1 = pars2 = 0;
1832 v1 = v2 = v3 = v4 = NULL;
1833
1834
1835 v1 = b_fcus->left->v[b_fcus->l_v1];
1836 v2 = b_fcus->left->v[b_fcus->l_v2];
1837 v3 = b_fcus->rght->v[b_fcus->r_v1];
1838 v4 = b_fcus->rght->v[b_fcus->r_v2];
1839
1840 if(v1->num < v2->num) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1841 if(v3->num < v4->num) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1842
1843
1844 /***********/
1845 Swap(v2,b_fcus->left,b_fcus->rght,v3,tree);
1846 Set_Both_Sides(YES,tree);
1847 pars1 = Update_Pars_At_Given_Edge(b_fcus,tree);
1848 Swap(v3,b_fcus->left,b_fcus->rght,v2,tree);
1849 /***********/
1850
1851 /***********/
1852 Swap(v2,b_fcus->left,b_fcus->rght,v4,tree);
1853 Set_Both_Sides(YES,tree);
1854 pars2 = Update_Pars_At_Given_Edge(b_fcus,tree);
1855 Swap(v4,b_fcus->left,b_fcus->rght,v2,tree);
1856 /***********/
1857
1858
1859 /***********/
1860 Set_Both_Sides(YES,tree);
1861 pars0 = Update_Pars_At_Given_Edge(b_fcus,tree);
1862
1863 if(pars0 != pars_init)
1864 {
1865 PhyML_Fprintf(stderr,"\n. pars_init = %d; pars0 = %d\n",
1866 pars_init,
1867 pars0);
1868 Warn_And_Exit("\n. Err. in NNI (3)\n");
1869 }
1870 /***********/
1871
1872 tree->c_pars = pars0;
1873
1874 b_fcus->nni->score = MIN(pars1,pars2) - pars0;
1875
1876 if(pars0 < MIN(pars1,pars2))
1877 {
1878 b_fcus->nni->best_conf = 0;
1879 b_fcus->nni->swap_node_v1 = NULL;
1880 b_fcus->nni->swap_node_v2 = NULL;
1881 b_fcus->nni->swap_node_v3 = NULL;
1882 b_fcus->nni->swap_node_v4 = NULL;
1883 }
1884 else if(pars1 < MIN(pars0,pars2))
1885 {
1886 b_fcus->nni->best_conf = 1;
1887 b_fcus->nni->swap_node_v1 = v2;
1888 b_fcus->nni->swap_node_v2 = b_fcus->left;
1889 b_fcus->nni->swap_node_v3 = b_fcus->rght;
1890 b_fcus->nni->swap_node_v4 = v3;
1891 }
1892 else if(pars2 > MIN(pars0,pars1))
1893 {
1894 b_fcus->nni->best_conf = 2;
1895 b_fcus->nni->swap_node_v1 = v2;
1896 b_fcus->nni->swap_node_v2 = b_fcus->left;
1897 b_fcus->nni->swap_node_v3 = b_fcus->rght;
1898 b_fcus->nni->swap_node_v4 = v4;
1899 }
1900 else
1901 {
1902 b_fcus->nni->score = +1.0;
1903 b_fcus->nni->swap_node_v1 = NULL;
1904 b_fcus->nni->swap_node_v2 = NULL;
1905 b_fcus->nni->swap_node_v3 = NULL;
1906 b_fcus->nni->swap_node_v4 = NULL;
1907 }
1908 }
1909
1910 //////////////////////////////////////////////////////////////
1911 //////////////////////////////////////////////////////////////
1912
1913
Swap(t_node * a,t_node * b,t_node * c,t_node * d,t_tree * tree)1914 void Swap(t_node *a, t_node *b, t_node *c, t_node *d, t_tree *tree)
1915 {
1916 int ab, ba, cd, dc, bc;
1917 int i;
1918
1919 /* \ /d \ /a
1920 * \ / \ /
1921 * \b__...__c/ -> \b__...__c/
1922 * / \ / \
1923 * / \ / \
1924 * /a \ /d \
1925 *
1926 * nodes b and c are not necessarily on the same branch
1927 */
1928
1929 if(!tree) return;
1930
1931 if(!a || !b || !c || !d) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1932
1933 ab = ba = cd = dc = bc = -1;
1934
1935 for(i=0;i<3;i++) if(a->v[i] == b) { ab = i; break; }
1936 for(i=0;i<3;i++) if(b->v[i] == a) { ba = i; break; }
1937 for(i=0;i<3;i++) if(c->v[i] == d) { cd = i; break; }
1938 for(i=0;i<3;i++) if(d->v[i] == c) { dc = i; break; }
1939 for(i=0;i<3;i++) if(b->v[i] == c) { bc = i; break; }
1940
1941 if(ab < 0 || ba < 0 || cd < 0 || dc < 0)
1942 {
1943 PhyML_Fprintf(stderr,"\n. ab=%d ba=%d cd=%d dc=%d bc=%d",ab,ba,cd,dc,bc);
1944 PhyML_Fprintf(stderr,"\n. Nodes %d %d %d %d.",a->num,b->num,c->num,d->num);
1945 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1946 }
1947
1948 a->v[ab] = c;
1949 d->v[dc] = b;
1950 b->v[ba] = d;
1951 c->v[cd] = a;
1952 b->b[ba] = d->b[dc];
1953 c->b[cd] = a->b[ab];
1954
1955 (a->b[ab]->left == b)?
1956 (a->b[ab]->left = c):
1957 (a->b[ab]->rght = c);
1958
1959 (d->b[dc]->left == c)?
1960 (d->b[dc]->left = b):
1961 (d->b[dc]->rght = b);
1962
1963 for(i=0;i<3;i++)
1964 {
1965 if(a->b[ab]->left->v[i] == a->b[ab]->rght) a->b[ab]->l_r = i;
1966 if(a->b[ab]->rght->v[i] == a->b[ab]->left) a->b[ab]->r_l = i;
1967 if(d->b[dc]->left->v[i] == d->b[dc]->rght) d->b[dc]->l_r = i;
1968 if(d->b[dc]->rght->v[i] == d->b[dc]->left) d->b[dc]->r_l = i;
1969 }
1970
1971 a->b[ab]->l_v1 = a->b[ab]->l_v2 =
1972 a->b[ab]->r_v1 = a->b[ab]->r_v2 =
1973 d->b[dc]->l_v1 = d->b[dc]->l_v2 =
1974 d->b[dc]->r_v1 = d->b[dc]->r_v2 = -1;
1975
1976 for(i=0;i<3;i++)
1977 {
1978 if(i != a->b[ab]->l_r)
1979 {
1980 if(a->b[ab]->l_v1 < 0) a->b[ab]->l_v1 = i;
1981 else a->b[ab]->l_v2 = i;
1982 }
1983 if(i != a->b[ab]->r_l)
1984 {
1985 if(a->b[ab]->r_v1 < 0) a->b[ab]->r_v1 = i;
1986 else a->b[ab]->r_v2 = i;
1987 }
1988 if(i != d->b[dc]->l_r)
1989 {
1990 if(d->b[dc]->l_v1 < 0) d->b[dc]->l_v1 = i;
1991 else d->b[dc]->l_v2 = i;
1992 }
1993 if(i != d->b[dc]->r_l)
1994 {
1995 if(d->b[dc]->r_v1 < 0) d->b[dc]->r_v1 = i;
1996 else d->b[dc]->r_v2 = i;
1997 }
1998 }
1999
2000 Update_Dirs(tree);
2001
2002 if(tree->n_root != NULL)
2003 {
2004 tree->n_root->v[1] = tree->e_root->left;
2005 tree->n_root->v[2] = tree->e_root->rght;
2006 tree->n_root->b[1]->rght = tree->e_root->left;
2007 tree->n_root->b[2]->rght = tree->e_root->rght;
2008 }
2009
2010 if(tree->next)
2011 Swap(a->next,b->next,c->next,d->next,tree->next);
2012 }
2013
2014 //////////////////////////////////////////////////////////////
2015 //////////////////////////////////////////////////////////////
2016
Update_SubTree_Partial_Lk(t_edge * b_fcus,t_node * a,t_node * d,t_tree * tree)2017 void Update_SubTree_Partial_Lk(t_edge *b_fcus, t_node *a, t_node *d, t_tree *tree)
2018 {
2019 int i;
2020 Update_Partial_Lk(tree,b_fcus,a);
2021 if(d->tax) return;
2022 else for(i=0;i<3;++i) if(d->v[i] != a) Update_SubTree_Partial_Lk(d->b[i],d,d->v[i],tree);
2023 }
2024
2025 //////////////////////////////////////////////////////////////
2026 //////////////////////////////////////////////////////////////
2027
Copy_Seq_Names_To_Tip_Labels(t_tree * tree,calign * data)2028 void Copy_Seq_Names_To_Tip_Labels(t_tree *tree, calign *data)
2029 {
2030 int i;
2031 for(i=0;i<tree->n_otu;++i) strcpy(tree->a_nodes[i]->name,data->c_seq[i]->name);
2032 }
2033
2034 //////////////////////////////////////////////////////////////
2035 //////////////////////////////////////////////////////////////
2036
Copy_Cseq(calign * ori,option * io)2037 calign *Copy_Cseq(calign *ori, option *io)
2038 {
2039 calign *new;
2040 int i,j,k,n_otu,n_rm,c_len;
2041 char **sp_names_in,**sp_names_out;
2042
2043 n_otu = ori->n_otu;
2044 c_len = ori->crunch_len;
2045 n_rm = ori->n_rm;
2046
2047 sp_names_in = (char **)mCalloc(n_otu+n_rm,sizeof(char *));
2048 for(i=0;i<n_otu+n_rm;i++)
2049 {
2050 sp_names_in[i] = (char *)mCalloc(strlen(ori->c_seq[i]->name)+1,sizeof(char));
2051 strcpy(sp_names_in[i],ori->c_seq[i]->name);
2052 }
2053
2054 sp_names_out = (char **)mCalloc(n_rm,sizeof(char *));
2055 for(i=0;i<ori->n_rm;i++)
2056 {
2057 sp_names_out[i] = (char *)mCalloc(strlen(ori->c_seq_rm[i]->name)+1,sizeof(char));
2058 strcpy(sp_names_out[i],ori->c_seq_rm[i]->name);
2059 }
2060
2061 new = Make_Calign(n_otu+n_rm,c_len+1,io->state_len,ori->init_len,sp_names_in,ori->n_rm,sp_names_out);
2062 new->n_rm = ori->n_rm;
2063 Init_Calign(n_otu,c_len+1,ori->init_len,new);
2064
2065 for(i=0;i<ori->n_rm;++i)
2066 {
2067 strcpy(new->c_seq_rm[i]->name,ori->c_seq_rm[i]->name);
2068 for(j=0;j<ori->crunch_len;j++)
2069 {
2070 for(k=0;k<io->state_len;++k)
2071 new->c_seq_rm[i]->state[j*io->state_len+k] =
2072 ori->c_seq_rm[i]->state[j*io->state_len+k];
2073
2074 new->c_seq_rm[i]->is_ambigu[j] = ori->c_seq_rm[i]->is_ambigu[j];
2075 }
2076 new->c_seq_rm[i]->len = ori->c_seq_rm[i]->len;
2077 new->c_seq_rm[i]->state[c_len*io->state_len] = '\0';
2078 new->c_seq_rm[i]->is_duplicate = YES;
2079 }
2080
2081 new->obs_pinvar = ori->obs_pinvar;
2082
2083 for(i=0;i<ori->n_otu;i++) new->c_seq[i]->num = ori->c_seq[i]->num;
2084 for(i=0;i<ori->n_rm;i++) new->c_seq_rm[i]->num = ori->c_seq_rm[i]->num;
2085
2086 for(i=0;i<ori->init_len;i++) new->sitepatt[i] = ori->sitepatt[i];
2087
2088 for(j=0;j<ori->crunch_len;j++)
2089 {
2090 for(i=0;i<ori->n_otu;i++)
2091 {
2092 for(k=0;k<io->state_len;k++)
2093 {
2094 new->c_seq[i]->state[j*io->state_len+k] =
2095 ori->c_seq[i]->state[j*io->state_len+k];
2096 }
2097 new->c_seq[i]->is_ambigu[j] = ori->c_seq[i]->is_ambigu[j];
2098 }
2099
2100 new->wght[j] = ori->wght[j];
2101 new->ambigu[j] = ori->ambigu[j];
2102 new->invar[j] = ori->invar[j];
2103 }
2104
2105 for(i=0;i<ori->n_otu;i++)
2106 {
2107 new->c_seq[i]->len = ori->c_seq[i]->len;
2108 strcpy(new->c_seq[i]->name,ori->c_seq[i]->name);
2109 new->c_seq[i]->is_duplicate = NO;
2110 }
2111
2112 for(i=0;i<ori->n_otu;i++) new->c_seq[i]->state[c_len*io->state_len] = '\0';
2113
2114 for(i=0;i<T_MAX_ALPHABET;i++) new->obs_state_frq[i] = ori->obs_state_frq[i];
2115
2116 new->init_len = ori->init_len;
2117 new->clean_len = ori->clean_len;
2118 new->crunch_len = ori->crunch_len;
2119 new->n_otu = ori->n_otu;
2120 new->io = ori->io;
2121
2122 for(i=n_otu;i<n_otu+n_rm;++i) new->c_seq[i] = new->c_seq_rm[i-n_otu];
2123
2124 for(i=0;i<ori->n_otu;i++) Free(sp_names_in[i]);
2125 Free(sp_names_in);
2126
2127 for(i=0;i<ori->n_rm;i++) Free(sp_names_out[i]);
2128 Free(sp_names_out);
2129
2130 return new;
2131 }
2132
2133 //////////////////////////////////////////////////////////////
2134 //////////////////////////////////////////////////////////////
2135
2136
Filexists(char * filename)2137 int Filexists(char *filename)
2138 {
2139 FILE *fp;
2140 fp =fopen(filename,"r");
2141 if (fp) {
2142 fclose(fp);
2143 return 1;
2144 } else
2145 return 0;
2146 }
2147
2148 //////////////////////////////////////////////////////////////
2149 //////////////////////////////////////////////////////////////
2150
K80_dist(calign * data,phydbl g_shape)2151 matrix *K80_dist(calign *data, phydbl g_shape)
2152 {
2153 int i,j,k;
2154 int diff;
2155 matrix *mat;
2156 phydbl **len;
2157
2158 len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
2159 for(i=0;i<data->n_otu;i++)
2160 len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
2161
2162 mat = Make_Mat(data->n_otu);
2163
2164 Init_Mat(mat,data);
2165
2166 diff = 0;
2167
2168 For(i,data->c_seq[0]->len)
2169 {
2170 for(j=0;j<data->n_otu-1;j++)
2171 {
2172 for(k=j+1;k<data->n_otu;k++)
2173 {
2174 if(((data->c_seq[j]->state[i] == 'A' || data->c_seq[j]->state[i] == 'G') &&
2175 (data->c_seq[k]->state[i] == 'C' || data->c_seq[k]->state[i] == 'T'))||
2176 ((data->c_seq[j]->state[i] == 'C' || data->c_seq[j]->state[i] == 'T') &&
2177 (data->c_seq[k]->state[i] == 'A' || data->c_seq[k]->state[i] == 'G')))
2178 {
2179 diff++;
2180 mat->Q[j][k]+=data->wght[i];
2181 len[j][k]+=data->wght[i];
2182 len[k][j]=len[j][k];
2183 }
2184
2185 else
2186 if(((data->c_seq[j]->state[i] == 'A' && data->c_seq[k]->state[i] == 'G') ||
2187 (data->c_seq[j]->state[i] == 'G' && data->c_seq[k]->state[i] == 'A'))||
2188 ((data->c_seq[j]->state[i] == 'C' && data->c_seq[k]->state[i] == 'T') ||
2189 (data->c_seq[j]->state[i] == 'T' && data->c_seq[k]->state[i] == 'C')))
2190 {
2191 diff++;
2192 mat->P[j][k]+=data->wght[i];
2193 len[j][k]+=data->wght[i];
2194 len[k][j]=len[j][k];
2195 }
2196 else
2197 if((data->c_seq[j]->state[i] == 'A' ||
2198 data->c_seq[j]->state[i] == 'C' ||
2199 data->c_seq[j]->state[i] == 'G' ||
2200 data->c_seq[j]->state[i] == 'T')&&
2201 (data->c_seq[k]->state[i] == 'A' ||
2202 data->c_seq[k]->state[i] == 'C' ||
2203 data->c_seq[k]->state[i] == 'G' ||
2204 data->c_seq[k]->state[i] == 'T'))
2205 {
2206 len[j][k]+=data->wght[i];
2207 len[k][j]=len[j][k];
2208 }
2209 }
2210 }
2211 }
2212
2213
2214 for(i=0;i<data->n_otu-1;i++)
2215 for(j=i+1;j<data->n_otu;j++)
2216 {
2217 if(len[i][j] > .0)
2218 {
2219 mat->P[i][j] /= len[i][j];
2220 mat->Q[i][j] /= len[i][j];
2221 }
2222 else
2223 {
2224 mat->P[i][j] = .5;
2225 mat->Q[i][j] = .5;
2226 }
2227
2228 mat->P[j][i] = mat->P[i][j];
2229 mat->Q[j][i] = mat->Q[i][j];
2230
2231
2232 if((1-2*mat->P[i][j]-mat->Q[i][j] <= .0) || (1-2*mat->Q[i][j] <= .0))
2233 {
2234 mat->dist[i][j] = -1.;
2235 mat->dist[j][i] = -1.;
2236 continue;
2237 }
2238
2239 mat->dist[i][j] = (g_shape/2)*
2240 (POW(1-2*mat->P[i][j]-mat->Q[i][j],-1./g_shape) +
2241 0.5*POW(1-2*mat->Q[i][j],-1./g_shape) - 1.5);
2242
2243 if(mat->dist[i][j] > DIST_MAX) mat->dist[i][j] = DIST_MAX;
2244
2245 mat->dist[j][i] = mat->dist[i][j];
2246 }
2247
2248 for(i=0;i<data->n_otu;i++) free(len[i]);
2249 free(len);
2250 return mat;
2251 }
2252
2253 //////////////////////////////////////////////////////////////
2254 //////////////////////////////////////////////////////////////
2255
2256
JC69_Dist(calign * data,t_mod * mod)2257 matrix *JC69_Dist(calign *data, t_mod *mod)
2258 {
2259 int site,i,j,k;
2260 matrix *mat;
2261 phydbl **len;
2262 int datatype;
2263
2264
2265 len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
2266 for(i=0;i<data->n_otu;i++)
2267 len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
2268
2269 mat = Make_Mat(data->n_otu);
2270 Init_Mat(mat,data);
2271
2272 datatype = mod->io->datatype;
2273
2274 For(site,data->c_seq[0]->len)
2275 {
2276 for(j=0;j<data->n_otu-1;j++)
2277 {
2278 for(k=j+1;k<data->n_otu;k++)
2279 {
2280 if((!Is_Ambigu(data->c_seq[j]->state+site*mod->io->state_len,datatype,mod->io->state_len)) &&
2281 (!Is_Ambigu(data->c_seq[k]->state+site*mod->io->state_len,datatype,mod->io->state_len)))
2282 {
2283 len[j][k]+=data->wght[site];
2284 len[k][j]=len[j][k];
2285
2286
2287 if(strncmp(data->c_seq[j]->state+site*mod->io->state_len,
2288 data->c_seq[k]->state+site*mod->io->state_len,mod->io->state_len))
2289 /* if(!Are_Compatible(data->c_seq[j]->state+site*mod->io->state_len, */
2290 /* data->c_seq[k]->state+site*mod->io->state_len, */
2291 /* mod->io->state_len, */
2292 /* mod->io->datatype)) */
2293 mat->P[j][k]+=data->wght[site];
2294 }
2295 }
2296 }
2297 }
2298
2299
2300 for(i=0;i<data->n_otu-1;i++)
2301 for(j=i+1;j<data->n_otu;j++)
2302 {
2303 if(len[i][j] > .0) mat->P[i][j] /= len[i][j];
2304 else mat->P[i][j] = 1.;
2305
2306 mat->P[j][i] = mat->P[i][j];
2307
2308 if((1.-(mod->ns)/(mod->ns-1.)*mat->P[i][j]) < .0) mat->dist[i][j] = -1.;
2309 else
2310 mat->dist[i][j] = -(mod->ns-1.)/(mod->ns)*(phydbl)log(1.-(mod->ns)/(mod->ns-1.)*mat->P[i][j]);
2311
2312 /* PhyML_Printf("\n. Incorrect JC distances"); */
2313 /* mat->dist[i][j] = len[i][j]; */
2314
2315 if(mat->dist[i][j] > DIST_MAX) mat->dist[i][j] = DIST_MAX;
2316
2317 mat->dist[j][i] = mat->dist[i][j];
2318 }
2319
2320 for(i=0;i<data->n_otu;i++) free(len[i]);
2321 free(len);
2322
2323 return mat;
2324 }
2325
2326 //////////////////////////////////////////////////////////////
2327 //////////////////////////////////////////////////////////////
2328
Hamming_Dist(calign * data,t_mod * mod)2329 matrix *Hamming_Dist(calign *data, t_mod *mod)
2330 {
2331 int i,j,k;
2332 matrix *mat;
2333 phydbl **len;
2334 int datatype;
2335
2336 len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
2337 for(i=0;i<data->n_otu;i++)
2338 len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
2339
2340 mat = Make_Mat(data->n_otu);
2341 Init_Mat(mat,data);
2342
2343 datatype = mod->io->datatype;
2344
2345 for(i=0;i<data->crunch_len;i++)
2346 {
2347 for(j=0;j<data->n_otu-1;j++)
2348 {
2349 for(k=j+1;k<data->n_otu;k++)
2350 {
2351 if((!Is_Ambigu(data->c_seq[j]->state+i*mod->io->state_len,datatype,mod->io->state_len)) &&
2352 (!Is_Ambigu(data->c_seq[k]->state+i*mod->io->state_len,datatype,mod->io->state_len)))
2353 {
2354 len[j][k]+=data->wght[i];
2355 len[k][j]=len[j][k];
2356 /* if(data->c_seq[j]->state[i] != data->c_seq[k]->state[i]) */
2357 if(!Are_Compatible(data->c_seq[j]->state+i*mod->io->state_len,
2358 data->c_seq[k]->state+i*mod->io->state_len,
2359 mod->io->state_len,
2360 mod->io->datatype))
2361 {
2362 mat->P[j][k]+=data->wght[i];
2363 }
2364 }
2365 }
2366 }
2367 }
2368
2369 for(i=0;i<data->n_otu-1;i++)
2370 for(j=i+1;j<data->n_otu;j++)
2371 {
2372 if(len[i][j] > .0)
2373 {
2374 mat->P[i][j] /= len[i][j];
2375 }
2376 else
2377 {
2378 mat->P[i][j] = 1.;
2379 }
2380
2381 mat->P[j][i] = mat->P[i][j];
2382
2383 mat->dist[i][j] = mat->P[i][j];
2384
2385
2386 if(mat->dist[i][j] > DIST_MAX)
2387 {
2388 mat->dist[i][j] = DIST_MAX;
2389 }
2390 mat->dist[j][i] = mat->dist[i][j];
2391 }
2392
2393 for(i=0;i<data->n_otu;i++) free(len[i]);
2394 free(len);
2395
2396 return mat;
2397 }
2398
2399 //////////////////////////////////////////////////////////////
2400 //////////////////////////////////////////////////////////////
2401
Are_Sequences_Identical(align * seq1,align * seq2)2402 short int Are_Sequences_Identical(align *seq1, align *seq2)
2403 {
2404 for(int i=0; i<seq1->len; ++i) if(seq1->state[i] != seq2->state[i]) return NO;
2405 return YES;
2406 }
2407 //////////////////////////////////////////////////////////////
2408 //////////////////////////////////////////////////////////////
2409
Remove_Duplicates(calign * data,option * io,t_tree * tree)2410 void Remove_Duplicates(calign *data, option *io, t_tree *tree)
2411 {
2412 int n_duplicates,n_removed,n_otu_orig,i,j,k;
2413 align *tmp;
2414
2415 if(data->n_rm > 0) return; // Already removed duplicates
2416 if(io->leave_duplicates == YES) return;
2417
2418 n_otu_orig = data->n_otu;
2419
2420 if(n_otu_orig < 4) return;
2421
2422 n_duplicates = 0;
2423
2424 for(i=0; i<data->n_otu-1; ++i)
2425 {
2426 if(data->c_seq[i]->is_duplicate == YES) continue;
2427 else
2428 {
2429 for(j=i+1; j < data->n_otu; ++j)
2430 {
2431 if(Are_Sequences_Identical(data->c_seq[i],data->c_seq[j]) == YES)
2432 {
2433 for(k=0;k<n_otu_orig;++k) if(!strcmp(tree->a_nodes[k]->name,data->c_seq[j]->name)) break;
2434 assert(k < n_otu_orig);
2435
2436 if(tree->a_nodes[k]->b[0] != tree->e_root) data->c_seq[j]->is_duplicate = YES;
2437 else data->c_seq[i]->is_duplicate = YES;
2438
2439 if(n_duplicates == 0) PhyML_Printf("\n");
2440 PhyML_Printf("\n. Note: taxon '%s' is a duplicate of taxon '%s'.",
2441 data->c_seq[j]->name,data->c_seq[i]->name);
2442
2443 n_duplicates++;
2444 }
2445 }
2446 }
2447 }
2448
2449 n_removed = 0;
2450 for(i=0; i < n_otu_orig; ++i)
2451 {
2452 if(data->c_seq[i]->is_duplicate == YES)
2453 {
2454 if(!n_removed) data->c_seq_rm = (align **)mCalloc(1,sizeof(align *));
2455 else data->c_seq_rm = (align **)mRealloc(data->c_seq_rm,n_removed+1,sizeof(align *));
2456 data->c_seq_rm[n_removed] = data->c_seq[i];
2457 n_removed++;
2458 if(n_otu_orig - n_removed == 3)
2459 {
2460 for(j=i+1; j < n_otu_orig; ++j) data->c_seq[j]->is_duplicate = NO;
2461 i = n_otu_orig+1;
2462 }
2463 }
2464 }
2465
2466 data->n_rm = n_removed;
2467
2468 if(!n_removed) return;
2469
2470
2471 for(i=0; i < n_otu_orig; ++i)
2472 {
2473 if(data->c_seq[i]->is_duplicate == YES)
2474 {
2475 for(int j=i+1; j < n_otu_orig; j++)
2476 {
2477 if(data->c_seq[j]->is_duplicate == NO)
2478 {
2479 tmp = data->c_seq[i];
2480 data->c_seq[i] = data->c_seq[j];
2481 data->c_seq[j] = tmp;
2482 break;
2483 }
2484 }
2485 }
2486 }
2487
2488 Remove_Duplicates_From_Tree(data,tree);
2489
2490 data->n_otu = tree->n_otu;
2491 io->n_otu = tree->n_otu;
2492
2493 }
2494
2495 //////////////////////////////////////////////////////////////
2496 //////////////////////////////////////////////////////////////
2497
Remove_Duplicates_From_Tree(calign * data,t_tree * tree)2498 void Remove_Duplicates_From_Tree(calign *data, t_tree *tree)
2499 {
2500 int i,j;
2501 int n_otu_orig,idx;
2502 t_edge *res_edge;
2503
2504 n_otu_orig = tree->n_otu;
2505 idx = -1;
2506
2507 for(i=0;i<n_otu_orig;++i)
2508 {
2509 for(j=0;j<n_otu_orig;++j)
2510 {
2511 if(data->c_seq[j]->is_duplicate == YES &&
2512 !strcmp(tree->a_nodes[i]->name,data->c_seq[j]->name) &&
2513 tree->a_nodes[i]->b[0] != tree->e_root)
2514 {
2515 Prune_Subtree(tree->a_nodes[i]->v[0],
2516 tree->a_nodes[i],
2517 NULL,&res_edge,tree);
2518
2519 assert(tree->a_edges[tree->a_nodes[i]->b[0]->num] == tree->a_nodes[i]->b[0]);
2520 idx = tree->a_nodes[i]->b[0]->num;
2521 Free_Edge_Length(tree->a_nodes[i]->b[0]);
2522 Free_Edge(tree->a_nodes[i]->b[0]);
2523 tree->a_edges[idx] = NULL;
2524 idx = res_edge->num;
2525 assert(tree->a_edges[res_edge->num] == res_edge);
2526 Free_Edge_Length(res_edge);
2527 Free_Edge(res_edge);
2528 tree->a_edges[idx] = NULL;
2529
2530 idx = tree->a_nodes[i]->v[0]->num;
2531 Free_Node(tree->a_nodes[i]->v[0]);
2532 tree->a_nodes[idx] = NULL;
2533
2534 Free_Node(tree->a_nodes[i]);
2535 tree->a_nodes[i] = NULL;
2536
2537 break;
2538 }
2539 }
2540 }
2541
2542 tree->a_nodes[2*tree->n_otu-2 - 2*data->n_rm] = tree->a_nodes[2*tree->n_otu-2];
2543 tree->a_edges[2*tree->n_otu-3 - 2*data->n_rm] = tree->a_edges[2*tree->n_otu-3];
2544 tree->a_edges[2*tree->n_otu-2 - 2*data->n_rm] = tree->a_edges[2*tree->n_otu-2];
2545
2546 if(data->n_rm > 0)
2547 {
2548 tree->n_otu -= data->n_rm;
2549 Refactor_Tree(tree);
2550 }
2551
2552 }
2553
2554 //////////////////////////////////////////////////////////////
2555 //////////////////////////////////////////////////////////////
2556
Insert_Duplicates(t_tree * tree)2557 void Insert_Duplicates(t_tree *tree)
2558 {
2559 unsigned int i,j;
2560 unsigned int idx_new_edge,idx_new_node,idx_root;
2561 t_edge *link_daughter,*residual,**new_a_edges,*b1,*b2;
2562 t_node *link,*daughter,**new_a_nodes,*n0;
2563
2564 link_daughter = NULL;
2565 residual = NULL;
2566 link = NULL;
2567 daughter = NULL;
2568 idx_root = (tree->n_root) ? 1 : 3;
2569
2570 n0 = tree->a_nodes[2*tree->n_otu-2];
2571 b1 = tree->a_edges[2*tree->n_otu-2];
2572 b2 = tree->a_edges[2*tree->n_otu-3];
2573
2574 new_a_nodes = (t_node **)mCalloc(2*tree->n_otu-1 + tree->data->n_rm * 2,sizeof(t_node *));
2575
2576 for(i=0;i<tree->n_otu;++i) new_a_nodes[i] = tree->a_nodes[i];
2577 for(i=tree->n_otu;i<2*tree->n_otu-1;++i)
2578 {
2579 new_a_nodes[i+tree->data->n_rm] = tree->a_nodes[i];
2580 new_a_nodes[i+tree->data->n_rm]->num = i+tree->data->n_rm;
2581 }
2582
2583 Free(tree->a_nodes);
2584 tree->a_nodes = new_a_nodes;
2585
2586 new_a_edges = (t_edge **)mCalloc(2*tree->n_otu-1+tree->data->n_rm*2,sizeof(t_edge *));
2587 for(i=0;i<2*tree->n_otu-1;++i) new_a_edges[i] = tree->a_edges[i];
2588
2589
2590 idx_new_edge = 0;
2591 idx_new_node = 0;
2592
2593 for(i=0;i<tree->data->n_rm;++i)
2594 {
2595 for(j=0;j<tree->n_otu;++j)
2596 {
2597 if(Are_Sequences_Identical(tree->data->c_seq_rm[i],tree->a_nodes[j]->c_seq) == YES)
2598 {
2599 link = Make_Node_Light(2*tree->n_otu-idx_root+tree->data->n_rm+idx_new_node+1);
2600 daughter = Make_Node_Light(tree->n_otu+idx_new_node);
2601
2602 new_a_nodes[tree->n_otu+idx_new_node] = daughter;
2603 new_a_nodes[2*tree->n_otu-idx_root+tree->data->n_rm+idx_new_node+1] = link;
2604
2605 idx_new_node += 1;
2606
2607 daughter->c_seq = tree->data->c_seq_rm[i];
2608
2609 daughter->name = (char *)mCalloc((int)strlen(tree->data->c_seq_rm[i]->name)+1,sizeof(char));
2610 daughter->ori_name = daughter->name;
2611 strcpy(daughter->name,tree->data->c_seq_rm[i]->name);
2612
2613 link->v[0] = daughter;
2614 link->v[1] = NULL;
2615 link->v[2] = NULL;
2616
2617 daughter->v[0] = link;
2618 daughter->v[1] = NULL;
2619 daughter->v[2] = NULL;
2620
2621 daughter->tax = YES;
2622 link->tax = NO;
2623
2624 link_daughter = Make_Edge_Light(link,daughter,2*tree->n_otu-idx_root+idx_new_edge);
2625 residual = Make_Edge_Light(daughter,link,2*tree->n_otu-idx_root+idx_new_edge+1);
2626
2627 new_a_edges[2*tree->n_otu-idx_root+idx_new_edge] = link_daughter;
2628 new_a_edges[2*tree->n_otu-idx_root+idx_new_edge+1] = residual;
2629
2630 new_a_edges[2*tree->n_otu-idx_root+idx_new_edge]->rght = daughter;
2631 new_a_edges[2*tree->n_otu-idx_root+idx_new_edge]->left = link;
2632
2633 new_a_edges[2*tree->n_otu-idx_root+idx_new_edge+1]->rght = link;
2634 new_a_edges[2*tree->n_otu-idx_root+idx_new_edge+1]->left = tree->a_nodes[j]->b[0]->left;
2635
2636 daughter->b[0] = link_daughter;
2637 link->b[0] = link_daughter;
2638
2639 idx_new_edge += 2;
2640
2641 Set_Scalar_Dbl(tree->mod->l_min,link_daughter->l);
2642
2643 Multiply_Scalar_Dbl(2.0,tree->a_nodes[j]->b[0]->l);
2644 Graft_Subtree(tree->a_nodes[j]->b[0],
2645 link,
2646 daughter,
2647 residual,
2648 tree->a_nodes[j],
2649 tree);
2650 Set_Scalar_Dbl(tree->a_nodes[j]->b[0]->l->v,residual->l);
2651 Set_Scalar_Dbl(tree->mod->l_min,tree->a_nodes[j]->b[0]->l);
2652 residual->support_val = -1.;
2653
2654 break;
2655
2656 }
2657 }
2658 }
2659
2660
2661 Free(tree->a_edges);
2662 tree->a_edges = new_a_edges;
2663
2664 tree->n_otu += tree->data->n_rm;
2665
2666 Refactor_Tree(tree);
2667
2668 tree->a_nodes[2*tree->n_otu-2] = n0;
2669 tree->a_edges[2*tree->n_otu-2] = b1;
2670 tree->a_edges[2*tree->n_otu-3] = b2;
2671 }
2672
2673
2674 //////////////////////////////////////////////////////////////
2675 //////////////////////////////////////////////////////////////
2676
2677
2678 /* Test if the given site pattern is invariant. Does not handle ambiguities */
2679
Is_Invar(int patt_num,int stepsize,int datatype,calign * data)2680 int Is_Invar(int patt_num, int stepsize, int datatype, calign *data)
2681 {
2682 int i, j;
2683
2684 for(i=0;i<data->n_otu;i++)
2685 {
2686 for(j=0;j<data->n_otu;j++)
2687 {
2688 if(!(Are_Compatible(data->c_seq[i]->state+patt_num,
2689 data->c_seq[j]->state+patt_num,
2690 stepsize,
2691 datatype)))
2692 {
2693 break;
2694 }
2695 }
2696 if(j != data->n_otu) break;
2697 }
2698
2699 if(i == data->n_otu) return 1;
2700 else return 0;
2701 }
2702
2703
2704 //////////////////////////////////////////////////////////////
2705 //////////////////////////////////////////////////////////////
2706
Is_Ambigu(char * state,int datatype,int stepsize)2707 int Is_Ambigu(char *state, int datatype, int stepsize)
2708 {
2709 int val,i;
2710
2711 val = -1;
2712 if(datatype == NT)
2713 {
2714 for(i=0;i<stepsize;i++)
2715 {
2716 switch(state[i])
2717 {
2718 case 'A' : case 'C' : case 'G' : case 'T' : case 'U' : { val=NO; break; }
2719 default : { val=YES; break; }
2720 }
2721 if(val == YES) break;
2722 }
2723 }
2724 else if(datatype == AA)
2725 {
2726 switch(state[0])
2727 {
2728 case 'X' : case '?' : case '-' : case '.' : {val=YES; break; }
2729 default : { val=NO; break; }
2730 }
2731 }
2732 else if(datatype == GENERIC)
2733 {
2734 int i;
2735 for(i=0;i<stepsize;i++) if(!isdigit(state[i])) break;
2736 if(i == stepsize) val = NO;
2737 else val = YES;
2738 }
2739
2740 return val;
2741 }
2742
2743 //////////////////////////////////////////////////////////////
2744 //////////////////////////////////////////////////////////////
2745
Check_Ambiguities(calign * data,int datatype,int stepsize)2746 void Check_Ambiguities(calign *data, int datatype, int stepsize)
2747 {
2748 int i,j;
2749
2750 for(j=0;j<data->crunch_len;j++)
2751 {
2752 data->ambigu[j] = NO;
2753 for(i=0;i<data->n_otu;i++)
2754 {
2755 data->c_seq[i]->is_ambigu[j] = NO;
2756 }
2757
2758 for(i=0;i<data->n_otu;i++)
2759 {
2760 if(Is_Ambigu(data->c_seq[i]->state+j*stepsize,
2761 datatype,
2762 stepsize))
2763 {
2764 data->ambigu[j] = YES;
2765 data->c_seq[i]->is_ambigu[j] = YES;
2766 }
2767 }
2768 }
2769 }
2770
2771 //////////////////////////////////////////////////////////////
2772 //////////////////////////////////////////////////////////////
2773
Set_D_States(calign * data,int datatype,int stepsize)2774 void Set_D_States(calign *data, int datatype, int stepsize)
2775 {
2776 int i,j;
2777
2778 for(j=0;j<data->crunch_len;j++)
2779 {
2780 for(i=0;i<data->n_otu;i++)
2781 {
2782 if(data->c_seq[i]->is_ambigu[j] == NO)
2783 {
2784 data->c_seq[i]->d_state[j] = Assign_State(data->c_seq[i]->state+j,
2785 datatype,
2786 stepsize);
2787 }
2788 }
2789 }
2790 }
2791
2792 //////////////////////////////////////////////////////////////
2793 //////////////////////////////////////////////////////////////
2794
Get_State_From_Ui(int ui,int datatype)2795 int Get_State_From_Ui(int ui, int datatype)
2796 {
2797 if(datatype == NT)
2798 {
2799 switch(ui)
2800 {
2801 case 1 : {return 0; break;}
2802 case 2 : {return 1; break;}
2803 case 4 : {return 2; break;}
2804 case 8 : {return 3; break;}
2805 default :
2806 {
2807 PhyML_Fprintf(stderr,"\n. ui=%d",ui);
2808 PhyML_Fprintf(stderr,"\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2809 Warn_And_Exit("\n. PhyML finished prematurely.");
2810 break;
2811 }
2812 }
2813 }
2814 else if(datatype == AA)
2815 {
2816 switch(ui)
2817 {
2818 case 1 : {return 0; break;}
2819 case 2 : {return 1; break;}
2820 case 4 : {return 2; break;}
2821 case 8 : {return 3; break;}
2822 case 16 : {return 4; break;}
2823 case 32 : {return 5; break;}
2824 case 64 : {return 6; break;}
2825 case 128 : {return 7; break;}
2826 case 256 : {return 8; break;}
2827 case 512 : {return 9; break;}
2828 case 1024 : {return 10; break;}
2829 case 2048 : {return 11; break;}
2830 case 4096 : {return 12; break;}
2831 case 8192 : {return 13; break;}
2832 case 16384 : {return 14; break;}
2833 case 32768 : {return 15; break;}
2834 case 65536 : {return 16; break;}
2835 case 131072 : {return 17; break;}
2836 case 262144 : {return 18; break;}
2837 case 524288 : {return 19; break;}
2838 default :
2839 {
2840 PhyML_Fprintf(stderr,"\n. ui=%d",ui);
2841 PhyML_Fprintf(stderr,"\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2842 Warn_And_Exit("\n. PhyML finished prematurely.");
2843 }
2844 }
2845 }
2846 else Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
2847 return -1;
2848 }
2849
2850 //////////////////////////////////////////////////////////////
2851 //////////////////////////////////////////////////////////////
2852
2853
Assign_State(char * c,int datatype,int stepsize)2854 int Assign_State(char *c, int datatype, int stepsize)
2855 {
2856 int state[3];
2857 int i;
2858
2859 state[0] = state[1] = state[2] = -1;
2860 if(datatype == NT)
2861 {
2862 for(i=0;i<stepsize;i++)
2863 {
2864 switch(c[i])
2865 {
2866 case 'A' : {state[i]=0; break;}
2867 case 'C' : {state[i]=1; break;}
2868 case 'G' : {state[i]=2; break;}
2869 case 'T' : {state[i]=3; break;}
2870 case 'U' : {state[i]=3; break;}
2871 default : {state[i]=-1; break;}
2872 }
2873 }
2874 return (stepsize>1)?(state[0]*16+state[1]*4+state[2]):(state[0]);
2875 }
2876 else if(datatype == AA)
2877 {
2878 switch(c[0])
2879 {
2880 case 'A' : {state[0]=0 ; break;}
2881 case 'R' : {state[0]=1 ; break;}
2882 case 'N' : {state[0]=2 ; break;}
2883 case 'D' : {state[0]=3 ; break;}
2884 case 'C' : {state[0]=4 ; break;}
2885 case 'Q' : {state[0]=5 ; break;}
2886 case 'E' : {state[0]=6 ; break;}
2887 case 'G' : {state[0]=7 ; break;}
2888 case 'H' : {state[0]=8 ; break;}
2889 case 'I' : {state[0]=9 ; break;}
2890 case 'L' : {state[0]=10; break;}
2891 case 'K' : {state[0]=11; break;}
2892 case 'M' : {state[0]=12; break;}
2893 case 'F' : {state[0]=13; break;}
2894 case 'P' : {state[0]=14; break;}
2895 case 'S' : {state[0]=15; break;}
2896 case 'T' : {state[0]=16; break;}
2897 case 'W' : {state[0]=17; break;}
2898 case 'Y' : {state[0]=18; break;}
2899 case 'V' : {state[0]=19; break;}
2900
2901 case 'B' : {state[0] = 2; break;}
2902 case 'Z' : {state[0] = 5; break;}
2903 default : {state[0]=-1; break;}
2904 }
2905 return state[0];
2906 }
2907 else if(datatype == GENERIC)
2908 {
2909 char format[6];
2910 int ret;
2911
2912 sprintf(format,"%%%dd",stepsize);
2913 ret = sscanf(c,format,state);
2914 if(!ret) state[0] = -1;
2915 return state[0];
2916 }
2917 else
2918 {
2919 PhyML_Printf("\n. Not implemented yet.\n");
2920 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
2921 }
2922
2923 return -1;
2924 }
2925
2926 //////////////////////////////////////////////////////////////
2927 //////////////////////////////////////////////////////////////
2928
Reciproc_Assign_State(int i_state,int datatype)2929 char Reciproc_Assign_State(int i_state, int datatype)
2930 {
2931 if(datatype == NT)
2932 {
2933 i_state = i_state%4;
2934 switch(i_state)
2935 {
2936 case 0 : {return 'A'; break;}
2937 case 1 : {return 'C'; break;}
2938 case 2 : {return 'G'; break;}
2939 case 3 : {return 'T'; break;}
2940 default :
2941 {
2942 PhyML_Printf("\n. i_state = %d",i_state);
2943 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2944 Warn_And_Exit("\n. PhyML finished prematurely.");
2945 break;
2946 }
2947 }
2948 }
2949 else if(datatype == AA)
2950 {
2951 i_state = i_state%20;
2952 switch(i_state)
2953 {
2954 case 0 : {return 'A' ; break;}
2955 case 1 : {return 'R' ; break;}
2956 case 2 : {return 'N' ; break;}
2957 case 3 : {return 'D' ; break;}
2958 case 4 : {return 'C' ; break;}
2959 case 5 : {return 'Q' ; break;}
2960 case 6 : {return 'E' ; break;}
2961 case 7 : {return 'G' ; break;}
2962 case 8 : {return 'H' ; break;}
2963 case 9 : {return 'I' ; break;}
2964 case 10 : {return 'L'; break;}
2965 case 11 : {return 'K'; break;}
2966 case 12 : {return 'M'; break;}
2967 case 13 : {return 'F'; break;}
2968 case 14 : {return 'P'; break;}
2969 case 15 : {return 'S'; break;}
2970 case 16 : {return 'T'; break;}
2971 case 17 : {return 'W'; break;}
2972 case 18 : {return 'Y'; break;}
2973 case 19 : {return 'V'; break;}
2974 default :
2975 {
2976 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
2977 Warn_And_Exit("\n. PhyML finished prematurely.");
2978 break;
2979 }
2980 }
2981 }
2982 else if(datatype == GENERIC)
2983 {
2984 return i_state + '0';
2985 }
2986 return -1;
2987 }
2988
2989 //////////////////////////////////////////////////////////////
2990 //////////////////////////////////////////////////////////////
2991
2992
Assign_State_With_Ambiguity(char * c,int datatype,int stepsize)2993 int Assign_State_With_Ambiguity(char *c, int datatype, int stepsize)
2994 {
2995 int state[3];
2996 int i;
2997
2998 state[0] = state[1] = state[2] = -1;
2999 if(datatype == NT)
3000 {
3001 for(i=0;i<stepsize;i++)
3002 {
3003 switch(c[i])
3004 {
3005 case 'A' : {state[i]= 0; break;}
3006 case 'C' : {state[i]= 1; break;}
3007 case 'G' : {state[i]= 2; break;}
3008 case 'T' : {state[i]= 3; break;}
3009 case 'U' : {state[i]= 3; break;}
3010 case 'M' : {state[i]= 4; break;}
3011 case 'R' : {state[i]= 5; break;}
3012 case 'W' : {state[i]= 6; break;}
3013 case 'S' : {state[i]= 7; break;}
3014 case 'Y' : {state[i]= 8; break;}
3015 case 'K' : {state[i]= 9; break;}
3016 case 'B' : {state[i]=10; break;}
3017 case 'D' : {state[i]=11; break;}
3018 case 'H' : {state[i]=12; break;}
3019 case 'V' : {state[i]=13; break;}
3020 case 'N' : case 'X' : case '?' : case 'O' : case '-' : {state[i]=T_MAX_ALPHABET-1; break;}
3021 default :
3022 {
3023 PhyML_Printf("\n. Unknown character state : '%c'\n",c[i]);
3024 Warn_And_Exit("\n. Init failed (data type supposed to be DNA)\n");
3025 break;
3026 }
3027 }
3028 return (stepsize>1)?(state[0]*16+state[1]*4+state[2]):(state[0]);
3029 }
3030 }
3031 else if(datatype == AA)
3032 {
3033 switch(c[0])
3034 {
3035 case 'A' : {state[0]= 0; break;}
3036 case 'R' : {state[0]= 1; break;}
3037 case 'N' : {state[0]= 2; break;}
3038 case 'D' : {state[0]= 3; break;}
3039 case 'C' : {state[0]= 4; break;}
3040 case 'Q' : {state[0]= 5; break;}
3041 case 'E' : {state[0]= 6; break;}
3042 case 'G' : {state[0]= 7; break;}
3043 case 'H' : {state[0]= 8; break;}
3044 case 'I' : {state[0]= 9; break;}
3045 case 'L' : {state[0]=10; break;}
3046 case 'K' : {state[0]=11; break;}
3047 case 'M' : {state[0]=12; break;}
3048 case 'F' : {state[0]=13; break;}
3049 case 'P' : {state[0]=14; break;}
3050 case 'S' : {state[0]=15; break;}
3051 case 'T' : {state[0]=16; break;}
3052 case 'W' : {state[0]=17; break;}
3053 case 'Y' : {state[0]=18; break;}
3054 case 'V' : {state[0]=19; break;}
3055 case 'B' : {state[0]= 2; break;}
3056 case 'Z' : {state[0]= 5; break;}
3057 case 'X' : case '?' : case '-' : {state[0]=T_MAX_ALPHABET-1; break;}
3058 default :
3059 {
3060 PhyML_Printf("\n. Unknown character state : '%c'\n",state[0]);
3061 Warn_And_Exit("\n. Init failed (data type supposed to be amino-acids)\n");
3062 break;
3063 }
3064 }
3065 return state[0];
3066 }
3067 else if(datatype == GENERIC)
3068 {
3069 if(Is_Ambigu(c,GENERIC,stepsize)) state[0] = T_MAX_ALPHABET-1;
3070 else
3071 {
3072 char format[20];
3073 sprintf(format,"%%%dd",stepsize);
3074 if(!sscanf(c,format,state))
3075 {
3076 PhyML_Printf("\n. Error reading character. Was expecting an integer, got '%c' instead.\n",c[0]);
3077 PhyML_Printf("\n. Err. in file %s at line %d (function '%s')\n",__FILE__,__LINE__,__FUNCTION__);
3078 Warn_And_Exit("\n. PhyML finished prematurely.");
3079 }
3080 }
3081 return state[0];
3082 }
3083
3084 return -1;
3085 }
3086
3087 //////////////////////////////////////////////////////////////
3088 //////////////////////////////////////////////////////////////
3089
Clean_Tree_Connections(t_tree * tree)3090 void Clean_Tree_Connections(t_tree *tree)
3091 {
3092
3093 int i;
3094 For(i,2*tree->n_otu-2)
3095 {
3096 tree->a_nodes[i]->v[0] = NULL;
3097 tree->a_nodes[i]->v[1] = NULL;
3098 tree->a_nodes[i]->v[2] = NULL;
3099 tree->a_nodes[i]->b[0] = NULL;
3100 tree->a_nodes[i]->b[1] = NULL;
3101 tree->a_nodes[i]->b[2] = NULL;
3102 }
3103 }
3104
3105 //////////////////////////////////////////////////////////////
3106 //////////////////////////////////////////////////////////////
3107
3108 /*
3109 if tbe_bootstrap == 0 => Classical FBP (Felsenstein bootstrap proportions)
3110 else => TBE (Transfer bootstrap expectation)
3111 */
Bootstrap(t_tree * tree)3112 void Bootstrap(t_tree *tree)
3113 {
3114 int *site_num, n_site;
3115 int replicate,j,k;
3116 int position,init_len;
3117 calign *boot_data;
3118 t_tree *boot_tree;
3119 t_mod *boot_mod;
3120 matrix *boot_mat;
3121 char *s;
3122 /* phydbl rf; */
3123
3124
3125 if(tree->is_mixt_tree == YES)
3126 {
3127 PhyML_Printf("\n. Bootstrap option not yet available for partition/mixture analysis.");
3128 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
3129 }
3130
3131
3132 tree->io->print_support_val = YES;
3133
3134 boot_tree = NULL;
3135
3136 site_num = (int *)mCalloc(tree->data->init_len,sizeof(int));
3137
3138 Free_Bip(tree);
3139 Alloc_Bip(tree);
3140 Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
3141
3142 n_site = 0;
3143 for(j=0;j<tree->data->crunch_len;j++)
3144 for(k=0;k<tree->data->wght[j];++k)
3145 {
3146 site_num[n_site] = j;
3147 n_site++;
3148 }
3149
3150 boot_data = Copy_Cseq(tree->data,tree->io);
3151
3152 PhyML_Printf("\n\n. Non parametric bootstrap analysis \n\n");
3153 PhyML_Printf(" [");
3154
3155 for(replicate=0;replicate<tree->io->n_boot_replicates;replicate++)
3156 {
3157 for(j=0;j<boot_data->crunch_len;j++) boot_data->wght[j] = 0;
3158
3159 init_len = 0;
3160 for(j=0;j<boot_data->init_len;j++)
3161 {
3162 position = Rand_Int(0,(int)(tree->data->init_len-1.0));
3163 boot_data->wght[site_num[position]] += 1;
3164 init_len++;
3165 }
3166
3167 if(init_len != tree->data->init_len) Exit("\n. Pb. when copying sequences\n");
3168
3169 init_len = 0;
3170 for(j=0;j<boot_data->crunch_len;j++) init_len += boot_data->wght[j];
3171
3172
3173 if(init_len != tree->data->init_len) Exit("\n. Pb. when copying sequences\n");
3174
3175
3176 if(tree->io->datatype == NT) Get_Base_Freqs(boot_data);
3177 else if(tree->io->datatype == AA) Get_AA_Freqs(boot_data);
3178
3179 if(tree->io->random_boot_seq_order) Randomize_Sequence_Order(boot_data);
3180
3181 Set_D_States(boot_data,tree->io->datatype,tree->io->state_len);
3182
3183 boot_mod = Copy_Model(tree->mod);
3184
3185 boot_mod->s_opt = tree->mod->s_opt; /* WARNING: re-using the same address here instead of creating a copying
3186 requires to leave the value of s_opt unchanged during the boostrap. */
3187 boot_mod->io = tree->io; /* WARNING: re-using the same address here instead of creating a copying
3188 requires to leave the value of io unchanged during the boostrap. */
3189
3190 Init_Model(boot_data,boot_mod,tree->io);
3191
3192 if(tree->io->in_tree == 2)
3193 {
3194 rewind(tree->io->fp_in_tree);
3195 boot_tree = Read_Tree_File_Phylip(tree->io->fp_in_tree);
3196 Remove_Duplicates_From_Tree(boot_data,boot_tree);
3197 }
3198 else
3199 {
3200 boot_mat = ML_Dist(boot_data,boot_mod);
3201 boot_mat->tree = Make_Tree_From_Scratch(boot_data->n_otu,boot_data);
3202 Fill_Missing_Dist(boot_mat);
3203 Bionj(boot_mat);
3204 boot_tree = boot_mat->tree;
3205 boot_tree->mat = boot_mat;
3206 }
3207
3208 boot_tree->mod = boot_mod;
3209 boot_tree->io = tree->io;
3210 boot_tree->data = boot_data;
3211 boot_tree->verbose = VL0;
3212 boot_tree->n_pattern = boot_tree->data->crunch_len;
3213 boot_tree->io->print_site_lnl = NO;
3214 boot_tree->io->print_trace = NO;
3215 boot_tree->io->print_json_trace = NO;
3216 boot_tree->n_root = NULL;
3217 boot_tree->e_root = NULL;
3218
3219 Set_Both_Sides(YES,boot_tree);
3220
3221 if((boot_tree->mod->s_opt->random_input_tree) && (boot_tree->mod->s_opt->topo_search == SPR_MOVE)) Random_Tree(boot_tree);
3222
3223 Connect_CSeqs_To_Nodes(boot_data,tree->io,boot_tree);
3224
3225 /* Make_Tree_For_Pars(boot_tree); */
3226 /* Make_Tree_For_Lk(boot_tree); */
3227 /* Make_Spr(boot_tree); */
3228
3229 Check_Br_Lens(boot_tree);
3230 Share_Lk_Struct(tree,boot_tree);
3231 Share_Spr_Struct(tree,boot_tree);
3232 Share_Pars_Struct(tree,boot_tree);
3233 Update_Dirs(boot_tree);
3234 Init_Partial_Lk_Tips_Double(boot_tree);
3235 Init_Ui_Tips(boot_tree);
3236 Init_Partial_Pars_Tips(boot_tree);
3237 Br_Len_Not_Involving_Invar(boot_tree);
3238
3239
3240 if(boot_tree->io->do_alias_subpatt)
3241 {
3242 MIXT_Set_Alias_Subpatt(YES,boot_tree);
3243 Lk(NULL,boot_tree);
3244 MIXT_Set_Alias_Subpatt(NO,boot_tree);
3245 }
3246
3247 Set_Update_Eigen(YES,boot_tree->mod);
3248 Lk(NULL,boot_tree);
3249 Set_Update_Eigen(NO,boot_tree->mod);
3250
3251 if(boot_tree->mod->s_opt->opt_topo)
3252 {
3253 Global_Spr_Search(boot_tree);
3254 }
3255 else
3256 {
3257 if(boot_tree->mod->s_opt->opt_subst_param || boot_tree->mod->s_opt->opt_bl)
3258 Round_Optimize(boot_tree,ROUND_MAX);
3259 else
3260 Lk(NULL,boot_tree);
3261 }
3262
3263 Free_Bip(boot_tree);
3264 Alloc_Bip(boot_tree);
3265 Match_Tip_Numbers(tree,boot_tree);
3266 Get_Bip(boot_tree->a_nodes[0],
3267 boot_tree->a_nodes[0]->v[0],
3268 boot_tree);
3269
3270 if(tree->io->do_boot) Compare_Bip(tree,boot_tree,NO);
3271 else if(tree->io->do_tbe) Compare_Bip_Distance(tree, boot_tree);
3272 else assert(FALSE);
3273
3274 Check_Br_Lens(boot_tree);
3275 Br_Len_Involving_Invar(boot_tree);
3276
3277 if(tree->io->print_boot_trees)
3278 {
3279 s = Write_Tree(boot_tree);
3280 PhyML_Fprintf(tree->io->fp_out_boot_tree,"%s\n",s);
3281 Free(s);
3282 Print_Fp_Out_Lines(tree->io->fp_out_boot_stats,0,0,boot_tree,tree->io,replicate+1);
3283 }
3284
3285
3286 PhyML_Printf(".");
3287 #ifndef QUIET
3288 fflush(stdout);
3289 #endif
3290 if(!((replicate+1)%tree->io->boot_prog_every))
3291 {
3292 PhyML_Printf("] %4d/%4d\n ",replicate+1,tree->io->n_boot_replicates);
3293 if(replicate != tree->io->n_boot_replicates-1) PhyML_Printf("[");
3294 }
3295
3296 Free_Tree(boot_tree);
3297 Free_Model(boot_mod);
3298 }
3299
3300 if(((replicate)%tree->io->boot_prog_every)) PhyML_Printf("] %4d/%4d\n ",replicate,tree->io->n_boot_replicates);
3301
3302 tree->lock_topo = YES; /* Topology should not be modified afterwards */
3303
3304 if(tree->io->print_boot_trees)
3305 {
3306 fclose(tree->io->fp_out_boot_tree);
3307 fclose(tree->io->fp_out_boot_stats);
3308 }
3309
3310 Free_Calign(boot_data);
3311 Free(site_num);
3312 }
3313
3314 //////////////////////////////////////////////////////////////
3315 //////////////////////////////////////////////////////////////
3316
Br_Len_Involving_Invar(t_tree * tree)3317 void Br_Len_Involving_Invar(t_tree *tree)
3318 {
3319 int i;
3320
3321 if(tree->is_mixt_tree)
3322 {
3323 MIXT_Br_Len_Involving_Invar(tree);
3324 return;
3325 }
3326
3327 For(i,2*tree->n_otu-1) tree->a_edges[i]->l->v *= (1.0-tree->mod->ras->pinvar->v);
3328 }
3329
3330 //////////////////////////////////////////////////////////////
3331 //////////////////////////////////////////////////////////////
3332
Br_Len_Not_Involving_Invar(t_tree * tree)3333 void Br_Len_Not_Involving_Invar(t_tree *tree)
3334 {
3335 int i;
3336
3337 if(tree->is_mixt_tree)
3338 {
3339 MIXT_Br_Len_Not_Involving_Invar(tree);
3340 return;
3341 }
3342
3343 For(i,2*tree->n_otu-1) tree->a_edges[i]->l->v /= (1.0-tree->mod->ras->pinvar->v);
3344 }
3345
3346 //////////////////////////////////////////////////////////////
3347 //////////////////////////////////////////////////////////////
3348
3349
Getstring_Stdin(char * s)3350 void Getstring_Stdin(char *s)
3351 {
3352 if(!fgets(s,T_MAX_LINE,stdin)) Exit("");
3353 if (strchr(s, '\n') != NULL)
3354 *strchr(s, '\n') = '\0';
3355 }
3356
3357 //////////////////////////////////////////////////////////////
3358 //////////////////////////////////////////////////////////////
3359
Num_Derivatives_One_Param(phydbl (* func)(t_tree * tree),t_tree * tree,phydbl f0,phydbl * param,int which,int n_param,phydbl stepsize,int logt,phydbl * err,int precise,int is_positive)3360 phydbl Num_Derivatives_One_Param(phydbl (*func)(t_tree *tree), t_tree *tree,
3361 phydbl f0, phydbl *param, int which, int n_param, phydbl stepsize, int logt,
3362 phydbl *err, int precise, int is_positive)
3363 {
3364 int i,j;
3365 phydbl errt,fac,hh,**a,ans,*sign;
3366 int n_iter;
3367
3368 sign = (phydbl *)mCalloc(n_param,sizeof(phydbl));
3369
3370 a = (phydbl **)mCalloc(11,sizeof(phydbl *));
3371 for(i=0;i<11;i++) a[i] = (phydbl *)mCalloc(11,sizeof(phydbl));
3372
3373 n_iter = 10; /* */
3374
3375 ans = .0;
3376
3377 if(stepsize < SMALL) Warn_And_Exit("\n. h must be nonzero in Dfridr.");
3378
3379 hh=stepsize;
3380
3381 if(!precise)
3382 {
3383 param[which] = param[which]+hh;
3384
3385 if(logt == YES) for(i=0;i<n_param;i++) param[i] = exp(MIN(1.E+2,param[i]));
3386 for(i=0;i<n_param;i++) sign[i] = param[i] > .0 ? 1. : -1.;
3387 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] = FABS(param[i]);
3388 a[0][0] = (*func)(tree);
3389 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] *= sign[i];
3390 if(logt == YES) for(i=0;i<n_param;i++) param[i] = log(param[i]);
3391
3392 a[0][0] -= f0;
3393 a[0][0] /= hh;
3394 param[which] = param[which]-hh;
3395
3396 ans = a[0][0];
3397 }
3398 else
3399 {
3400 param[which] = param[which]+hh;
3401
3402 if(logt == YES) for(i=0;i<n_param;i++) param[i] = exp(MIN(1.E+2,param[i]));
3403 for(i=0;i<n_param;i++) sign[i] = param[i] > .0 ? 1. : -1.;
3404 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] = FABS(param[i]);
3405 a[0][0] = (*func)(tree);
3406 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] *= sign[i];
3407 if(logt == YES) for(i=0;i<n_param;i++) param[i] = log(param[i]);
3408
3409 param[which] = param[which]-2*hh;
3410 a[0][0] -= (*func)(tree);
3411 a[0][0] /= (2.0*hh);
3412 param[which] = param[which]+hh;
3413
3414 /* a[0][0] -= f0; */
3415 /* a[0][0] /= hh; */
3416 /* param[which] = param[which]-hh; */
3417
3418 *err=1e30;
3419 for(i=1;i<n_iter;i++)
3420 {
3421 hh /= 1.4;
3422
3423 /* param[which] = param[which]+hh; */
3424 /* a[0][i] = (*func)(tree); */
3425 /* param[which] = param[which]-2*hh; */
3426 /* a[0][i] -= (*func)(tree); */
3427 /* a[0][i] /= (2.0*hh); */
3428 /* param[which] = param[which]+hh; */
3429
3430 param[which] = param[which]+hh;
3431
3432 if(logt == YES) for(j=0;j<n_param;j++) param[j] = exp(MIN(1.E+2,param[j]));
3433 for(i=0;i<n_param;i++) sign[i] = param[i] > .0 ? 1. : -1.;
3434 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] = FABS(param[i]);
3435 a[0][i] = (*func)(tree);
3436
3437 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] *= sign[i];
3438 if(logt == YES) for(j=0;j<n_param;j++) param[j] = log(param[j]);
3439
3440 param[which] = param[which]-2*hh;
3441 a[0][i] -= (*func)(tree);
3442 a[0][i] /= (2.0*hh);
3443 param[which] = param[which]+hh;
3444
3445 /* a[0][i] -= f0; */
3446 /* a[0][i] /= hh; */
3447 /* param[which] = param[which]-hh; */
3448
3449 /* printf("\n. f0=%f f1=%f hh=%G %f",f0,a[0][0],hh,param[which]); */
3450
3451 fac=1.4*1.4;
3452 for (j=1;j<=i;j++)
3453 {
3454 a[j][i]=(a[j-1][i]*fac-a[j-1][i-1])/(fac-1.0);
3455 fac=1.4*1.4*fac;
3456
3457 errt=MAX(FABS(a[j][i]-a[j-1][i]),FABS(a[j][i]-a[j-1][i-1]));
3458
3459 if (errt <= *err)
3460 {
3461 *err=errt;
3462 ans=a[j][i];
3463 }
3464 }
3465
3466 if(FABS(a[i][i]-a[i-1][i-1]) >= 2.0*(*err)) break;
3467 }
3468 }
3469 for(i=0;i<11;i++) Free(a[i]);
3470 Free(a);
3471 Free(sign);
3472
3473 return ans;
3474 }
3475
3476 //////////////////////////////////////////////////////////////
3477 //////////////////////////////////////////////////////////////
3478
Num_Derivatives_One_Param_Nonaligned(phydbl (* func)(t_tree * tree),t_tree * tree,phydbl f0,phydbl ** param,int which,int n_param,phydbl stepsize,int logt,phydbl * err,int precise,int is_positive)3479 phydbl Num_Derivatives_One_Param_Nonaligned(phydbl (*func)(t_tree *tree), t_tree *tree,
3480 phydbl f0, phydbl **param, int which, int n_param, phydbl stepsize, int logt,
3481 phydbl *err, int precise, int is_positive)
3482 {
3483 int i,j;
3484 phydbl errt,fac,hh,**a,ans,*sign;
3485 int n_iter;
3486
3487 sign = (phydbl *)mCalloc(n_param,sizeof(phydbl));
3488
3489 a = (phydbl **)mCalloc(11,sizeof(phydbl *));
3490 for(i=0;i<11;i++) a[i] = (phydbl *)mCalloc(11,sizeof(phydbl));
3491
3492 n_iter = 10; /* */
3493
3494 ans = .0;
3495
3496 if(stepsize < SMALL) Warn_And_Exit("\n. h must be nonzero in Dfridr.");
3497
3498 hh=stepsize;
3499
3500 if(!precise)
3501 {
3502 *(param[which]) = *(param[which])+hh;
3503
3504 if(logt == YES) for(i=0;i<n_param;i++) *(param[i]) = exp(MIN(1.E+2,*(param[i])));
3505 for(i=0;i<n_param;i++) sign[i] = (*(param[i])) > .0 ? 1. : -1.;
3506 if(is_positive == YES) for(i=0;i<n_param;i++) (*(param[i])) = FABS(*(param[i]));
3507 a[0][0] = (*func)(tree);
3508 if(is_positive == YES) for(i=0;i<n_param;i++) (*(param[i])) *= sign[i];
3509 if(logt == YES) for(i=0;i<n_param;i++) *(param[i]) = log(*(param[i]));
3510
3511 /* printf("\n. f0=%f f1=%f hh=%G %f",f0,a[0][0],hh,*(param[which])); */
3512
3513 a[0][0] -= f0;
3514 a[0][0] /= hh;
3515 *(param[which]) = *(param[which])-hh;
3516
3517 ans = a[0][0];
3518 }
3519 else
3520 {
3521 *(param[which]) = *(param[which])+hh;
3522
3523 if(logt == YES) for(i=0;i<n_param;i++) *(param[i]) = exp(MIN(1.E+2,*(param[i])));
3524 for(i=0;i<n_param;i++) sign[i] = (*(param[i])) > .0 ? 1. : -1.;
3525 if(is_positive == YES) for(i=0;i<n_param;i++) (*(param[i])) = FABS(*(param[i]));
3526 a[0][0] = (*func)(tree);
3527 if(is_positive == YES) for(i=0;i<n_param;i++) (*(param[i])) *= sign[i];
3528 if(logt == YES) for(i=0;i<n_param;i++) *(param[i]) = log(*(param[i]));
3529
3530 /* *(param[which]) = *(param[which])-2*hh; */
3531 /* a[0][0] -= (*func)(tree); */
3532 /* a[0][0] /= (2.0*hh); */
3533 /* *(param[which]) = *(param[which])+hh; */
3534
3535 a[0][0] -= f0;
3536 a[0][0] /= hh;
3537 *(param[which]) = *(param[which])-hh;
3538
3539 *err=1e30;
3540 for(i=1;i<n_iter;i++)
3541 {
3542 hh /= 1.4;
3543
3544 /* *(param[which] = *(param[which]+hh; */
3545 /* a[0][i] = (*func)(tree); */
3546 /* *(param[which] = *(param[which]-2*hh; */
3547 /* a[0][i] -= (*func)(tree); */
3548 /* a[0][i] /= (2.0*hh); */
3549 /* *(param[which] = *(param[which]+hh; */
3550
3551 *(param[which]) = *(param[which])+hh;
3552
3553 if(logt == YES) for(j=0;j<n_param;j++) *(param[j]) = exp(MIN(1.E+2,*(param[j])));
3554 for(i=0;i<n_param;i++) sign[i] = (*(param[i])) > .0 ? 1. : -1.;
3555 if(is_positive == YES) for(i=0;i<n_param;i++) (*(param[i])) = FABS(*(param[i]));
3556 a[0][i] = (*func)(tree);
3557 if(is_positive == YES) for(i=0;i<n_param;i++) (*(param[i])) *= sign[i];
3558 if(logt == YES) for(j=0;j<n_param;j++) *(param[j]) = log(*(param[j]));
3559
3560 /* *(param[which] = *(param[which]-2*hh; */
3561 /* a[0][i] -= (*func)(tree); */
3562 /* a[0][i] /= (2.0*hh); */
3563 /* *(param[which] = *(param[which]+hh; */
3564 a[0][i] -= f0;
3565 a[0][i] /= hh;
3566 *(param[which]) = *(param[which])-hh;
3567
3568
3569 fac=1.4*1.4;
3570 for (j=1;j<=i;j++)
3571 {
3572 a[j][i]=(a[j-1][i]*fac-a[j-1][i-1])/(fac-1.0);
3573 fac=1.4*1.4*fac;
3574
3575 errt=MAX(FABS(a[j][i]-a[j-1][i]),FABS(a[j][i]-a[j-1][i-1]));
3576
3577 if (errt <= *err)
3578 {
3579 *err=errt;
3580 ans=a[j][i];
3581 }
3582 }
3583
3584 if(FABS(a[i][i]-a[i-1][i-1]) >= 2.0*(*err)) break;
3585 }
3586 }
3587 for(i=0;i<11;i++) Free(a[i]);
3588 Free(a);
3589 Free(sign);
3590 return ans;
3591 }
3592
3593 //////////////////////////////////////////////////////////////
3594 //////////////////////////////////////////////////////////////
3595
Num_Derivative_Several_Param(t_tree * tree,phydbl * param,int n_param,phydbl stepsize,int logt,phydbl (* func)(t_tree * tree),phydbl * derivatives,int is_positive)3596 int Num_Derivative_Several_Param(t_tree *tree, phydbl *param, int n_param, phydbl stepsize, int logt,
3597 phydbl (*func)(t_tree *tree), phydbl *derivatives, int is_positive)
3598 {
3599 int i;
3600 phydbl err,f0,*sign;
3601
3602 sign = (phydbl *)mCalloc(n_param,sizeof(phydbl));
3603
3604 if(logt == YES) for(i=0;i<n_param;i++) param[i] = exp(MIN(1.E+2,param[i]));
3605 for(i=0;i<n_param;i++) sign[i] = (param[i]) > .0 ? 1. : -1.;
3606 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] = FABS(param[i]);
3607 f0 = (*func)(tree);
3608 if(is_positive == YES) for(i=0;i<n_param;i++) param[i] *= sign[i];
3609 if(logt == YES) for(i=0;i<n_param;i++) param[i] = log(param[i]);
3610
3611 for(i=0;i<n_param;i++)
3612 {
3613 /* for(int j=0;j<tree->mod->r_mat->n_diff_rr;j++) PhyML_Printf("\n. 00%d %f",i,tree->mod->r_mat->rr_val->v[j]); */
3614 derivatives[i] = Num_Derivatives_One_Param(func,
3615 tree,
3616 f0,
3617 param,
3618 i,
3619 n_param,
3620 stepsize,
3621 logt,
3622 &err,
3623 NO,
3624 is_positive
3625 );
3626 }
3627
3628 Free(sign);
3629
3630 return 1;
3631 }
3632
3633 //////////////////////////////////////////////////////////////
3634 //////////////////////////////////////////////////////////////
3635
Num_Derivative_Several_Param_Nonaligned(t_tree * tree,phydbl ** param,int n_param,phydbl stepsize,int logt,phydbl (* func)(t_tree * tree),phydbl * derivatives,int is_positive)3636 int Num_Derivative_Several_Param_Nonaligned(t_tree *tree, phydbl **param, int n_param, phydbl stepsize, int logt,
3637 phydbl (*func)(t_tree *tree), phydbl *derivatives, int is_positive)
3638 {
3639 int i;
3640 phydbl err,f0,*sign;
3641
3642 sign = (phydbl *)mCalloc(n_param,sizeof(phydbl));
3643
3644 if(logt == YES) for(i=0;i<n_param;i++) (*(param[i])) = exp(MIN(1.E+2,*(param[i])));
3645 for(i=0;i<n_param;i++) sign[i] = (*(param[i])) > .0 ? 1. : -1.;
3646 if(is_positive == YES) for(i=0;i<n_param;i++) *(param[i]) = FABS(*(param[i]));
3647 f0 = (*func)(tree);
3648 if(is_positive == YES) for(i=0;i<n_param;i++) *(param[i]) *= sign[i];
3649 if(logt == YES) for(i=0;i<n_param;i++) (*(param[i])) = log(*(param[i]));
3650
3651
3652 for(i=0;i<n_param;i++)
3653 {
3654
3655 derivatives[i] = Num_Derivatives_One_Param_Nonaligned(func,
3656 tree,
3657 f0,
3658 param,
3659 i,
3660 n_param,
3661 stepsize,
3662 logt,
3663 &err,
3664 0,
3665 is_positive
3666 );
3667 }
3668
3669 Free(sign);
3670
3671 return 1;
3672 }
3673
3674 //////////////////////////////////////////////////////////////
3675 //////////////////////////////////////////////////////////////
3676
3677
Compare_Two_States(char * state1,char * state2,int state_size)3678 int Compare_Two_States(char *state1, char *state2, int state_size)
3679 {
3680
3681 /* 1 the two states are identical */
3682 /* 0 the two states are different */
3683 int i;
3684
3685 for(i=0;i<state_size;i++) if(state1[i] != state2[i]) break;
3686
3687 return (i==state_size)?(1):(0);
3688 }
3689
3690 //////////////////////////////////////////////////////////////
3691 //////////////////////////////////////////////////////////////
3692
3693
Copy_One_State(char * from,char * to,int state_size)3694 void Copy_One_State(char *from, char *to, int state_size)
3695 {
3696 int i;
3697 for(i=0;i<state_size;++i) to[i] = from[i];
3698 }
3699
3700 //////////////////////////////////////////////////////////////
3701 //////////////////////////////////////////////////////////////
3702
Copy_Dist(phydbl ** cpy,phydbl ** orig,int n)3703 void Copy_Dist(phydbl **cpy, phydbl **orig, int n)
3704 {
3705 int i,j;
3706 for(i=0;i<n;i++) for(j=0;j<n;j++) cpy[i][j] = orig[i][j];
3707 }
3708
3709 //////////////////////////////////////////////////////////////
3710 //////////////////////////////////////////////////////////////
3711
Copy_Model(t_mod * ori)3712 t_mod *Copy_Model(t_mod *ori)
3713 {
3714 t_mod *cpy;
3715
3716 cpy = Make_Model_Basic();
3717
3718 cpy->ns = ori->ns;
3719 cpy->ras->n_catg = ori->ras->n_catg;
3720 cpy->whichmodel = ori->whichmodel;
3721 cpy->io = ori->io;
3722
3723 Make_Model_Complete(cpy);
3724 Record_Model(ori,cpy);
3725
3726 #ifdef BEAGLE
3727 cpy->b_inst = ori->b_inst;
3728 cpy->optimizing_topology = ori->optimizing_topology;
3729 #endif
3730
3731 return cpy;
3732 }
3733
3734 //////////////////////////////////////////////////////////////
3735 //////////////////////////////////////////////////////////////
3736
Record_Model(t_mod * ori,t_mod * cpy)3737 void Record_Model(t_mod *ori, t_mod *cpy)
3738 {
3739 int i;
3740
3741 cpy->ns = ori->ns;
3742 cpy->ras->n_catg = ori->ras->n_catg;
3743 cpy->ras->normalise_rr = ori->ras->normalise_rr;
3744 cpy->l_var_sigma = ori->l_var_sigma;
3745
3746 cpy->kappa->v = ori->kappa->v;
3747 cpy->ras->alpha->v = ori->ras->alpha->v;
3748 cpy->lambda->v = ori->lambda->v;
3749 cpy->ras->pinvar->v = ori->ras->pinvar->v;
3750 cpy->br_len_mult->v = ori->br_len_mult->v;
3751
3752 strcpy(cpy->modelname->s,ori->modelname->s);
3753 strcpy(cpy->custom_mod_string->s,ori->custom_mod_string->s);
3754
3755 cpy->mod_num = ori->mod_num;
3756 cpy->whichmodel = ori->whichmodel;
3757 cpy->update_eigen = ori->update_eigen;
3758 cpy->ras->invar = ori->ras->invar;
3759 cpy->r_mat->n_diff_rr = ori->r_mat->n_diff_rr;
3760 cpy->l_min = ori->l_min;
3761 cpy->l_max = ori->l_max;
3762 cpy->log_l = ori->log_l;
3763 cpy->ras->free_mixt_rates = ori->ras->free_mixt_rates;
3764 cpy->ras->gamma_median = ori->ras->gamma_median;
3765
3766
3767 if((ori->whichmodel == CUSTOM) || (ori->whichmodel == GTR))
3768 {
3769 For(i,ori->ns*(ori->ns-1)/2)
3770 {
3771 cpy->r_mat->rr_num->v[i] = ori->r_mat->rr_num->v[i];
3772 cpy->r_mat->rr_val->v[i] = ori->r_mat->rr_val->v[i];
3773 cpy->r_mat->rr->v[i] = ori->r_mat->rr->v[i];
3774 cpy->r_mat->n_rr_per_cat->v[i] = ori->r_mat->n_rr_per_cat->v[i];
3775 }
3776 }
3777
3778 for(i=0;i<cpy->ns;i++)
3779 {
3780 cpy->e_frq->pi->v[i] = ori->e_frq->pi->v[i];
3781 cpy->e_frq->pi_unscaled->v[i] = ori->e_frq->pi_unscaled->v[i];
3782 cpy->e_frq->user_b_freq->v[i] = ori->e_frq->user_b_freq->v[i];
3783 }
3784
3785 For(i,cpy->ns*cpy->ns) cpy->r_mat->qmat->v[i] = ori->r_mat->qmat->v[i];
3786
3787 for(i=0;i<cpy->ras->n_catg;i++)
3788 {
3789 cpy->ras->gamma_r_proba->v[i] = ori->ras->gamma_r_proba->v[i];
3790 cpy->ras->gamma_rr->v[i] = ori->ras->gamma_rr->v[i];
3791 cpy->ras->gamma_r_proba_unscaled->v[i] = ori->ras->gamma_r_proba_unscaled->v[i];
3792 cpy->ras->gamma_rr_unscaled->v[i] = ori->ras->gamma_rr_unscaled->v[i];
3793 }
3794
3795 cpy->use_m4mod = ori->use_m4mod;
3796
3797 cpy->eigen->size = ori->eigen->size;
3798 For(i,2*ori->ns) cpy->eigen->space[i] = ori->eigen->space[i];
3799 For(i,2*ori->ns) cpy->eigen->space_int[i] = ori->eigen->space_int[i];
3800 for(i=0;i<ori->ns;i++) cpy->eigen->e_val[i] = ori->eigen->e_val[i];
3801 for(i=0;i<ori->ns;i++) cpy->eigen->e_val_im[i] = ori->eigen->e_val_im[i];
3802 For(i,ori->ns*ori->ns) cpy->eigen->r_e_vect[i] = ori->eigen->r_e_vect[i];
3803 For(i,ori->ns*ori->ns) cpy->eigen->r_e_vect[i] = ori->eigen->r_e_vect[i];
3804 For(i,ori->ns*ori->ns) cpy->eigen->r_e_vect_im[i] = ori->eigen->r_e_vect_im[i];
3805 For(i,ori->ns*ori->ns) cpy->eigen->l_e_vect[i] = ori->eigen->l_e_vect[i];
3806 For(i,ori->ns*ori->ns) cpy->eigen->q[i] = ori->eigen->q[i];
3807
3808 #ifdef BEAGLE
3809 cpy->b_inst = ori->b_inst;
3810 cpy->optimizing_topology = ori->optimizing_topology;
3811 #endif
3812
3813 }
3814
3815 //////////////////////////////////////////////////////////////
3816 //////////////////////////////////////////////////////////////
3817
3818 //////////////////////////////////////////////////////////////
3819 //////////////////////////////////////////////////////////////
3820
Test_Node_Table_Consistency(t_tree * tree)3821 void Test_Node_Table_Consistency(t_tree *tree)
3822 {
3823 int i;
3824
3825 For(i,2*tree->n_otu-2)
3826 {
3827 if(tree->a_nodes[i]->num != i)
3828 {
3829 PhyML_Printf("\n. Node table is not consistent with node numbers.");
3830 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
3831 }
3832 }
3833 }
3834
3835 //////////////////////////////////////////////////////////////
3836 //////////////////////////////////////////////////////////////
3837
Get_Bip(t_node * a,t_node * d,t_tree * tree)3838 void Get_Bip(t_node *a, t_node *d, t_tree *tree)
3839 {
3840 int i,j;
3841 t_node *tmp;
3842 int swapped;
3843
3844 if(!d || !a || !tree)
3845 {
3846 PhyML_Printf("\n. d: %p a: %p tree: %p",d,a,tree);
3847 PhyML_Printf("\n. Err. in file %s at line %d (function '%s').\n",__FILE__,__LINE__,__FUNCTION__);
3848 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
3849 }
3850
3851 if(d->tax)
3852 {
3853 if(d->common)
3854 {
3855 d->bip_node[0] = (t_node **)mCalloc(1,sizeof(t_node *));
3856 d->bip_node[0][0] = d;
3857 d->bip_size[0] = 1;
3858 d->bip_size[1] = -1;
3859 d->bip_size[2] = -1;
3860
3861 for(i=0;i<3;i++)
3862 {
3863 if(a->v[i] == d)
3864 {
3865 a->bip_size[i] = 0;
3866 for(j=0;j<tree->n_otu;j++)
3867 {
3868 if(strcmp(tree->a_nodes[j]->name,d->name))
3869 {
3870 a->bip_node[i] = (t_node **)realloc(a->bip_node[i],(a->bip_size[i]+1)*sizeof(t_node *));
3871 a->bip_node[i][a->bip_size[i]] = tree->a_nodes[j];
3872 a->bip_size[i]++;
3873 }
3874 }
3875
3876 /* Sort bipartition */
3877 do
3878 {
3879 swapped = NO;
3880 For(j,a->bip_size[i]-1)
3881 {
3882 if(a->bip_node[i][j]->num > a->bip_node[i][j+1]->num)
3883 {
3884 swapped = YES;
3885 tmp = a->bip_node[i][j];
3886 a->bip_node[i][j] = a->bip_node[i][j+1];
3887 a->bip_node[i][j+1] = tmp;
3888 }
3889 }
3890 }while(swapped == YES);
3891
3892
3893 break;
3894
3895 }
3896 }
3897 }
3898 return;
3899 }
3900 else
3901 {
3902 int k;
3903 int d_a;
3904
3905 d_a = -1;
3906
3907 for(i=0;i<3;i++)
3908 {
3909 if(d->v[i] != a) Get_Bip(d,d->v[i],tree);
3910 else if(d->v[i] == a) d_a = i;
3911 }
3912
3913 d->bip_size[d_a] = 0;
3914 for(i=0;i<3;i++)
3915 if(d->v[i] != a)
3916 {
3917 for(j=0;j<3;j++)
3918 {
3919 if(d->v[i]->v[j] == d)
3920 {
3921 For(k,d->v[i]->bip_size[j])
3922 {
3923 d->bip_node[d_a] = (t_node **)realloc(d->bip_node[d_a],(d->bip_size[d_a]+1)*sizeof(t_node *));
3924 d->bip_node[d_a][d->bip_size[d_a]] = d->v[i]->bip_node[j][k];
3925 d->bip_size[d_a]++;
3926 }
3927 break;
3928 }
3929 }
3930 }
3931
3932 do
3933 {
3934 swapped = NO;
3935 For(j,d->bip_size[d_a]-1)
3936 {
3937 if(d->bip_node[d_a][j]->num > d->bip_node[d_a][j+1]->num)
3938 {
3939 swapped = YES;
3940 tmp = d->bip_node[d_a][j];
3941 d->bip_node[d_a][j] = d->bip_node[d_a][j+1];
3942 d->bip_node[d_a][j+1] = tmp;
3943 }
3944 }
3945 }while(swapped == YES);
3946
3947
3948 for(i=0;i<3;i++)
3949 if(a->v[i] == d)
3950 {
3951 a->bip_size[i] = 0;
3952 for(j=0;j<tree->n_otu;j++)
3953 {
3954 For(k,d->bip_size[d_a])
3955 {
3956 if(d->bip_node[d_a][k] == tree->a_nodes[j])
3957 break;
3958 }
3959
3960 if((k == d->bip_size[d_a]) && (tree->a_nodes[j]->common))
3961 {
3962 a->bip_node[i] = (t_node **)realloc(a->bip_node[i],(a->bip_size[i]+1)*sizeof(t_node *));
3963 a->bip_node[i][a->bip_size[i]] = tree->a_nodes[j];
3964 a->bip_size[i]++;
3965 }
3966 }
3967
3968 do
3969 {
3970 swapped = NO;
3971 For(j,a->bip_size[i]-1)
3972 {
3973 if(a->bip_node[i][j]->num > a->bip_node[i][j+1]->num)
3974 {
3975 swapped = YES;
3976 tmp = a->bip_node[i][j];
3977 a->bip_node[i][j] = a->bip_node[i][j+1];
3978 a->bip_node[i][j+1] = tmp;
3979 }
3980 }
3981 }while(swapped == YES);
3982
3983 if(a->bip_size[i] != tree->n_otu - d->bip_size[d_a])
3984 {
3985 PhyML_Printf("%d %d \n",a->bip_size[i],tree->n_otu - d->bip_size[d_a]);
3986 Warn_And_Exit("\n. Problem in counting bipartitions \n");
3987 }
3988 break;
3989 }
3990 }
3991 }
3992
3993 //////////////////////////////////////////////////////////////
3994 //////////////////////////////////////////////////////////////
3995
Alloc_Bip(t_tree * tree)3996 void Alloc_Bip(t_tree *tree)
3997 {
3998 int i;
3999
4000 if(tree->has_bip) return;
4001
4002 tree->has_bip = YES;
4003
4004 For(i,2*tree->n_otu-2)
4005 {
4006 tree->a_nodes[i]->bip_size = (int *)mCalloc(3,sizeof(int));
4007 tree->a_nodes[i]->bip_node = (t_node ***)mCalloc(3,sizeof(t_node **));
4008 }
4009 }
4010
4011 //////////////////////////////////////////////////////////////
4012 //////////////////////////////////////////////////////////////
4013
Order_Int(const int * u,const int n)4014 int *Order_Int(const int *u, const int n)
4015 {
4016 unsigned int i,j;
4017 int *v;
4018
4019 v = (int *)mCalloc(n,sizeof(int));
4020
4021 for(i=0;i<n;++i)
4022 {
4023 v[i] = 0;
4024 for(j=0;j<n;++j)
4025 {
4026 if(j != i)
4027 {
4028 if(u[i] < u[j])
4029 v[i]++;
4030 }
4031 }
4032 }
4033
4034 return(v);
4035
4036 }
4037
4038 //////////////////////////////////////////////////////////////
4039 //////////////////////////////////////////////////////////////
4040
Order_Dbl(const phydbl * u,const int n)4041 int *Order_Dbl(const phydbl *u, const int n)
4042 {
4043 unsigned int i,j;
4044 int *v;
4045
4046 v = (int *)mCalloc(n,sizeof(int));
4047
4048 for(i=0;i<n;++i)
4049 {
4050 v[i] = 0;
4051 for(j=0;j<n;++j)
4052 {
4053 if(j != i)
4054 {
4055 if(u[i] < u[j])
4056 v[i]++;
4057 }
4058 }
4059 }
4060
4061 return(v);
4062
4063 }
4064
4065 //////////////////////////////////////////////////////////////
4066 //////////////////////////////////////////////////////////////
4067
Sort_Phydbl_Increase(const void * a,const void * b)4068 int Sort_Phydbl_Increase(const void *a, const void *b)
4069 {
4070 if((*(phydbl *)(a)) <= (*(phydbl *)(b))) return -1;
4071 else return 1;
4072 }
4073
4074 //////////////////////////////////////////////////////////////
4075 //////////////////////////////////////////////////////////////
4076
Sort_String(const void * a,const void * b)4077 int Sort_String(const void *a, const void *b)
4078 {
4079 return(strcmp((*(const char **)(a)), (*(const char **)(b))));
4080 }
4081
4082 //////////////////////////////////////////////////////////////
4083 //////////////////////////////////////////////////////////////
4084
Compare_Bip(t_tree * tree1,t_tree * tree2,int on_existing_edges_only)4085 int Compare_Bip(t_tree *tree1, t_tree *tree2, int on_existing_edges_only)
4086 {
4087 int i,j,k;
4088 t_edge *b1,*b2;
4089 /* char **bip1,**bip2; */
4090 /* int *bip1,*bip2; */
4091 t_node **bip1, **bip2;
4092 int bip_size1, bip_size2, bip_size;
4093 int different,identical;
4094 int n_edges;
4095
4096
4097
4098 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
4099 /* WARNING: call Match_Tip_Numbers and Get_Bip before using this function. */
4100 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
4101
4102 if(on_existing_edges_only == YES)
4103 {
4104 n_edges = 0;
4105 for(i=0;i<2*tree1->n_otu-3;++i)
4106 if(tree1->a_edges[i]->does_exist && tree2->a_edges[i]->does_exist) n_edges++;
4107 n_edges -= tree1->n_otu;
4108 }
4109 else
4110 {
4111 n_edges = tree1->n_otu-3;
4112 }
4113
4114
4115 identical = 0;
4116 different = 0;
4117 for(i=0;i<2*tree1->n_otu-3;++i)
4118 {
4119 b1 = tree1->a_edges[i];
4120 bip_size1 = MIN(b1->left->bip_size[b1->l_r],b1->rght->bip_size[b1->r_l]);
4121
4122
4123 if(bip_size1 > 1 && ((on_existing_edges_only == YES && b1->does_exist) || (on_existing_edges_only == NO)))
4124 {
4125 for(j=0;j<2*tree2->n_otu-3;++j)
4126 {
4127 b2 = tree2->a_edges[j];
4128 bip_size2 = MIN(b2->left->bip_size[b2->l_r],b2->rght->bip_size[b2->r_l]);
4129
4130 if(bip_size2 > 1 && ((on_existing_edges_only == YES && b2->does_exist) || (on_existing_edges_only == NO)))
4131 {
4132 if(bip_size1 == bip_size2)
4133 {
4134 bip_size = bip_size1;
4135
4136 if(b1->left->bip_size[b1->l_r] == b1->rght->bip_size[b1->r_l])
4137 {
4138 /* if(b1->left->bip_name[b1->l_r][0][0] < b1->rght->bip_name[b1->r_l][0][0]) */
4139 if(b1->left->bip_node[b1->l_r][0]->num < b1->rght->bip_node[b1->r_l][0]->num)
4140 {
4141 /* bip1 = b1->left->bip_name[b1->l_r]; */
4142 bip1 = b1->left->bip_node[b1->l_r];
4143 }
4144 else
4145 {
4146 /* bip1 = b1->rght->bip_name[b1->r_l]; */
4147 bip1 = b1->rght->bip_node[b1->r_l];
4148 }
4149 }
4150 else if(b1->left->bip_size[b1->l_r] < b1->rght->bip_size[b1->r_l])
4151 {
4152 /* bip1 = b1->left->bip_name[b1->l_r]; */
4153 bip1 = b1->left->bip_node[b1->l_r];
4154 }
4155 else
4156 {
4157 /* bip1 = b1->rght->bip_name[b1->r_l]; */
4158 bip1 = b1->rght->bip_node[b1->r_l];
4159 }
4160
4161
4162 if(b2->left->bip_size[b2->l_r] == b2->rght->bip_size[b2->r_l])
4163 {
4164 /* if(b2->left->bip_name[b2->l_r][0][0] < b2->rght->bip_name[b2->r_l][0][0]) */
4165 if(b2->left->bip_node[b2->l_r][0]->num < b2->rght->bip_node[b2->r_l][0]->num)
4166 {
4167 /* bip2 = b2->left->bip_name[b2->l_r]; */
4168 bip2 = b2->left->bip_node[b2->l_r];
4169 }
4170 else
4171 {
4172 /* bip2 = b2->rght->bip_name[b2->r_l]; */
4173 bip2 = b2->rght->bip_node[b2->r_l];
4174 }
4175 }
4176 else if(b2->left->bip_size[b2->l_r] < b2->rght->bip_size[b2->r_l])
4177 {
4178 /* bip2 = b2->left->bip_name[b2->l_r]; */
4179 bip2 = b2->left->bip_node[b2->l_r];
4180 }
4181 else
4182 {
4183 /* bip2 = b2->rght->bip_name[b2->r_l]; */
4184 bip2 = b2->rght->bip_node[b2->r_l];
4185 }
4186
4187 if(bip_size == 1) Warn_And_Exit("\n. Problem in Compare_Bip\n");
4188
4189 for(k=0;k<bip_size;k++)
4190 {
4191 /* if(strcmp(bip1[k],bip2[k])) break; */
4192 if(bip1[k]->num != bip2[k]->num) break;
4193 }
4194
4195 if(k == bip_size) /* Branches b1 and b2 define the same bipartition */
4196 {
4197 b1->bip_score++;
4198 b2->bip_score++;
4199 identical++;
4200 goto out;
4201 }
4202 else
4203 {
4204 different++; // Bipartitions have identical sizes but distinct elements
4205 }
4206 }
4207 else different++; // Biparition have different sizes
4208 }
4209 }
4210 }
4211 out: ;
4212 }
4213
4214 return n_edges - identical;
4215 /* return different; */
4216 }
4217
4218
4219 /*
4220 Computes min transfer distance between branches of tree1 and tree2
4221 And adds these distances to tdist_score of each branches of tree1
4222 the score is not normalized yet by depth nor by number of bootstrap
4223 trees.
4224 This will be done at the end.
4225 */
Compare_Bip_Distance(t_tree * tree1,t_tree * tree2)4226 void Compare_Bip_Distance(t_tree *tree1, t_tree *tree2){
4227 int i;
4228 t_edge *cur_edge;
4229 short unsigned** i_matrix;
4230 short unsigned** c_matrix;
4231 short unsigned** hamming;
4232 short unsigned* min_dist;
4233 short unsigned* min_dist_edge;
4234 int* cluster_sizes;
4235
4236 Alloc_TBE_Matrices(tree1->n_otu, &i_matrix, &c_matrix, &hamming, &min_dist, &min_dist_edge, &cluster_sizes);
4237
4238 Update_All_IC_Ref_Tree(tree1, tree2, i_matrix, c_matrix, cluster_sizes);
4239 Update_All_IC_Boot_Tree(tree1, tree2, i_matrix, c_matrix,hamming,min_dist, min_dist_edge, cluster_sizes);
4240
4241 for(i=0; i<2*tree1->n_otu-3; i++){
4242 cur_edge = tree1->a_edges[i];
4243 cur_edge->tdist_score+=min_dist[cur_edge->num];
4244 }
4245
4246 Free_TBE_Matrices(tree1->n_otu, &i_matrix, &c_matrix, &hamming, &min_dist, &min_dist_edge, &cluster_sizes);
4247 }
4248
4249
4250 //////////////////////////////////////////////////////////////
4251 //////////////////////////////////////////////////////////////
4252
4253 /* Modifiy the tip numbering in tree2 so that tips in
4254 tree1 and tree2 corresponding to the same taxon name
4255 also have the same tip numbering */
Match_Tip_Numbers(t_tree * tree1,t_tree * tree2)4256 void Match_Tip_Numbers(t_tree *tree1, t_tree *tree2)
4257 {
4258 int i,j;
4259
4260 if(tree1->n_otu != tree2->n_otu)
4261 {
4262 PhyML_Printf("\n. tree1 and tree2 must have the same number of tips.");
4263 /* Otherwise, if tree2->n_otu < tree->n_otu, then some tips in tree2
4264 will have a number (->num) that is the same as the number of an
4265 internal node in this tree */
4266 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
4267 }
4268
4269 for(i=0;i<tree1->n_otu;i++)
4270 {
4271 for(j=0;j<tree2->n_otu;j++)
4272 {
4273 if(!strcmp(tree1->a_nodes[i]->name,tree2->a_nodes[j]->name))
4274 {
4275 tree2->a_nodes[j]->num = tree1->a_nodes[i]->num;
4276 break;
4277 }
4278 }
4279 }
4280
4281 }
4282
4283 //////////////////////////////////////////////////////////////
4284 //////////////////////////////////////////////////////////////
4285
Test_Multiple_Data_Set_Format(option * io)4286 void Test_Multiple_Data_Set_Format(option *io)
4287 {
4288 char *line;
4289
4290 line = (char *)mCalloc(T_MAX_LINE,sizeof(char));
4291
4292 io->n_trees = 0;
4293
4294 while(fgets(line,T_MAX_LINE,io->fp_in_tree)) if(strstr(line,";")) io->n_trees++;
4295
4296 Free(line);
4297
4298 if((io->do_boot || io->do_tbe) && (io->n_trees > 1))
4299 Warn_And_Exit("\n. Bootstrap option is not allowed with multiple input trees !\n");
4300
4301 rewind(io->fp_in_tree);
4302
4303 return;
4304 }
4305
4306 //////////////////////////////////////////////////////////////
4307 //////////////////////////////////////////////////////////////
4308
4309
Are_Compatible(char * statea,char * stateb,int stepsize,int datatype)4310 int Are_Compatible(char *statea, char *stateb, int stepsize, int datatype)
4311 {
4312 int i,j;
4313 char a,b;
4314
4315 if(datatype == NT)
4316 {
4317 for(i=0;i<stepsize;i++)
4318 {
4319 a = statea[i];
4320 for(j=0;j<stepsize;j++)
4321 {
4322 b = stateb[j];
4323
4324 switch(a)
4325 {
4326 case 'A':
4327 {
4328 switch(b)
4329 {
4330 case 'A' :
4331 case 'M' :
4332 case 'R' :
4333 case 'W' :
4334 case 'D' :
4335 case 'H' :
4336 case 'V' :
4337 case 'X' : {break;}
4338 default : return 0;
4339 }
4340 break;
4341 }
4342 case 'G':
4343 {
4344 switch(b)
4345 {
4346 case 'G' :
4347 case 'R' :
4348 case 'S' :
4349 case 'K' :
4350 case 'B' :
4351 case 'D' :
4352 case 'V' :
4353 case 'X' : {break;}
4354 default : return 0;
4355 }
4356 break;
4357 }
4358 case 'C':
4359 {
4360 switch(b)
4361 {
4362 case 'C' :
4363 case 'M' :
4364 case 'S' :
4365 case 'Y' :
4366 case 'B' :
4367 case 'H' :
4368 case 'V' :
4369 case 'X' : { break;}
4370 default : return 0;
4371 }
4372 break;
4373 }
4374 case 'T':
4375 {
4376 switch(b)
4377 {
4378 case 'T' :
4379 case 'W' :
4380 case 'Y' :
4381 case 'K' :
4382 case 'B' :
4383 case 'D' :
4384 case 'H' :
4385 case 'X' : { break;}
4386 default : return 0;
4387 }
4388 break;
4389 }
4390 case 'M' :
4391 {
4392 switch(b)
4393 {
4394 case 'M' :
4395 case 'A' :
4396 case 'C' :
4397 case 'R' :
4398 case 'W' :
4399 case 'S' :
4400 case 'Y' :
4401 case 'B' :
4402 case 'D' :
4403 case 'H' :
4404 case 'V' :
4405 case 'X' : { break;}
4406 default : return 0;
4407 }
4408 break;
4409 }
4410 case 'R' :
4411 {
4412 switch(b)
4413 {
4414 case 'R' :
4415 case 'A' :
4416 case 'G' :
4417 case 'M' :
4418 case 'W' :
4419 case 'S' :
4420 case 'K' :
4421 case 'B' :
4422 case 'D' :
4423 case 'H' :
4424 case 'V' :
4425 case 'X' : { break;}
4426 default : return 0;
4427 }
4428 break;
4429 }
4430
4431 case 'W' :
4432 {
4433 switch(b)
4434 {
4435 case 'W' :
4436 case 'A' :
4437 case 'T' :
4438 case 'M' :
4439 case 'R' :
4440 case 'Y' :
4441 case 'K' :
4442 case 'B' :
4443 case 'D' :
4444 case 'H' :
4445 case 'V' :
4446 case 'X' : { break;}
4447 default : return 0;
4448 }
4449 break;
4450 }
4451
4452 case 'S' :
4453 {
4454 switch(b)
4455 {
4456 case 'S' :
4457 case 'C' :
4458 case 'G' :
4459 case 'M' :
4460 case 'R' :
4461 case 'Y' :
4462 case 'K' :
4463 case 'B' :
4464 case 'D' :
4465 case 'H' :
4466 case 'V' :
4467 case 'X' : { break;}
4468 default : return 0;
4469 }
4470 break;
4471 }
4472
4473 case 'Y' :
4474 {
4475 switch(b)
4476 {
4477 case 'Y' :
4478 case 'C' :
4479 case 'T' :
4480 case 'M' :
4481 case 'W' :
4482 case 'S' :
4483 case 'K' :
4484 case 'B' :
4485 case 'D' :
4486 case 'H' :
4487 case 'V' :
4488 case 'X' : { break;}
4489 default : return 0;
4490 }
4491 break;
4492 }
4493
4494 case 'K' :
4495 {
4496 switch(b)
4497 {
4498 case 'K' :
4499 case 'G' :
4500 case 'T' :
4501 case 'R' :
4502 case 'W' :
4503 case 'S' :
4504 case 'Y' :
4505 case 'B' :
4506 case 'D' :
4507 case 'H' :
4508 case 'V' :
4509 case 'X' : { break;}
4510 default : return 0;
4511 }
4512 break;
4513 }
4514 case 'B' :
4515 {
4516 switch(b)
4517 {
4518 case 'B' :
4519 case 'C' :
4520 case 'G' :
4521 case 'T' :
4522 case 'M' :
4523 case 'R' :
4524 case 'W' :
4525 case 'S' :
4526 case 'Y' :
4527 case 'K' :
4528 case 'D' :
4529 case 'H' :
4530 case 'V' :
4531 case 'X' : { break;}
4532 default : return 0;
4533 }
4534 break;
4535 }
4536 case 'D' :
4537 {
4538 switch(b)
4539 {
4540 case 'D' :
4541 case 'A' :
4542 case 'G' :
4543 case 'T' :
4544 case 'M' :
4545 case 'R' :
4546 case 'W' :
4547 case 'S' :
4548 case 'Y' :
4549 case 'K' :
4550 case 'B' :
4551 case 'H' :
4552 case 'V' :
4553 case 'X' : { break;}
4554 default : return 0;
4555 }
4556 break;
4557 }
4558 case 'H' :
4559 {
4560 switch(b)
4561 {
4562 case 'H' :
4563 case 'A' :
4564 case 'C' :
4565 case 'T' :
4566 case 'M' :
4567 case 'R' :
4568 case 'W' :
4569 case 'S' :
4570 case 'Y' :
4571 case 'K' :
4572 case 'B' :
4573 case 'D' :
4574 case 'V' :
4575 case 'X' : { break;}
4576 default : return 0;
4577 }
4578 break;
4579 }
4580 case 'V' :
4581 {
4582 switch(b)
4583 {
4584 case 'V' :
4585 case 'A' :
4586 case 'C' :
4587 case 'G' :
4588 case 'M' :
4589 case 'R' :
4590 case 'W' :
4591 case 'S' :
4592 case 'Y' :
4593 case 'K' :
4594 case 'B' :
4595 case 'D' :
4596 case 'H' :
4597 case 'X' : { break;}
4598 default : return 0;
4599 }
4600 break;
4601 }
4602 case 'X' :
4603 {
4604 switch(b)
4605 {
4606 case 'X' :
4607 case 'A' :
4608 case 'C' :
4609 case 'G' :
4610 case 'T' :
4611 case 'M' :
4612 case 'R' :
4613 case 'W' :
4614 case 'S' :
4615 case 'Y' :
4616 case 'K' :
4617 case 'B' :
4618 case 'D' :
4619 case 'H' :
4620 case 'V' : { break;}
4621 default : return 0;
4622 }
4623 break;
4624 }
4625 default :
4626 {
4627 PhyML_Printf("\n. Err. in Are_Compatible.");
4628 PhyML_Printf("\n. Please check that characters `%c` and `%c`",a,b);
4629 PhyML_Printf("\n. correspond to existing nucleotides.\n");
4630 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
4631 return 0;
4632 }
4633 }
4634 }
4635 }
4636 }
4637 else if(datatype == AA)
4638 {
4639 a = statea[0];
4640 b = stateb[0];
4641
4642 switch(a)
4643 {
4644 case 'A' :
4645 {
4646 switch(b)
4647 {
4648 case 'A' :
4649 case 'X' : { break;}
4650 default : return 0;
4651 }
4652 break;
4653 }
4654 case 'R' :
4655 {
4656 switch(b)
4657 {
4658 case 'R' :
4659 case 'X' : { break;}
4660 default : return 0;
4661 }
4662 break;
4663 }
4664 case 'N' :
4665 {
4666 switch(b)
4667 {
4668 case 'N' :
4669 case 'B' :
4670 case 'X' : { break;}
4671 default : return 0;
4672 }
4673 break;
4674 }
4675 case 'B' :
4676 {
4677 switch(b)
4678 {
4679 case 'N' :
4680 case 'B' :
4681 case 'X' : { break;}
4682 default : return 0;
4683 }
4684 break;
4685 }
4686 case 'D' :
4687 {
4688 switch(b)
4689 {
4690 case 'D' :
4691 case 'X' : { break;}
4692 default : return 0;
4693 }
4694 break;
4695 }
4696 case 'C' :
4697 {
4698 switch(b)
4699 {
4700 case 'C' :
4701 case 'X' : { break;}
4702 default : return 0;
4703 }
4704 break;
4705 }
4706 case 'Q' :
4707 {
4708 switch(b)
4709 {
4710 case 'Q' :
4711 case 'Z' :
4712 case 'X' : { break;}
4713 default : return 0;
4714 }
4715 break;
4716 }
4717 case 'Z' :
4718 {
4719 switch(b)
4720 {
4721 case 'Q' :
4722 case 'Z' :
4723 case 'X' : { break;}
4724 default : return 0;
4725 }
4726 break;
4727 }
4728 case 'E' :
4729 {
4730 switch(b)
4731 {
4732 case 'E' :
4733 case 'X' : { break;}
4734 default : return 0;
4735 }
4736 break;
4737 }
4738 case 'G' :
4739 {
4740 switch(b)
4741 {
4742 case 'G' :
4743 case 'X' : { break;}
4744 default : return 0;
4745 }
4746 break;
4747 }
4748 case 'H' :
4749 {
4750 switch(b)
4751 {
4752 case 'H' :
4753 case 'X' : { break;}
4754 default : return 0;
4755 }
4756 break;
4757 }
4758 case 'I' :
4759 {
4760 switch(b)
4761 {
4762 case 'I' :
4763 case 'X' : { break;}
4764 default : return 0;
4765 }
4766 break;
4767 }
4768 case 'L' :
4769 {
4770 switch(b)
4771 {
4772 case 'L' :
4773 case 'X' : { break;}
4774 default : return 0;
4775 }
4776 break;
4777 }
4778 case 'K' :
4779 {
4780 switch(b)
4781 {
4782 case 'K' :
4783 case 'X' : { break;}
4784 default : return 0;
4785 }
4786 break;
4787 }
4788 case 'M' :
4789 {
4790 switch(b)
4791 {
4792 case 'M' :
4793 case 'X' : { break;}
4794 default : return 0;
4795 }
4796 break;
4797 }
4798 case 'F' :
4799 {
4800 switch(b)
4801 {
4802 case 'F' :
4803 case 'X' : { break;}
4804 default : return 0;
4805 }
4806 break;
4807 }
4808 case 'P' :
4809 {
4810 switch(b)
4811 {
4812 case 'P' :
4813 case 'X' : { break;}
4814 default : return 0;
4815 }
4816 break;
4817 }
4818 case 'S' :
4819 {
4820 switch(b)
4821 {
4822 case 'S' :
4823 case 'X' : { break;}
4824 default : return 0;
4825 }
4826 break;
4827 }
4828 case 'T' :
4829 {
4830 switch(b)
4831 {
4832 case 'T' :
4833 case 'X' : { break;}
4834 default : return 0;
4835 }
4836 break;
4837 }
4838 case 'W' :
4839 {
4840 switch(b)
4841 {
4842 case 'W' :
4843 case 'X' : { break;}
4844 default : return 0;
4845 }
4846 break;
4847 }
4848 case 'Y' :
4849 {
4850 switch(b)
4851 {
4852 case 'Y' :
4853 case 'X' : { break;}
4854 default : return 0;
4855 }
4856 break;
4857 }
4858 case 'V' :
4859 {
4860 switch(b)
4861 {
4862 case 'V' :
4863 case 'X' : { break;}
4864 default : return 0;
4865 }
4866 break;
4867 }
4868 case 'X' :
4869 {
4870 switch(b)
4871 {
4872 case 'A':case 'R':case 'N' :case 'B' :case 'D' :
4873 case 'C':case 'Q':case 'Z' :case 'E' :case 'G' :
4874 case 'H':case 'I':case 'L' :case 'K' :case 'M' :
4875 case 'F':case 'P':case 'S' :case 'T' :case 'W' :
4876 case 'Y':case 'V': case 'X' : { break;}
4877 default : return 0;
4878 }
4879 break;
4880 }
4881 default :
4882 {
4883 PhyML_Printf("\n. Err. in Are_Compatible.");
4884 PhyML_Printf("\n. Please check that characters `%c` and `%c`",a,b);
4885 PhyML_Printf("\n. correspond to existing amino-acids.\n");
4886 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
4887 return 0;
4888 }
4889 }
4890 }
4891 else if(datatype == GENERIC)
4892 {
4893 if(Is_Ambigu(statea,GENERIC,stepsize) || Is_Ambigu(stateb,GENERIC,stepsize)) return 1;
4894 else
4895 {
4896 int a,b;
4897 char format[20];
4898
4899 sprintf(format,"%%%dd",stepsize);
4900
4901 if(!sscanf(statea,format,&a))
4902 {
4903 PhyML_Printf("\n. statea = %s",statea);
4904 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
4905 Warn_And_Exit("\n. PhyML finished prematurely.");
4906 }
4907 if(!sscanf(stateb,format,&b))
4908 {
4909 PhyML_Printf("\n. statea = %s",stateb);
4910 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
4911 Warn_And_Exit("\n. PhyML finished prematurely.");
4912 }
4913
4914 /* PhyML_Printf("\n. %s %d a=%d b=%d ",__FILE__,__LINE__,a,b); */
4915
4916 if(a == b) return 1;
4917 }
4918 return 0;
4919 }
4920
4921 return 1;
4922 }
4923
4924 //////////////////////////////////////////////////////////////
4925 //////////////////////////////////////////////////////////////
4926
Hide_Ambiguities(calign * data)4927 void Hide_Ambiguities(calign *data)
4928 {
4929 int i;
4930 for(i=0;i<data->crunch_len;i++) if(data->ambigu[i]) data->wght[i] = 0;
4931 }
4932
4933 //////////////////////////////////////////////////////////////
4934 //////////////////////////////////////////////////////////////
4935
Copy_Tree(t_tree * ori,t_tree * cpy)4936 void Copy_Tree(t_tree *ori, t_tree *cpy)
4937 {
4938 int i,j;
4939
4940 if(ori->is_mixt_tree == YES || cpy->is_mixt_tree == YES)
4941 {
4942 MIXT_Copy_Tree(ori,cpy);
4943 return;
4944 }
4945
4946 for(i=0;i<2*ori->n_otu-1;++i)
4947 {
4948 if(ori->a_nodes[i] != NULL)
4949 {
4950 cpy->a_nodes[i]->anc =
4951 (ori->a_nodes[i]->anc != NULL) ?
4952 cpy->a_nodes[ori->a_nodes[i]->anc->num] :
4953 NULL;
4954
4955 for(j=0;j<3;++j)
4956 {
4957 if(ori->a_nodes[i]->v[j] != NULL)
4958 {
4959 cpy->a_nodes[i]->v[j] = cpy->a_nodes[ori->a_nodes[i]->v[j]->num];
4960 cpy->a_nodes[i]->b[j] = cpy->a_edges[ori->a_nodes[i]->b[j]->num];
4961 }
4962 else
4963 {
4964 cpy->a_nodes[i]->v[j] = NULL;
4965 cpy->a_nodes[i]->b[j] = NULL;
4966 }
4967 }
4968 }
4969 cpy->a_nodes[i]->c_seq = ori->a_nodes[i]->c_seq;
4970 }
4971
4972 for(i=0;i<2*ori->n_otu-1;++i)
4973 {
4974 if(ori->a_edges[i] != NULL)
4975 {
4976 cpy->a_edges[i]->l->v = ori->a_edges[i]->l->v;
4977 cpy->a_edges[i]->l_old->v = ori->a_edges[i]->l_old->v;
4978 cpy->a_edges[i]->l_var->v = ori->a_edges[i]->l_var->v;
4979 cpy->a_edges[i]->l_var_old->v = ori->a_edges[i]->l_var_old->v;
4980 cpy->a_edges[i]->left = ori->a_edges[i]->left ? cpy->a_nodes[ori->a_edges[i]->left->num] : NULL;
4981 cpy->a_edges[i]->rght = ori->a_edges[i]->rght ? cpy->a_nodes[ori->a_edges[i]->rght->num] : NULL;
4982 cpy->a_edges[i]->l_v1 = ori->a_edges[i]->l_v1;
4983 cpy->a_edges[i]->l_v2 = ori->a_edges[i]->l_v2;
4984 cpy->a_edges[i]->r_v1 = ori->a_edges[i]->r_v1;
4985 cpy->a_edges[i]->r_v2 = ori->a_edges[i]->r_v2;
4986 cpy->a_edges[i]->l_r = ori->a_edges[i]->l_r;
4987 cpy->a_edges[i]->r_l = ori->a_edges[i]->r_l;
4988 cpy->a_edges[i]->does_exist = ori->a_edges[i]->does_exist;
4989 cpy->a_edges[i]->support_val = ori->a_edges[i]->support_val;
4990
4991 #ifdef BEAGLE
4992 cpy->a_edges[i]->p_lk_left_idx = ori->a_edges[i]->p_lk_left_idx;
4993 cpy->a_edges[i]->p_lk_rght_idx = ori->a_edges[i]->p_lk_rght_idx;
4994 cpy->a_edges[i]->p_lk_tip_idx = ori->a_edges[i]->p_lk_tip_idx;
4995 #endif
4996 }
4997 }
4998
4999
5000 for(i=0;i<ori->n_otu;++i)
5001 {
5002 cpy->a_nodes[i]->tax = YES;
5003
5004 Free(cpy->a_nodes[i]->name);
5005
5006 cpy->a_nodes[i]->name = (char *)mCalloc(strlen(ori->a_nodes[i]->name)+1,sizeof(char));
5007 cpy->a_nodes[i]->ori_name = cpy->a_nodes[i]->name ;
5008
5009 strcpy(cpy->a_nodes[i]->name,ori->a_nodes[i]->name);
5010 }
5011
5012
5013 if(ori->n_root)
5014 {
5015 cpy->e_root = cpy->a_edges[ori->e_root->num];
5016 cpy->n_root = cpy->a_nodes[ori->n_root->num];
5017 cpy->n_root_pos = ori->n_root_pos;
5018
5019 cpy->n_root->b[1] = cpy->a_edges[ori->n_root->b[1]->num];
5020 cpy->n_root->b[2] = cpy->a_edges[ori->n_root->b[2]->num];
5021 }
5022
5023 cpy->num_curr_branch_available = 0;
5024 cpy->t_beg = ori->t_beg;
5025 cpy->verbose = ori->verbose;
5026
5027 #ifdef BEAGLE
5028 cpy->b_inst = ori->b_inst;
5029 #endif
5030 }
5031
5032 //////////////////////////////////////////////////////////////
5033 //////////////////////////////////////////////////////////////
5034
Duplicate_Tree(t_tree * ori)5035 t_tree *Duplicate_Tree(t_tree *ori)
5036 {
5037 if(ori->is_mixt_tree == YES) return MIXT_Duplicate_Tree(ori);
5038 t_tree *cpy = Make_Tree_From_Scratch(ori->n_otu,ori->data);
5039 Copy_Tree(ori,cpy);
5040 return(cpy);
5041 }
5042
5043 //////////////////////////////////////////////////////////////
5044 //////////////////////////////////////////////////////////////
5045
Prune_Subtree(t_node * a,t_node * d,t_edge ** target,t_edge ** residual,t_tree * tree)5046 void Prune_Subtree(t_node *a, t_node *d, t_edge **target, t_edge **residual, t_tree *tree)
5047 {
5048 t_node *v1, *v2, *buff_nd;
5049 t_edge *b1, *b2;
5050 int dir_v1, dir_v2;
5051 int i;
5052 phydbl *buff_p_lk;
5053 int *buff_scale;
5054 int *buff_p_pars;
5055 int *buff_pars;
5056 int *buff_p_lk_loc, *buff_patt_id;
5057 int *buff_ui;
5058 phydbl *buff_p_lk_tip;
5059
5060 assert(a);
5061 assert(d);
5062 assert(tree);
5063
5064
5065 if(tree->n_root && a == tree->n_root)
5066 {
5067 if(d == tree->e_root->left) a = tree->e_root->rght;
5068 else if(d == tree->e_root->rght) a = tree->e_root->left;
5069 else
5070 {
5071 PhyML_Printf("\n. left: %d right: %d",tree->e_root->left->num,tree->e_root->rght->num);
5072 assert(false);
5073 }
5074 }
5075
5076 if(a->tax) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5077
5078 dir_v1 = dir_v2 = -1;
5079 for(i=0;i<3;++i)
5080 {
5081 if(a->v[i] != d)
5082 {
5083 if(dir_v1 < 0) dir_v1 = i;
5084 else dir_v2 = i;
5085 }
5086 }
5087
5088 assert(dir_v1 > -1);
5089 assert(dir_v2 > -1);
5090
5091 assert(a->v[dir_v1] != NULL);
5092 assert(a->v[dir_v2] != NULL);
5093
5094 if(a->v[dir_v1] == a->anc) a->v[dir_v2]->anc = a->v[dir_v1];
5095 else a->v[dir_v1]->anc = a->v[dir_v2];
5096
5097 if(a->v[dir_v1]->num < a->v[dir_v2]->num)
5098 {
5099 v1 = a->v[dir_v1];
5100 v2 = a->v[dir_v2];
5101 b1 = a->b[dir_v1];
5102 b2 = a->b[dir_v2];
5103 }
5104 else
5105 {
5106 v1 = a->v[dir_v2];
5107 v2 = a->v[dir_v1];
5108 b1 = a->b[dir_v2];
5109 b2 = a->b[dir_v1];
5110 }
5111
5112
5113 assert(NULL != b1 && NULL != b2);
5114
5115 if(target) (*target) = b1;
5116 if(residual) (*residual) = b2;
5117
5118
5119 a->v[dir_v1] = NULL;
5120 a->v[dir_v2] = NULL;
5121 a->b[dir_v1] = NULL;
5122 a->b[dir_v2] = NULL;
5123
5124 #ifdef BEAGLE
5125 int temp;
5126 #endif
5127
5128 if(v1 == b1->left)
5129 {
5130 b1->rght = v2;
5131
5132 if(v2 == b2->left)
5133 {
5134 if(tree->is_mixt_tree == NO)
5135 {
5136 buff_p_lk = b1->p_lk_rght;
5137 b1->p_lk_rght = b2->p_lk_left;
5138 b2->p_lk_left = buff_p_lk;
5139
5140 buff_p_lk_tip = b1->p_lk_tip_r;
5141 b1->p_lk_tip_r = b2->p_lk_tip_l;
5142 b2->p_lk_tip_l = buff_p_lk_tip;
5143
5144 #ifdef BEAGLE
5145 temp = b1->p_lk_rght_idx;
5146 b1->p_lk_rght_idx = b2->p_lk_left_idx;
5147 b2->p_lk_left_idx = temp;
5148 #endif
5149 buff_scale = b1->sum_scale_rght;
5150 b1->sum_scale_rght = b2->sum_scale_left;
5151 b2->sum_scale_left = buff_scale;
5152
5153 buff_scale = b1->sum_scale_rght_cat;
5154 b1->sum_scale_rght_cat = b2->sum_scale_left_cat;
5155 b2->sum_scale_left_cat = buff_scale;
5156
5157 buff_pars = b1->pars_r;
5158 b1->pars_r = b2->pars_l;
5159 b2->pars_l = buff_pars;
5160
5161 buff_ui = b1->ui_r;
5162 b1->ui_r = b2->ui_l;
5163 b2->ui_l = buff_ui;
5164
5165 buff_p_pars = b1->p_pars_r;
5166 b1->p_pars_r = b2->p_pars_l;
5167 b2->p_pars_l = buff_p_pars;
5168
5169 buff_p_lk_loc = b1->p_lk_loc_rght;
5170 b1->p_lk_loc_rght = b2->p_lk_loc_left;
5171 b2->p_lk_loc_left = buff_p_lk_loc;
5172
5173 buff_patt_id = b1->patt_id_rght;
5174 b1->patt_id_rght = b2->patt_id_left;
5175 b2->patt_id_left = buff_patt_id;
5176 }
5177 }
5178 else
5179 {
5180 if(tree->is_mixt_tree == NO)
5181 {
5182 buff_p_lk = b1->p_lk_rght; /* b1->p_lk_rght = NULL if b1->rght->tax */
5183 b1->p_lk_rght = b2->p_lk_rght; /* b2->p_lk_rght = NULL if b2->rght->tax */
5184 b2->p_lk_rght = buff_p_lk;
5185
5186 buff_p_lk_tip = b1->p_lk_tip_r;
5187 b1->p_lk_tip_r = b2->p_lk_tip_r;
5188 b2->p_lk_tip_r = buff_p_lk_tip;
5189 #ifdef BEAGLE
5190 temp = b1->p_lk_rght_idx;
5191 b1->p_lk_rght_idx = b2->p_lk_rght_idx;
5192 b2->p_lk_rght_idx = temp;
5193
5194 b2->p_lk_tip_idx = b1->p_lk_tip_idx;
5195 #endif
5196 buff_scale = b1->sum_scale_rght;
5197 b1->sum_scale_rght = b2->sum_scale_rght;
5198 b2->sum_scale_rght = buff_scale;
5199
5200 buff_pars = b1->pars_r;
5201 b1->pars_r = b2->pars_r;
5202 b2->pars_r = buff_pars;
5203
5204 buff_ui = b1->ui_r;
5205 b1->ui_r = b2->ui_r;
5206 b2->ui_r = buff_ui;
5207
5208 buff_p_pars = b1->p_pars_r;
5209 b1->p_pars_r = b2->p_pars_r;
5210 b2->p_pars_r = buff_p_pars;
5211
5212 buff_p_lk_loc = b1->p_lk_loc_rght;
5213 b1->p_lk_loc_rght = b2->p_lk_loc_rght;
5214 b2->p_lk_loc_rght = buff_p_lk_loc;
5215
5216 buff_patt_id = b1->patt_id_rght;
5217 b1->patt_id_rght = b2->patt_id_rght;
5218 b2->patt_id_rght = buff_patt_id;
5219 }
5220 }
5221 }
5222 else
5223 {
5224 b1->left = v2;
5225
5226 if(v2 == b2->left)
5227 {
5228 if(tree->is_mixt_tree == NO)
5229 {
5230 buff_p_lk = b1->p_lk_left;
5231 b1->p_lk_left = b2->p_lk_left;
5232 b2->p_lk_left = buff_p_lk;
5233
5234 buff_p_lk_tip = b1->p_lk_tip_l;
5235 b1->p_lk_tip_l = b2->p_lk_tip_l;
5236 b2->p_lk_tip_l = buff_p_lk_tip;
5237 #ifdef BEAGLE
5238 temp = b1->p_lk_left_idx;
5239 b1->p_lk_left_idx = b2->p_lk_left_idx;
5240 b2->p_lk_left_idx = temp;
5241 #endif
5242 buff_scale = b1->sum_scale_left;
5243 b1->sum_scale_left = b2->sum_scale_left;
5244 b2->sum_scale_left = buff_scale;
5245
5246 buff_scale = b1->sum_scale_left_cat;
5247 b1->sum_scale_left_cat = b2->sum_scale_left_cat;
5248 b2->sum_scale_left_cat = buff_scale;
5249
5250 buff_pars = b1->pars_l;
5251 b1->pars_l = b2->pars_l;
5252 b2->pars_l = buff_pars;
5253
5254 buff_ui = b1->ui_l;
5255 b1->ui_l = b2->ui_l;
5256 b2->ui_l = buff_ui;
5257
5258 buff_p_pars = b1->p_pars_l;
5259 b1->p_pars_l = b2->p_pars_l;
5260 b2->p_pars_l = buff_p_pars;
5261
5262 buff_p_lk_loc = b1->p_lk_loc_left;
5263 b1->p_lk_loc_left = b2->p_lk_loc_left;
5264 b2->p_lk_loc_left = buff_p_lk_loc;
5265
5266 buff_patt_id = b1->patt_id_left;
5267 b1->patt_id_left = b2->patt_id_left;
5268 b2->patt_id_left = buff_patt_id;
5269 }
5270 }
5271 else
5272 {
5273 if(tree->is_mixt_tree == NO)
5274 {
5275 buff_p_lk = b1->p_lk_left;
5276 b1->p_lk_left = b2->p_lk_rght; /* b2->p_lk_rght = NULL if b2->rght->tax */
5277 b2->p_lk_rght = buff_p_lk;
5278
5279 buff_p_lk_tip = b1->p_lk_tip_l;
5280 b1->p_lk_tip_l = b2->p_lk_tip_r;
5281 b2->p_lk_tip_r = buff_p_lk_tip;
5282 #ifdef BEAGLE
5283 temp = b1->p_lk_left_idx;
5284 b1->p_lk_left_idx = b2->p_lk_rght_idx;
5285 b2->p_lk_rght_idx = temp;
5286
5287 b2->p_lk_tip_idx = b1->p_lk_tip_idx;
5288 #endif
5289 buff_scale = b1->sum_scale_left;
5290 b1->sum_scale_left = b2->sum_scale_rght;
5291 b2->sum_scale_rght = buff_scale;
5292
5293 buff_scale = b1->sum_scale_left_cat;
5294 b1->sum_scale_left_cat = b2->sum_scale_rght_cat;
5295 b2->sum_scale_rght_cat = buff_scale;
5296
5297 buff_pars = b1->pars_l;
5298 b1->pars_l = b2->pars_r;
5299 b2->pars_r = buff_pars;
5300
5301 buff_ui = b1->ui_l;
5302 b1->ui_l = b2->ui_r;
5303 b2->ui_r = buff_ui;
5304
5305 buff_p_pars = b1->p_pars_l;
5306 b1->p_pars_l = b2->p_pars_r;
5307 b2->p_pars_r = buff_p_pars;
5308
5309 buff_p_lk_loc = b1->p_lk_loc_left;
5310 b1->p_lk_loc_left = b2->p_lk_loc_rght;
5311 b2->p_lk_loc_rght = buff_p_lk_loc;
5312
5313 buff_patt_id = b1->patt_id_left;
5314 b1->patt_id_left = b2->patt_id_rght;
5315 b2->patt_id_rght = buff_patt_id;
5316 }
5317 }
5318 }
5319
5320 for(i=0;i<3;++i)
5321 if(v2->v[i] == a)
5322 {
5323 v2->v[i] = v1;
5324 v2->b[i] = b1;
5325 break;
5326 }
5327
5328 #ifdef DEBUG
5329 if(i == 3)
5330 {
5331 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
5332 Warn_And_Exit("\n. PhyML finished prematurely.");
5333 }
5334 #endif
5335
5336 for(i=0;i<3;++i)
5337 if(v1->v[i] == a)
5338 {
5339 v1->v[i] = v2;
5340 break;
5341 }
5342
5343 #ifdef DEBUG
5344 if(i == 3)
5345 {
5346 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
5347 Warn_And_Exit("\n. PhyML finished prematurely.");
5348 }
5349 #endif
5350
5351 if(b1->l->onoff == ON)
5352 {
5353 b1->l->v = (b1->l->v + b2->l->v);
5354 b1->l_var->v = (b1->l_var->v + b2->l_var->v);
5355 }
5356
5357 assert(v1 != v2);
5358
5359 (v1 == b1->left)?
5360 (Set_Edge_Dirs(b1,v1,v2,tree)):
5361 (Set_Edge_Dirs(b1,v2,v1,tree));
5362
5363
5364 if(tree->n_root != NULL)
5365 {
5366 // Pruning one of the subtree below n_root->v[2], v2 below a
5367 if(tree->n_root->v[1] == v1 && tree->n_root->v[2] == a) tree->n_root->v[2] = v2;
5368
5369 // Pruning one of the subtree below n_root->v[1], v2 below a
5370 else if(tree->n_root->v[2] == v1 && tree->n_root->v[1] == a) tree->n_root->v[1] = v2;
5371
5372 // Pruning one of the subtree below n_root->v[1], v1 below a
5373 else if((tree->n_root->v[1] == v2 && tree->n_root->v[2] == a) ||
5374 (tree->n_root->v[2] == v2 && tree->n_root->v[1] == a))
5375 {
5376 tree->e_root = b1;
5377 if(tree->n_root->v[1] == v2) tree->n_root->v[2] = v1;
5378 if(tree->n_root->v[2] == v2) tree->n_root->v[1] = v1;
5379 }
5380
5381 // Prune subtree to the left or to the right of the root node
5382 else if((tree->n_root->v[1] == a && tree->n_root->v[2] == d) ||
5383 (tree->n_root->v[1] == d && tree->n_root->v[2] == a))
5384 {
5385 tree->e_root = b1;
5386 tree->n_root->v[1] = v2;
5387 tree->n_root->v[2] = v1;
5388 }
5389
5390
5391 if(tree->n_root->v[1] == tree->e_root->rght)
5392 {
5393 buff_nd = tree->n_root->v[1];
5394 tree->n_root->v[1] = tree->n_root->v[2];
5395 tree->n_root->v[2] = buff_nd;
5396 }
5397
5398 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
5399 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
5400 tree->n_root->anc = NULL;
5401 }
5402
5403 #ifdef DEBUG
5404 if(b1->left->tax == YES && b1->rght->tax == NO)
5405 {
5406 PhyML_Printf("\n. root: %d root->v1: %d root->v2: %d eroot: %d b1: %d b2: %d v1: %d v2: %d",
5407 tree->n_root->num,
5408 tree->n_root->v[1]->num,
5409 tree->n_root->v[2]->num,
5410 tree->e_root->num,
5411 b1->num,b2->num,v1->num,v2->num);
5412 PhyML_Printf("\n. b1->left->num = %d",b1->left->num);
5413 PhyML_Printf("\n. b1->rght->num = %d",b1->rght->num);
5414 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
5415 Warn_And_Exit("\n. PhyML finished prematurely.");
5416 }
5417 #endif
5418
5419 if(tree->is_mixt_tree == YES) MIXT_Prune_Subtree(a,d,target,residual,tree);
5420 }
5421
5422 //////////////////////////////////////////////////////////////
5423 //////////////////////////////////////////////////////////////
5424
Graft_Subtree(t_edge * target,t_node * link,t_node * link_daughter,t_edge * residual,t_node * target_nd,t_tree * tree)5425 void Graft_Subtree(t_edge *target, t_node *link, t_node *link_daughter, t_edge *residual, t_node *target_nd, t_tree *tree)
5426 {
5427 t_node *v1, *v2;
5428 int i, dir_v1, dir_v2;
5429 phydbl *buff_p_lk;
5430 int *buff_scale;
5431 int *buff_p_pars, *buff_pars;
5432 int *buff_p_lk_loc, *buff_patt_id;
5433 phydbl *buff_p_lk_tip;
5434 int *buff_ui;
5435 t_edge *b_up;
5436
5437 assert(link);
5438 assert(tree);
5439 assert(target);
5440
5441 if(link == tree->n_root)
5442 {
5443 assert(link_daughter);
5444 if(link_daughter == tree->n_root->v[1]) link = tree->n_root->v[2];
5445 else if(link_daughter == tree->n_root->v[2]) link = tree->n_root->v[1];
5446 else
5447 {
5448 PhyML_Printf("\n. link: %d link_daughter: %d",
5449 link->num,
5450 link_daughter ? link_daughter->num : -1);
5451 assert(false);
5452 }
5453 }
5454
5455 dir_v1 = dir_v2 = -1;
5456 b_up = NULL;
5457 for(i=0;i<3;i++)
5458 {
5459 if(link->v[i] == NULL)
5460 {
5461 if(dir_v1 < 0) dir_v1 = i;
5462 else dir_v2 = i;
5463 }
5464 else b_up = link->b[i];
5465 }
5466
5467 if(dir_v1 < 0 || dir_v2 < 0)
5468 {
5469 PhyML_Printf("\n. link: %d was not pruned in a clean manner...\n",link->num);
5470 assert(FALSE);
5471 }
5472
5473
5474 if(target->left == target->rght->anc)
5475 {
5476 link->anc = target->left;
5477 target->rght->anc = link;
5478 }
5479 else
5480 {
5481 link->anc = target->rght;
5482 target->left->anc = link;
5483 }
5484
5485
5486 #ifdef BEAGLE
5487 int temp;
5488 #endif
5489
5490 if(target->left->num < target->rght->num)
5491 {
5492 v1 = target->left;
5493 v2 = target->rght;
5494
5495 assert(v1 != link);
5496 assert(v2 != link);
5497
5498 if(tree->is_mixt_tree == NO)
5499 {
5500 buff_p_lk = residual->p_lk_rght;
5501 residual->p_lk_rght = target->p_lk_rght;
5502 target->p_lk_rght = buff_p_lk;
5503
5504 buff_p_lk_tip = residual->p_lk_tip_r;
5505 residual->p_lk_tip_r = target->p_lk_tip_r;
5506 target->p_lk_tip_r = buff_p_lk_tip;
5507
5508 #ifdef BEAGLE
5509 temp = residual->p_lk_rght_idx;
5510 residual->p_lk_rght_idx = target->p_lk_rght_idx;
5511 target->p_lk_rght_idx = temp;
5512
5513 temp = residual->p_lk_tip_idx;
5514 residual->p_lk_tip_idx = target->p_lk_tip_idx;
5515 target->p_lk_tip_idx = temp;
5516 #endif
5517
5518 buff_scale = residual->sum_scale_rght;
5519 residual->sum_scale_rght = target->sum_scale_rght;
5520 target->sum_scale_rght = buff_scale;
5521
5522 buff_scale = residual->sum_scale_rght_cat;
5523 residual->sum_scale_rght_cat = target->sum_scale_rght_cat;
5524 target->sum_scale_rght_cat = buff_scale;
5525
5526 buff_pars = residual->pars_r;
5527 residual->pars_r = target->pars_r;
5528 target->pars_r = buff_pars;
5529
5530 buff_ui = residual->ui_r;
5531 residual->ui_r = target->ui_r;
5532 target->ui_r = buff_ui;
5533
5534 buff_p_pars = residual->p_pars_r;
5535 residual->p_pars_r = target->p_pars_r;
5536 target->p_pars_r = buff_p_pars;
5537
5538 buff_p_lk_loc = residual->p_lk_loc_rght;
5539 residual->p_lk_loc_rght = target->p_lk_loc_rght;
5540 target->p_lk_loc_rght = buff_p_lk_loc;
5541
5542 buff_patt_id = residual->patt_id_rght;
5543 residual->patt_id_rght = target->patt_id_rght;
5544 target->patt_id_rght = buff_patt_id;
5545 }
5546 }
5547 else
5548 {
5549 v1 = target->rght;
5550 v2 = target->left;
5551
5552 assert(v1 != link);
5553 assert(v2 != link);
5554
5555 if(tree->is_mixt_tree == NO)
5556 {
5557 buff_p_lk = residual->p_lk_rght;
5558 residual->p_lk_rght = target->p_lk_left;
5559 target->p_lk_left = buff_p_lk;
5560
5561 buff_p_lk_tip = residual->p_lk_tip_r;
5562 residual->p_lk_tip_r = target->p_lk_tip_l;
5563 target->p_lk_tip_l = buff_p_lk_tip;
5564
5565 #ifdef BEAGLE
5566 temp = residual->p_lk_rght_idx;
5567 residual->p_lk_rght_idx = target->p_lk_left_idx;
5568 target->p_lk_left_idx = temp;
5569 #endif
5570
5571 buff_scale = residual->sum_scale_rght;
5572 residual->sum_scale_rght = target->sum_scale_left;
5573 target->sum_scale_left = buff_scale;
5574
5575 buff_scale = residual->sum_scale_rght_cat;
5576 residual->sum_scale_rght_cat = target->sum_scale_left_cat;
5577 target->sum_scale_left_cat = buff_scale;
5578
5579 buff_pars = residual->pars_r;
5580 residual->pars_r = target->pars_l;
5581 target->pars_l = buff_pars;
5582
5583 buff_ui = residual->ui_r;
5584 residual->ui_r = target->ui_l;
5585 target->ui_l = buff_ui;
5586
5587 buff_p_pars = residual->p_pars_r;
5588 residual->p_pars_r = target->p_pars_l;
5589 target->p_pars_l = buff_p_pars;
5590
5591 buff_p_lk_loc = residual->p_lk_loc_rght;
5592 residual->p_lk_loc_rght = target->p_lk_loc_left;
5593 target->p_lk_loc_left = buff_p_lk_loc;
5594
5595 buff_patt_id = residual->patt_id_rght;
5596 residual->patt_id_rght = target->patt_id_left;
5597 target->patt_id_left = buff_patt_id;
5598 }
5599 }
5600
5601
5602
5603 for(i=0;i<3;i++)
5604 if(v2->b[i] == target)
5605 {
5606 v2->v[i] = link;
5607 v2->b[i] = residual;
5608 break;
5609 }
5610 assert(i<3);
5611
5612 link->v[dir_v2] = v2;
5613 link->b[dir_v2] = residual;
5614
5615 residual->left = link;
5616 residual->rght = v2;
5617
5618 if(v1 == target->left) target->rght = link;
5619 else target->left = link;
5620
5621 link->v[dir_v1] = v1;
5622 link->b[dir_v1] = target;
5623
5624
5625 for(i=0;i<3;i++)
5626 if(v1->v[i] == v2)
5627 {
5628 v1->v[i] = link;
5629 break;
5630 }
5631
5632
5633 if(target->l->onoff == ON)
5634 {
5635 target->l->v /= 2.0;
5636 target->l_var->v /= 2.0;
5637 }
5638
5639 if(residual->l->onoff == ON)
5640 {
5641 residual->l->v = target->l->v;
5642 residual->l_var->v = target->l_var->v;
5643 }
5644
5645 assert(target->left != target->rght);
5646 assert(residual->left != residual->rght);
5647 assert(b_up->left != b_up->rght);
5648
5649 Set_Edge_Dirs(target,target->left,target->rght,tree);
5650 Set_Edge_Dirs(residual,residual->left,residual->rght,tree);
5651 Set_Edge_Dirs(b_up,b_up->left,b_up->rght,tree);
5652
5653 if(tree->n_root != NULL)
5654 {
5655 if(target == tree->e_root)
5656 {
5657 assert(target_nd);
5658 if(target_nd == v1) tree->e_root = residual;
5659 else if(target_nd == v2) tree->e_root = target;
5660 else if(target_nd == tree->n_root) tree->e_root = b_up;
5661 }
5662
5663 tree->n_root->v[1] = tree->e_root->left;
5664 tree->n_root->v[2] = tree->e_root->rght;
5665
5666 tree->n_root->b[1]->left = tree->n_root;
5667 tree->n_root->b[1]->rght = tree->n_root->v[1];
5668 tree->n_root->b[1]->p_lk_rght = tree->e_root->p_lk_left;
5669 tree->n_root->b[1]->p_lk_tip_r = tree->e_root->p_lk_tip_l;
5670 #ifdef BEAGLE
5671 tree->n_root->b[1]->p_lk_rght_idx = tree->e_root->p_lk_left_idx;
5672 tree->n_root->b[1]->p_lk_tip_idx = tree->e_root->p_lk_tip_idx;
5673 #endif
5674 tree->n_root->b[1]->sum_scale_rght = tree->e_root->sum_scale_left;
5675 tree->n_root->b[1]->sum_scale_rght_cat = tree->e_root->sum_scale_left_cat;
5676 tree->n_root->b[1]->pars_r = tree->e_root->pars_l;
5677 tree->n_root->b[1]->ui_r = tree->e_root->ui_l;
5678 tree->n_root->b[1]->p_pars_r = tree->e_root->p_pars_l;
5679 tree->n_root->b[1]->p_lk_loc_rght = tree->e_root->p_lk_loc_left;
5680 tree->n_root->b[1]->patt_id_rght = tree->e_root->patt_id_left;
5681
5682
5683 tree->n_root->b[2]->left = tree->n_root;
5684 tree->n_root->b[2]->rght = tree->n_root->v[2];
5685 tree->n_root->b[2]->p_lk_rght = tree->e_root->p_lk_rght;
5686 tree->n_root->b[2]->p_lk_tip_r = tree->e_root->p_lk_tip_r;
5687 #ifdef BEAGLE
5688 tree->n_root->b[2]->p_lk_rght_idx = tree->e_root->p_lk_rght_idx;
5689 tree->n_root->b[2]->p_lk_tip_idx = tree->e_root->p_lk_tip_idx;
5690 #endif
5691 tree->n_root->b[2]->sum_scale_rght = tree->e_root->sum_scale_rght;
5692 tree->n_root->b[2]->sum_scale_rght_cat = tree->e_root->sum_scale_rght_cat;
5693 tree->n_root->b[2]->pars_r = tree->e_root->pars_r;
5694 tree->n_root->b[2]->ui_r = tree->e_root->ui_r;
5695 tree->n_root->b[2]->p_pars_r = tree->e_root->p_pars_r;
5696 tree->n_root->b[2]->p_lk_loc_rght = tree->e_root->p_lk_loc_rght;
5697 tree->n_root->b[2]->patt_id_rght = tree->e_root->patt_id_rght;
5698
5699 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
5700 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
5701 tree->n_root->anc = NULL;
5702 }
5703
5704 if(tree->is_mixt_tree == YES) MIXT_Graft_Subtree(target,link,link_daughter,residual,target_nd,tree);
5705 }
5706
5707 //////////////////////////////////////////////////////////////
5708 //////////////////////////////////////////////////////////////
5709
Reassign_Node_Nums(t_node * a,t_node * d,unsigned int * curr_ext_node,unsigned int * curr_int_node,t_tree * tree)5710 void Reassign_Node_Nums(t_node *a, t_node *d, unsigned int *curr_ext_node, unsigned int *curr_int_node, t_tree *tree)
5711 {
5712 t_node *buff;
5713 int i;
5714
5715 if(a->tax)
5716 {
5717 buff = tree->a_nodes[*curr_ext_node];
5718 tree->a_nodes[*curr_ext_node] = a;
5719 tree->a_nodes[a->num] = buff;
5720 buff->num = a->num;
5721 a->num = *curr_ext_node;
5722 (*curr_ext_node)++;
5723 }
5724
5725 if(d->tax)
5726 {
5727 buff = tree->a_nodes[*curr_ext_node];
5728 tree->a_nodes[*curr_ext_node] = d;
5729 tree->a_nodes[d->num] = buff;
5730 buff->num = d->num;
5731 d->num = *curr_ext_node;
5732 (*curr_ext_node)++;
5733 return;
5734 }
5735 else
5736 {
5737 buff = tree->a_nodes[*curr_int_node];
5738 tree->a_nodes[*curr_int_node] = d;
5739 tree->a_nodes[d->num] = buff;
5740 buff->num = d->num;
5741 d->num = *curr_int_node;
5742 (*curr_int_node)++;
5743 }
5744
5745 for(i=0;i<3;i++)
5746 {
5747 if(d->v[i] != a)
5748 Reassign_Node_Nums(d,d->v[i],curr_ext_node,curr_int_node,tree);
5749 }
5750 }
5751
5752 //////////////////////////////////////////////////////////////
5753 //////////////////////////////////////////////////////////////
5754
5755
Reassign_Edge_Nums(t_node * a,t_node * d,int * curr_br,t_tree * tree)5756 void Reassign_Edge_Nums(t_node *a, t_node *d, int *curr_br, t_tree *tree)
5757 {
5758 t_edge *buff;
5759 int i,j;
5760
5761 for(i=0;i<3;i++)
5762 if(a->v[i] == d)
5763 {
5764 buff = tree->a_edges[*curr_br];
5765 For(j,2*N_MAX_OTU-3) if(tree->a_edges[j] == a->b[i]) break;
5766 if(j == 2*N_MAX_OTU-3)
5767 {
5768 PhyML_Printf("\n. Err. in file %s at line %d (function '%s').\n",__FILE__,__LINE__,__FUNCTION__);
5769 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5770 }
5771 tree->a_edges[*curr_br] = a->b[i];
5772 tree->a_edges[j] = buff;
5773 a->b[i]->num = *curr_br;
5774 (*curr_br)++;
5775 break;
5776 }
5777
5778 if(d->tax) return;
5779 else
5780 {
5781 for(i=0;i<3;i++)
5782 if(d->v[i] != a)
5783 Reassign_Edge_Nums(d,d->v[i],curr_br,tree);
5784 }
5785 }
5786
5787 //////////////////////////////////////////////////////////////
5788 //////////////////////////////////////////////////////////////
5789
Find_Mutual_Direction(t_node * n1,t_node * n2,short int * dir_n1_to_n2,short int * dir_n2_to_n1)5790 void Find_Mutual_Direction(t_node *n1, t_node *n2, short int *dir_n1_to_n2, short int *dir_n2_to_n1)
5791 {
5792 int scores[3][3];
5793 int i,j,k,l;
5794
5795 if(n1 == n2) return;
5796
5797
5798 for(i=0;i<3;i++)
5799 {
5800 for(j=0;j<3;j++)
5801 {
5802 scores[i][j] = 0;
5803
5804 For(k,n1->bip_size[i])
5805 {
5806 For(l,n2->bip_size[j])
5807 {
5808 if(n1->bip_node[i][k] == n2->bip_node[j][l])
5809 {
5810 scores[i][j]++;
5811 break;
5812 }
5813 }
5814 }
5815 }
5816 }
5817
5818 for(i=0;i<3;i++)
5819 {
5820 for(j=0;j<3;j++)
5821 {
5822 if(!scores[i][j])
5823 {
5824 *dir_n1_to_n2 = i;
5825 *dir_n2_to_n1 = j;
5826 return;
5827 }
5828 }
5829 }
5830
5831 PhyML_Printf("\n. n1=%d n2=%d",n1->num,n2->num);
5832 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
5833 Warn_And_Exit("\n. PhyML finished prematurely.");
5834
5835
5836
5837 /* for(i=0;i<3;i++) */
5838 /* { */
5839 /* n_zero_line = 0; */
5840 /* for(j=0;j<3;j++) */
5841 /* { */
5842 /* if(!scores[i][j]) n_zero_line++; */
5843 /* } */
5844 /* if(n_zero_line != 2) {*dir_n1_to_n2 = i; break;} */
5845 /* } */
5846
5847
5848 /* for(i=0;i<3;i++) */
5849 /* { */
5850 /* n_zero_col = 0; */
5851 /* for(j=0;j<3;j++) */
5852 /* { */
5853 /* if(!scores[j][i]) n_zero_col++; */
5854 /* } */
5855 /* if(n_zero_col != 2) {*dir_n2_to_n1 = i; break;} */
5856 /* } */
5857
5858 }
5859
5860 //////////////////////////////////////////////////////////////
5861 //////////////////////////////////////////////////////////////
5862
Update_Dir_To_Tips(t_node * a,t_node * d,t_tree * tree)5863 void Update_Dir_To_Tips(t_node *a, t_node *d, t_tree *tree)
5864 {
5865 int i,j,k;
5866 short int *inout;
5867 int d_a;
5868 int dim;
5869
5870 dim = 2*tree->n_otu-2;
5871
5872 inout = (short int *)mCalloc(tree->n_otu,sizeof(short int));
5873
5874 for(i=0;i<3;i++)
5875 {
5876 if(a->v[i] == d)
5877 {
5878 for(j=0;j<tree->n_otu;j++) inout[j] = 1;
5879 For(k,a->bip_size[i]) inout[a->bip_node[i][k]->num] = 0;
5880 for(j=0;j<tree->n_otu;j++) if(inout[tree->a_nodes[j]->num]) tree->t_dir[a->num*dim+tree->a_nodes[j]->num] = i;
5881 break;
5882 }
5883 }
5884
5885
5886 if(!d->tax)
5887 {
5888 d_a = -1;
5889
5890 for(i=0;i<3;i++)
5891 {
5892 if(d->v[i] != a) Update_Dir_To_Tips(d,d->v[i],tree);
5893 else if(d->v[i] == a) d_a = i;
5894 }
5895
5896 for(j=0;j<tree->n_otu;j++) inout[j] = 1;
5897 For(k,d->bip_size[d_a]) inout[d->bip_node[d_a][k]->num] = 0;
5898 for(j=0;j<tree->n_otu;j++) if(inout[tree->a_nodes[j]->num]) tree->t_dir[d->num*dim+tree->a_nodes[j]->num] = d_a;
5899 }
5900 Free(inout);
5901 }
5902
5903 //////////////////////////////////////////////////////////////
5904 //////////////////////////////////////////////////////////////
5905
Fill_Dir_Table(t_tree * tree)5906 void Fill_Dir_Table(t_tree *tree)
5907 {
5908 int i,j;
5909 int dim;
5910
5911 dim = 2*tree->n_otu-2;
5912 For(i,dim*dim) tree->t_dir[i] = 0;
5913 Free_Bip(tree);
5914 Alloc_Bip(tree);
5915 Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
5916 Update_Dir_To_Tips(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
5917
5918 for(i=tree->n_otu;i<2*tree->n_otu-2;i++)
5919 for(j=i;j<2*tree->n_otu-2;j++)
5920 {
5921 Find_Mutual_Direction(tree->a_nodes[i],tree->a_nodes[j],
5922 &(tree->t_dir[i*dim+j]),
5923 &(tree->t_dir[j*dim+i]));
5924 }
5925 }
5926
5927 //////////////////////////////////////////////////////////////
5928 //////////////////////////////////////////////////////////////
5929
5930
Get_Subtree_Size(t_node * a,t_node * d)5931 int Get_Subtree_Size(t_node *a, t_node *d)
5932 {
5933 int size,i;
5934
5935 if(d->tax) return 1;
5936 else
5937 {
5938 size = 0;
5939 for(i=0;i<3;i++)
5940 if(d->v[i] != a)
5941 size += Get_Subtree_Size(d,d->v[i]);
5942 }
5943 return size;
5944 }
5945
5946 //////////////////////////////////////////////////////////////
5947 //////////////////////////////////////////////////////////////
5948
5949 /*!
5950 Calculate the joint probability of states (nt or aa) at the
5951 two extremities of a given edge given the matrix of transition
5952 probabilities, the vector of conditional likelihoods on each
5953 side of the branch and the vector of equilibrium frequencies.
5954 */
Joint_Proba_States_Left_Right(phydbl * Pij,phydbl * p_lk_left,phydbl * p_lk_rght,vect_dbl * pi,int scale_left,int scale_rght,phydbl * F,int n,int site,t_tree * tree)5955 void Joint_Proba_States_Left_Right(phydbl *Pij, phydbl *p_lk_left, phydbl *p_lk_rght,
5956 vect_dbl *pi, int scale_left, int scale_rght,
5957 phydbl *F, int n, int site, t_tree *tree)
5958 {
5959 int i,j;
5960 phydbl sum = 0.0;
5961
5962 for(i=0;i<n;i++) F[i] = .0;
5963
5964 for(i=0;i<n;i++)
5965 {
5966 for(j=0;j<n;j++)
5967 {
5968 F[i*n+j] =
5969 pi->v[i] *
5970 Pij[i*n+j] *
5971 p_lk_left[i] *
5972 p_lk_rght[j] *
5973 POW(2.,-(scale_left + scale_rght));
5974
5975 sum += F[i*n+j];
5976 }
5977 }
5978
5979 For(i,n*n)
5980 {
5981 F[i] /= sum;
5982 if(isnan(F[i]) || isinf(F[i]))
5983 {
5984 for(i=0;i<n;i++) for(j=0;j<n;j++)
5985 PhyML_Printf("\n. %15G %15G %15G %15G %15G",
5986 pi->v[i] ,
5987 Pij[i*n+j] ,
5988 p_lk_left[i] ,
5989 p_lk_rght[j] ,
5990 POW(2.,-(scale_left + scale_rght)));
5991
5992 PhyML_Printf("\n. sum = %G",sum);
5993 Print_Site(tree->data,site,tree->n_otu,"\n",1,stderr);
5994 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
5995 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5996 }
5997 }
5998
5999 }
6000
6001 //////////////////////////////////////////////////////////////
6002 //////////////////////////////////////////////////////////////
6003
Triple_Dist(t_node * a,t_tree * tree)6004 phydbl Triple_Dist(t_node *a, t_tree *tree)
6005 {
6006 if(a->tax) return UNLIKELY;
6007 else
6008 {
6009 Update_PMat_At_Given_Edge(a->b[1],tree);
6010 Update_PMat_At_Given_Edge(a->b[2],tree);
6011
6012 Update_Partial_Lk(tree,a->b[0],a);
6013 /* Fast_Br_Len(a->b[0],tree,YES); */
6014 Br_Len_Opt(&(a->b[0]->l->v),a->b[0],tree);
6015
6016 Update_Partial_Lk(tree,a->b[1],a);
6017 /* Fast_Br_Len(a->b[1],tree,YES); */
6018 Br_Len_Opt(&(a->b[1]->l->v),a->b[1],tree);
6019
6020 Update_Partial_Lk(tree,a->b[2],a);
6021 /* Fast_Br_Len(a->b[2],tree,YES); */
6022 Br_Len_Opt(&(a->b[2]->l->v),a->b[2],tree);
6023
6024
6025 Update_Partial_Lk(tree,a->b[1],a);
6026 Update_Partial_Lk(tree,a->b[0],a);
6027 }
6028
6029 return tree->c_lnL;
6030
6031 }
6032
6033 //////////////////////////////////////////////////////////////
6034 //////////////////////////////////////////////////////////////
6035
Triple_Dist_Approx(t_node * a,t_edge * b,t_tree * tree)6036 phydbl Triple_Dist_Approx(t_node *a, t_edge *b, t_tree *tree)
6037 {
6038 // !!!!!!!! NOT MIXT PROOF
6039 if(a->tax) return UNLIKELY;
6040 else
6041 {
6042 int i;
6043
6044 for(i=0;i<3;i++)
6045 if(a->b[i] != b)
6046 Update_PMat_At_Given_Edge(a->b[i],tree);
6047
6048 Update_Partial_Lk(tree,b,a);
6049 Fast_Br_Len(b,tree,YES);
6050
6051 return tree->c_lnL;
6052
6053 /* t_node *v0,*v1,*v2; */
6054 /* phydbl d01,d02,d12; */
6055 /* t_ll *tips0,*tips1,*tips2; */
6056
6057 /* d01 = d02 = d12 = 0.0; */
6058
6059 /* v0 = a->v[0]; */
6060 /* v1 = a->v[1]; */
6061 /* v2 = a->v[2]; */
6062
6063 /* tips0 = Get_List_Of_Reachable_Tips(a,v0,tree); */
6064 /* tips1 = Get_List_Of_Reachable_Tips(a,v1,tree); */
6065 /* tips2 = Get_List_Of_Reachable_Tips(a,v2,tree); */
6066
6067 /* d01 = Length_Of_Path_Between_List_Of_Tips(tips0,tips1,tree->mat); */
6068 /* d02 = Length_Of_Path_Between_List_Of_Tips(tips0,tips2,tree->mat); */
6069 /* d12 = Length_Of_Path_Between_List_Of_Tips(tips1,tips2,tree->mat); */
6070
6071 /* a->b[0]->l->v = (d01 + d02 - d12)/2.; */
6072 /* a->b[1]->l->v = (d01 + d12 - d02)/2.; */
6073 /* a->b[2]->l->v = (d02 + d12 - d01)/2.; */
6074
6075 /* Free_Linked_List(tips0); */
6076 /* Free_Linked_List(tips1); */
6077 /* Free_Linked_List(tips2); */
6078
6079 }
6080 }
6081
6082 //////////////////////////////////////////////////////////////
6083 //////////////////////////////////////////////////////////////
6084
Make_Symmetric(phydbl ** F,int size)6085 void Make_Symmetric(phydbl **F, int size)
6086 {
6087 int i,j;
6088
6089 for(i=0;i<size;i++)
6090 {
6091 for(j=i+1;j<size;j++)
6092 {
6093 (*F)[size*i+j] = ((*F)[size*i+j] + (*F)[size*j+i])/2.;
6094 (*F)[size*j+i] = (*F)[size*i+j];
6095 }
6096 }
6097 }
6098
6099 //////////////////////////////////////////////////////////////
6100 //////////////////////////////////////////////////////////////
6101
Divide_Mat_By_Vect(phydbl ** F,phydbl * vect,int size)6102 void Divide_Mat_By_Vect(phydbl **F, phydbl *vect, int size)
6103 {
6104 int i,j;
6105 for(i=0;i<size;i++)
6106 for(j=0;j<size;j++)
6107 (*F)[size*i+j] = (*F)[size*i+j] / vect[j];
6108 }
6109
6110 //////////////////////////////////////////////////////////////
6111 //////////////////////////////////////////////////////////////
6112
Found_In_Subtree(t_node * a,t_node * d,t_node * target,int * match,t_tree * tree)6113 void Found_In_Subtree(t_node *a, t_node *d, t_node *target, int *match, t_tree *tree)
6114 {
6115 if(d->tax) return;
6116 else
6117 {
6118 int i;
6119 if(d == target) *match = 1;
6120 for(i=0;i<3;i++)
6121 {
6122 if(d->v[i] != a)
6123 Found_In_Subtree(d,d->v[i],target,match,tree);
6124 }
6125 }
6126 }
6127
6128
6129
6130
6131
6132 //////////////////////////////////////////////////////////////
6133 //////////////////////////////////////////////////////////////
6134
Get_List_Of_Target_Edges(t_node * a,t_node * d,t_edge ** list,int * list_size,t_tree * tree)6135 void Get_List_Of_Target_Edges(t_node *a, t_node *d, t_edge **list, int *list_size, t_tree *tree)
6136 {
6137 int i;
6138
6139 for(i=0;i<3;i++)
6140 {
6141 if(a->v[i] && a->v[i] == d)
6142 {
6143 list[*list_size] = a->b[i];
6144 (*list_size)++;
6145 }
6146 }
6147
6148 if(d->tax) return;
6149 else
6150 {
6151 for(i=0;i<3;i++)
6152 {
6153 if(d->v[i] != a)
6154 Get_List_Of_Target_Edges(d,d->v[i],list,list_size,tree);
6155 }
6156 }
6157 }
6158
6159 //////////////////////////////////////////////////////////////
6160 //////////////////////////////////////////////////////////////
6161
Fix_All(t_tree * tree)6162 void Fix_All(t_tree *tree)
6163 {
6164 int i;
6165
6166 for(i=tree->n_otu;i<2*tree->n_otu-2;i++)
6167 {
6168 tree->a_nodes[i]->b[0]->l_old->v = tree->a_nodes[i]->b[0]->l->v;
6169 tree->a_nodes[i]->b[1]->l_old->v = tree->a_nodes[i]->b[1]->l->v;
6170 tree->a_nodes[i]->b[2]->l_old->v = tree->a_nodes[i]->b[2]->l->v;
6171 }
6172 }
6173
6174 //////////////////////////////////////////////////////////////
6175 //////////////////////////////////////////////////////////////
6176
Tree_Length(t_tree * tree)6177 phydbl Tree_Length(t_tree *tree)
6178 {
6179 phydbl sum;
6180
6181 sum = 0.0;
6182 for(int i=0;i<2*tree->n_otu-3;++i) sum += MIXT_Get_Mean_Edge_Len(tree->a_edges[i],tree);
6183 return(sum);
6184 }
6185
6186 //////////////////////////////////////////////////////////////
6187 //////////////////////////////////////////////////////////////
6188
Record_Br_Len(t_tree * mixt_tree)6189 void Record_Br_Len(t_tree *mixt_tree)
6190 {
6191 int i;
6192 t_tree *tree;
6193
6194 if(mixt_tree->br_len_recorded == YES)
6195 {
6196 PhyML_Printf("\n. Overwriting recorded edge lengths.\n");
6197 assert(FALSE);
6198 }
6199
6200 tree = mixt_tree;
6201
6202 do
6203 {
6204 for(i=0;i<2*tree->n_otu-1;++i) tree->a_edges[i]->l_old->v = tree->a_edges[i]->l->v;
6205 for(i=0;i<2*tree->n_otu-1;++i) tree->a_edges[i]->l_var_old->v = tree->a_edges[i]->l_var->v;
6206 tree = tree->next;
6207 }
6208 while(tree);
6209 }
6210
6211 //////////////////////////////////////////////////////////////
6212 //////////////////////////////////////////////////////////////
6213
Copy_Br_Len(t_tree * mixt_tree)6214 scalar_dbl **Copy_Br_Len(t_tree *mixt_tree)
6215 {
6216 int i;
6217 scalar_dbl **bl, *new_l;
6218 t_edge *e;
6219
6220 bl = (scalar_dbl **)mCalloc(2*mixt_tree->n_otu-1,sizeof(scalar_dbl *));
6221
6222 For(i,2*mixt_tree->n_otu-1)
6223 {
6224 e = mixt_tree->a_edges[i];
6225 bl[i] = (scalar_dbl *)mCalloc(1,sizeof(scalar_dbl));
6226 do
6227 {
6228 bl[i]->v = e->l->v;
6229 e = e->next;
6230 if(e)
6231 {
6232 new_l = (scalar_dbl *)mCalloc(1,sizeof(scalar_dbl));
6233 bl[i]->next = new_l;
6234 bl[i]->next->prev = bl[i];
6235 bl[i] = bl[i]->next;
6236 }
6237 }
6238 while(e);
6239 }
6240
6241 return(bl);
6242 }
6243
6244
6245 //////////////////////////////////////////////////////////////
6246 //////////////////////////////////////////////////////////////
6247
Copy_Br_Len_Var(t_tree * mixt_tree)6248 scalar_dbl **Copy_Br_Len_Var(t_tree *mixt_tree)
6249 {
6250 int i;
6251 scalar_dbl **bl_var, *new_l_var;
6252 t_edge *e;
6253
6254 bl_var = (scalar_dbl **)mCalloc(2*mixt_tree->n_otu-1,sizeof(scalar_dbl *));
6255
6256 For(i,2*mixt_tree->n_otu-1)
6257 {
6258 e = mixt_tree->a_edges[i];
6259 bl_var[i] = (scalar_dbl *)mCalloc(1,sizeof(scalar_dbl));
6260 do
6261 {
6262 bl_var[i]->v = e->l_var->v;
6263 e = e->next;
6264 if(e)
6265 {
6266 new_l_var = (scalar_dbl *)mCalloc(1,sizeof(scalar_dbl));
6267 bl_var[i]->next = new_l_var;
6268 bl_var[i]->next->prev = bl_var[i];
6269 bl_var[i] = bl_var[i]->next;
6270 }
6271 }
6272 while(e);
6273 }
6274
6275 return(bl_var);
6276 }
6277
6278 //////////////////////////////////////////////////////////////
6279 //////////////////////////////////////////////////////////////
6280
Transfer_Br_Len_To_Tree(scalar_dbl ** bl,t_tree * tree)6281 void Transfer_Br_Len_To_Tree(scalar_dbl **bl, t_tree *tree)
6282 {
6283 int i;
6284 scalar_dbl *la, *lb;
6285
6286 For(i,2*tree->n_otu-1)
6287 {
6288 if(tree->a_edges[i]->l != NULL)
6289 {
6290 la = bl[i];
6291 lb = tree->a_edges[i]->l;
6292 if(lb != NULL && la != NULL)
6293 {
6294 do
6295 {
6296 lb->v = la->v;
6297 if(la) la = la->next;
6298 if(lb) lb = lb->next;
6299 }
6300 while(la != NULL && lb != NULL);
6301 assert(la == NULL && lb == NULL);
6302 }
6303 }
6304 }
6305 }
6306
6307 //////////////////////////////////////////////////////////////
6308 //////////////////////////////////////////////////////////////
6309
Restore_Br_Len(t_tree * mixt_tree)6310 void Restore_Br_Len(t_tree *mixt_tree)
6311 {
6312 int i;
6313 t_tree *tree;
6314
6315 mixt_tree->br_len_recorded = NO;
6316
6317 tree = mixt_tree;
6318
6319 do
6320 {
6321 for(i=0;i<2*tree->n_otu-1;++i) tree->a_edges[i]->l->v = tree->a_edges[i]->l_old->v;
6322 for(i=0;i<2*tree->n_otu-1;++i) tree->a_edges[i]->l_var->v = tree->a_edges[i]->l_var_old->v;
6323 tree = tree->next;
6324 }
6325 while(tree);
6326 }
6327
6328 //////////////////////////////////////////////////////////////
6329 //////////////////////////////////////////////////////////////
6330
Get_Dist_Btw_Edges(t_node * a,t_node * d,t_tree * tree)6331 void Get_Dist_Btw_Edges(t_node *a, t_node *d, t_tree *tree)
6332 {
6333 int i;
6334 t_edge *b_fcus;
6335
6336 b_fcus = NULL;
6337 for(i=0;i<3;i++) if(a->v[i] == d) {b_fcus = a->b[i]; break;}
6338
6339 if(d->tax) return;
6340 else
6341 {
6342 for(i=0;i<3;i++)
6343 if(d->v[i] != a)
6344 {
6345 d->b[i]->topo_dist_btw_edges = b_fcus->topo_dist_btw_edges + 1;
6346 d->b[i]->dist_btw_edges = b_fcus->dist_btw_edges + d->b[i]->l->v / 2.;
6347 Get_Dist_Btw_Edges(d,d->v[i],tree);
6348 }
6349 }
6350
6351
6352 }
6353
6354 //////////////////////////////////////////////////////////////
6355 //////////////////////////////////////////////////////////////
6356
6357
Detect_Polytomies(t_edge * b,phydbl l_thresh,t_tree * tree)6358 void Detect_Polytomies(t_edge *b, phydbl l_thresh, t_tree *tree)
6359 {
6360 if((b->l->v < l_thresh) && (!b->left->tax) && (!b->rght->tax))
6361 {
6362 b->l->v = 0.0;
6363 b->has_zero_br_len = YES;
6364 }
6365 else b->has_zero_br_len = NO;
6366 }
6367
6368 //////////////////////////////////////////////////////////////
6369 //////////////////////////////////////////////////////////////
6370
Get_List_Of_Nodes_In_Polytomy(t_node * a,t_node * d,t_node *** list,int * size_list)6371 void Get_List_Of_Nodes_In_Polytomy(t_node *a, t_node *d, t_node ***list, int *size_list)
6372 {
6373 if(d->tax) return;
6374 else
6375 {
6376 int i;
6377
6378 for(i=0;i<3;i++)
6379 {
6380 if(d->v[i] != a)
6381 {
6382 if(!d->b[i]->has_zero_br_len)
6383 {
6384 (*list)[*size_list] = d->v[i];
6385 (*size_list)++;
6386 }
6387
6388 if(d->b[i]->has_zero_br_len)
6389 Get_List_Of_Nodes_In_Polytomy(d,d->v[i],list,size_list);
6390 }
6391 }
6392 }
6393
6394 }
6395
6396
6397 //////////////////////////////////////////////////////////////
6398 //////////////////////////////////////////////////////////////
6399
Path_Length(t_node * dep,t_node * arr,phydbl * len,t_tree * tree)6400 void Path_Length(t_node *dep, t_node *arr, phydbl *len, t_tree *tree)
6401 {
6402 assert(tree->t_dir);
6403
6404 if(dep==arr) return;
6405 else
6406 {
6407 t_edge *next;
6408
6409 next = dep->b[tree->t_dir[dep->num*(2*tree->n_otu-2)+arr->num]];
6410
6411 if(next == tree->e_root)
6412 {
6413 (*len) += (tree->n_root->b[1]->l->v + tree->n_root->b[2]->l->v);
6414 }
6415 else
6416 {
6417 (*len) += next->l->v;
6418 }
6419 Path_Length(dep->v[tree->t_dir[dep->num*(2*tree->n_otu-2)+arr->num]],arr,len,tree);
6420 return;
6421 }
6422 }
6423
6424
6425 //////////////////////////////////////////////////////////////
6426 //////////////////////////////////////////////////////////////
6427
Check_Path(t_node * a,t_node * d,t_node * target,t_tree * tree)6428 void Check_Path(t_node *a, t_node *d, t_node *target, t_tree *tree)
6429 {
6430 PhyML_Printf("path---------\n");
6431 if(d==target) return;
6432 else Check_Path(d,d->v[tree->t_dir[d->num*(2*tree->n_otu-2)+target->num]],target,tree);
6433 }
6434
6435
6436 //////////////////////////////////////////////////////////////
6437 //////////////////////////////////////////////////////////////
6438
6439
Connect_Two_Nodes(t_node * a,t_node * d)6440 void Connect_Two_Nodes(t_node *a, t_node *d)
6441 {
6442 a->v[0] = d;
6443 d->v[0] = a;
6444 }
6445
6446 //////////////////////////////////////////////////////////////
6447 //////////////////////////////////////////////////////////////
6448
Get_List_Of_Adjacent_Targets(t_node * a,t_node * d,t_node *** node_list,t_edge *** edge_list,int * list_size,int curr_depth,int max_depth)6449 void Get_List_Of_Adjacent_Targets(t_node *a, t_node *d, t_node ***node_list, t_edge ***edge_list, int *list_size, int curr_depth, int max_depth)
6450 {
6451 int i;
6452
6453 if(a->tax) return;
6454
6455 for(i=0;i<3;i++)
6456 if(a->v[i] == d)
6457 {
6458 if(node_list != NULL) (*node_list)[*list_size] = a;
6459 if(edge_list != NULL) (*edge_list)[*list_size] = a->b[i];
6460 (*list_size)++;
6461 }
6462 if(curr_depth == max_depth) return;
6463 if(d->tax) return;
6464 else
6465 for(i=0;i<3;i++)
6466 if(d->v[i] != a) Get_List_Of_Adjacent_Targets(d,d->v[i],node_list,edge_list,list_size,curr_depth+1,max_depth);
6467 }
6468
6469 //////////////////////////////////////////////////////////////
6470 //////////////////////////////////////////////////////////////
6471
Sort_List_Of_Adjacent_Targets(t_edge *** list,int list_size)6472 void Sort_List_Of_Adjacent_Targets(t_edge ***list, int list_size)
6473 {
6474 t_edge *buff_edge;
6475 int i,j;
6476
6477 buff_edge = NULL;
6478
6479 for(i=0;i<list_size-1;i++)
6480 {
6481 for(j=i+1;j<list_size;j++)
6482 if((*list)[j]->topo_dist_btw_edges < (*list)[i]->topo_dist_btw_edges)
6483 {
6484 buff_edge = (*list)[j];
6485 (*list)[j] = (*list)[i];
6486 (*list)[i] = buff_edge;
6487 }
6488 }
6489 }
6490
6491 //////////////////////////////////////////////////////////////
6492 //////////////////////////////////////////////////////////////
6493
Common_Nodes_Btw_Two_Edges(t_edge * a,t_edge * b)6494 t_node *Common_Nodes_Btw_Two_Edges(t_edge *a, t_edge *b)
6495 {
6496 if(a->left == b->left) return b->left;
6497 else if(a->left == b->rght) return b->rght;
6498 else if(a->rght == b->left) return b->left;
6499 else if(a->rght == b->rght) return b->rght;
6500
6501 PhyML_Printf("\n. First t_edge = %d (%d %d); Second t_edge = %d (%d %d)\n",
6502 a->num,a->left->num,a->rght->num,
6503 b->num,b->left->num,b->rght->num);
6504 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
6505 Warn_And_Exit("\n. PhyML finished prematurely.");
6506
6507 return NULL;
6508 }
6509
6510 //////////////////////////////////////////////////////////////
6511 //////////////////////////////////////////////////////////////
6512
KH_Test(phydbl * site_lk_M1,phydbl * site_lk_M2,t_tree * tree)6513 int KH_Test(phydbl *site_lk_M1, phydbl *site_lk_M2, t_tree *tree)
6514 {
6515 phydbl *delta,mean,sd,obs_stat,threshold;
6516 int i;
6517
6518
6519 delta = (phydbl *)mCalloc(tree->data->init_len,sizeof(phydbl));
6520
6521 threshold = .0;
6522 mean = .0;
6523 obs_stat = .0;
6524 for(i=0;i<tree->n_pattern;i++)
6525 {
6526 delta[i] = site_lk_M1[i] - site_lk_M2[i];
6527 mean += ((int)tree->data->wght[i])*delta[i];
6528 }
6529
6530 obs_stat = mean;
6531
6532 mean /= tree->data->init_len;
6533
6534 for(i=0;i<tree->data->init_len;i++) delta[i] -= mean;
6535
6536 sd = .0;
6537 for(i=0;i<tree->data->init_len;i++) sd += POW(delta[i],2);
6538 sd /= (phydbl)(tree->data->init_len-1.);
6539
6540 /* threshold = tree->dnorm_thresh*SQRT(sd*tree->data->init_len); */
6541
6542
6543 /* PhyML_Printf("\nObs stat = %f Threshold = %f\n",obs_stat,threshold); */
6544 Free(delta);
6545
6546 if(obs_stat > threshold) return 1;
6547 else return 0;
6548 }
6549
6550 //////////////////////////////////////////////////////////////
6551 //////////////////////////////////////////////////////////////
6552
Random_Tree(t_tree * tree)6553 void Random_Tree(t_tree *tree)
6554 {
6555 int *is_available,*list_of_nodes;
6556 int i,node_num,step,n_available;
6557 phydbl min_edge_len;
6558
6559 assert(tree);
6560
6561 min_edge_len = 1.E-3;
6562
6563 is_available = (int *)mCalloc(2*tree->n_otu-2,sizeof(int));
6564 list_of_nodes = (int *)mCalloc(tree->n_otu, sizeof(int));
6565
6566 for(i=0;i<tree->n_otu;i++) is_available[i] = 1;
6567 for(i=0;i<tree->n_otu;i++) list_of_nodes[i] = i;
6568
6569
6570 step = 0;
6571 do
6572 {
6573 /* node_num = (int)RINT(rand()/(phydbl)(RAND_MAX+1.0)*(tree->n_otu-1-step)); */
6574 node_num = Rand_Int(0,tree->n_otu-1-step);
6575 node_num = list_of_nodes[node_num];
6576 is_available[node_num] = 0;
6577 for(i=0;i<tree->n_otu;i++) list_of_nodes[i] = -1;
6578 n_available = 0;
6579 For(i,2*tree->n_otu-2) if(is_available[i]) {list_of_nodes[n_available++] = i;}
6580
6581 tree->a_nodes[node_num]->v[0] = tree->a_nodes[tree->n_otu+step];
6582 tree->a_nodes[tree->n_otu+step]->v[1] = tree->a_nodes[node_num];
6583
6584 /* node_num = (int)RINT(rand()/(phydbl)(RAND_MAX+1.0)*(tree->n_otu-2-step)); */
6585 node_num = Rand_Int(0,tree->n_otu-2-step);
6586 node_num = list_of_nodes[node_num];
6587 is_available[node_num] = 0;
6588 for(i=0;i<tree->n_otu;i++) list_of_nodes[i] = -1;
6589 n_available = 0;
6590 For(i,2*tree->n_otu-2) if(is_available[i]) {list_of_nodes[n_available++] = i;}
6591
6592 tree->a_nodes[node_num]->v[0] = tree->a_nodes[tree->n_otu+step];
6593 tree->a_nodes[tree->n_otu+step]->v[2] = tree->a_nodes[node_num];
6594
6595 is_available[tree->n_otu+step] = 1;
6596 for(i=0;i<tree->n_otu;i++) list_of_nodes[i] = -1;
6597 n_available = 0;
6598 For(i,2*tree->n_otu-2) if(is_available[i]) list_of_nodes[n_available++] = i;
6599
6600 step++;
6601 }while(step < tree->n_otu-2);
6602
6603 tree->a_nodes[list_of_nodes[0]]->v[0] = tree->a_nodes[list_of_nodes[1]];
6604 tree->a_nodes[list_of_nodes[1]]->v[0] = tree->a_nodes[list_of_nodes[0]];
6605
6606 Connect_Edges_To_Nodes_Serial(tree);
6607
6608 for(i=0;i<2*tree->n_otu-3;++i) if(tree->a_edges[i]->l->v < min_edge_len) tree->a_edges[i]->l->v = min_edge_len;
6609
6610 Free(is_available);
6611 Free(list_of_nodes);
6612 }
6613
6614 //////////////////////////////////////////////////////////////
6615 //////////////////////////////////////////////////////////////
6616 // Make sure internal edges have likelihood vectors on both
6617 // sides and external edges have one likelihood vector on the
6618 // lefthand side only
6619 // Note: make sure p_lk_tips vector are re-initialized after
6620 // calling this function
Reorganize_Edges_Given_Lk_Struct(t_tree * tree)6621 void Reorganize_Edges_Given_Lk_Struct(t_tree *tree)
6622 {
6623 int j,i;
6624
6625 if(tree->is_mixt_tree == YES) return;
6626
6627 for(i=0;i<2*tree->n_otu-1;++i)
6628 {
6629 if(tree->a_edges[i]->p_lk_left && tree->a_edges[i]->left->tax == YES)
6630 {
6631 for(j=0;j<2*tree->n_otu-1;++j)
6632 {
6633 if(!tree->a_edges[j]->p_lk_left && tree->a_edges[j]->left->tax == NO)
6634 {
6635 Swap_Partial_Lk(tree->a_edges[i],tree->a_edges[j],LEFT,LEFT,tree);
6636 break;
6637 }
6638 if(!tree->a_edges[j]->p_lk_rght && tree->a_edges[j]->rght->tax == NO)
6639 {
6640 Swap_Partial_Lk(tree->a_edges[i],tree->a_edges[j],LEFT,RGHT,tree);
6641 break;
6642 }
6643 }
6644 }
6645
6646 if(tree->a_edges[i]->p_lk_rght && tree->a_edges[i]->rght->tax == YES)
6647 {
6648 for(j=0;j<2*tree->n_otu-1;++j)
6649 {
6650 if(!tree->a_edges[j]->p_lk_left && tree->a_edges[j]->left->tax == NO)
6651 {
6652 Swap_Partial_Lk(tree->a_edges[i],tree->a_edges[j],RGHT,LEFT,tree);
6653 break;
6654 }
6655 if(!tree->a_edges[j]->p_lk_rght && tree->a_edges[j]->rght->tax == NO)
6656 {
6657 Swap_Partial_Lk(tree->a_edges[i],tree->a_edges[j],RGHT,RGHT,tree);
6658 break;
6659 }
6660 }
6661 }
6662 }
6663 }
6664
6665 //////////////////////////////////////////////////////////////
6666 //////////////////////////////////////////////////////////////
6667
Swap_Partial_Lk(t_edge * a,t_edge * b,int side_a,int side_b,t_tree * tree)6668 void Swap_Partial_Lk(t_edge *a, t_edge *b, int side_a, int side_b, t_tree *tree)
6669 {
6670 phydbl *buff_p_lk;
6671 int *buff_scale;
6672 int *buff_p_pars;
6673 int *buff_pars;
6674 int *buff_p_lk_loc, *buff_patt_id;
6675 phydbl *buff_p_lk_tip;
6676 int *buff_ui;
6677
6678
6679 if(side_a == LEFT && side_b == LEFT)
6680 {
6681 buff_p_lk = b->p_lk_left;
6682 b->p_lk_left = a->p_lk_left;
6683 a->p_lk_left = buff_p_lk;
6684
6685 buff_p_lk_tip = b->p_lk_tip_l;
6686 b->p_lk_tip_l = a->p_lk_tip_l;
6687 a->p_lk_tip_l = buff_p_lk_tip;
6688
6689 buff_scale = b->sum_scale_left_cat;
6690 b->sum_scale_left_cat = a->sum_scale_left_cat;
6691 a->sum_scale_left_cat = buff_scale;
6692
6693 buff_scale = b->sum_scale_left;
6694 b->sum_scale_left = a->sum_scale_left;
6695 a->sum_scale_left = buff_scale;
6696
6697 buff_patt_id = b->patt_id_left;
6698 b->patt_id_left = a->patt_id_left;
6699 a->patt_id_left = buff_patt_id;
6700
6701 buff_p_lk_loc = b->p_lk_loc_left;
6702 b->p_lk_loc_left = a->p_lk_loc_left;
6703 a->p_lk_loc_left = buff_p_lk_loc;
6704
6705 buff_pars = b->pars_l;
6706 b->pars_l = a->pars_l;
6707 a->pars_l = buff_pars;
6708
6709 buff_p_pars = b->p_pars_l;
6710 b->p_pars_l = a->p_pars_l;
6711 a->p_pars_l = buff_p_pars;
6712
6713 buff_ui = b->ui_l;
6714 b->ui_l = a->ui_l;
6715 a->ui_l = buff_ui;
6716
6717 #ifdef BEAGLE
6718 temp = b->p_lk_left_idx;
6719 b->p_lk_left_idx = a->p_lk_left_idx;
6720 a->p_lk_left_idx = temp;
6721
6722 temp = b->p_lk_tip_idx;
6723 b->p_lk_tip_idx = a->p_lk_tip_idx;
6724 a->p_lk_tip_idx = temp;
6725 #endif
6726 }
6727
6728 if(side_a == LEFT && side_b == RGHT)
6729 {
6730 buff_p_lk = b->p_lk_rght;
6731 b->p_lk_rght = a->p_lk_left;
6732 a->p_lk_left = buff_p_lk;
6733
6734 buff_p_lk_tip = b->p_lk_tip_r;
6735 b->p_lk_tip_r = a->p_lk_tip_l;
6736 a->p_lk_tip_l = buff_p_lk_tip;
6737
6738 buff_scale = b->sum_scale_rght_cat;
6739 b->sum_scale_rght_cat = a->sum_scale_left_cat;
6740 a->sum_scale_left_cat = buff_scale;
6741
6742 buff_scale = b->sum_scale_rght;
6743 b->sum_scale_rght = a->sum_scale_left;
6744 a->sum_scale_left = buff_scale;
6745
6746 buff_patt_id = b->patt_id_rght;
6747 b->patt_id_rght = a->patt_id_left;
6748 a->patt_id_left = buff_patt_id;
6749
6750 buff_p_lk_loc = b->p_lk_loc_rght;
6751 b->p_lk_loc_rght = a->p_lk_loc_left;
6752 a->p_lk_loc_left = buff_p_lk_loc;
6753
6754 buff_pars = b->pars_r;
6755 b->pars_r = a->pars_l;
6756 a->pars_l = buff_pars;
6757
6758 buff_p_pars = b->p_pars_r;
6759 b->p_pars_r = a->p_pars_l;
6760 a->p_pars_l = buff_p_pars;
6761
6762 buff_ui = b->ui_r;
6763 b->ui_r = a->ui_l;
6764 a->ui_l = buff_ui;
6765
6766 #ifdef BEAGLE
6767 temp = b->p_lk_rght_idx;
6768 b->p_lk_rght_idx = a->p_lk_left_idx;
6769 a->p_lk_left_idx = temp;
6770
6771 temp = b->p_lk_tip_idx;
6772 b->p_lk_tip_idx = a->p_lk_tip_idx;
6773 a->p_lk_tip_idx = temp;
6774 #endif
6775 }
6776
6777 if(side_a == RGHT && side_b == LEFT)
6778 {
6779 buff_p_lk = b->p_lk_left;
6780 b->p_lk_left = a->p_lk_rght;
6781 a->p_lk_rght = buff_p_lk;
6782
6783 buff_p_lk_tip = b->p_lk_tip_l;
6784 b->p_lk_tip_l = a->p_lk_tip_r;
6785 a->p_lk_tip_r = buff_p_lk_tip;
6786
6787 buff_scale = b->sum_scale_left_cat;
6788 b->sum_scale_left_cat = a->sum_scale_rght_cat;
6789 a->sum_scale_rght_cat = buff_scale;
6790
6791 buff_scale = b->sum_scale_left;
6792 b->sum_scale_left = a->sum_scale_rght;
6793 a->sum_scale_rght = buff_scale;
6794
6795 buff_patt_id = b->patt_id_left;
6796 b->patt_id_left = a->patt_id_rght;
6797 a->patt_id_rght = buff_patt_id;
6798
6799 buff_p_lk_loc = b->p_lk_loc_left;
6800 b->p_lk_loc_left = a->p_lk_loc_rght;
6801 a->p_lk_loc_rght = buff_p_lk_loc;
6802
6803 buff_pars = b->pars_l;
6804 b->pars_l = a->pars_r;
6805 a->pars_r = buff_pars;
6806
6807 buff_p_pars = b->p_pars_l;
6808 b->p_pars_l = a->p_pars_r;
6809 a->p_pars_r = buff_p_pars;
6810
6811 buff_ui = b->ui_l;
6812 b->ui_l = a->ui_r;
6813 a->ui_r = buff_ui;
6814
6815 #ifdef BEAGLE
6816 temp = b->p_lk_left_idx;
6817 b->p_lk_left_idx = a->p_lk_rght_idx;
6818 a->p_lk_rght_idx = temp;
6819
6820 temp = b->p_lk_tip_idx;
6821 b->p_lk_tip_idx = a->p_lk_tip_idx;
6822 a->p_lk_tip_idx = temp;
6823 #endif
6824 }
6825
6826 if(side_a == RGHT && side_b == RGHT)
6827 {
6828 buff_p_lk = b->p_lk_rght;
6829 b->p_lk_rght = a->p_lk_rght;
6830 a->p_lk_rght = buff_p_lk;
6831
6832 buff_p_lk_tip = b->p_lk_tip_r;
6833 b->p_lk_tip_r = a->p_lk_tip_r;
6834 a->p_lk_tip_r = buff_p_lk_tip;
6835
6836 buff_scale = b->sum_scale_rght_cat;
6837 b->sum_scale_rght_cat = a->sum_scale_rght_cat;
6838 a->sum_scale_rght_cat = buff_scale;
6839
6840 buff_scale = b->sum_scale_rght;
6841 b->sum_scale_rght = a->sum_scale_rght;
6842 a->sum_scale_rght = buff_scale;
6843
6844 buff_patt_id = b->patt_id_rght;
6845 b->patt_id_rght = a->patt_id_rght;
6846 a->patt_id_rght = buff_patt_id;
6847
6848 buff_p_lk_loc = b->p_lk_loc_rght;
6849 b->p_lk_loc_rght = a->p_lk_loc_rght;
6850 a->p_lk_loc_rght = buff_p_lk_loc;
6851
6852 buff_pars = b->pars_r;
6853 b->pars_r = a->pars_r;
6854 a->pars_r = buff_pars;
6855
6856 buff_p_pars = b->p_pars_r;
6857 b->p_pars_r = a->p_pars_r;
6858 a->p_pars_r = buff_p_pars;
6859
6860 buff_ui = b->ui_r;
6861 b->ui_r = a->ui_r;
6862 a->ui_r = buff_ui;
6863
6864 #ifdef BEAGLE
6865 temp = b->p_lk_rght_idx;
6866 b->p_lk_rght_idx = a->p_lk_rght_idx;
6867 a->p_lk_rght_idx = temp;
6868
6869 temp = b->p_lk_tip_idx;
6870 b->p_lk_tip_idx = a->p_lk_tip_idx;
6871 a->p_lk_tip_idx = temp;
6872 #endif
6873 }
6874 }
6875
6876 //////////////////////////////////////////////////////////////
6877 //////////////////////////////////////////////////////////////
6878
Random_NNI(int n_moves,t_tree * tree)6879 void Random_NNI(int n_moves, t_tree *tree)
6880 {
6881 int i,j;
6882 t_edge *b;
6883 t_node *n1,*n2,*n_target;
6884
6885 n1 = n2 = NULL;
6886 b = NULL;
6887 for(i=0;i<n_moves;++i)
6888 {
6889 n_target = tree->a_nodes[tree->n_otu + (int)((phydbl)rand()/RAND_MAX * (2*tree->n_otu-3-tree->n_otu))];
6890 for(j=0;j<3;++j) if(!n_target->v[j]->tax) {b = n_target->b[j]; break;}
6891
6892 for(j=0;j<3;++j) if(b->left->v[j] != b->rght) {n1 = b->left->v[j]; break;}
6893 for(j=0;j<3;++j) if(b->rght->v[j] != b->left) {n2 = b->rght->v[j]; break;}
6894
6895 Swap(n1,b->left,b->rght,n2,tree);
6896 }
6897 }
6898
6899 //////////////////////////////////////////////////////////////
6900 //////////////////////////////////////////////////////////////
6901
Fill_Missing_Dist(matrix * mat)6902 void Fill_Missing_Dist(matrix *mat)
6903 {
6904 int i,j;
6905
6906 for(i=0;i<mat->n_otu;i++)
6907 {
6908 for(j=i+1;j<mat->n_otu;j++)
6909 {
6910 if(i != j)
6911 {
6912 if(mat->dist[i][j] < .0)
6913 {
6914 Fill_Missing_Dist_XY(i,j,mat);
6915 mat->dist[j][i] = mat->dist[i][j];
6916 }
6917 }
6918 }
6919 }
6920 }
6921
6922 //////////////////////////////////////////////////////////////
6923 //////////////////////////////////////////////////////////////
6924
6925
Fill_Missing_Dist_XY(int x,int y,matrix * mat)6926 void Fill_Missing_Dist_XY(int x, int y, matrix *mat)
6927 {
6928
6929 int i,j;
6930 phydbl *local_mins,**S1S2;
6931 int cpt;
6932 int pos_best_estimate;
6933 phydbl min_crit, curr_crit;
6934
6935 local_mins = (phydbl *)mCalloc(mat->n_otu*mat->n_otu,sizeof(phydbl ));
6936 S1S2 = (phydbl **)mCalloc(mat->n_otu*mat->n_otu,sizeof(phydbl *));
6937 For(i,mat->n_otu*mat->n_otu) S1S2[i] = (phydbl *)mCalloc(2,sizeof(phydbl));
6938
6939 cpt = 0;
6940 for(i=0;i<mat->n_otu;i++)
6941 {
6942 if((mat->dist[i][x] > .0) && (mat->dist[i][y] > .0))
6943 {
6944 for(j=0;j<mat->n_otu;j++)
6945 {
6946 if((mat->dist[j][x] > .0) && (mat->dist[j][y] > .0))
6947 {
6948 if((i != j) && (i != x) && (i != y) && (j != x) && (j != y))
6949 {
6950 S1S2[cpt][0] = MIN(mat->dist[i][x] + mat->dist[j][y] - mat->dist[i][j] , mat->dist[i][y] + mat->dist[j][x] - mat->dist[i][j]);
6951 S1S2[cpt][1] = MAX(mat->dist[i][x] + mat->dist[j][y] - mat->dist[i][j] , mat->dist[i][y] + mat->dist[j][x] - mat->dist[i][j]);
6952 cpt++;
6953 }
6954 }
6955 }
6956 }
6957 }
6958
6959 Qksort_Matrix(S1S2,0,0,cpt-1);
6960
6961 local_mins[0] = S1S2[0][1];
6962 for(i=1;i<cpt;i++) local_mins[i] = (i*local_mins[i-1] + S1S2[i][1])/(phydbl)(i+1);
6963
6964 pos_best_estimate = 0;
6965 min_crit = curr_crit = BIG;
6966
6967 for(i=0;i<cpt-1;i++)
6968 {
6969 if((local_mins[i] < S1S2[i+1][0]) && (local_mins[i] > S1S2[i][0]))
6970 {
6971 curr_crit = Least_Square_Missing_Dist_XY(x,y,local_mins[i],mat);
6972 if(curr_crit < min_crit)
6973 {
6974 min_crit = curr_crit;
6975 pos_best_estimate = i;
6976 }
6977 }
6978 }
6979
6980 mat->dist[x][y] = local_mins[pos_best_estimate];
6981 mat->dist[y][x] = mat->dist[x][y];
6982
6983 For(i,mat->n_otu*mat->n_otu) Free(S1S2[i]);
6984 Free(S1S2);
6985 Free(local_mins);
6986 }
6987
6988 //////////////////////////////////////////////////////////////
6989 //////////////////////////////////////////////////////////////
6990
6991
Least_Square_Missing_Dist_XY(int x,int y,phydbl dxy,matrix * mat)6992 phydbl Least_Square_Missing_Dist_XY(int x, int y, phydbl dxy, matrix *mat)
6993 {
6994 int i,j;
6995 phydbl fit;
6996
6997 fit = .0;
6998 for(i=0;i<mat->n_otu;i++)
6999 {
7000 if((mat->dist[i][x] > .0) && (mat->dist[i][y] > .0))
7001 {
7002 for(j=0;j<mat->n_otu;j++)
7003 {
7004 if((mat->dist[j][x] > .0) && (mat->dist[j][y] > .0))
7005 {
7006 if((i != j) && (i != x) && (i != y) && (j != x) && (j != y))
7007 {
7008 if(dxy < MIN(mat->dist[i][x] + mat->dist[j][y] - mat->dist[i][j] , mat->dist[i][y] + mat->dist[j][x] - mat->dist[i][j]))
7009 {
7010 fit += POW((mat->dist[i][x] + mat->dist[j][y]) - (mat->dist[i][y] + mat->dist[j][x]),2);
7011 }
7012 else if((mat->dist[i][x] + mat->dist[j][y]) < (mat->dist[i][y] + mat->dist[j][x]))
7013 {
7014 fit += POW(dxy - (mat->dist[i][y] + mat->dist[j][x] - mat->dist[i][j]),2);
7015 }
7016 else
7017 {
7018 fit += POW(dxy - (mat->dist[i][x] + mat->dist[j][y] - mat->dist[i][j]),2);
7019 }
7020 }
7021 }
7022 }
7023 }
7024 }
7025 return fit;
7026 }
7027
7028 //////////////////////////////////////////////////////////////
7029 //////////////////////////////////////////////////////////////
7030
Check_Memory_Amount(t_tree * tree)7031 void Check_Memory_Amount(t_tree *tree)
7032 {
7033 /* Rough estimate of the amount of memory that has to be used */
7034
7035 long int nbytes;
7036 int n_otu;
7037 t_mod *mod;
7038
7039 mod = tree->mod;
7040 n_otu = tree->io->n_otu;
7041 nbytes = 0;
7042
7043 /* Partial Pars */
7044 /* pars_r */
7045 nbytes += (2*n_otu-3) * 2 * tree->data->crunch_len * sizeof(int);
7046 /* ui_r */
7047 nbytes += (2*n_otu-3) * 2 * tree->data->crunch_len * sizeof(int);
7048 /* p_pars_r */
7049 nbytes += (2*n_otu-3) * 2 * tree->data->crunch_len * mod->ns * sizeof(int);
7050 /* n_diff_states_r */
7051 nbytes += (2*n_otu-3) * 2 * mod->ns * sizeof(int);
7052
7053 /* Pmat */
7054 /* Pij_rr */
7055 nbytes += (2*n_otu-3) * mod->ras->n_catg * mod->ns * mod->ns * sizeof(phydbl);
7056 /* tPij_rr */
7057 nbytes += (2*n_otu-3) * mod->ras->n_catg * mod->ns * mod->ns * sizeof(phydbl);
7058
7059
7060 /* Partial Lk */
7061 /* p_lk */
7062 nbytes += ((2*n_otu-3) * 2 - tree->n_otu) * tree->data->crunch_len * mod->ras->n_catg * mod->ns * sizeof(phydbl);
7063 /* p_lk_tip */
7064 nbytes += (tree->n_otu) * tree->data->crunch_len * mod->ns * sizeof(phydbl);
7065
7066
7067 /* Scaling factors */
7068 /* sum_scale */
7069 nbytes += ((2*n_otu-3) * 2 - tree->n_otu) * tree->data->crunch_len * mod->ras->n_catg * sizeof(int);
7070
7071
7072 if(((phydbl)nbytes/(1.E+06)) > 256.)
7073 /* if(((phydbl)nbytes/(1.E+06)) > 0.) */
7074 {
7075 PhyML_Printf("\n\n. WARNING: this analysis requires at least %.0f MB of memory space.\n",(phydbl)nbytes/(1.E+06));
7076 #ifndef BATCH
7077
7078 char answer;
7079 if((!tree->io->quiet) && (tree->io->mem_question == YES))
7080 {
7081 PhyML_Printf("\n. Do you really want to proceed? [Y/n] ");
7082 if(scanf("%c", &answer))
7083 {
7084 if(answer == '\n') answer = 'Y';
7085 else if(answer == 'n' || answer == 'N') Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
7086 else getchar();
7087 }
7088 else
7089 {
7090 Warn_And_Exit("\n\n");
7091 }
7092 }
7093 #endif
7094 }
7095 else if(((phydbl)nbytes/(1.E+06)) > 100.)
7096 {
7097 if(!tree->io->quiet) PhyML_Printf("\n\n. WARNING: this analysis will use at least %.0f MB of memory space...\n",(phydbl)nbytes/(1.E+06));
7098 }
7099 else if(((phydbl)nbytes/(1.E+06)) > 1.)
7100 {
7101 if(!tree->io->quiet) PhyML_Printf("\n\n. This analysis requires at least %.0f MB of memory space.\n",(phydbl)nbytes/(1.E+06));
7102 }
7103 }
7104
7105 //////////////////////////////////////////////////////////////
7106 //////////////////////////////////////////////////////////////
7107
7108
Get_State_From_Partial_Lk(phydbl * p_lk,int pos,t_tree * tree)7109 int Get_State_From_Partial_Lk(phydbl *p_lk, int pos, t_tree *tree)
7110 {
7111 int i;
7112 for(i=0;i<tree->mod->ns;i++) if(p_lk[pos+i] > .0) return i;
7113 return -1;
7114 }
7115
7116 //////////////////////////////////////////////////////////////
7117 //////////////////////////////////////////////////////////////
7118
7119
Get_State_From_Partial_Pars(short int * p_pars,int pos,t_tree * tree)7120 int Get_State_From_Partial_Pars(short int *p_pars, int pos, t_tree *tree)
7121 {
7122 int i;
7123 for(i=0;i<tree->mod->ns;i++) if(p_pars[pos+i] > .0) return i;
7124 return -1;
7125 }
7126
7127 //////////////////////////////////////////////////////////////
7128 //////////////////////////////////////////////////////////////
7129
7130
7131 //////////////////////////////////////////////////////////////
7132 //////////////////////////////////////////////////////////////
7133
Check_Dirs(t_tree * tree)7134 void Check_Dirs(t_tree *tree)
7135 {
7136 int i;
7137
7138 For(i,2*tree->n_otu-3)
7139 {
7140 if(!tree->a_edges[i]->left->tax)
7141 {
7142 if(tree->a_edges[i]->left->v[tree->a_edges[i]->l_v1]->num <
7143 tree->a_edges[i]->left->v[tree->a_edges[i]->l_v2]->num)
7144 {
7145 PhyML_Printf("\n. Edge %d ; v1=%d v2=%d",
7146 tree->a_edges[i]->num,
7147 tree->a_edges[i]->left->v[tree->a_edges[i]->l_v1]->num,
7148 tree->a_edges[i]->left->v[tree->a_edges[i]->l_v2]->num);
7149 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
7150 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
7151 }
7152 }
7153
7154 if(!tree->a_edges[i]->rght->tax)
7155 {
7156 if(tree->a_edges[i]->rght->v[tree->a_edges[i]->r_v1]->num <
7157 tree->a_edges[i]->rght->v[tree->a_edges[i]->r_v2]->num)
7158 {
7159 PhyML_Printf("\n. Edge %d ; v3=%d v4=%d",
7160 tree->a_edges[i]->num,
7161 tree->a_edges[i]->rght->v[tree->a_edges[i]->r_v1]->num,
7162 tree->a_edges[i]->rght->v[tree->a_edges[i]->r_v2]->num);
7163 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
7164 Warn_And_Exit("\n. PhyML finished prematurely.");
7165 }
7166 }
7167 }
7168 }
7169
7170 //////////////////////////////////////////////////////////////
7171 //////////////////////////////////////////////////////////////
7172
Warn_And_Exit(const char * s)7173 void Warn_And_Exit(const char *s)
7174 {
7175 PhyML_Fprintf(stderr,"%s",s);
7176 fflush(NULL);
7177 #ifndef BATCH
7178 PhyML_Fprintf(stderr,"\n. Type enter to exit.\n");
7179 Exit("");
7180 #endif
7181 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
7182 }
7183
7184 //////////////////////////////////////////////////////////////
7185 //////////////////////////////////////////////////////////////
7186
7187 // Apply random prune and regraft moves to an existing tree. As opposed to Random_Tree, using this
7188 // function does not break the likelihood structure.
Randomize_Tree(t_tree * tree,int n_prune_regraft)7189 void Randomize_Tree(t_tree *tree, int n_prune_regraft)
7190 {
7191 t_node *rnd_node;
7192 t_edge *rnd_edge,*b_target,*b_residual,**target_list;
7193 int n_targets,n_rand,i;
7194
7195 target_list = (t_edge **)mCalloc(2*tree->n_otu-3,sizeof(t_edge *));
7196
7197 n_rand = n_prune_regraft;
7198 do
7199 {
7200 rnd_node = tree->a_nodes[Rand_Int(tree->n_otu,2*tree->n_otu-3)];
7201 assert(rnd_node != tree->n_root && rnd_node->tax == NO);
7202
7203 rnd_edge = rnd_node->b[Rand_Int(0,2)];
7204
7205 Prune_Subtree(rnd_node,
7206 rnd_node == rnd_edge->left ? rnd_edge->rght : rnd_edge->left,
7207 &b_target,
7208 &b_residual,
7209 tree);
7210
7211 n_targets = 0;
7212 for(i=0;i<3;i++)
7213 if(b_target->left->v[i] != b_target->rght)
7214 Get_List_Of_Adjacent_Targets(b_target->left,b_target->left->v[i],NULL,&target_list,&n_targets,0,tree->n_otu);
7215
7216 for(i=0;i<3;i++)
7217 if(b_target->rght->v[i] != b_target->left)
7218 Get_List_Of_Adjacent_Targets(b_target->rght,b_target->rght->v[i],NULL,&target_list,&n_targets,0,tree->n_otu);
7219
7220 if(n_targets > 0) b_target = target_list[Rand_Int(0,n_targets-1)];
7221
7222 assert(b_target != NULL);
7223
7224 Graft_Subtree(b_target,rnd_node,NULL,b_residual,NULL,tree);
7225
7226 n_rand--;
7227 }
7228 while(n_rand > 0);
7229
7230 Free(target_list);
7231 }
7232
7233 //////////////////////////////////////////////////////////////
7234 //////////////////////////////////////////////////////////////
7235
Randomize_Sequence_Order(calign * cdata)7236 void Randomize_Sequence_Order(calign *cdata)
7237 {
7238 int i,exchange_with;
7239 phydbl buff_dbl;
7240 char *buff_name,*buff_state;
7241 short int *buff_ambigu;
7242
7243 exchange_with = -1;
7244 for(i=0;i<cdata->n_otu;i++)
7245 {
7246 buff_dbl = rand();
7247 buff_dbl /= (RAND_MAX+1.);
7248 buff_dbl *= cdata->n_otu;
7249 exchange_with = (int)FLOOR(buff_dbl);
7250
7251 buff_name = cdata->c_seq[i]->name;
7252 cdata->c_seq[i]->name = cdata->c_seq[exchange_with]->name;
7253 cdata->c_seq[exchange_with]->name = buff_name;
7254
7255 buff_state = cdata->c_seq[i]->state;
7256 cdata->c_seq[i]->state = cdata->c_seq[exchange_with]->state;
7257 cdata->c_seq[exchange_with]->state = buff_state;
7258
7259 buff_ambigu = cdata->c_seq[i]->is_ambigu;
7260 cdata->c_seq[i]->is_ambigu = cdata->c_seq[exchange_with]->is_ambigu;
7261 cdata->c_seq[exchange_with]->is_ambigu = buff_ambigu;
7262 }
7263 }
7264
7265 //////////////////////////////////////////////////////////////
7266 //////////////////////////////////////////////////////////////
7267
7268
Update_Root_Pos(t_tree * tree)7269 void Update_Root_Pos(t_tree *tree)
7270 {
7271 if(tree->n_root_pos > -1.0)
7272 {
7273 tree->n_root->b[2]->l->v = tree->e_root->l->v * tree->n_root_pos;
7274 tree->n_root->b[1]->l->v = tree->e_root->l->v * (1.-tree->n_root_pos);
7275 }
7276 else
7277 {
7278 /* tree->n_root->l[0]->v = tree->e_root->l->v / 2.; */
7279 /* tree->n_root->l[1]->v = tree->e_root->l->v / 2.; */
7280 }
7281 }
7282
7283 //////////////////////////////////////////////////////////////
7284 //////////////////////////////////////////////////////////////
7285
Add_Root(t_edge * target,t_tree * tree)7286 void Add_Root(t_edge *target, t_tree *tree)
7287 {
7288 t_edge *b1, *b2;
7289
7290 assert(target);
7291 assert(tree);
7292
7293 #ifndef PHYML
7294 /* PhyML_Printf("\n. Adding root on t_edge %d left = %d right = %d.",target->num,target->left ? target->left->num : -1, target->rght ? target->rght->num : -1); fflush(NULL) */;
7295 #endif
7296
7297 tree->e_root = target;
7298
7299 /* Create the root t_node if it does not exist yet */
7300 if(!tree->a_nodes[2*tree->n_otu-2]) tree->n_root = (t_node *)Make_Node_Light(2*tree->n_otu-2);
7301 else tree->n_root = tree->a_nodes[2*tree->n_otu-2];
7302
7303
7304 tree->a_nodes[2*tree->n_otu-2] = tree->n_root;
7305
7306 tree->n_root->tax = 0;
7307
7308 /* Set the position of the root */
7309 tree->n_root->v[0] = NULL;
7310 tree->n_root->v[1] = tree->e_root->left;
7311 tree->n_root->v[2] = tree->e_root->rght;
7312
7313 /* tree->n_root->b[2] = tree->e_root; */
7314 /* tree->n_root->b[1] = tree->e_root; */
7315
7316 b1 = tree->a_edges[2*tree->n_otu-3];
7317 b2 = tree->a_edges[2*tree->n_otu-2];
7318
7319 tree->n_root->b[0] = NULL;
7320 tree->n_root->b[1] = b1;
7321 tree->n_root->b[2] = b2;
7322
7323 if(tree->n_root_pos > -1.0)
7324 {
7325 if(tree->n_root_pos < 1.E-6 && tree->n_root_pos > -1.E-6)
7326 {
7327 printf("\n. WARNING: you put the root at a weird position...");
7328 }
7329
7330 tree->n_root->b[2]->l->v = tree->e_root->l->v * tree->n_root_pos;
7331 tree->n_root->b[1]->l->v = tree->e_root->l->v * (1. - tree->n_root_pos);
7332 PhyML_Printf("\n. ROOTPOS: %f L: %f L2: %f",
7333 tree->n_root_pos,
7334 tree->e_root->l->v,
7335 tree->n_root->b[2]->l->v);
7336 }
7337 else
7338 {
7339 tree->n_root->b[2]->l->v = tree->e_root->l->v / 2.;
7340 tree->n_root->b[1]->l->v = tree->e_root->l->v / 2.;
7341 tree->n_root_pos = 0.5;
7342 }
7343
7344
7345 b1->num = tree->num_curr_branch_available;
7346 b2->num = tree->num_curr_branch_available+1;
7347 b1->left = tree->n_root;
7348 b1->rght = tree->n_root->v[1];
7349 b2->left = tree->n_root;
7350 b2->rght = tree->n_root->v[2];
7351
7352 b1->l->v = tree->n_root->b[1]->l->v;
7353 b2->l->v = tree->n_root->b[2]->l->v;
7354 b1->l_old->v = tree->n_root->b[1]->l->v;
7355 b2->l_old->v = tree->n_root->b[2]->l->v;
7356
7357 b1->l_r = 1;
7358 b2->l_r = 2;
7359
7360 b1->r_l = 0;
7361 b2->r_l = 0;
7362
7363 b1->l_v1 = 0;
7364 b1->l_v2 = 2;
7365
7366 b2->l_v1 = 0;
7367 b2->l_v2 = 1;
7368
7369 b1->r_v1 = 1;
7370 b1->r_v2 = 2;
7371
7372 b2->r_v1 = 1;
7373 b2->r_v2 = 2;
7374
7375 /* WARNING: make sure you have freed the memory for p_lk_rght on b1 and b2 */
7376 if(tree->is_mixt_tree == NO)
7377 {
7378 b1->p_lk_rght = tree->e_root->p_lk_left;
7379 b2->p_lk_rght = tree->e_root->p_lk_rght;
7380
7381 b1->p_lk_tip_r = tree->e_root->p_lk_tip_l;
7382 b2->p_lk_tip_r = tree->e_root->p_lk_tip_r;
7383
7384 b1->sum_scale_rght = tree->e_root->sum_scale_left;
7385 b2->sum_scale_rght = tree->e_root->sum_scale_rght;
7386
7387 b1->sum_scale_rght_cat = tree->e_root->sum_scale_left_cat;
7388 b2->sum_scale_rght_cat = tree->e_root->sum_scale_rght_cat;
7389
7390 b1->p_lk_loc_rght = tree->e_root->p_lk_loc_left;
7391 b2->p_lk_loc_rght = tree->e_root->p_lk_loc_rght;
7392
7393 b1->pars_r = tree->e_root->pars_l;
7394 b2->pars_r = tree->e_root->pars_r;
7395
7396 b1->ui_r = tree->e_root->ui_l;
7397 b2->ui_r = tree->e_root->ui_r;
7398
7399 b1->p_pars_r = tree->e_root->p_pars_l;
7400 b2->p_pars_r = tree->e_root->p_pars_r;
7401
7402 b1->p_lk_loc_rght = tree->e_root->p_lk_loc_left;
7403 b2->p_lk_loc_rght = tree->e_root->p_lk_loc_rght;
7404
7405 b1->patt_id_rght = tree->e_root->patt_id_left;
7406 b2->patt_id_rght = tree->e_root->patt_id_rght;
7407 }
7408
7409 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
7410 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
7411 tree->n_root->anc = NULL;
7412
7413 if(tree->is_mixt_tree == YES) MIXT_Add_Root(target,tree);
7414 }
7415
7416 //////////////////////////////////////////////////////////////
7417 //////////////////////////////////////////////////////////////
7418
Update_Ancestors(t_node * a,t_node * d,t_tree * tree)7419 void Update_Ancestors(t_node *a, t_node *d, t_tree *tree)
7420 {
7421 if(d == NULL)
7422 {
7423 PhyML_Printf("\n. d is NULL; a: %d root: %d",a->num,tree->n_root->num);
7424 assert(FALSE);
7425 }
7426
7427 d->anc = a;
7428
7429 if(a == tree->n_root) a->anc = NULL;
7430
7431 if(d->tax) return;
7432 else
7433 {
7434 int i;
7435 for(i=0;i<3;i++)
7436 if((d->v[i] != a) && (d->b[i] != tree->e_root))
7437 Update_Ancestors(d,d->v[i],tree);
7438 }
7439 }
7440
7441 //////////////////////////////////////////////////////////////
7442 //////////////////////////////////////////////////////////////
7443
7444 /* Generate a random unrooted tree with 'n_otu' OTUs */
Generate_Random_Tree_From_Scratch(int n_otu,int rooted)7445 t_tree *Generate_Random_Tree_From_Scratch(int n_otu, int rooted)
7446 {
7447 t_tree *tree;
7448 int *connected,*nonconnected,*available_nodes;
7449 int i,n_connected,n_nonconnected,n1,n2,new_n,n_internal,n_external,n_available;
7450 t_node *root,*curr_n,**internal_nodes, **external_nodes;
7451 phydbl *t,*tmp;
7452
7453 tree = Make_Tree_From_Scratch(n_otu,NULL);
7454
7455 tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
7456 RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
7457
7458 tree->times = TIMES_Make_Time_Struct(tree->n_otu);
7459 TIMES_Init_Time_Struct(tree->times,NULL,tree->n_otu);
7460
7461 for(i=0;i<2*tree->n_otu-2;++i)
7462 {
7463 tree->a_nodes[i]->v[1] = NULL;
7464 tree->a_nodes[i]->v[2] = NULL;
7465 }
7466
7467 root = (t_node *)Make_Node_Light(2*tree->n_otu-2);
7468
7469 connected = (int *)mCalloc(2*tree->n_otu-2,sizeof(int));
7470 nonconnected = (int *)mCalloc(2*tree->n_otu-2,sizeof(int));
7471 available_nodes = (int *)mCalloc(2*tree->n_otu-2,sizeof(int));
7472 internal_nodes = (t_node **)mCalloc(tree->n_otu-2,sizeof(t_node *));
7473 external_nodes = (t_node **)mCalloc(tree->n_otu, sizeof(t_node *));
7474 t = (phydbl *)mCalloc(tree->n_otu-1,sizeof(phydbl ));
7475 tmp = (phydbl *)mCalloc(2*tree->n_otu-2,sizeof(phydbl ));
7476
7477 n_nonconnected = 2*n_otu-2;
7478
7479 for(i=0;i<2*tree->n_otu-2;++i) nonconnected[i] = i;
7480
7481 available_nodes[0] = 2*n_otu-2;
7482
7483 /* Node times are generated according to a Birth-death process.
7484 Formulae are as described by Yang and Rannala (1997) */
7485 phydbl phi;
7486 phydbl rho; /* sampling intensity */
7487 phydbl mu; /* birth rate */
7488 phydbl lambda; /* death rate */
7489 phydbl u; /* random U[0,1] */
7490 phydbl expval;
7491
7492 /* rho = 1.0 and mu = 0.0 correspond to the Yule process */
7493
7494 lambda = 6.7;
7495 mu = 2.5;
7496 rho = 9./150.;
7497
7498 expval = exp(MIN(1.E+2,mu-lambda));
7499 phi = (rho*lambda*(expval-1.) + (mu-lambda)*expval)/(expval-1.); /* Equation 16 */
7500
7501 for(i=0;i<tree->n_otu-1;i++)
7502 {
7503 u = rand();
7504 u /= RAND_MAX;
7505
7506 if(fabs(lambda - mu) > 1.E-4)
7507 t[i] = (log(phi-u*rho*lambda) - log(phi-u*rho*lambda + u*(lambda-mu)))/(mu-lambda); /* Equation 15 */
7508 else
7509 t[i] = u / (1.+lambda*rho*(1-u)); /* Equation 17 */
7510 }
7511
7512 Qksort(t,NULL,0,tree->n_otu-2); /* Node times ordering in ascending order */
7513
7514 for(i=0;i<tree->n_otu-1;i++) tmp[i] = t[tree->n_otu-2-i];
7515 for(i=0;i<tree->n_otu-1;i++) t[i] = -tmp[i];
7516
7517
7518 /* Rescale t_node times such that the time at the root t_node is -100 */
7519 for(i=1;i<tree->n_otu-1;i++)
7520 {
7521 t[i] /= -t[0];
7522 t[i] *= 1.E+02;
7523 }
7524 t[0] = -1.E+02;
7525
7526
7527 n_available = 1;
7528 curr_n = root;
7529 n_connected = 0;
7530 do
7531 {
7532 n1 = Rand_Int(0,n_nonconnected-1);
7533 n1 = nonconnected[n1];
7534 connected[n1] = 1;
7535
7536 n_nonconnected = 0;
7537 For(i,2*tree->n_otu-2) if(!connected[i]) {nonconnected[n_nonconnected++] = i;}
7538
7539 n2 = Rand_Int(0,n_nonconnected-1);
7540 n2 = nonconnected[n2];
7541 connected[n2] = 1;
7542
7543 n_nonconnected = 0;
7544 For(i,2*tree->n_otu-2) if(!connected[i]) {nonconnected[n_nonconnected++] = i;}
7545
7546 curr_n->v[1] = tree->a_nodes[n1];
7547 curr_n->v[2] = tree->a_nodes[n2];
7548 tree->a_nodes[n1]->v[0] = curr_n;
7549 tree->a_nodes[n2]->v[0] = curr_n;
7550
7551 tree->times->nd_t[curr_n->num] = t[n_connected/2];
7552
7553 available_nodes[n_available] = tree->a_nodes[n1]->num;
7554 for(i=0;i<n_available;i++)
7555 if(available_nodes[i] == curr_n->num)
7556 {
7557 available_nodes[i] = tree->a_nodes[n2]->num;
7558 break;
7559 }
7560 n_available++;
7561
7562 new_n = Rand_Int(0,n_available-1);
7563 curr_n = tree->a_nodes[available_nodes[new_n]];
7564
7565 n_connected+=2;
7566
7567 }while(n_connected < 2*tree->n_otu-2);
7568
7569 For(i,2*tree->n_otu-2) tmp[i] = tree->times->nd_t[i];
7570
7571 /* Unroot the tree */
7572 root->v[2]->v[0] = root->v[2];
7573 root->v[1]->v[0] = root->v[1];
7574
7575 n_internal = n_external = 0;
7576 For(i,2*tree->n_otu-2)
7577 {
7578 if(tree->a_nodes[i]->v[1]) internal_nodes[n_internal++] = tree->a_nodes[i];
7579 else external_nodes[n_external++] = tree->a_nodes[i];
7580 }
7581
7582
7583 n_internal = n_external = 0;
7584 For(i,2*tree->n_otu-2)
7585 {
7586 if(i < tree->n_otu)
7587 {
7588 tree->a_nodes[i] = external_nodes[n_external++];
7589 tree->a_nodes[i]->tax = 1;
7590 }
7591 else
7592 {
7593 tree->times->nd_t[i] = tmp[internal_nodes[n_internal]->num];
7594 tree->a_nodes[i] = internal_nodes[n_internal++];
7595 tree->a_nodes[i]->tax = 0;
7596 }
7597
7598 tree->a_nodes[i]->num = i;
7599 }
7600
7601 for(i=0;i<tree->n_otu;i++) tree->times->nd_t[i] = 0.0;
7602
7603 for(i=0;i<tree->n_otu;i++)
7604 {
7605 if(!tree->a_nodes[i]->name) tree->a_nodes[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
7606 strcpy(tree->a_nodes[i]->name,"x");
7607 sprintf(tree->a_nodes[i]->name+1,"%d",i);
7608 }
7609
7610 Connect_Edges_To_Nodes_Serial(tree);
7611
7612 /* Add root */
7613 if(rooted)
7614 {
7615 For(i,2*tree->n_otu-3)
7616 {
7617 if(((tree->a_edges[i]->left == root->v[1]) || (tree->a_edges[i]->rght == root->v[1])) &&
7618 ((tree->a_edges[i]->left == root->v[2]) || (tree->a_edges[i]->rght == root->v[2])))
7619 {
7620 Add_Root(tree->a_edges[i],tree);
7621 break;
7622 }
7623 }
7624
7625 }
7626 /* Or not... */
7627 else
7628 {
7629 Free_Node(root);
7630 }
7631
7632 RATES_Random_Branch_Lengths(tree);
7633
7634 Free(available_nodes);
7635 Free(connected);
7636 Free(nonconnected);
7637 Free(external_nodes);
7638 Free(internal_nodes);
7639 Free(t);
7640 Free(tmp);
7641
7642 return tree;
7643 }
7644
7645 //////////////////////////////////////////////////////////////
7646 //////////////////////////////////////////////////////////////
7647
Evolve(calign * data,t_mod * mod,int first_site_pos,t_tree * tree)7648 void Evolve(calign *data, t_mod *mod, int first_site_pos, t_tree *tree)
7649 {
7650 int root_state, root_rate_class;
7651 int site,i;
7652 phydbl *orig_l;
7653 /* phydbl shape,scale,var,mean; */
7654 int switch_to_yes;
7655
7656 orig_l = (phydbl *)mCalloc(2*tree->n_otu-3,sizeof(phydbl));
7657 For(i,2*tree->n_otu-3) orig_l[i] = tree->a_edges[i]->l->v;
7658
7659 data->n_otu = tree->n_otu;
7660 data->io = tree->io;
7661
7662 if(mod->use_m4mod) tree->write_labels = YES;
7663
7664 Set_Br_Len_Var(NULL,tree);
7665
7666 switch_to_yes = NO;
7667 if(tree->mod->gamma_mgf_bl == YES) switch_to_yes = YES;
7668
7669 Set_Update_Eigen(YES,mod);
7670
7671 assert(first_site_pos < data->init_len);
7672
7673 for(site=first_site_pos;site<data->init_len;++site)
7674 {
7675 if(!Set_Model_Parameters(mod)) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
7676
7677
7678 /* Pick the rate class */
7679 root_state = root_rate_class = -1;
7680 root_rate_class = Pick_State(mod->ras->n_catg,mod->ras->gamma_r_proba->v);
7681
7682
7683 /* /\* Get the change probability matrices *\/ */
7684 /* For(i,2*tree->n_otu-3) */
7685 /* { */
7686 /* var = MAX(0.0,tree->a_edges[i]->l_var->v) * POW(tree->mod->ras->gamma_rr->v[root_rate_class],2); */
7687 /* mean = orig_l[i]; */
7688
7689 /* shape = mean * mean / var; */
7690 /* scale = var / mean; */
7691
7692 /* tree->a_edges[i]->l->v = Rgamma(shape,scale); */
7693
7694 /* } */
7695
7696 for(i=0;i<2*tree->n_otu-3;++i) Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
7697
7698 /* Pick the root nucleotide/aa */
7699 root_state = Pick_State(mod->ns,mod->e_frq->pi->v);
7700 data->c_seq[0]->state[site] = Reciproc_Assign_State(root_state,tree->io->datatype);
7701
7702 /* printf("\n. root_state: %d root_rate_class: %d [%f %f %f %f]", */
7703 /* root_state, */
7704 /* root_rate_class, */
7705 /* mod->e_frq->pi->v[0], */
7706 /* mod->e_frq->pi->v[1], */
7707 /* mod->e_frq->pi->v[2], */
7708 /* mod->e_frq->pi->v[3]); */
7709 /* Generic_Exit(__FILE__,__LINE__,__FUNCTION__); */
7710
7711 /* tree->a_nodes[0] is considered as the root t_node */
7712 Evolve_Recur(tree->a_nodes[0],
7713 tree->a_nodes[0]->v[0],
7714 tree->a_nodes[0]->b[0],
7715 root_state,
7716 root_rate_class,
7717 site,
7718 data,
7719 mod,
7720 tree);
7721
7722 /* PhyML_Printf("%s\n",Write_Tree(tree)); */
7723
7724 data->wght[site] = 1;
7725 }
7726 data->crunch_len = data->init_len;
7727 /* Print_CSeq(stdout,NO,data); */
7728 For(i,2*tree->n_otu-3) tree->a_edges[i]->l->v = orig_l[i];
7729 Free(orig_l);
7730
7731 if(switch_to_yes == YES) tree->mod->gamma_mgf_bl = YES;
7732 }
7733
7734 //////////////////////////////////////////////////////////////
7735 //////////////////////////////////////////////////////////////
7736
Pick_State(int n,phydbl * prob)7737 int Pick_State(int n, phydbl *prob)
7738 {
7739 int pos;
7740 phydbl uni;
7741
7742 do
7743 {
7744 pos = rand();
7745 pos = (pos % n);
7746 uni = (phydbl)rand();
7747 uni /= (phydbl)RAND_MAX;
7748 if(uni < prob[pos]) break;
7749 }
7750 while(1);
7751
7752 return (int)pos;
7753 }
7754
7755 //////////////////////////////////////////////////////////////
7756 //////////////////////////////////////////////////////////////
7757
Evolve_Recur(t_node * a,t_node * d,t_edge * b,int a_state,int r_class,int site_num,calign * gen_data,t_mod * mod,t_tree * tree)7758 void Evolve_Recur(t_node *a, t_node *d, t_edge *b, int a_state, int r_class, int site_num, calign *gen_data, t_mod *mod, t_tree *tree)
7759 {
7760 int d_state;
7761 int dim1,dim2;
7762
7763 dim1 = tree->mod->ns * tree->mod->ns;
7764 dim2 = tree->mod->ns;
7765
7766 d_state = Pick_State(mod->ns,b->Pij_rr+r_class*dim1+a_state*dim2);
7767
7768 /* PhyML_Printf("\n>> %c (%d) L:%G %G %G %G %G",Reciproc_Assign_State(d_state,mod->io->datatype),d_state, */
7769 /* b->l->v, */
7770 /* b->Pij_rr[r_class*dim1+a_state*dim2+0], */
7771 /* b->Pij_rr[r_class*dim1+a_state*dim2+1], */
7772 /* b->Pij_rr[r_class*dim1+a_state*dim2+2], */
7773 /* b->Pij_rr[r_class*dim1+a_state*dim2+3]); */
7774
7775
7776 if(d->tax)
7777 {
7778 gen_data->c_seq[d->num]->state[site_num] = Reciproc_Assign_State(d_state,tree->io->datatype);
7779 return;
7780 }
7781 else
7782 {
7783 int i;
7784 for(i=0;i<3;i++)
7785 if(d->v[i] != a)
7786 Evolve_Recur(d,d->v[i],d->b[i],
7787 d_state,r_class,site_num,gen_data,
7788 mod,tree);
7789 }
7790 }
7791
7792 //////////////////////////////////////////////////////////////
7793 //////////////////////////////////////////////////////////////
7794
Site_Diversity(t_tree * tree)7795 void Site_Diversity(t_tree *tree)
7796 {
7797 int i,j,k,ns;
7798 int *div,sum;
7799
7800 ns = tree->mod->ns;
7801
7802 div = (int *)mCalloc(ns,sizeof(int));
7803
7804 Site_Diversity_Post(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree->a_nodes[0]->b[0],tree);
7805 Site_Diversity_Pre (tree->a_nodes[0],tree->a_nodes[0]->v[0],tree->a_nodes[0]->b[0],tree);
7806
7807 For(i,2*tree->n_otu-3)
7808 {
7809 for(j=0;j<ns;j++)
7810 {
7811 tree->a_edges[i]->div_post_pred_left[j] = 0;
7812 tree->a_edges[i]->div_post_pred_rght[j] = 0;
7813 }
7814 }
7815
7816 for(i=0;i<tree->n_pattern;i++)
7817 {
7818 For(j,2*tree->n_otu-3)
7819 {
7820 Binary_Decomposition(tree->a_edges[j]->ui_l[i],div,ns);
7821 sum = 0;
7822 for(k=0;k<ns;k++) sum += div[k];
7823 tree->a_edges[j]->div_post_pred_left[sum-1] += tree->data->wght[i];
7824
7825 Binary_Decomposition(tree->a_edges[j]->ui_r[i],div,ns);
7826 sum = 0;
7827 for(k=0;k<ns;k++) sum += div[k];
7828 tree->a_edges[j]->div_post_pred_rght[sum-1] += tree->data->wght[i];
7829 }
7830 }
7831
7832 /* For(j,2*tree->n_otu-3) */
7833 /* { */
7834 /* PhyML_Printf("\n. Edge %4d div_left = %4d %4d %4d %4d -- div_rght = %4d %4d %4d %4d", */
7835 /* j, */
7836 /* tree->a_edges[j]->div_post_pred_left[0], */
7837 /* tree->a_edges[j]->div_post_pred_left[1], */
7838 /* tree->a_edges[j]->div_post_pred_left[2], */
7839 /* tree->a_edges[j]->div_post_pred_left[3], */
7840 /* tree->a_edges[j]->div_post_pred_rght[0], */
7841 /* tree->a_edges[j]->div_post_pred_rght[1], */
7842 /* tree->a_edges[j]->div_post_pred_rght[2], */
7843 /* tree->a_edges[j]->div_post_pred_rght[3]); */
7844 /* } */
7845
7846 Free(div);
7847 }
7848
7849 //////////////////////////////////////////////////////////////
7850 //////////////////////////////////////////////////////////////
7851
7852
Site_Diversity_Post(t_node * a,t_node * d,t_edge * b,t_tree * tree)7853 void Site_Diversity_Post(t_node *a, t_node *d, t_edge *b, t_tree *tree)
7854 {
7855 if(d->tax) return;
7856 else
7857 {
7858 int i;
7859
7860 for(i=0;i<3;i++)
7861 if(d->v[i] != a)
7862 Site_Diversity_Post(d,d->v[i],d->b[i],tree);
7863
7864 Subtree_Union(d,b,tree);
7865 }
7866 }
7867
7868 //////////////////////////////////////////////////////////////
7869 //////////////////////////////////////////////////////////////
7870
7871
Site_Diversity_Pre(t_node * a,t_node * d,t_edge * b,t_tree * tree)7872 void Site_Diversity_Pre(t_node *a, t_node *d, t_edge *b, t_tree *tree)
7873 {
7874 if(d->tax) return;
7875 else
7876 {
7877 int i;
7878
7879 for(i=0;i<3;i++)
7880 if(d->v[i] != a)
7881 {
7882 Subtree_Union(d,d->b[i],tree);
7883 Site_Diversity_Pre(d,d->v[i],d->b[i],tree);
7884 }
7885 }
7886 }
7887
7888 //////////////////////////////////////////////////////////////
7889 //////////////////////////////////////////////////////////////
7890
7891
Subtree_Union(t_node * n,t_edge * b_fcus,t_tree * tree)7892 void Subtree_Union(t_node *n, t_edge *b_fcus, t_tree *tree)
7893 {
7894 /*
7895 |
7896 |<- b_cus
7897 |
7898 n
7899 / \
7900 / \
7901 / \
7902 */
7903
7904 int site;
7905 int *ui, *ui_v1, *ui_v2;
7906
7907 ui = ui_v1 = ui_v2 = NULL;
7908
7909 if(n == b_fcus->left)
7910 {
7911 ui = b_fcus->ui_l;
7912
7913 ui_v1 =
7914 (n == n->b[b_fcus->l_v1]->left)?
7915 (n->b[b_fcus->l_v1]->ui_r):
7916 (n->b[b_fcus->l_v1]->ui_l);
7917
7918 ui_v2 =
7919 (n == n->b[b_fcus->l_v2]->left)?
7920 (n->b[b_fcus->l_v2]->ui_r):
7921 (n->b[b_fcus->l_v2]->ui_l);
7922 }
7923 else
7924 {
7925 ui = b_fcus->ui_r;
7926
7927 ui_v1 =
7928 (n == n->b[b_fcus->r_v1]->left)?
7929 (n->b[b_fcus->r_v1]->ui_r):
7930 (n->b[b_fcus->r_v1]->ui_l);
7931
7932 ui_v2 =
7933 (n == n->b[b_fcus->r_v2]->left)?
7934 (n->b[b_fcus->r_v2]->ui_r):
7935 (n->b[b_fcus->r_v2]->ui_l);
7936 }
7937
7938 for(site=0;site<tree->n_pattern;site++) ui[site] = ui_v1[site] | ui_v2[site];
7939
7940 }
7941
7942 //////////////////////////////////////////////////////////////
7943 //////////////////////////////////////////////////////////////
7944
7945
Binary_Decomposition(int value,int * bit_vect,int size)7946 void Binary_Decomposition(int value, int *bit_vect, int size)
7947 {
7948 int i,cumul;
7949
7950 for(i=0;i<size;i++) bit_vect[i] = 0;
7951
7952 cumul = 0;
7953 for(i=size-1;i>=0;i--)
7954 {
7955 if(value - cumul < (int)POW(2,i))
7956 {
7957 bit_vect[i] = 0;
7958 }
7959 else
7960 {
7961 bit_vect[i] = 1;
7962 cumul += (int)POW(2,i);
7963 }
7964 }
7965 }
7966
7967 //////////////////////////////////////////////////////////////
7968 //////////////////////////////////////////////////////////////
7969
7970
Print_Diversity_Header(FILE * fp,t_tree * tree)7971 void Print_Diversity_Header(FILE *fp, t_tree *tree)
7972 {
7973 /* PhyML_Fprintf(fp,"t_edge side mean\n"); */
7974 PhyML_Fprintf(fp,"t_edge side diversity count\n");
7975 }
7976
7977 //////////////////////////////////////////////////////////////
7978 //////////////////////////////////////////////////////////////
7979
Best_Of_NNI_And_SPR(t_tree * tree)7980 void Best_Of_NNI_And_SPR(t_tree *tree)
7981 {
7982 PhyML_Fprintf(stderr,"Best of NNI and SPR option is deprecated. PhyML nows only relies on SPR moves");
7983 assert(FALSE);
7984
7985 if(tree->mod->s_opt->random_input_tree)
7986 Global_Spr_Search(tree); /* Don't do simultaneous NNIs if starting tree is random */
7987 else
7988 {
7989 t_tree *ori_tree,*best_tree;
7990 t_mod *ori_mod,*best_mod;
7991 scalar_dbl **ori_bl,**best_bl;
7992 phydbl best_lnL,ori_lnL,nni_lnL,spr_lnL;
7993 int i;
7994 #ifdef BEAGLE
7995 tree->b_inst = create_beagle_instance(tree, tree->io->quiet, tree->io);
7996 #endif
7997
7998 ori_mod = Copy_Model(tree->mod);
7999 best_mod = Copy_Model(tree->mod);
8000
8001 ori_tree = Make_Tree_From_Scratch(tree->n_otu,tree->data);
8002 best_tree = Make_Tree_From_Scratch(tree->n_otu,tree->data);
8003
8004 Copy_Tree(tree,ori_tree);//Save a backup of the original tree in ori_tree
8005 Record_Br_Len(tree);
8006 ori_bl = Copy_Br_Len(tree);
8007
8008
8009 best_lnL = UNLIKELY;
8010 Lk(NULL,tree);
8011 ori_lnL = tree->c_lnL; /* Record likelihood of the starting tree */
8012
8013 // ****** Perform NNI ******
8014 Simu_Loop(tree); /* Perform simultaneous NNIs */
8015 best_lnL = tree->c_lnL; /* Record the likelihood */
8016 nni_lnL = tree->c_lnL;
8017
8018 //Mark the NNI tree as the "best" tree
8019 Copy_Tree(tree,best_tree); /* Record the tree topology and branch lengths */
8020 Record_Br_Len(tree);
8021 best_bl = Copy_Br_Len(tree);
8022 Transfer_Br_Len_To_Tree(best_bl,best_tree);
8023 Record_Model(tree->mod,best_mod);
8024
8025 Copy_Tree(ori_tree,tree); /* Back to the original tree topology */
8026 Transfer_Br_Len_To_Tree(ori_bl,tree); /* Back to the original branch lengths */
8027 Record_Model(ori_mod,tree->mod); /* Back to the original model */
8028
8029 /* Make sure the tree is in its original form */
8030 Lk(NULL,tree);
8031 if(FABS(tree->c_lnL - ori_lnL) > tree->mod->s_opt->min_diff_lk_local)
8032 {
8033 PhyML_Printf("\n. ori_lnL = %f, c_lnL = %f",ori_lnL,tree->c_lnL);
8034 PhyML_Printf("\n. Err. in file %s at line %d (function '%s') \n",__FILE__,__LINE__,__FUNCTION__);
8035 Warn_And_Exit("\n. PhyML finished prematurely.");
8036 }
8037
8038
8039 // ****** Perform SPR ******
8040 Global_Spr_Search(tree);
8041 spr_lnL = tree->c_lnL;
8042
8043
8044 //Did SPR perform better than NNI?
8045 if(tree->c_lnL > best_lnL)
8046 {
8047 #ifdef BEAGLE
8048 finalize_beagle_instance(best_tree);//Free the old BEAGLE instance associated with the NNI tree (since SPR is better)
8049 #endif
8050 best_lnL = spr_lnL;
8051 Copy_Tree(tree,best_tree); /* Record tree topology, branch lengths and model parameters */
8052 Record_Br_Len(tree);
8053 For(i,2*tree->n_otu-1) Free_Scalar_Dbl(best_bl[i]);
8054 Free(best_bl);
8055 best_bl = Copy_Br_Len(tree);
8056 Transfer_Br_Len_To_Tree(best_bl,best_tree);
8057 Record_Model(tree->mod,best_mod);
8058 }
8059
8060 Copy_Tree(best_tree,tree);
8061 Init_Partial_Lk_Tips_Double(tree);
8062 Init_Ui_Tips(tree);
8063 Init_Partial_Pars_Tips(tree);
8064 Transfer_Br_Len_To_Tree(best_bl,tree);
8065 Record_Model(best_mod,tree->mod);
8066
8067 /* Make sure the current tree has the best topology, branch lengths and model parameters */
8068 Lk(NULL,tree);
8069 if(FABS(tree->c_lnL - best_lnL) > tree->mod->s_opt->min_diff_lk_local)
8070 {
8071 PhyML_Fprintf(stderr,"\n. best_lnL = %f, c_lnL = %f",best_lnL,tree->c_lnL);
8072 PhyML_Fprintf(stderr,"\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
8073 Warn_And_Exit("\n. PhyML finished prematurely.");
8074 }
8075
8076 if(tree->verbose > VL0)
8077 {
8078 PhyML_Printf("\n\n. Log likelihood obtained after NNI moves : %f",nni_lnL);
8079 PhyML_Printf("\n. Log likelihood obtained after SPR moves : %f",spr_lnL);
8080 }
8081
8082 For(i,2*tree->n_otu-1) Free_Scalar_Dbl(ori_bl[i]);
8083 Free(ori_bl);
8084
8085 For(i,2*tree->n_otu-1) Free_Scalar_Dbl(best_bl[i]);
8086 Free(best_bl);
8087
8088 Free_Tree(ori_tree);
8089 Free_Tree(best_tree);
8090
8091 Free_Model_Complete(ori_mod);
8092 Free_Model_Complete(best_mod);
8093
8094 Free_Model_Basic(ori_mod);
8095 Free_Model_Basic(best_mod);
8096 }
8097 }
8098
8099 //////////////////////////////////////////////////////////////
8100 //////////////////////////////////////////////////////////////
8101
8102
8103 /* Polynomial interpolation. Adapted from "Numerical Recipes in C".
8104 Press, Flannery, Teukolsky, Vetterling, 1988.
8105 */
Polint(phydbl * xa,phydbl * ya,int n,phydbl x,phydbl * y,phydbl * dy)8106 int Polint(phydbl *xa, phydbl *ya, int n, phydbl x, phydbl *y, phydbl *dy)
8107 {
8108 int i,m,ns=1;
8109 phydbl den,dif,dift,ho,hp,w;
8110 phydbl *c,*d;
8111
8112 dif=FABS(x-xa[1]);
8113
8114 c = (phydbl *)mCalloc(n,sizeof(phydbl));
8115 d = (phydbl *)mCalloc(n,sizeof(phydbl));
8116
8117 for(i=1;i<=n;i++)
8118 {
8119 if((dift=FABS(x-xa[i])) < dif)
8120 {
8121 ns=i;
8122 dif=dift;
8123 }
8124 c[i]=ya[i];
8125 d[i]=ya[i];
8126 }
8127
8128 *y=ya[ns--];
8129
8130 for (m=1;m<n;m++)
8131 {
8132 for (i=1;i<=n-m;i++)
8133 {
8134 ho=xa[i]-x;
8135 hp=xa[i+m]-x;
8136 w=c[i+1]-d[i];
8137 if((den=ho-hp) < SMALL && (den=ho-hp) > -SMALL )
8138 {
8139 /* Rprintf("\n. Error in routine POLINT.\n"); */
8140 Exit("\n. Error in routine POLINT.\n");
8141 return(-1);
8142 }
8143 den=w/den;
8144 d[i]=hp*den;
8145 c[i]=ho*den;
8146 }
8147 *y += (*dy=(2*ns < (n-m) ? c[ns+1] : d[ns--]));
8148 }
8149
8150 Free(d);
8151 Free(c);
8152 return(0);
8153 }
8154
8155 //////////////////////////////////////////////////////////////
8156 //////////////////////////////////////////////////////////////
8157
8158 //////////////////////////////////////////////////////////////
8159 //////////////////////////////////////////////////////////////
8160
Dist_And_BioNJ(calign * cdata,t_mod * mod,option * io)8161 t_tree *Dist_And_BioNJ(calign *cdata, t_mod *mod, option *io)
8162 {
8163 t_tree *tree;
8164 matrix *mat;
8165
8166 if(mod->s_opt->random_input_tree == NO)
8167 {
8168 if(!io->quiet) PhyML_Printf("\n\n. Computing pairwise distances...");
8169
8170 mat = ML_Dist(cdata,mod);
8171
8172 Fill_Missing_Dist(mat);
8173
8174 if(!io->quiet) PhyML_Printf("\n\n. Building BioNJ tree...");
8175 mat->tree = Make_Tree_From_Scratch(cdata->n_otu,cdata);
8176 Bionj(mat);
8177
8178 tree = mat->tree;
8179 tree->mat = mat;
8180 }
8181 else
8182 {
8183 tree = Make_Tree_From_Scratch(cdata->n_otu,cdata);
8184 Random_Tree(tree);
8185 tree->mat = NULL;
8186 }
8187
8188 return tree;
8189 }
8190
8191 //////////////////////////////////////////////////////////////
8192 //////////////////////////////////////////////////////////////
8193
Add_BioNJ_Branch_Lengths(t_tree * tree,calign * cdata,t_mod * mod,matrix * mat)8194 void Add_BioNJ_Branch_Lengths(t_tree *tree, calign *cdata, t_mod *mod, matrix *mat)
8195 {
8196 short unsigned int freemat = NO;
8197 if(mat == NULL) freemat = YES;
8198 Connect_CSeqs_To_Nodes(cdata,mod->io,tree);
8199 if(mat == NULL) mat = ML_Dist(cdata,mod);
8200 mat->tree = tree;
8201 mat->method = 0;
8202 Bionj_Br_Length(mat);
8203 if(freemat == YES) Free_Mat(mat);
8204 }
8205
8206 //////////////////////////////////////////////////////////////
8207 //////////////////////////////////////////////////////////////
8208
Bootstrap_From_String(char * s_tree,calign * cdata,t_mod * mod,option * io)8209 char *Bootstrap_From_String(char *s_tree, calign *cdata, t_mod *mod, option *io)
8210 {
8211 t_tree *tree;
8212
8213 tree = Read_Tree(&s_tree);
8214
8215 tree->n_root = NULL;
8216 tree->e_root = NULL;
8217
8218 if(!tree)
8219 {
8220 PhyML_Printf("\n. Err. in file %s at line %d (function '%s') \n",__FILE__,__LINE__,__FUNCTION__);
8221 Exit("");
8222 }
8223
8224 tree->mod = mod;
8225 tree->io = io;
8226 tree->data = cdata;
8227 tree->n_pattern = tree->data->crunch_len;
8228 tree->io->print_support_val = YES;
8229
8230
8231 Connect_CSeqs_To_Nodes(cdata,io,tree);
8232 if(tree->mod->s_opt->random_input_tree) Random_Tree(tree);
8233 Make_Tree_For_Pars(tree);
8234 Make_Tree_For_Lk(tree);
8235 Unscale_Br_Len_Multiplier_Tree(tree);
8236 Br_Len_Not_Involving_Invar(tree);
8237 Make_Spr_List_One_Edge(tree);
8238 Make_Spr_List_All_Edge(tree);
8239 Make_Best_Spr(tree);
8240
8241 Set_Both_Sides(YES,tree);
8242 Lk(NULL,tree);
8243
8244 #ifdef MPI
8245 Bootstrap_MPI(tree);
8246 #else
8247 Bootstrap(tree);
8248 #endif
8249
8250 Free(s_tree);
8251
8252 Rescale_Br_Len_Multiplier_Tree(tree);
8253 Br_Len_Involving_Invar(tree);
8254 Collect_Edge_Support_Values(tree);
8255
8256 s_tree = Write_Tree(tree);
8257
8258 Free_Spr_List_One_Edge(tree);
8259 Free_One_Spr(tree->best_spr);
8260 Free_Spr_List_All_Edge(tree);
8261 Free_Tree_Pars(tree);
8262 Free_Tree_Lk(tree);
8263 Free_Tree(tree);
8264
8265 return s_tree;
8266 }
8267
8268 //////////////////////////////////////////////////////////////
8269 //////////////////////////////////////////////////////////////
8270
aLRT_From_String(char * s_tree,calign * cdata,t_mod * mod,option * io)8271 char *aLRT_From_String(char *s_tree, calign *cdata, t_mod *mod, option *io)
8272 {
8273 t_tree *tree;
8274
8275 tree = Read_Tree(&s_tree);
8276
8277 tree->n_root = NULL;
8278 tree->e_root = NULL;
8279
8280 if(!tree)
8281 {
8282 PhyML_Fprintf(stderr,"\n. Err. in file %s at line %d (function '%s') \n",__FILE__,__LINE__,__FUNCTION__);
8283 Warn_And_Exit("\n. PhyML finished prematurely.");
8284 }
8285
8286 tree->mod = mod;
8287 tree->io = io;
8288 tree->data = cdata;
8289 tree->n_pattern = tree->data->crunch_len;
8290
8291 Connect_CSeqs_To_Nodes(cdata,io,tree);
8292 if(tree->mod->s_opt->random_input_tree) Random_Tree(tree);
8293 Make_Tree_For_Pars(tree);
8294 Make_Tree_For_Lk(tree);
8295
8296 Unscale_Br_Len_Multiplier_Tree(tree);
8297 Br_Len_Not_Involving_Invar(tree);
8298
8299 Make_Spr_List_One_Edge(tree);
8300 Make_Spr_List_All_Edge(tree);
8301 Make_Best_Spr(tree);
8302
8303 #ifdef BEAGLE
8304 tree->b_inst = create_beagle_instance(tree, io->quiet, io);
8305 #endif
8306
8307 Set_Both_Sides(YES,tree);
8308 Lk(NULL,tree);
8309
8310 aLRT(tree);
8311
8312 Free(s_tree);
8313
8314 Rescale_Br_Len_Multiplier_Tree(tree);
8315 Br_Len_Involving_Invar(tree);
8316 Collect_Edge_Support_Values(tree);
8317
8318 s_tree = Write_Tree(tree);
8319
8320 #ifdef BEAGLE
8321 finalize_beagle_instance(tree);
8322 #endif
8323
8324 Free_One_Spr(tree->best_spr);
8325 Free_Spr_List_One_Edge(tree);
8326 Free_Spr_List_All_Edge(tree);
8327 Free_Tree_Pars(tree);
8328 Free_Tree_Lk(tree);
8329 Free_Tree(tree);
8330
8331 return s_tree;
8332 }
8333
8334 //////////////////////////////////////////////////////////////
8335 //////////////////////////////////////////////////////////////
8336
Find_Common_Tips(t_tree * tree1,t_tree * tree2)8337 void Find_Common_Tips(t_tree *tree1, t_tree *tree2)
8338 {
8339 int i,j;
8340
8341 for(i=0;i<tree1->n_otu;i++) tree1->a_nodes[i]->common = NO;
8342 for(i=0;i<tree2->n_otu;i++) tree2->a_nodes[i]->common = NO;
8343
8344 for(i=0;i<tree1->n_otu;i++)
8345 {
8346 for(j=0;j<tree2->n_otu;j++)
8347 {
8348 if(!strcmp(tree1->a_nodes[i]->name,tree2->a_nodes[j]->name))
8349 {
8350 tree1->a_nodes[i]->common = YES;
8351 tree2->a_nodes[j]->common = YES;
8352 break;
8353 }
8354 }
8355 }
8356 }
8357
8358 //////////////////////////////////////////////////////////////
8359 //////////////////////////////////////////////////////////////
8360
Get_Tree_Size(t_tree * tree)8361 phydbl Get_Tree_Size(t_tree *tree)
8362 {
8363 int i;
8364 phydbl tree_size;
8365
8366 tree_size = 0.0;
8367 For(i,2*tree->n_otu-3) tree_size += tree->a_edges[i]->l->v;
8368
8369 if(tree->n_root != NULL)
8370 {
8371 tree_size += tree->n_root->b[1]->l->v;
8372 tree_size += tree->n_root->b[2]->l->v;
8373 }
8374
8375 /* For(i,2*tree->n_otu-3) */
8376 /* tree_size += */
8377 /* FABS(tree->times->nd_t[tree->a_edges[i]->left->num] - */
8378 /* tree->times->nd_t[tree->a_edges[i]->rght->num]); */
8379
8380 tree->size = tree_size;
8381 return tree_size;
8382 }
8383
8384 //////////////////////////////////////////////////////////////
8385 //////////////////////////////////////////////////////////////
8386
Dist_To_Root_Pre(t_node * a,t_node * d,t_edge * b,t_tree * tree)8387 void Dist_To_Root_Pre(t_node *a, t_node *d, t_edge *b, t_tree *tree)
8388 {
8389 int i;
8390
8391 if(b) d->dist_to_root = a->dist_to_root + b->l->v;
8392
8393 if(d->tax) return;
8394 else
8395 {
8396 for(i=0;i<3;i++)
8397 if((d->v[i] != a) && (d->b[i] != tree->e_root))
8398 Dist_To_Root_Pre(d,d->v[i],d->b[i],tree);
8399 }
8400 }
8401
8402 //////////////////////////////////////////////////////////////
8403 //////////////////////////////////////////////////////////////
8404
Dist_To_Root(t_tree * tree)8405 void Dist_To_Root(t_tree *tree)
8406 {
8407 tree->n_root->dist_to_root = 0.0;
8408 tree->n_root->v[2]->dist_to_root = tree->n_root->b[1]->l->v;
8409 tree->n_root->v[1]->dist_to_root = tree->n_root->b[2]->l->v;
8410
8411 Dist_To_Root_Pre(tree->n_root,tree->n_root->v[2],NULL,tree);
8412 Dist_To_Root_Pre(tree->n_root,tree->n_root->v[1],NULL,tree);
8413 }
8414
8415 //////////////////////////////////////////////////////////////
8416 //////////////////////////////////////////////////////////////
8417
Get_Node_Ranks_From_Dist_To_Root(t_tree * tree)8418 void Get_Node_Ranks_From_Dist_To_Root(t_tree *tree)
8419 {
8420 int buff;
8421 int i;
8422 int swap = NO;
8423 int *rk;
8424
8425 rk = (int *)mCalloc(2*tree->n_otu-1,sizeof(int));
8426
8427 for(i=0;i<2*tree->n_otu-1;++i) rk[i] = i;
8428
8429 do
8430 {
8431 swap = NO;
8432 for(i=0;i<2*tree->n_otu-2;++i)
8433 {
8434 if(tree->a_nodes[rk[i]]->dist_to_root >
8435 tree->a_nodes[rk[i+1]]->dist_to_root) // Sort in ascending order
8436 {
8437 swap = YES;
8438
8439 buff = rk[i];
8440 rk[i] = rk[i+1];
8441 rk[i+1] = buff;
8442 }
8443 }
8444 }
8445 while(swap == YES);
8446
8447 for(i=0;i<2*tree->n_otu-1;++i) tree->a_nodes[i]->rk_next = NULL;
8448
8449 for(i=0;i<2*tree->n_otu-2;++i) tree->a_nodes[rk[i]]->rk_next = tree->a_nodes[rk[i+1]];
8450
8451 tree->a_nodes[rk[2*tree->n_otu-2]]->rk_next = NULL;
8452
8453 Free(rk);
8454
8455 }
8456
8457 //////////////////////////////////////////////////////////////
8458 //////////////////////////////////////////////////////////////
8459
Get_Node_Ranks_From_Times(t_tree * tree)8460 void Get_Node_Ranks_From_Times(t_tree *tree)
8461 {
8462 int buff;
8463 int i;
8464 int swap = NO;
8465 int *rk;
8466
8467 rk = (int *)mCalloc(2*tree->n_otu-1,sizeof(int));
8468
8469 for(i=0;i<2*tree->n_otu-1;++i) rk[i] = i;
8470
8471 do
8472 {
8473 swap = NO;
8474 for(i=0;i<2*tree->n_otu-2;++i)
8475 {
8476 if(tree->times->nd_t[rk[i+1]] < tree->times->nd_t[rk[i]]) // Sort in ascending order
8477 {
8478 swap = YES;
8479
8480 buff = rk[i];
8481 rk[i] = rk[i+1];
8482 rk[i+1] = buff;
8483 }
8484 }
8485 }
8486 while(swap == YES);
8487
8488 for(i=0;i<2*tree->n_otu-1;++i) tree->a_nodes[i]->rk_next = NULL;
8489 for(i=0;i<2*tree->n_otu-1;++i) tree->a_nodes[i]->rk_prev = NULL;
8490
8491 for(i=0;i<2*tree->n_otu-2;++i) tree->a_nodes[rk[i]]->rk_next = tree->a_nodes[rk[i+1]];
8492 for(i=0;i<2*tree->n_otu-2;++i) tree->a_nodes[rk[i+1]]->rk_prev = tree->a_nodes[rk[i]];
8493
8494 Free(rk);
8495 }
8496
8497 //////////////////////////////////////////////////////////////
8498 //////////////////////////////////////////////////////////////
8499
Get_Node_Ranks_From_Tip_Times(t_tree * tree)8500 void Get_Node_Ranks_From_Tip_Times(t_tree *tree)
8501 {
8502 int buff;
8503 int i;
8504 int swap = NO;
8505 int *rk;
8506
8507 rk = (int *)mCalloc(tree->n_otu,sizeof(int));
8508
8509 for(i=0;i<tree->n_otu;++i) rk[i] = i;
8510
8511 do
8512 {
8513 swap = NO;
8514 for(i=0;i<tree->n_otu-1;++i)
8515 {
8516 if(tree->times->nd_t[rk[i+1]] < tree->times->nd_t[rk[i]]) // Sort in ascending order
8517 {
8518 swap = YES;
8519
8520 buff = rk[i];
8521 rk[i] = rk[i+1];
8522 rk[i+1] = buff;
8523 }
8524 }
8525 }
8526 while(swap == YES);
8527
8528 for(i=0;i<tree->n_otu;++i) tree->a_nodes[i]->rk_next = NULL;
8529 for(i=0;i<tree->n_otu;++i) tree->a_nodes[i]->rk_prev = NULL;
8530
8531 for(i=0;i<tree->n_otu-1;++i) tree->a_nodes[rk[i]]->rk_next = tree->a_nodes[rk[i+1]];
8532 for(i=0;i<tree->n_otu-1;++i) tree->a_nodes[rk[i+1]]->rk_prev = tree->a_nodes[rk[i]];
8533
8534 Free(rk);
8535 }
8536
8537 //////////////////////////////////////////////////////////////
8538 //////////////////////////////////////////////////////////////
8539
8540 /* 'Borrowed' fromn libgen */
Basename(char * path)8541 char *Basename(char *path)
8542 {
8543 char *p;
8544
8545 if( path == NULL || *path == '\0' ) return ".";
8546
8547 p = path + strlen(path) - 1;
8548
8549 while( *p == '/' )
8550 {
8551 if( p == path ) return path;
8552 *p-- = '\0';
8553 }
8554
8555 while( p >= path && *p != '/' ) p--;
8556
8557 return p + 1;
8558 }
8559
8560 //////////////////////////////////////////////////////////////
8561 //////////////////////////////////////////////////////////////
8562
8563 /* Find the Last Common Ancestor of n1 and n2 */
Find_Lca_Pair_Of_Nodes(t_node * n1,t_node * n2,t_tree * tree)8564 t_node *Find_Lca_Pair_Of_Nodes(t_node *n1, t_node *n2, t_tree *tree)
8565 {
8566 t_node **list1, **list2, *lca;
8567 int size1, size2;
8568
8569 if(n1 == n2) return(n1);
8570
8571 if(!tree->n_root)
8572 {
8573 PhyML_Printf("\n. The tree must be rooted in this function.");
8574 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
8575 }
8576
8577 list1 = (t_node **)mCalloc(2*tree->n_otu-1,sizeof(t_node *));
8578 list2 = (t_node **)mCalloc(2*tree->n_otu-1,sizeof(t_node *));
8579
8580 Get_List_Of_Ancestors(n1,list1,&size1,tree);
8581 Get_List_Of_Ancestors(n2,list2,&size2,tree);
8582
8583 while(list1[size1] == list2[size2])
8584 {
8585 size1--;
8586 size2--;
8587
8588 if(size1 < 0 || size2 < 0) break;
8589 }
8590
8591 lca = list1[size1+1];
8592
8593 Free(list1);
8594 Free(list2);
8595
8596 if(lca == NULL)
8597 {
8598 PhyML_Printf("\n. %s",Write_Tree(tree));
8599 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
8600 }
8601 return lca;
8602 }
8603
8604 //////////////////////////////////////////////////////////////
8605 //////////////////////////////////////////////////////////////
8606
8607 /* Find the Last Common Ancestor of all the nodes in node_list */
Find_Lca_Clade(t_node ** node_list,int node_list_size,t_tree * tree)8608 t_node *Find_Lca_Clade(t_node **node_list, int node_list_size, t_tree *tree)
8609 {
8610 t_node ***list, *lca;
8611 int *size;
8612 int i;
8613
8614 assert(tree->n_root);
8615
8616 list = (t_node ***)mCalloc(node_list_size,sizeof(t_node **));
8617 for(i=0;i<node_list_size;i++) list[i] = (t_node **)mCalloc(2*tree->n_otu-1,sizeof(t_node *));
8618 size = (int *)mCalloc(node_list_size,sizeof(int));
8619
8620 for(i=0;i<node_list_size;i++)
8621 {
8622 if(!Get_List_Of_Ancestors(node_list[i],list[i],size+i,tree))
8623 {
8624 for(i=0;i<node_list_size;i++) PhyML_Printf("\n. %s",node_list[i]->name);
8625 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
8626 }
8627 }
8628
8629 /* for(i=0;i<node_list_size;i++) */
8630 /* { */
8631 /* int j; */
8632 /* PhyML_Printf("\n. Listing all ancestors of node number %d [%s]", */
8633 /* node_list[i]->num, */
8634 /* node_list[i]->tax ? node_list[i]->name : NULL); */
8635 /* For(j,size[i]) PhyML_Printf("\n. > %d <",list[i][j]->num); */
8636 /* } */
8637
8638 if(node_list_size > 1)
8639 {
8640 do
8641 {
8642 for(i=0;i<node_list_size-1;i++)
8643 {
8644 assert(list[i][size[i]-1]);
8645 assert(list[i+1][size[i+1]-1]);
8646 /* PhyML_Printf("\n. %d %d %d %d",list[i][size[i]-1]->num,size[i],list[i+1][size[i+1]-1]->num,size[i+1]); */
8647 if(list[i][size[i]-1] != list[i+1][size[i+1]-1])
8648 {
8649 /* PhyML_Printf("\n. Break at %d %d",list[i][size[i]]->num,list[i+1][size[i+1]]->num); */
8650 break;
8651 }
8652 }
8653
8654 if(i != node_list_size-1) break;
8655
8656 for(i=0;i<node_list_size;i++)
8657 {
8658 size[i]--;
8659 assert(size[i] > 0);
8660 }
8661
8662 if(node_list_size == 1) break;
8663
8664 }while(1);
8665 lca = list[0][size[0]];
8666 }
8667 else
8668 {
8669 lca = node_list[0];
8670 }
8671
8672 for(i=0;i<node_list_size;i++) Free(list[i]);
8673 Free(list);
8674 Free(size);
8675
8676 /* PhyML_Printf("\n. LCA: %d",lca->num); */
8677
8678 return lca;
8679 }
8680
8681 //////////////////////////////////////////////////////////////
8682 //////////////////////////////////////////////////////////////
8683
8684 /* Returns the list of the ancestors of ref_t_node from ref_t_node to the root included */
Get_List_Of_Ancestors(t_node * ref_node,t_node ** list,int * size,t_tree * tree)8685 int Get_List_Of_Ancestors(t_node *ref_node, t_node **list, int *size, t_tree *tree)
8686 {
8687 t_node *n;
8688
8689 n = ref_node;
8690 list[0] = n;
8691 *size = 1;
8692
8693 if(!n)
8694 {
8695 PhyML_Printf("\n. There seems to be a problem with the calibration file.\n");
8696 return 0;
8697 }
8698
8699 while(n != tree->n_root)
8700 {
8701 n = n->anc;
8702 if(!n)
8703 {
8704 PhyML_Printf("\n. n->anc has not been set properly (call Update_Ancestors first...)\n");
8705 return 0;
8706 }
8707 list[*size] = n;
8708 *size = *size+1;
8709 }
8710 return 1;
8711 }
8712
8713 //////////////////////////////////////////////////////////////
8714 //////////////////////////////////////////////////////////////
8715
Edge_Num_To_Node_Num(int edge_num,t_tree * tree)8716 int Edge_Num_To_Node_Num(int edge_num, t_tree *tree)
8717 {
8718 int node_num;
8719 t_edge *b;
8720
8721 b = tree->a_edges[edge_num];
8722
8723 node_num = (b->left == b->rght->anc)?(b->rght->num):(b->left->num);
8724
8725 return node_num;
8726 }
8727
8728 //////////////////////////////////////////////////////////////
8729 //////////////////////////////////////////////////////////////
8730
Branch_Lengths_To_Rate_Lengths(t_tree * tree)8731 void Branch_Lengths_To_Rate_Lengths(t_tree *tree)
8732 {
8733 Branch_Lengths_To_Rate_Lengths_Pre(tree->n_root,tree->n_root->v[2],tree);
8734 Branch_Lengths_To_Rate_Lengths_Pre(tree->n_root,tree->n_root->v[1],tree);
8735 }
8736
8737 //////////////////////////////////////////////////////////////
8738 //////////////////////////////////////////////////////////////
8739
Branch_Lengths_To_Rate_Lengths_Pre(t_node * a,t_node * d,t_tree * tree)8740 void Branch_Lengths_To_Rate_Lengths_Pre(t_node *a, t_node *d, t_tree *tree)
8741 {
8742 int i;
8743
8744 tree->rates->cur_l[d->num] =
8745 tree->rates->br_r[d->num] *
8746 tree->rates->clock_r *
8747 tree->rates->norm_fact;
8748
8749 if(d->tax) return;
8750 else
8751 {
8752 for(i=0;i<3;i++)
8753 if((d->v[i] != a) && (d->b[i] != tree->e_root))
8754 Branch_Lengths_To_Rate_Lengths_Pre(d,d->v[i],tree);
8755 }
8756 }
8757
8758 //////////////////////////////////////////////////////////////
8759 //////////////////////////////////////////////////////////////
8760
Find_Clade(char ** tax_name_list,int list_size,t_tree * tree)8761 int Find_Clade(char **tax_name_list, int list_size, t_tree *tree)
8762 {
8763 int *tax_num_list;
8764 t_node **tax_node_list;
8765 int i,j;
8766 int n_matches;
8767 t_node *lca;
8768
8769 tax_num_list = (int *)mCalloc(list_size,sizeof(int));
8770 tax_node_list = (t_node **)mCalloc(list_size,sizeof(t_node *));
8771
8772 for(i=0;i<list_size;i++) tax_num_list[i] = -1;
8773
8774 n_matches = 0;
8775
8776 for(i=0;i<list_size;i++)
8777 {
8778 for(j=0;j<tree->n_otu;j++)
8779 {
8780 if(!strcmp(tax_name_list[i],tree->a_nodes[j]->name))
8781 {
8782 tax_num_list[i] = tree->a_nodes[j]->num;
8783 tax_node_list[i] = tree->a_nodes[j];
8784 n_matches++;
8785 break;
8786 }
8787 }
8788
8789 if(j == tree->n_otu)
8790 {
8791 PhyML_Printf("\n. Problem with the calibration file.");
8792 PhyML_Printf("\n. Could not find taxon with name '%s' in the sequence or tree file.",tax_name_list[i]);
8793 /* Generic_Exit(__FILE__,__LINE__,__FUNCTION__); */
8794 }
8795 }
8796
8797 lca = Find_Lca_Clade(tax_node_list,n_matches,tree);
8798
8799 Free(tax_num_list);
8800 Free(tax_node_list);
8801
8802 if(lca) return lca->num;
8803 else return -1;
8804 }
8805
8806 //////////////////////////////////////////////////////////////
8807 //////////////////////////////////////////////////////////////
8808
8809
Find_Clade_Pre(t_node * a,t_node * d,int * tax_num_list,int list_size,int * num,t_tree * tree)8810 void Find_Clade_Pre(t_node *a, t_node *d, int *tax_num_list, int list_size, int *num, t_tree *tree)
8811 {
8812 int i,j,k;
8813 int score;
8814
8815
8816 for(i=0;i<3;i++)
8817 if((d->v[i] == a) || (d->b[i] == tree->e_root))
8818 {
8819 if(list_size == d->bip_size[i])
8820 {
8821 score = 0;
8822 For(j,d->bip_size[i])
8823 {
8824 for(k=0;k<list_size;k++)
8825 {
8826 if(tax_num_list[k] == d->bip_node[i][j]->num)
8827 {
8828 score++;
8829 break;
8830 }
8831 }
8832 }
8833 if(score == list_size) *num = d->num;
8834 }
8835 break;
8836 }
8837
8838 if(d->tax) return;
8839 else
8840 for(i=0;i<3;i++)
8841 if((d->v[i] != a) && (d->b[i] != tree->e_root))
8842 Find_Clade_Pre(d,d->v[i],tax_num_list,list_size,num,tree);
8843 }
8844
8845 //////////////////////////////////////////////////////////////
8846 //////////////////////////////////////////////////////////////
8847
Find_Root_Edge(FILE * fp_input_tree,t_tree * tree)8848 t_edge *Find_Root_Edge(FILE *fp_input_tree, t_tree *tree)
8849 {
8850 char **subs;
8851 int degree;
8852 int i,j;
8853 t_node *left, *rght;
8854 int l_r, r_l;
8855 int score;
8856 char *line;
8857 char c;
8858 t_edge *root_edge;
8859
8860 line = (char *)mCalloc(T_MAX_LINE,sizeof(char));
8861
8862 rewind(fp_input_tree);
8863
8864 do c=fgetc(fp_input_tree);
8865 while((c != '(') && (c != EOF));
8866
8867 if(c==EOF)
8868 {
8869 Free(line);
8870 return NULL;
8871 }
8872
8873 i=0;
8874 for(;;)
8875 {
8876 if((c == ' ') || (c == '\n'))
8877 {
8878 c=fgetc(fp_input_tree);
8879 if(c==EOF) break;
8880 else continue;
8881 }
8882
8883 line[i]=c;
8884 i++;
8885 c=fgetc(fp_input_tree);
8886 if(c==EOF || c==';') break;
8887 }
8888
8889
8890 Free_Bip(tree);
8891 Alloc_Bip(tree);
8892 Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
8893
8894 subs = Sub_Trees(line,°ree);
8895 Clean_Multifurcation(subs,degree,3);
8896 if(degree != 2)
8897 {
8898 PhyML_Printf("\n. The tree does not seem to be rooted...");
8899 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
8900 Warn_And_Exit("\n. PhyML finished prematurely.");
8901 }
8902
8903 left = rght = NULL;
8904 l_r = r_l = -1;
8905
8906 For(i,2*tree->n_otu-3)
8907 {
8908 left = tree->a_edges[i]->left;
8909 rght = tree->a_edges[i]->rght;
8910 l_r = tree->a_edges[i]->l_r;
8911 r_l = tree->a_edges[i]->r_l;
8912
8913 score = 0;
8914 For(j,left->bip_size[l_r]) if(strstr(subs[1],left->bip_node[l_r][j]->name)) score++;
8915 if(score == left->bip_size[l_r]) break;
8916
8917 score = 0;
8918 For(j,rght->bip_size[r_l]) if(strstr(subs[1],rght->bip_node[r_l][j]->name)) score++;
8919 if(score == rght->bip_size[r_l]) break;
8920 }
8921
8922 root_edge = tree->a_edges[i];
8923
8924 i = 0;
8925 while(subs[i] != NULL) Free(subs[i++]);
8926 Free(subs);
8927 Free(line);
8928
8929 if(i == 2*tree->n_otu-3)
8930 {
8931 PhyML_Printf("\n. Could not find the root edge...");
8932 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
8933 Warn_And_Exit("\n. PhyML finished prematurely.");
8934 }
8935
8936 return root_edge;
8937 }
8938
8939 //////////////////////////////////////////////////////////////
8940 //////////////////////////////////////////////////////////////
8941
Copy_Tree_Topology_With_Labels(t_tree * ori,t_tree * cpy)8942 void Copy_Tree_Topology_With_Labels(t_tree *ori, t_tree *cpy)
8943 {
8944 int i,j;
8945
8946 For(i,2*ori->n_otu-2)
8947 {
8948 for(j=0;j<3;j++)
8949 {
8950 if(ori->a_nodes[i]->v[j])
8951 {
8952 cpy->a_nodes[i]->v[j] = cpy->a_nodes[ori->a_nodes[i]->v[j]->num];
8953 }
8954 else
8955 cpy->a_nodes[i]->v[j] = NULL;
8956 }
8957 cpy->a_nodes[i]->num = ori->a_nodes[i]->num;
8958 cpy->a_nodes[i]->tax = 0;
8959 }
8960
8961 For(i,2*ori->n_otu-3)
8962 {
8963 cpy->a_edges[i]->l->v = ori->a_edges[i]->l->v;
8964 }
8965
8966 for(i=0;i<ori->n_otu;i++)
8967 {
8968 cpy->a_nodes[i]->tax = 1;
8969 strcpy(cpy->a_nodes[i]->name,ori->a_nodes[i]->name);
8970 }
8971
8972 }
8973
8974 //////////////////////////////////////////////////////////////
8975 //////////////////////////////////////////////////////////////
8976
Set_Model_Name(t_mod * mod)8977 void Set_Model_Name(t_mod *mod)
8978 {
8979 switch(mod->whichmodel)
8980 {
8981 case JC69:
8982 {
8983 strcpy(mod->modelname->s, "JC69");
8984 break;
8985 }
8986 case K80:
8987 {
8988 strcpy(mod->modelname->s, "K80");
8989 break;
8990 }
8991 case F81:
8992 {
8993 strcpy(mod->modelname->s, "F81");
8994 break;
8995 }
8996 case HKY85:
8997 {
8998 strcpy(mod->modelname->s, "HKY85");
8999 break;
9000 }
9001 case F84:
9002 {
9003 strcpy(mod->modelname->s, "F84");
9004 break;
9005 }
9006 case TN93:
9007 {
9008 strcpy(mod->modelname->s, "TN93");
9009 break;
9010 }
9011 case GTR:
9012 {
9013 strcpy(mod->modelname->s, "GTR");
9014 break;
9015 }
9016 case CUSTOM:
9017 {
9018 strcpy(mod->modelname->s, "Custom");
9019 break;
9020 }
9021 case DAYHOFF:
9022 {
9023 strcpy(mod->modelname->s, "Dayhoff");
9024 break;
9025 }
9026 case JTT:
9027 {
9028 strcpy(mod->modelname->s, "JTT");
9029 break;
9030 }
9031 case MTREV:
9032 {
9033 strcpy(mod->modelname->s, "MtREV");
9034 break;
9035 }
9036 case LG:
9037 {
9038 strcpy(mod->modelname->s, "LG");
9039 break;
9040 }
9041 case WAG:
9042 {
9043 strcpy(mod->modelname->s, "WAG");
9044 break;
9045 }
9046 case DCMUT:
9047 {
9048 strcpy(mod->modelname->s, "DCMut");
9049 break;
9050 }
9051 case RTREV:
9052 {
9053 strcpy(mod->modelname->s, "RtREV");
9054 break;
9055 }
9056 case CPREV:
9057 {
9058 strcpy(mod->modelname->s, "CpREV");
9059 break;
9060 }
9061 case VT:
9062 {
9063 strcpy(mod->modelname->s, "VT");
9064 break;
9065 }
9066 case BLOSUM62:
9067 {
9068 strcpy(mod->modelname->s, "Blosum62");
9069 break;
9070 }
9071 case MTMAM:
9072 {
9073 strcpy(mod->modelname->s, "MtMam");
9074 break;
9075 }
9076 case MTART:
9077 {
9078 strcpy(mod->modelname->s, "MtArt");
9079 break;
9080 }
9081 case HIVW:
9082 {
9083 strcpy(mod->modelname->s, "HIVw");
9084 break;
9085 }
9086 case HIVB:
9087 {
9088 strcpy(mod->modelname->s, "HIVb");
9089 break;
9090 }
9091 case AB:
9092 {
9093 strcpy(mod->modelname->s, "AB");
9094 break;
9095 }
9096 case CUSTOMAA:
9097 {
9098 strcpy(mod->modelname->s, "Custom");
9099 break;
9100 }
9101 case FLU:
9102 {
9103 strcpy(mod->modelname->s, "FLU");
9104 break;
9105 }
9106 default:
9107 {
9108 PhyML_Printf("\n. Unknown model name.\n");
9109 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
9110 Warn_And_Exit("\n. PhyML finished prematurely.");
9111 break;
9112 }
9113 }
9114 }
9115
9116 //////////////////////////////////////////////////////////////
9117 //////////////////////////////////////////////////////////////
9118
Adjust_Min_Diff_Lk(t_tree * tree)9119 void Adjust_Min_Diff_Lk(t_tree *tree)
9120 {
9121 if(sizeof(phydbl) == 4)
9122 {
9123 int exponent;
9124 exponent = (int)FLOOR(log10(FABS(tree->c_lnL)));
9125 tree->mod->s_opt->min_diff_lk_local = POW(10.,exponent - FLT_DIG + 1);
9126 tree->mod->s_opt->min_diff_lk_move = tree->mod->s_opt->min_diff_lk_local;
9127 }
9128 /* PhyML_Printf("\n. Exponent = %d Precision = %E DIG = %d",exponent,tree->mod->s_opt->min_diff_lk_local,FLT_DIG); */
9129 }
9130
9131 //////////////////////////////////////////////////////////////
9132 //////////////////////////////////////////////////////////////
9133
9134
9135 /*!
9136 tree->a_nodes[i]->name is initially a number. It is translated into
9137 a string of characters using the names provided in the tax_name
9138 array.
9139 */
Translate_Tax_Names(char ** tax_names,t_tree * tree)9140 void Translate_Tax_Names(char **tax_names, t_tree *tree)
9141 {
9142 int i;
9143 int tax_num;
9144
9145 for(i=0;i<tree->n_otu;i++)
9146 {
9147 tax_num = strtol(tree->a_nodes[i]->name,NULL,10);
9148 tree->a_nodes[i]->name = tax_names[tax_num-1];
9149 }
9150 }
9151
9152 //////////////////////////////////////////////////////////////
9153 //////////////////////////////////////////////////////////////
9154
9155 /*!
9156 Skip coment in NEXUS file.
9157 */
Skip_Comment(FILE * fp)9158 void Skip_Comment(FILE *fp)
9159 {
9160 int in_comment;
9161 char c;
9162
9163 in_comment = 1;
9164 do
9165 {
9166 c = fgetc(fp);
9167 if(c == EOF) break;
9168 if(c == '[') in_comment++;
9169 else if(c == ']') in_comment--;
9170 }
9171 while(in_comment);
9172 }
9173
9174 //////////////////////////////////////////////////////////////
9175 //////////////////////////////////////////////////////////////
9176
9177 /*!
9178 Determine the most appropriate position of the root if outgroup taxa are specified.
9179 */
9180
Get_Best_Root_Position(t_tree * tree)9181 void Get_Best_Root_Position(t_tree *tree)
9182 {
9183 int i,j;
9184 phydbl eps;
9185 phydbl s, s_max;
9186 t_edge *best_edge;
9187 int has_outgrp;
9188
9189 best_edge = NULL;
9190
9191 if(tree->n_root)
9192 {
9193 PhyML_Printf("\n. Tree already has a root.");
9194 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
9195 PhyML_Printf("\n. PhyML finished prematurely.");
9196 assert(FALSE);
9197 }
9198
9199 has_outgrp = NO;
9200
9201 if(strstr(tree->a_nodes[0]->name,"*"))
9202 {
9203 /* PhyML_Printf("\n. Found outgroup taxon: %s",tree->a_nodes[0]->name); */
9204 tree->a_nodes[0]->s_ingrp[0] = 0;
9205 tree->a_nodes[0]->s_outgrp[0] = 1;
9206 has_outgrp = YES;
9207 }
9208 else
9209 {
9210 tree->a_nodes[0]->s_ingrp[0] = 1;
9211 tree->a_nodes[0]->s_outgrp[0] = 0;
9212 }
9213
9214 Get_Best_Root_Position_Post(tree->a_nodes[0],tree->a_nodes[0]->v[0],&has_outgrp,tree);
9215 Get_Best_Root_Position_Pre(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
9216
9217 if(has_outgrp == YES)
9218 {
9219
9220 Free_Edge_Lk_Rght(tree->a_edges[2*tree->n_otu-3]);
9221 Free_Edge_Lk_Rght(tree->a_edges[2*tree->n_otu-2]);
9222 Free_Edge_Pars_Rght(tree->a_edges[2*tree->n_otu-3]);
9223 Free_Edge_Pars_Rght(tree->a_edges[2*tree->n_otu-2]);
9224
9225 eps = 1.E-10;
9226 s = s_max = 0.0;
9227 for(i=0;i<2*tree->n_otu-2;++i)
9228 {
9229 for(j=0;j<3;j++)
9230 {
9231 s = (tree->a_nodes[i]->s_outgrp[j]+eps) / (tree->a_nodes[i]->s_ingrp[j] + eps) ;
9232 /* printf("\n. [%d %d] %d %d",i,j,tree->a_nodes[i]->s_outgrp[j],tree->a_nodes[i]->s_ingrp[j]); */
9233 if(s > s_max)
9234 {
9235 s_max = s;
9236 best_edge = tree->a_nodes[i]->b[j];
9237 }
9238 }
9239 }
9240 Add_Root(best_edge,tree);
9241 }
9242 }
9243
9244 //////////////////////////////////////////////////////////////
9245 //////////////////////////////////////////////////////////////
9246
9247
9248 /*!
9249 Determine the most appropriate position of the root if outgroup taxa are specified.
9250 Post-traversal.
9251 */
Get_Best_Root_Position_Post(t_node * a,t_node * d,int * has_outgrp,t_tree * tree)9252 void Get_Best_Root_Position_Post(t_node *a, t_node *d, int *has_outgrp, t_tree *tree)
9253 {
9254 if(d->tax)
9255 {
9256 if(strstr(d->name,"*"))
9257 {
9258 *has_outgrp = YES;
9259 /* PhyML_Printf("\n. Found outgroup taxon: %s",d->name); */
9260 d->s_ingrp[0] = NO;
9261 d->s_outgrp[0] = YES;
9262 }
9263 else
9264 {
9265 d->s_ingrp[0] = YES;
9266 d->s_outgrp[0] = NO;
9267 }
9268 return;
9269 }
9270 else
9271 {
9272 int i;
9273
9274 for(i=0;i<3;i++)
9275 if(d->v[i] != a && (d->b[i] != tree->e_root))
9276 Get_Best_Root_Position_Post(d,d->v[i],has_outgrp,tree);
9277
9278 Get_OutIn_Scores(a,d);
9279
9280 }
9281 }
9282
9283 //////////////////////////////////////////////////////////////
9284 //////////////////////////////////////////////////////////////
9285
9286
9287 /*!
9288 Determine the most appropriate position of the root if outgroup taxa are specified.
9289 Pre-traversal.
9290 */
Get_Best_Root_Position_Pre(t_node * a,t_node * d,t_tree * tree)9291 void Get_Best_Root_Position_Pre(t_node *a, t_node *d, t_tree *tree)
9292 {
9293 if(d->tax)
9294 {
9295 return;
9296 }
9297 else
9298 {
9299 int i;
9300
9301 for(i=0;i<3;i++)
9302 if(d->v[i] != a && (d->b[i] != tree->e_root))
9303 {
9304 Get_OutIn_Scores(d->v[i],d);
9305 Get_Best_Root_Position_Pre(d,d->v[i],tree);
9306 }
9307 }
9308 }
9309
9310 //////////////////////////////////////////////////////////////
9311 //////////////////////////////////////////////////////////////
9312
9313
9314 /*!
9315 Determine the most appropriate position of the root if outgroup taxa are specified.
9316 Core.
9317 */
Get_OutIn_Scores(t_node * a,t_node * d)9318 void Get_OutIn_Scores(t_node *a, t_node *d)
9319 {
9320 int i,d_v1,d_v2,v1_d,v2_d,d_a;
9321
9322 d_a = v1_d = v2_d = -1;
9323 d_v1 = d_v2 = -1;
9324 for(i=0;i<3;i++)
9325 {
9326 if(d->v[i] != a)
9327 {
9328 if(d_v1 < 0) d_v1 = i;
9329 else d_v2 = i;
9330 }
9331 }
9332
9333 for(i=0;i<3;i++) if(d->v[i] == a) { d_a = i; break; }
9334 for(i=0;i<3;i++) if(d->v[d_v1]->v[i] == d) { v1_d = i; break; }
9335 for(i=0;i<3;i++) if(d->v[d_v2]->v[i] == d) { v2_d = i; break; }
9336
9337 d->s_ingrp[d_a] =
9338 d->v[d_v1]->s_ingrp[v1_d] +
9339 d->v[d_v2]->s_ingrp[v2_d] ;
9340
9341 d->s_outgrp[d_a] =
9342 d->v[d_v1]->s_outgrp[v1_d] +
9343 d->v[d_v2]->s_outgrp[v2_d] ;
9344 }
9345
9346 //////////////////////////////////////////////////////////////
9347 //////////////////////////////////////////////////////////////
9348
Check_Sequence_Name(char * s)9349 int Check_Sequence_Name(char *s)
9350 {
9351 int i;
9352 /* if(rindex(s,':')) */
9353 For(i,strlen(s))
9354 {
9355 if(s[i] == ':')
9356 {
9357 PhyML_Printf("\n. Character ':' is not permitted in sequence name (%s).",s);
9358 PhyML_Printf("\n. Err. in file %s at line %d",__FILE__,__LINE__);
9359 Warn_And_Exit("\n. PhyML finished prematurely.");
9360 }
9361 }
9362 /* if(rindex(s,',')) */
9363 For(i,strlen(s))
9364 {
9365 if(s[i] == ',')
9366 {
9367 PhyML_Printf("\n. Character ',' is not permitted in sequence name (%s).",s);
9368 PhyML_Printf("\n. Err in file %s at line %d",__FILE__,__LINE__);
9369 Warn_And_Exit("\n. PhyML finished prematurely.");
9370 }
9371 }
9372 /* if(rindex(s,' ')) */
9373 For(i,strlen(s))
9374 {
9375 if(s[i] == ' ')
9376 {
9377 PhyML_Printf("\n. Character ' ' is not permitted in sequence name (%s).",s);
9378 PhyML_Printf("\n. Err in file %s at line %d",__FILE__,__LINE__);
9379 Warn_And_Exit("\n. PhyML finished prematurely.");
9380 }
9381 }
9382
9383 return 1;
9384 }
9385
9386 //////////////////////////////////////////////////////////////
9387 //////////////////////////////////////////////////////////////
9388
Scale_Subtree_Height(t_node * a,phydbl K,phydbl floor,int * n_nodes,t_tree * tree)9389 int Scale_Subtree_Height(t_node *a, phydbl K, phydbl floor, int *n_nodes, t_tree *tree)
9390 {
9391 phydbl new_height;
9392
9393 if(a->tax == YES) return 0;
9394
9395 *n_nodes = 0;
9396
9397 new_height = .0;
9398
9399 if(!(tree->times->nd_t[a->num] > floor)) new_height = K*(tree->times->nd_t[a->num]-floor)+floor;
9400
9401 if(a == tree->n_root)
9402 {
9403 tree->times->nd_t[tree->n_root->num] = new_height;
9404 *n_nodes = 1;
9405
9406 Scale_Node_Heights_Post(tree->n_root,tree->n_root->v[2],K,floor,n_nodes,tree);
9407 Scale_Node_Heights_Post(tree->n_root,tree->n_root->v[1],K,floor,n_nodes,tree);
9408 }
9409 else
9410 {
9411 int i;
9412
9413 if(new_height < tree->times->nd_t[a->anc->num]) return 0;
9414 else
9415 {
9416 tree->times->nd_t[a->num] = new_height;
9417 *n_nodes = 1;
9418 }
9419
9420 for(i=0;i<3;i++)
9421 if(a->v[i] != a->anc && a->b[i] != tree->e_root)
9422 {
9423 Scale_Node_Heights_Post(a,a->v[i],K,floor,n_nodes,tree);
9424 }
9425 }
9426
9427 return 1;
9428 }
9429
9430 //////////////////////////////////////////////////////////////
9431 //////////////////////////////////////////////////////////////
9432
Scale_Node_Heights_Post(t_node * a,t_node * d,phydbl K,phydbl floor,int * n_nodes,t_tree * tree)9433 void Scale_Node_Heights_Post(t_node *a, t_node *d, phydbl K, phydbl floor, int *n_nodes, t_tree *tree)
9434 {
9435 if(d == tree->n_root)
9436 {
9437 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
9438 }
9439
9440 if(d->tax) return;
9441 else
9442 {
9443 int i;
9444
9445 /* It is tempting to set floor = tree->times->t_prior_max[d->num]; but
9446 it then becomes possible for nodes with different floor values
9447 to have their orders interverted (i.e., ancestor below descendant)
9448 */
9449 if((tree->times->nd_t[d->num] > floor) == NO) // If node is strictly older than floor
9450 {
9451 tree->times->nd_t[d->num] = K*(tree->times->nd_t[d->num]-floor)+floor;
9452 *n_nodes = *n_nodes+1;
9453 }
9454
9455 if(tree->times->nd_t[d->num] < tree->times->nd_t[a->num])
9456 {
9457 PhyML_Printf("\n. K = %f floor = %f t_prior_max(a) = %f t_prior_max(d) = %f a->t = %f d->t %f",
9458 K,floor,tree->times->t_prior_max[a->num],tree->times->t_prior_max[d->num],
9459 tree->times->nd_t[a->num],tree->times->nd_t[d->num]);
9460 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
9461 Warn_And_Exit("\n. PhyML finished prematurely.");
9462 }
9463
9464 for(i=0;i<3;i++)
9465 if(d->v[i] != a && d->b[i] != tree->e_root)
9466 Scale_Node_Heights_Post(d,d->v[i],K,floor,n_nodes,tree);
9467
9468 }
9469 }
9470
9471 //////////////////////////////////////////////////////////////
9472 //////////////////////////////////////////////////////////////
9473
Scale_Subtree_Rates(t_node * a,phydbl mult,int * n_nodes,t_tree * tree)9474 int Scale_Subtree_Rates(t_node *a, phydbl mult, int *n_nodes, t_tree *tree)
9475 {
9476 int res;
9477 int i;
9478
9479 *n_nodes = 0;
9480 res = 1;
9481
9482 if(a == tree->n_root)
9483 {
9484 res = Scale_Subtree_Rates_Post(a,a->v[2],mult,n_nodes,tree);
9485 if(res) res = Scale_Subtree_Rates_Post(a,a->v[1],mult,n_nodes,tree);
9486 return res;
9487 }
9488 else
9489 {
9490 for(i=0;i<3;i++) if((a->v[i] != a->anc) &&
9491 (a->b[i] != tree->e_root) &&
9492 (res == 1)) res = Scale_Subtree_Rates_Post(a,a->v[i],mult,n_nodes,tree);
9493 return res;
9494 }
9495 }
9496
9497 //////////////////////////////////////////////////////////////
9498 //////////////////////////////////////////////////////////////
9499
Scale_Subtree_Rates_Post(t_node * a,t_node * d,phydbl mult,int * n_nodes,t_tree * tree)9500 int Scale_Subtree_Rates_Post(t_node *a, t_node *d, phydbl mult, int *n_nodes, t_tree *tree)
9501 {
9502
9503 tree->rates->br_r[d->num] *= mult;
9504 tree->rates->nd_r[d->num] *= mult;
9505
9506 *n_nodes = *n_nodes+1;
9507
9508 if(tree->rates->br_r[d->num] < tree->rates->min_rate) return 0;
9509 if(tree->rates->br_r[d->num] > tree->rates->max_rate) return 0;
9510 if(tree->rates->nd_r[d->num] < tree->rates->min_rate) return 0;
9511 if(tree->rates->nd_r[d->num] > tree->rates->max_rate) return 0;
9512
9513 if(d->tax) return 1;
9514 else
9515 {
9516 int i,res;
9517
9518 res = 1;
9519 for(i=0;i<3;++i)
9520 {
9521 if((d->v[i] != a) &&
9522 (d->b[i] != tree->e_root) &&
9523 (res == 1))
9524 {
9525 res = Scale_Subtree_Rates_Post(d,d->v[i],mult,n_nodes,tree);
9526 }
9527 }
9528 return res;
9529 }
9530 }
9531
9532 //////////////////////////////////////////////////////////////
9533 //////////////////////////////////////////////////////////////
9534
9535 //////////////////////////////////////////////////////////////
9536 //////////////////////////////////////////////////////////////
9537
Get_Node_Ranks(t_tree * tree)9538 void Get_Node_Ranks(t_tree *tree)
9539 {
9540 tree->n_root->rank = 1;
9541 Get_Node_Ranks_Pre(tree->n_root,tree->n_root->v[2],tree);
9542 Get_Node_Ranks_Pre(tree->n_root,tree->n_root->v[1],tree);
9543 }
9544
9545 //////////////////////////////////////////////////////////////
9546 //////////////////////////////////////////////////////////////
9547
Get_Node_Ranks_Pre(t_node * a,t_node * d,t_tree * tree)9548 void Get_Node_Ranks_Pre(t_node *a, t_node *d, t_tree *tree)
9549 {
9550 d->rank = a->rank+1;
9551
9552 if(d->tax) return;
9553 else
9554 {
9555 int i;
9556
9557 for(i=0;i<3;i++)
9558 {
9559 if(d->v[i] != a && d->b[i] != tree->e_root)
9560 {
9561 Get_Node_Ranks_Pre(d,d->v[i],tree);
9562 }
9563 }
9564 }
9565 }
9566
9567 //////////////////////////////////////////////////////////////
9568 //////////////////////////////////////////////////////////////
9569
Log_Br_Len(t_tree * tree)9570 void Log_Br_Len(t_tree *tree)
9571 {
9572 int i;
9573 for(i=0;i<2*tree->n_otu-3;++i) tree->a_edges[i]->l->v = log(tree->a_edges[i]->l->v);
9574 }
9575
9576 //////////////////////////////////////////////////////////////
9577 //////////////////////////////////////////////////////////////
9578
Diff_Lk_Norm_At_Given_Edge(t_edge * b,t_tree * tree)9579 phydbl Diff_Lk_Norm_At_Given_Edge(t_edge *b, t_tree *tree)
9580 {
9581 int i,dim,err;
9582 phydbl lk_exact,lk_norm,sum;
9583
9584 Record_Br_Len(tree);
9585
9586 dim = 2*tree->n_otu-3;
9587 sum = 0.0;
9588
9589 for(i=0;i<tree->n_short_l;i++)
9590 {
9591 b->l->v = tree->short_l[i];
9592
9593 lk_exact = Lk(b,tree);
9594 lk_norm = tree->norm_scale + Log_Dnorm(b->l->v,tree->rates->mean_l[b->num],
9595 tree->rates->cov_l[b->num*dim+b->num],&err);
9596
9597 if(err)
9598 {
9599 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
9600 }
9601
9602 sum += pow(lk_exact - lk_norm,2);
9603 }
9604
9605 Restore_Br_Len(tree);
9606 Lk(b,tree);
9607
9608 return(sum);
9609 }
9610
9611 //////////////////////////////////////////////////////////////
9612 //////////////////////////////////////////////////////////////
9613
Adjust_Variances(t_tree * tree)9614 void Adjust_Variances(t_tree *tree)
9615 {
9616 int i;
9617 phydbl new_diff,curr_diff;
9618
9619 Make_Short_L(tree);
9620 for(i=0;i<tree->n_short_l;i++)
9621 {
9622 tree->short_l[i] = tree->mod->l_min + i*(0.1 - tree->mod->l_min)/tree->n_short_l;
9623 }
9624
9625
9626 For(i,2*tree->n_otu-3)
9627 {
9628 if(tree->a_edges[i]->l->v < 1.1*tree->mod->l_min)
9629 {
9630 tree->rates->mean_l[i] = -1.00;
9631 tree->rates->cov_l[i*(2*tree->n_otu-3)+i] = 0.1;
9632 tree->norm_scale = -100;
9633
9634
9635 new_diff = curr_diff = 10.0;
9636 do
9637 {
9638 curr_diff = new_diff;
9639
9640 Generic_Brent_Lk(&(tree->norm_scale),
9641 -1E+6,
9642 0.0,
9643 1.E-10,
9644 10000,
9645 NO,
9646 Wrap_Diff_Lk_Norm_At_Given_Edge,tree->a_edges[i],tree,NULL,NO);
9647
9648 /* Generic_Brent_Lk(&(tree->rates->mean_l[0]), */
9649 /* -100., */
9650 /* 10*tree->mod->l_min, */
9651 /* 1.E-3, */
9652 /* 10000, */
9653 /* NO, */
9654 /* Wrap_Diff_Lk_Norm_At_Given_Edge,tree->a_edges[0],tree,NULL); */
9655
9656 Generic_Brent_Lk(&(tree->rates->cov_l[i*(2*tree->n_otu-3)+i]),
9657 0.0,
9658 10.0,
9659 1.E-10,
9660 10000,
9661 NO,
9662 Wrap_Diff_Lk_Norm_At_Given_Edge,tree->a_edges[i],tree,NULL,NO);
9663
9664 new_diff = Diff_Lk_Norm_At_Given_Edge(tree->a_edges[i],tree);
9665 }while(FABS(new_diff-curr_diff) > 1.E-3);
9666 }
9667 }
9668 }
9669
9670 //////////////////////////////////////////////////////////////
9671 //////////////////////////////////////////////////////////////
9672
9673
Effective_Sample_Size(phydbl first_val,phydbl last_val,phydbl sum,phydbl sumsq,phydbl sumcurnext,int n)9674 phydbl Effective_Sample_Size(phydbl first_val, phydbl last_val, phydbl sum, phydbl sumsq, phydbl sumcurnext, int n)
9675 {
9676 phydbl numerator,denom;
9677 phydbl mean;
9678 phydbl r;
9679
9680 mean = sum / n;
9681 denom = sumsq - n * POW(mean,2);
9682 numerator = sumcurnext - (n+1.)*POW(mean,2) + (first_val+last_val)*mean;
9683
9684 r = numerator/denom;
9685
9686 return (phydbl)n * (1.-r)/(1.+r);
9687 }
9688
9689 //////////////////////////////////////////////////////////////
9690 //////////////////////////////////////////////////////////////
9691
Rescale_Br_Len_Multiplier_Tree(t_tree * tree)9692 phydbl Rescale_Br_Len_Multiplier_Tree(t_tree *tree)
9693 {
9694 int i;
9695
9696 if(tree->is_mixt_tree)
9697 {
9698 MIXT_Rescale_Br_Len_Multiplier_Tree(tree);
9699 return(-1.);
9700 }
9701
9702 for(i=0;i<2*tree->n_otu-1;++i) tree->a_edges[i]->l->v *= tree->mod->br_len_mult->v;
9703 return(-1.);
9704 }
9705
9706 //////////////////////////////////////////////////////////////
9707 //////////////////////////////////////////////////////////////
9708
Unscale_Br_Len_Multiplier_Tree(t_tree * tree)9709 phydbl Unscale_Br_Len_Multiplier_Tree(t_tree *tree)
9710 {
9711 int i;
9712
9713 if(tree->is_mixt_tree)
9714 {
9715 MIXT_Unscale_Br_Len_Multiplier_Tree(tree);
9716 return(-1.);
9717 }
9718
9719 For(i,2*tree->n_otu-1) tree->a_edges[i]->l->v /= tree->mod->br_len_mult->v;
9720 return(-1.);
9721 }
9722
9723 //////////////////////////////////////////////////////////////
9724 //////////////////////////////////////////////////////////////
9725
9726
Reflect(phydbl x,phydbl l,phydbl u)9727 phydbl Reflect(phydbl x, phydbl l, phydbl u)
9728 {
9729 int rounds;
9730 phydbl tmp;
9731 int k;
9732
9733 if(u < l)
9734 {
9735 tmp = u;
9736 u = l;
9737 l = tmp;
9738 }
9739
9740 if(x < l) x = x + 2.*(l - x);
9741
9742 if(((x-u) > (u-l)) && (x > u))
9743 {
9744 k = (x - (2.*u-l))/(2.*(u-l));
9745 x = x - 2.*k*(u-l);
9746 }
9747
9748 rounds = 0;
9749 do
9750 {
9751 rounds++;
9752 /* printf("\n. l=%f u=%f x=%f",l,u,x); */
9753 if(x > u || x < l)
9754 {
9755 if(x > u) x = x - 2.*(x - u);
9756 else x = x + 2.*(l - x);
9757 }
9758 else break;
9759 /* printf(" x'=%f",x); */
9760 }
9761 while(rounds < 100);
9762
9763 if(rounds == 100 && (x > u || x < l))
9764 {
9765 PhyML_Printf("\n. u=%f l=%f x=%f",u,l,x);
9766 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
9767 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
9768 }
9769
9770 return x;
9771 }
9772
9773 //////////////////////////////////////////////////////////////
9774 //////////////////////////////////////////////////////////////
9775
9776
Are_Equal(phydbl a,phydbl b,phydbl eps)9777 int Are_Equal(phydbl a, phydbl b, phydbl eps)
9778 {
9779 if(FABS(a-b) < eps) return TRUE; /* a==b */
9780 else return FALSE;
9781 }
9782
9783 //////////////////////////////////////////////////////////////
9784 //////////////////////////////////////////////////////////////
9785
9786 /* Returns 1 if small_tree is displayed by big_tree, 0 otherwise
9787 Does not account for the root positions, if any.
9788 */
Check_Topo_Constraints(t_tree * big_tree,t_tree * small_tree)9789 int Check_Topo_Constraints(t_tree *big_tree, t_tree *small_tree)
9790 {
9791 if(!small_tree) return 1;
9792
9793 if(small_tree->n_otu < 4) return 1;
9794
9795 if(small_tree->n_otu > big_tree->n_otu)
9796 {
9797 PhyML_Printf("\n");
9798 PhyML_Printf("\n. The tree that defines the topological constraints can not");
9799 PhyML_Printf("\n. display more taxa than %d",big_tree->n_otu);
9800 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
9801 }
9802
9803 t_tree *big_tree_cpy;
9804 int diffs,i;
9805
9806 big_tree_cpy = Make_Tree_From_Scratch(big_tree->n_otu,NULL);
9807 Copy_Tree(big_tree,big_tree_cpy);
9808
9809 Prune_Tree(big_tree_cpy,small_tree);
9810
9811 /* For(i,2*small_tree->n_otu-3) printf("\nz %d . %d . %d", */
9812 /* big_tree->a_edges[i]->does_exist, */
9813 /* big_tree_cpy->a_edges[i]->does_exist, */
9814 /* small_tree->a_edges[i]->does_exist); */
9815
9816 Free_Bip(small_tree);
9817 Alloc_Bip(small_tree);
9818 Get_Bip(small_tree->a_nodes[0],small_tree->a_nodes[0]->v[0],small_tree);
9819
9820 Free_Bip(big_tree_cpy);
9821 Alloc_Bip(big_tree_cpy);
9822 Match_Tip_Numbers(small_tree,big_tree_cpy);
9823 Get_Bip(big_tree_cpy->a_nodes[0],big_tree_cpy->a_nodes[0]->v[0],big_tree_cpy);
9824
9825 for(i=0;i<2*big_tree_cpy->n_otu-3;++i) big_tree_cpy->a_edges[i]->bip_score = 0;
9826 for(i=0;i<2*small_tree->n_otu-3;++i) small_tree->a_edges[i]->bip_score = 0;
9827
9828 diffs = Compare_Bip(small_tree,big_tree_cpy,NO);
9829
9830 /* printf("\n"); */
9831 /* printf("\n. %s",Write_Tree(big_tree_cpy)); */
9832 /* printf("\n. %s",Write_Tree(small_tree)); */
9833 /* printf("\n. diffs=%d",diffs); */
9834
9835
9836 Free_Tree(big_tree_cpy);
9837
9838 t_tree *big_tree_cpy_bis;
9839 big_tree_cpy_bis = Make_Tree_From_Scratch(big_tree->n_otu,NULL);
9840 Copy_Tree(big_tree,big_tree_cpy_bis);
9841 Free_Tree(big_tree_cpy_bis);
9842
9843 if(diffs == 0) return 1; /* Constraint is satisfied */
9844 else return 0;
9845 }
9846
9847 //////////////////////////////////////////////////////////////
9848 //////////////////////////////////////////////////////////////
9849
Prune_Tree(t_tree * big_tree,t_tree * small_tree)9850 void Prune_Tree(t_tree *big_tree, t_tree *small_tree)
9851 {
9852 int i,j;
9853 unsigned int curr_ext_node, curr_int_node;
9854 int curr_br, n_pruned_nodes;;
9855 t_node **pruned_nodes;
9856 t_edge **residual_edges;
9857
9858
9859 pruned_nodes = (t_node **)mCalloc(big_tree->n_otu,sizeof(t_node *));
9860 residual_edges = (t_edge **)mCalloc(big_tree->n_otu,sizeof(t_edge *));
9861
9862 n_pruned_nodes = 0;
9863 for(i=0;i<big_tree->n_otu;i++)
9864 {
9865 for(j=0;j<small_tree->n_otu;j++)
9866 if(!strcmp(small_tree->a_nodes[j]->name,big_tree->a_nodes[i]->name))
9867 break;
9868
9869 if(j == small_tree->n_otu)
9870 {
9871 Prune_Subtree(big_tree->a_nodes[i]->v[0],
9872 big_tree->a_nodes[i],
9873 NULL,&(residual_edges[n_pruned_nodes]),
9874 big_tree);
9875
9876 pruned_nodes[n_pruned_nodes] = big_tree->a_nodes[i];
9877 n_pruned_nodes++;
9878 }
9879 }
9880
9881 if(!n_pruned_nodes)
9882 {
9883 Free(pruned_nodes);
9884 Free(residual_edges);
9885 return;
9886 }
9887
9888 Free(big_tree->t_dir);
9889
9890 big_tree->n_otu -= n_pruned_nodes;
9891
9892
9893 curr_ext_node = 0;
9894 curr_int_node = big_tree->n_otu;
9895 curr_br = 0;
9896 for(i=0;i<big_tree->n_otu+n_pruned_nodes;++i)
9897 {
9898 for(j=0;j<n_pruned_nodes;j++)
9899 if(!strcmp(pruned_nodes[j]->name,big_tree->a_nodes[i]->name))
9900 break;
9901
9902 if(j == n_pruned_nodes) /* That t_node still belongs to the tree */
9903 {
9904 Reassign_Node_Nums(big_tree->a_nodes[i],big_tree->a_nodes[i]->v[0],
9905 &curr_ext_node,&curr_int_node,big_tree);
9906 break;
9907 }
9908 }
9909
9910
9911 Reassign_Edge_Nums(big_tree->a_nodes[0],big_tree->a_nodes[0]->v[0],&curr_br,big_tree);
9912
9913 big_tree->t_dir = (short int *)mCalloc((2*big_tree->n_otu-2)*(2*big_tree->n_otu-2),sizeof(short int));
9914
9915 for(i=0;i<n_pruned_nodes;i++)
9916 {
9917 Free_Edge(residual_edges[i]);
9918 Free_Edge(pruned_nodes[i]->b[0]);
9919 Free_Node(pruned_nodes[i]->v[0]);
9920 Free_Node(pruned_nodes[i]);
9921 }
9922
9923 Free(pruned_nodes);
9924 Free(residual_edges);
9925
9926 big_tree->a_edges[2*big_tree->n_otu-3] = big_tree->a_edges[2*(big_tree->n_otu+n_pruned_nodes)-3];
9927 big_tree->a_edges[2*big_tree->n_otu-2] = big_tree->a_edges[2*(big_tree->n_otu+n_pruned_nodes)-2];
9928 big_tree->a_nodes[2*big_tree->n_otu-2] = big_tree->a_nodes[2*(big_tree->n_otu+n_pruned_nodes)-2];
9929
9930 }
9931
9932 //////////////////////////////////////////////////////////////
9933 //////////////////////////////////////////////////////////////
9934
9935 /* For every node in small_tree, find which node in big_tree
9936 it corresponds to and initialize the variable match_node
9937 accordingly (vice versa for big_tree)
9938 */
Match_Nodes_In_Small_Tree(t_tree * small_tree,t_tree * big_tree)9939 void Match_Nodes_In_Small_Tree(t_tree *small_tree, t_tree *big_tree)
9940 {
9941 int i,j,k,l,m,n,identical;
9942 int *score;
9943
9944 if(small_tree->n_otu > big_tree->n_otu)
9945 {
9946 PhyML_Printf("\n. small_tree->n_otu=%d big_tree->n_otu=%d",small_tree->n_otu,big_tree->n_otu);
9947 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
9948 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
9949 }
9950
9951 Free_Bip(big_tree);
9952 Alloc_Bip(big_tree);
9953 Get_Bip(big_tree->a_nodes[0],big_tree->a_nodes[0]->v[0],big_tree);
9954
9955 Free_Bip(small_tree);
9956 Alloc_Bip(small_tree);
9957 Get_Bip(small_tree->a_nodes[0],small_tree->a_nodes[0]->v[0],small_tree);
9958
9959 if(!Check_Topo_Constraints(big_tree,small_tree))
9960 {
9961 PhyML_Printf("\n. small_tree and big_tree cannot have distinct topologies.");
9962 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
9963 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
9964 }
9965
9966 For(i,2*small_tree->n_otu-1) small_tree->a_nodes[i]->match_node = NULL;
9967 For(i,2*big_tree->n_otu-1) big_tree->a_nodes[i]->match_node = NULL;
9968
9969 score = (int *)mCalloc(3,sizeof(int));
9970
9971 for(i=0;i<small_tree->n_otu;i++)
9972 {
9973 for(j=0;j<big_tree->n_otu;j++)
9974 {
9975 if(!strcmp(small_tree->a_nodes[i]->name,big_tree->a_nodes[j]->name))
9976 {
9977 small_tree->a_nodes[i]->match_node = big_tree->a_nodes[j];
9978 big_tree->a_nodes[j]->match_node = small_tree->a_nodes[i];
9979 break;
9980 }
9981 }
9982 }
9983
9984 For(i,2*small_tree->n_otu-2)
9985 {
9986 if(small_tree->a_nodes[i]->tax == NO)
9987 {
9988 For(j,2*big_tree->n_otu-2)
9989 {
9990 if(big_tree->a_nodes[j]->tax == NO)
9991 {
9992 for(k=0;k<3;k++) score[k] = 0;
9993
9994 for(k=0;k<3;k++)
9995 {
9996 for(l=0;l<3;l++)
9997 {
9998 identical = 0;
9999 For(m,small_tree->a_nodes[i]->bip_size[k])
10000 {
10001 For(n,big_tree->a_nodes[j]->bip_size[l])
10002 {
10003 if(!strcmp(small_tree->a_nodes[i]->bip_node[k][m]->name,big_tree->a_nodes[j]->bip_node[l][n]->name))
10004 {
10005 identical++;
10006 break;
10007 }
10008 }
10009 }
10010 if(identical == small_tree->a_nodes[i]->bip_size[k])
10011 {
10012 score[k]++;
10013 }
10014 }
10015 }
10016
10017 /* printf("\n. [%d] [%d] %d %d %d -- %d %d %d",i,j, */
10018 /* score[0],score[1],score[2], */
10019 /* small_tree->a_nodes[i]->bip_size[0], */
10020 /* small_tree->a_nodes[i]->bip_size[1], */
10021 /* small_tree->a_nodes[i]->bip_size[2]); */
10022
10023 if(
10024 score[0] == 1 &&
10025 score[1] == 1 &&
10026 score[2] == 1
10027 )
10028 {
10029 small_tree->a_nodes[i]->match_node = big_tree->a_nodes[j];
10030 big_tree->a_nodes[j]->match_node = small_tree->a_nodes[i];
10031 break;
10032 }
10033 }
10034 }
10035 }
10036 }
10037
10038 Free(score);
10039 }
10040
10041 //////////////////////////////////////////////////////////////
10042 //////////////////////////////////////////////////////////////
10043
10044
Find_Surviving_Edges_In_Small_Tree(t_tree * small_tree,t_tree * big_tree)10045 void Find_Surviving_Edges_In_Small_Tree(t_tree *small_tree, t_tree *big_tree)
10046 {
10047 int i;
10048
10049 Match_Nodes_In_Small_Tree(small_tree,big_tree);
10050
10051 For(i,2*small_tree->n_otu-1) small_tree->times->has_survived[i] = NO;
10052
10053 Find_Surviving_Edges_In_Small_Tree_Post(big_tree->n_root,big_tree->n_root->v[2],small_tree,big_tree);
10054 Find_Surviving_Edges_In_Small_Tree_Post(big_tree->n_root,big_tree->n_root->v[1],small_tree,big_tree);
10055 }
10056
10057 //////////////////////////////////////////////////////////////
10058 //////////////////////////////////////////////////////////////
10059
10060
Find_Surviving_Edges_In_Small_Tree_Post(t_node * a,t_node * d,t_tree * small_tree,t_tree * big_tree)10061 void Find_Surviving_Edges_In_Small_Tree_Post(t_node *a, t_node *d, t_tree *small_tree, t_tree *big_tree)
10062 {
10063 if(d->match_node && !a->match_node)
10064 {
10065 small_tree->times->has_survived[d->match_node->num] = YES;
10066 }
10067
10068 if(d->tax == YES) return;
10069 else
10070 {
10071 int i;
10072
10073 for(i=0;i<3;i++)
10074 {
10075 if(d->v[i] != a && d->b[i] != big_tree->e_root)
10076 {
10077 Find_Surviving_Edges_In_Small_Tree_Post(d,d->v[i],small_tree,big_tree);
10078 }
10079 }
10080 }
10081 }
10082
10083 //////////////////////////////////////////////////////////////
10084 //////////////////////////////////////////////////////////////
10085
10086
Set_Taxa_Id_Ranking(t_tree * tree)10087 void Set_Taxa_Id_Ranking(t_tree *tree)
10088 {
10089 int i,j;
10090
10091 for(i=0;i<tree->n_otu;i++) tree->a_nodes[i]->id_rank = 0;
10092
10093 for(i=0;i<tree->n_otu;i++)
10094 {
10095 for(j=i+1;j<tree->n_otu;j++)
10096 {
10097 if(strcmp(tree->a_nodes[i]->name,tree->a_nodes[j]->name) > 0)
10098 tree->a_nodes[i]->id_rank++;
10099 else
10100 tree->a_nodes[j]->id_rank++;
10101 }
10102 }
10103 /* for(i=0;i<tree->n_otu;i++) PhyML_Printf("\n. %20s %4d",tree->a_nodes[i]->name,tree->a_nodes[i]->id_rank); */
10104 }
10105
10106 //////////////////////////////////////////////////////////////
10107 //////////////////////////////////////////////////////////////
10108
10109
Get_Edge_Binary_Coding_Number(t_tree * tree)10110 void Get_Edge_Binary_Coding_Number(t_tree *tree)
10111 {
10112 int i,j;
10113 int list_size;
10114 t_node **list;
10115 t_edge *b;
10116 int max_left,max_rght;
10117
10118 if(tree->n_otu > 1000)
10119 {
10120 PhyML_Printf("\n. Can't work out edge binary code if the number of taxa >1000.");
10121 assert(FALSE);
10122 }
10123
10124 Free_Bip(tree);
10125 Alloc_Bip(tree);
10126 Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
10127
10128 Set_Taxa_Id_Ranking(tree);
10129
10130 b = NULL;
10131 for(i=0;i<2*tree->n_otu-3;++i)
10132 {
10133 b = tree->a_edges[i];
10134
10135 max_left = 0;
10136 for(j=0;j<b->left->bip_size[b->l_r];++j)
10137 if(b->left->bip_node[b->l_r][j]->id_rank > max_left)
10138 max_left = b->left->bip_node[b->l_r][j]->id_rank;
10139
10140 max_rght = 0;
10141 for(j=0;j<b->rght->bip_size[b->r_l];++j)
10142 if(b->rght->bip_node[b->r_l][j]->id_rank > max_rght)
10143 max_rght = b->rght->bip_node[b->r_l][j]->id_rank;
10144
10145
10146 if(max_left < max_rght)
10147 {
10148 list = b->left->bip_node[b->l_r];
10149 list_size = b->left->bip_size[b->l_r];
10150 }
10151 else
10152 {
10153 list = b->rght->bip_node[b->r_l];
10154 list_size = b->rght->bip_size[b->r_l];
10155 }
10156
10157 b->bin_cod_num = 0.;
10158 for(j=0;j<list_size;j++) b->bin_cod_num += POW(2,list[j]->id_rank);
10159 /* printf("\n. %f",b->bin_cod_num); */
10160 }
10161 }
10162
10163 //////////////////////////////////////////////////////////////
10164 //////////////////////////////////////////////////////////////
10165
10166
Get_Mutmap_Val(int edge,int site,int mut,t_tree * tree)10167 int Get_Mutmap_Val(int edge, int site, int mut, t_tree *tree)
10168 {
10169 int dim1,dim2;
10170
10171 dim1 = (tree->n_pattern)*(2*tree->n_otu-3);
10172 dim2 = (tree->n_pattern);
10173
10174 return tree->mutmap[mut*dim1 + edge*dim2 + site];
10175 }
10176
10177 //////////////////////////////////////////////////////////////
10178 //////////////////////////////////////////////////////////////
10179
Get_Mutmap_Coord(int idx,int * edge,int * site,int * mut,t_tree * tree)10180 void Get_Mutmap_Coord(int idx, int *edge, int *site, int *mut, t_tree *tree)
10181 {
10182 int dim1,dim2;
10183
10184 dim1 = (tree->n_pattern)*(2*tree->n_otu-3);
10185 dim2 = (tree->n_pattern);
10186
10187 (*mut) = (int)idx/dim1;
10188 (*edge) = (int)(idx - (*mut)*dim1)/dim2;
10189 (*site) = (int)(idx - (*mut)*dim1 - (*edge)*dim2);
10190 }
10191
10192 //////////////////////////////////////////////////////////////
10193 //////////////////////////////////////////////////////////////
10194
10195 //////////////////////////////////////////////////////////////
10196 //////////////////////////////////////////////////////////////
10197
Copy_Edge_Lengths(t_tree * to,t_tree * from)10198 void Copy_Edge_Lengths(t_tree *to, t_tree *from)
10199 {
10200 int i;
10201 For(i,2*from->n_otu-1) to->a_edges[i]->l->v = from->a_edges[i]->l->v;
10202 }
10203
10204 //////////////////////////////////////////////////////////////
10205 //////////////////////////////////////////////////////////////
10206 //////////////////////////////////////////////////////////////
10207 //////////////////////////////////////////////////////////////
10208
To_Lower_String(char * in)10209 char *To_Lower_String(char *in)
10210 {
10211 char *out;
10212 int i;
10213 int len;
10214
10215 len = (int)strlen(in);
10216
10217 out = (char *)mCalloc(len+1,sizeof(char));
10218
10219 for(i=0;i<len;i++) out[i] = (char)tolower(in[i]);
10220
10221 out[len] = '\0';
10222 return(out);
10223 }
10224
10225 //////////////////////////////////////////////////////////////
10226 //////////////////////////////////////////////////////////////
10227
To_Upper_String(char * in)10228 char *To_Upper_String(char *in)
10229 {
10230 char *out;
10231 int i;
10232 int len;
10233
10234 len = (int)strlen(in);
10235
10236 out = (char *)mCalloc(len+1,sizeof(char));
10237
10238 for(i=0;i<len;i++)
10239 {
10240 out[i] = (char)toupper(in[i]);
10241 }
10242
10243 out[len] = '\0';
10244 return(out);
10245 }
10246
10247 //////////////////////////////////////////////////////////////
10248 //////////////////////////////////////////////////////////////
10249
Connect_CSeqs_To_Nodes(calign * cdata,option * io,t_tree * tree)10250 void Connect_CSeqs_To_Nodes(calign *cdata, option *io, t_tree *tree)
10251 {
10252 int i,j,n_otu_tree,n_otu_cdata;
10253
10254 n_otu_tree = tree->n_otu;
10255 n_otu_cdata = cdata->n_otu;
10256
10257 if((n_otu_tree != n_otu_cdata) && (io->fp_in_constraint_tree == NULL))
10258 {
10259 PhyML_Printf("\n. Number of taxa in the tree: %d, number of sequences: %d.",n_otu_tree,n_otu_cdata);
10260 Warn_And_Exit("\n. The number of tips in the tree is not the same as the number of sequences\n");
10261 }
10262
10263 for(i=0;i<n_otu_tree;i++)
10264 {
10265 for(j=0;j<n_otu_cdata;j++)
10266 {
10267 if(!strcmp(tree->a_nodes[i]->name,cdata->c_seq[j]->name)) break;
10268 }
10269
10270 if(j==n_otu_cdata)
10271 {
10272 PhyML_Printf("\n. Taxon '%s' was not found in sequence file '%s'.\n",
10273 tree->a_nodes[i]->name,
10274 io->in_align_file);
10275 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10276 }
10277 tree->a_nodes[i]->c_seq = cdata->c_seq[j];
10278 }
10279 }
10280
10281 //////////////////////////////////////////////////////////////
10282 //////////////////////////////////////////////////////////////
10283
Set_Both_Sides(int yesno,t_tree * tree)10284 void Set_Both_Sides(int yesno, t_tree *tree)
10285 {
10286 tree->both_sides = yesno;
10287 if(tree->is_mixt_tree == YES) MIXT_Set_Both_Sides(yesno,tree);
10288 }
10289
10290 //////////////////////////////////////////////////////////////
10291 //////////////////////////////////////////////////////////////
10292
Set_Use_Eigen_Lr(int yesno,t_tree * tree)10293 void Set_Use_Eigen_Lr(int yesno, t_tree *tree)
10294 {
10295 tree->use_eigen_lr = yesno;
10296 if(tree->is_mixt_tree == YES) MIXT_Set_Use_Eigen_Lr(yesno,tree);
10297 }
10298
10299 //////////////////////////////////////////////////////////////
10300 //////////////////////////////////////////////////////////////
10301
Set_Update_Eigen_Lr(int yesno,t_tree * tree)10302 void Set_Update_Eigen_Lr(int yesno, t_tree *tree)
10303 {
10304 tree->update_eigen_lr = yesno;
10305 if(tree->is_mixt_tree == YES) MIXT_Set_Update_Eigen_Lr(yesno,tree);
10306 }
10307
10308 //////////////////////////////////////////////////////////////
10309 //////////////////////////////////////////////////////////////
10310
Set_Update_Eigen(int yesno,t_mod * mod)10311 void Set_Update_Eigen(int yesno, t_mod *mod)
10312 {
10313 MIXT_Set_Update_Eigen(yesno,mod);
10314 }
10315
10316 //////////////////////////////////////////////////////////////
10317 //////////////////////////////////////////////////////////////
10318 // Returns the matrix of pairwise distances between tips
Dist_Btw_Tips(t_tree * tree)10319 phydbl *Dist_Btw_Tips(t_tree *tree)
10320 {
10321 int i,j;
10322 phydbl *dist;
10323
10324 dist = (phydbl *)mCalloc(tree->n_otu*tree->n_otu,sizeof(phydbl));
10325
10326 for(i=0;i<tree->n_otu-1;i++)
10327 {
10328 for(j=i+1;j<tree->n_otu;j++)
10329 {
10330 Path_Length(tree->a_nodes[i],tree->a_nodes[j],dist+i*tree->n_otu+j,tree);
10331 dist[j*tree->n_otu+i] = dist[i*tree->n_otu+j];
10332 }
10333 }
10334
10335 return(dist);
10336
10337 }
10338
10339 //////////////////////////////////////////////////////////////
10340 //////////////////////////////////////////////////////////////
10341
10342
10343 //////////////////////////////////////////////////////////////
10344 //////////////////////////////////////////////////////////////
10345
Best_Root_Position_IL_Model(t_tree * tree)10346 void Best_Root_Position_IL_Model(t_tree *tree)
10347 {
10348
10349 if(tree->n_root)
10350 {
10351 PhyML_Printf("\n. The tree already has a root node");
10352 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10353 }
10354 else
10355 {
10356 int i;
10357 t_edge *best_edge;
10358 phydbl best_lnL;
10359
10360 Free_Edge_Lk_Rght(tree->a_edges[2*tree->n_otu-3]);
10361 Free_Edge_Lk_Rght(tree->a_edges[2*tree->n_otu-2]);
10362 Free_Edge_Pars_Rght(tree->a_edges[2*tree->n_otu-3]);
10363 Free_Edge_Pars_Rght(tree->a_edges[2*tree->n_otu-2]);
10364
10365
10366 best_edge = NULL;
10367 best_lnL = UNLIKELY;
10368 For(i,2*tree->n_otu-3)
10369 {
10370 PhyML_Printf("\n. Positionning root node on edge %4d",tree->a_edges[i]->num);
10371 Add_Root(tree->a_edges[i],tree);
10372 tree->ignore_root = NO;
10373 Set_Both_Sides(YES,tree);
10374 Lk(NULL,tree);
10375
10376 /* Optimize_Br_Len_Serie(2,tree); */
10377
10378 Update_Partial_Lk(tree,tree->n_root->b[1],tree->n_root);
10379 Br_Len_Opt(&(tree->n_root->b[1]->l->v),tree->n_root->b[1],tree);
10380 Update_Partial_Lk(tree,tree->n_root->b[2],tree->n_root);
10381 Br_Len_Opt(&(tree->n_root->b[2]->l->v),tree->n_root->b[2],tree);
10382
10383 PhyML_Printf(" -- lnL: %20f",tree->c_lnL);
10384 if(tree->c_lnL > best_lnL)
10385 {
10386 best_lnL = tree->c_lnL;
10387 best_edge = tree->a_edges[i];
10388 }
10389 }
10390
10391 Add_Root(best_edge,tree);
10392 Set_Both_Sides(YES,tree);
10393 Lk(NULL,tree);
10394 Update_Partial_Lk(tree,tree->n_root->b[1],tree->n_root);
10395 Br_Len_Opt(&(tree->n_root->b[1]->l->v),tree->n_root->b[1],tree);
10396 Update_Partial_Lk(tree,tree->n_root->b[2],tree->n_root);
10397 Br_Len_Opt(&(tree->n_root->b[2]->l->v),tree->n_root->b[2],tree);
10398 tree->ignore_root = YES;
10399 }
10400 }
10401
10402 //////////////////////////////////////////////////////////////
10403 //////////////////////////////////////////////////////////////
10404
Set_Br_Len_Var(t_edge * b,t_tree * tree)10405 void Set_Br_Len_Var(t_edge *b, t_tree *tree)
10406 {
10407 if(tree->is_mixt_tree)
10408 {
10409 MIXT_Set_Br_Len_Var(b,tree);
10410 return;
10411 }
10412
10413 if(tree->rates == NO && tree->mod->gamma_mgf_bl == YES)
10414 {
10415
10416 phydbl len;
10417 if(b == NULL)
10418 {
10419 int i;
10420
10421 For(i,2*tree->n_otu-1)
10422 {
10423 len = MAX(0.0,tree->a_edges[i]->l->v);
10424 tree->a_edges[i]->l_var->v = POW(len,2)*tree->mod->l_var_sigma;
10425 }
10426 }
10427 else
10428 {
10429 len = MAX(0.0,b->l->v);
10430 b->l_var->v = POW(len,2)*tree->mod->l_var_sigma;
10431 }
10432 }
10433 }
10434
10435 //////////////////////////////////////////////////////////////
10436 //////////////////////////////////////////////////////////////
10437
Check_Br_Lens(t_tree * tree)10438 void Check_Br_Lens(t_tree *tree)
10439 {
10440 int i;
10441 scalar_dbl *l;
10442
10443 For(i,2*tree->n_otu-1)
10444 {
10445 l = tree->a_edges[i]->l;
10446 do
10447 {
10448 /* if(l->v < tree->mod->l_min) l->v = tree->mod->l_min; */
10449 /* if(l->v > tree->mod->l_max) l->v = tree->mod->l_max; */
10450 if(l->v < 0.0) l->v = 0.0;
10451 l = l->next;
10452 }
10453 while(l);
10454 }
10455 }
10456
10457 //////////////////////////////////////////////////////////////
10458 //////////////////////////////////////////////////////////////
10459
Build_Distrib_Number_Of_Diff_States_Under_Model(t_tree * tree)10460 void Build_Distrib_Number_Of_Diff_States_Under_Model(t_tree *tree)
10461 {
10462 calign *orig_data;
10463 t_mod *orig_mod;
10464 int iter,n_iter_tot,i,j;
10465 phydbl *n_diff_states_all_l,*n_diff_states_all_r;
10466
10467 Calculate_Number_Of_Diff_States(tree);
10468
10469 PhyML_Printf("\n TRUE edge side states val");
10470 For(i,2*tree->n_otu-3)
10471 {
10472 if(tree->a_edges[i]->left->tax == NO && tree->a_edges[i]->rght->tax == NO)
10473 {
10474 for(j=0;j<tree->mod->ns;j++)
10475 {
10476 PhyML_Printf("\n TRUE %3d 0 %3d %d",
10477 i,
10478 j+1,
10479 tree->a_edges[i]->n_diff_states_l[j]);
10480
10481 PhyML_Printf("\n TRUE %3d 1 %3d %d",
10482 i,
10483 j+1,
10484 tree->a_edges[i]->n_diff_states_r[j]);
10485 }
10486 }
10487 }
10488
10489
10490
10491 n_iter_tot = 100;
10492
10493 n_diff_states_all_l = (phydbl *)mCalloc((n_iter_tot) * (tree->mod->ns) * (2*tree->n_otu-3) * 2, sizeof(phydbl));
10494 n_diff_states_all_r = (phydbl *)mCalloc((n_iter_tot) * (tree->mod->ns) * (2*tree->n_otu-3) * 2, sizeof(phydbl));
10495
10496 orig_mod = Copy_Model(tree->mod);
10497 orig_data = Copy_Cseq(tree->data,tree->io);
10498
10499 orig_mod->io = tree->io;
10500 orig_mod->s_opt = tree->mod->s_opt;
10501
10502 iter = 0;
10503
10504 do
10505 {
10506 Evolve(tree->data,tree->mod,0,tree);
10507
10508 Calculate_Number_Of_Diff_States(tree);
10509
10510 For(i,2*tree->n_otu-3)
10511 {
10512 for(j=0;j<tree->mod->ns;j++)
10513 {
10514 n_diff_states_all_l[j*(2*tree->n_otu-3)*(n_iter_tot) + i*(n_iter_tot) + iter] = tree->a_edges[i]->n_diff_states_l[j];
10515 n_diff_states_all_r[j*(2*tree->n_otu-3)*(n_iter_tot) + i*(n_iter_tot) + iter] = tree->a_edges[i]->n_diff_states_r[j];
10516 }
10517 }
10518
10519
10520 Free_Calign(tree->data);
10521 Free_Model_Complete(tree->mod);
10522 Free_Model_Basic(tree->mod);
10523
10524 tree->mod = Copy_Model(orig_mod);
10525 tree->data = Copy_Cseq(orig_data,tree->io);
10526
10527 tree->mod->io = orig_mod->io;
10528 tree->mod->s_opt = orig_mod->s_opt;
10529
10530 Connect_CSeqs_To_Nodes(tree->data,tree->io,tree);
10531
10532 iter++;
10533 }
10534 while(iter < n_iter_tot);
10535
10536
10537 PhyML_Printf("\n SIM edge side states low up");
10538 For(i,2*tree->n_otu-3)
10539 {
10540 if(tree->a_edges[i]->left->tax == NO && tree->a_edges[i]->rght->tax == NO)
10541 {
10542 for(j=0;j<tree->mod->ns;j++)
10543 {
10544 PhyML_Printf("\n SIM %3d 0 %3d %.0f %.0f",
10545 i,
10546 j+1,
10547 Quantile(n_diff_states_all_l + j*(2*tree->n_otu-3)*(n_iter_tot) + i*(n_iter_tot), n_iter_tot, 0.10),
10548 Quantile(n_diff_states_all_l + j*(2*tree->n_otu-3)*(n_iter_tot) + i*(n_iter_tot), n_iter_tot, 0.90));
10549
10550 PhyML_Printf("\n SIM %3d 1 %3d %.0f %.0f",
10551 i,
10552 j+1,
10553 Quantile(n_diff_states_all_r + j*(2*tree->n_otu-3)*(n_iter_tot) + i*(n_iter_tot), n_iter_tot, 0.10),
10554 Quantile(n_diff_states_all_r + j*(2*tree->n_otu-3)*(n_iter_tot) + i*(n_iter_tot), n_iter_tot, 0.90));
10555 }
10556 }
10557 }
10558
10559
10560 Add_Root(tree->a_edges[0],tree);
10561 DR_Draw_Tree("treefile",tree);
10562
10563
10564 Free(n_diff_states_all_l);
10565 Free(n_diff_states_all_r);
10566
10567 }
10568
10569 //////////////////////////////////////////////////////////////
10570 //////////////////////////////////////////////////////////////
10571
10572 /* Calculate the number of sites at which 1,...,n states (n: 4 or 20) */
10573 /* are observed, for every subtree */
10574
Calculate_Number_Of_Diff_States(t_tree * tree)10575 void Calculate_Number_Of_Diff_States(t_tree *tree)
10576 {
10577 Init_Ui_Tips(tree);
10578 Calculate_Number_Of_Diff_States_Post(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree->a_nodes[0]->b[0],tree);
10579 Calculate_Number_Of_Diff_States_Pre(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree->a_nodes[0]->b[0],tree);
10580
10581 /* int i; */
10582
10583 /* For(i,2*tree->n_otu-3) */
10584 /* { */
10585 /* if(tree->a_edges[i]->left->tax == NO && tree->a_edges[i]->rght->tax == NO) */
10586 /* printf("\n. Edge %d left : %d %d %d %d right: %d %d %d %d", */
10587 /* i, */
10588 /* tree->a_edges[i]->n_diff_states_l[0], */
10589 /* tree->a_edges[i]->n_diff_states_l[1], */
10590 /* tree->a_edges[i]->n_diff_states_l[2], */
10591 /* tree->a_edges[i]->n_diff_states_l[3], */
10592 /* tree->a_edges[i]->n_diff_states_r[0], */
10593 /* tree->a_edges[i]->n_diff_states_r[1], */
10594 /* tree->a_edges[i]->n_diff_states_r[2], */
10595 /* tree->a_edges[i]->n_diff_states_r[3]); */
10596 /* } */
10597 }
10598
10599 //////////////////////////////////////////////////////////////
10600 //////////////////////////////////////////////////////////////
10601
Calculate_Number_Of_Diff_States_Post(t_node * a,t_node * d,t_edge * b,t_tree * tree)10602 void Calculate_Number_Of_Diff_States_Post(t_node *a, t_node *d, t_edge *b, t_tree *tree)
10603 {
10604 if(d->tax) return;
10605 else
10606 {
10607 int i;
10608
10609 for(i=0;i<3;i++)
10610 if(d->v[i] != a)
10611 Calculate_Number_Of_Diff_States_Post(d,d->v[i],d->b[i],tree);
10612
10613 Calculate_Number_Of_Diff_States_Core(a,d,b,tree);
10614 }
10615 }
10616
10617 //////////////////////////////////////////////////////////////
10618 //////////////////////////////////////////////////////////////
10619
Calculate_Number_Of_Diff_States_Pre(t_node * a,t_node * d,t_edge * b,t_tree * tree)10620 void Calculate_Number_Of_Diff_States_Pre(t_node *a, t_node *d, t_edge *b, t_tree *tree)
10621 {
10622
10623 if(d->tax) return;
10624 else
10625 {
10626 int i;
10627
10628 for(i=0;i<3;i++)
10629 if(d->v[i] != a)
10630 {
10631 Calculate_Number_Of_Diff_States_Core(d->v[i],d,d->b[i],tree);
10632 Calculate_Number_Of_Diff_States_Pre(d,d->v[i],d->b[i],tree);
10633 }
10634 }
10635 }
10636
10637 //////////////////////////////////////////////////////////////
10638 //////////////////////////////////////////////////////////////
10639
Calculate_Number_Of_Diff_States_Core(t_node * a,t_node * d,t_edge * b,t_tree * tree)10640 void Calculate_Number_Of_Diff_States_Core(t_node *a, t_node *d, t_edge *b, t_tree *tree)
10641 {
10642 int *ui, *ui_v1, *ui_v2;
10643 int sum,site,state;
10644 int *diff;
10645 t_node *v1, *v2;
10646
10647 ui = ui_v1 = ui_v2 = NULL;
10648 v1 = v2 = NULL;
10649
10650 if(d == b->left)
10651 {
10652 v1 = (d == d->b[b->l_v1]->left)?
10653 (d->b[b->l_v1]->rght):
10654 (d->b[b->l_v1]->left);
10655
10656 v2 = (d == d->b[b->l_v2]->left)?
10657 (d->b[b->l_v2]->rght):
10658 (d->b[b->l_v2]->left);
10659
10660 ui = b->ui_l;
10661 diff = b->n_diff_states_l;
10662
10663 ui_v1 =
10664 (d == d->b[b->l_v1]->left)?
10665 (d->b[b->l_v1]->ui_r):
10666 (d->b[b->l_v1]->ui_l);
10667
10668 ui_v2 =
10669 (d == d->b[b->l_v2]->left)?
10670 (d->b[b->l_v2]->ui_r):
10671 (d->b[b->l_v2]->ui_l);
10672
10673 }
10674 else
10675 {
10676 v1 = (d == d->b[b->r_v1]->left)?
10677 (d->b[b->r_v1]->rght):
10678 (d->b[b->r_v1]->left);
10679
10680 v2 = (d == d->b[b->r_v2]->left)?
10681 (d->b[b->r_v2]->rght):
10682 (d->b[b->r_v2]->left);
10683
10684 ui = b->ui_r;
10685 diff = b->n_diff_states_r;
10686
10687 ui_v1 =
10688 (d == d->b[b->r_v1]->left)?
10689 (d->b[b->r_v1]->ui_r):
10690 (d->b[b->r_v1]->ui_l);
10691
10692 ui_v2 =
10693 (d == d->b[b->r_v2]->left)?
10694 (d->b[b->r_v2]->ui_r):
10695 (d->b[b->r_v2]->ui_l);
10696
10697 }
10698
10699 for(state=0;state<tree->mod->ns;state++) diff[state] = 0;
10700
10701 for(site=0;site<tree->n_pattern;site++)
10702 {
10703 if(v1->tax == YES)
10704 {
10705 int sum;
10706 sum = Sum_Bits(ui_v1[site],tree->mod->ns);
10707 if(sum > 1)
10708 {
10709 int val = ui_v1[site];
10710 int pos, iter;
10711 phydbl u = Uni();
10712
10713 iter = 0;
10714 do
10715 {
10716 pos = Rand_Int(0,tree->mod->ns-1);
10717 if(((val >> pos) & 1) && (u > 1./sum)) break;
10718 }
10719 while(iter++ < 1000);
10720
10721
10722 if(iter == 1000)
10723 {
10724 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10725 }
10726
10727 ui_v1[site] = POW(2,pos);
10728 }
10729 }
10730
10731 if(v2->tax == YES)
10732 {
10733 int sum;
10734 sum = Sum_Bits(ui_v2[site],tree->mod->ns);
10735 if(sum > 1)
10736 {
10737 int val = ui_v2[site];
10738 int pos, iter;
10739 phydbl u = Uni();
10740
10741 iter = 0;
10742 do
10743 {
10744 pos = Rand_Int(0,tree->mod->ns-1);
10745 if(((val >> pos) & 1) && (u > 1./sum)) break;
10746 }
10747 while(iter++ < 1000);
10748
10749 if(iter == 1000) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10750
10751 ui_v2[site] = POW(2,pos);
10752 }
10753 }
10754
10755 ui[site] = ui_v1[site] | ui_v2[site];
10756
10757 sum = Sum_Bits(ui[site],tree->mod->ns);
10758
10759 /* printf("\n. ui_v1: %d ui_v2: %d ui: %d sum: %d",ui_v1[site],ui_v2[site],ui[site],sum); fflush(NULL); */
10760
10761 if(sum-1 > tree->mod->ns-1) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10762
10763 diff[sum-1]++;
10764
10765 }
10766 }
10767
10768 //////////////////////////////////////////////////////////////
10769 //////////////////////////////////////////////////////////////
10770 /* Returns the number of distinct states observed at a particular site */
10771
Number_Of_Diff_States_One_Site(int site,t_tree * tree)10772 int Number_Of_Diff_States_One_Site(int site, t_tree *tree)
10773 {
10774 int n_states;
10775
10776 Number_Of_Diff_States_One_Site_Post(tree->a_nodes[0],
10777 tree->a_nodes[0]->v[0],
10778 tree->a_nodes[0]->b[0],
10779 site,tree);
10780
10781 n_states = Sum_Bits(tree->a_nodes[0]->b[0]->ui_r[site] | tree->a_nodes[0]->b[0]->ui_l[site],tree->mod->ns);
10782
10783
10784 return(n_states);
10785 }
10786
10787 //////////////////////////////////////////////////////////////
10788 //////////////////////////////////////////////////////////////
10789
Number_Of_Diff_States_One_Site_Post(t_node * a,t_node * d,t_edge * b,int site,t_tree * tree)10790 void Number_Of_Diff_States_One_Site_Post(t_node *a, t_node *d, t_edge *b, int site, t_tree *tree)
10791 {
10792 if(d->tax) return;
10793 else
10794 {
10795 int i;
10796
10797 for(i=0;i<3;i++)
10798 if(d->v[i] != a && d->b[i] != tree->e_root)
10799 Number_Of_Diff_States_One_Site_Post(d,d->v[i],d->b[i],site,tree);
10800
10801 Number_Of_Diff_States_One_Site_Core(a,d,b,site,tree);
10802 }
10803 }
10804
10805 //////////////////////////////////////////////////////////////
10806 //////////////////////////////////////////////////////////////
10807
Number_Of_Diff_States_One_Site_Core(t_node * a,t_node * d,t_edge * b,int site,t_tree * tree)10808 int Number_Of_Diff_States_One_Site_Core(t_node *a, t_node *d, t_edge *b, int site, t_tree *tree)
10809 {
10810 int *ui, *ui_v1, *ui_v2;
10811 int sum;
10812 t_node *v1, *v2;
10813
10814 ui = ui_v1 = ui_v2 = NULL;
10815 v1 = v2 = NULL;
10816
10817 if(d == b->left)
10818 {
10819 v1 = (d == d->b[b->l_v1]->left)?
10820 (d->b[b->l_v1]->rght):
10821 (d->b[b->l_v1]->left);
10822
10823 v2 = (d == d->b[b->l_v2]->left)?
10824 (d->b[b->l_v2]->rght):
10825 (d->b[b->l_v2]->left);
10826
10827 ui = b->ui_l;
10828
10829 ui_v1 =
10830 (d == d->b[b->l_v1]->left)?
10831 (d->b[b->l_v1]->ui_r):
10832 (d->b[b->l_v1]->ui_l);
10833
10834 ui_v2 =
10835 (d == d->b[b->l_v2]->left)?
10836 (d->b[b->l_v2]->ui_r):
10837 (d->b[b->l_v2]->ui_l);
10838 }
10839 else
10840 {
10841 v1 = (d == d->b[b->r_v1]->left)?
10842 (d->b[b->r_v1]->rght):
10843 (d->b[b->r_v1]->left);
10844
10845 v2 = (d == d->b[b->r_v2]->left)?
10846 (d->b[b->r_v2]->rght):
10847 (d->b[b->r_v2]->left);
10848
10849 ui = b->ui_r;
10850
10851 ui_v1 =
10852 (d == d->b[b->r_v1]->left)?
10853 (d->b[b->r_v1]->ui_r):
10854 (d->b[b->r_v1]->ui_l);
10855
10856 ui_v2 =
10857 (d == d->b[b->r_v2]->left)?
10858 (d->b[b->r_v2]->ui_r):
10859 (d->b[b->r_v2]->ui_l);
10860 }
10861
10862 if(v1->tax == YES) // Check for ambiguous character state at this tip
10863 {
10864 sum = Sum_Bits(ui_v1[site],tree->mod->ns);
10865
10866 if(sum > 1)
10867 {
10868 int val = ui_v1[site];
10869 int pos, iter;
10870 phydbl u = Uni();
10871
10872 // Select a state uniformly at random
10873 iter = 0;
10874 do
10875 {
10876 pos = Rand_Int(0,tree->mod->ns-1);
10877 if(((val >> pos) & 1) && (u > 1./sum)) break;
10878 }
10879 while(iter++ < 1000);
10880
10881
10882 if(iter == 1000)
10883 {
10884 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10885 }
10886
10887 ui_v1[site] = POW(2,pos);
10888 }
10889 }
10890
10891 if(v2->tax == YES)
10892 {
10893 sum = Sum_Bits(ui_v2[site],tree->mod->ns);
10894 if(sum > 1)
10895 {
10896 int val = ui_v2[site];
10897 int pos, iter;
10898 phydbl u = Uni();
10899
10900 iter = 0;
10901 do
10902 {
10903 pos = Rand_Int(0,tree->mod->ns-1);
10904 if(((val >> pos) & 1) && (u > 1./sum)) break;
10905 }
10906 while(iter++ < 1000);
10907
10908 if(iter == 1000) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10909
10910 ui_v2[site] = POW(2,pos);
10911 }
10912 }
10913
10914 ui[site] = ui_v1[site] | ui_v2[site];
10915
10916 sum = Sum_Bits(ui[site],tree->mod->ns);
10917
10918 /* printf("\n. ui_v1: %d ui_v2: %d ui: %d sum: %d",ui_v1[site],ui_v2[site],ui[site],sum); fflush(NULL); */
10919
10920 if(sum-1 > tree->mod->ns-1)
10921 {
10922 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
10923 }
10924
10925 return sum;
10926 }
10927
10928 //////////////////////////////////////////////////////////////
10929 //////////////////////////////////////////////////////////////
10930
Get_Lk(t_tree * tree)10931 phydbl Get_Lk(t_tree *tree)
10932 {
10933 t_tree *loc_tree;
10934
10935 loc_tree = tree;
10936 /*! Rewind back to the first mixt_tree */
10937 while(loc_tree->prev) loc_tree = loc_tree->prev;
10938
10939 return loc_tree->c_lnL;
10940
10941 }
10942
10943 /*////////////////////////////////////////////////////////////
10944 ////////////////////////////////////////////////////////////*/
10945
Get_dLk(t_tree * tree)10946 phydbl Get_dLk(t_tree *tree)
10947 {
10948 t_tree *loc_tree;
10949
10950 loc_tree = tree;
10951 /*! Rewind back to the first mixt_tree */
10952 while(loc_tree->prev) loc_tree = loc_tree->prev;
10953
10954 return loc_tree->c_dlnL;
10955
10956 }
10957
10958 /*////////////////////////////////////////////////////////////
10959 ////////////////////////////////////////////////////////////*/
10960
Get_d2Lk(t_tree * tree)10961 phydbl Get_d2Lk(t_tree *tree)
10962 {
10963 t_tree *loc_tree;
10964
10965 loc_tree = tree;
10966 /*! Rewind back to the first mixt_tree */
10967 while(loc_tree->prev) loc_tree = loc_tree->prev;
10968
10969 return loc_tree->c_d2lnL;
10970
10971 }
10972
10973 /*////////////////////////////////////////////////////////////
10974 ////////////////////////////////////////////////////////////*/
10975
Make_Empty_Alignment(option * io)10976 align **Make_Empty_Alignment(option *io)
10977 {
10978 int i;
10979 char *line;
10980 align **data;
10981
10982 line = (char *)mCalloc(T_MAX_LINE,sizeof(char));
10983 data = (align **)mCalloc(io->n_otu,sizeof(align *));
10984
10985 for(i=0;i<io->n_otu;i++)
10986 {
10987 data[i] = (align *)mCalloc(1,sizeof(align));
10988 data[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
10989 data[i]->state = (char *)mCalloc(io->init_len*io->state_len+1,sizeof(char));
10990
10991 data[i]->is_ambigu = NULL;
10992 data[i]->len = 0;
10993
10994 Random_String(data[i]->name,5);
10995
10996 while(data[i]->len < io->init_len * io->state_len)
10997 {
10998 data[i]->state[data[i]->len] = 'X';
10999 data[i]->len++;
11000 }
11001 }
11002
11003 for(i=0;i<io->n_otu;i++) data[i]->state[data[i]->len] = '\0';
11004
11005 Free(line);
11006
11007 return data;
11008 }
11009
11010 /*////////////////////////////////////////////////////////////
11011 ////////////////////////////////////////////////////////////*/
11012 /* Mean observed frequency of difference between the n(n-1)/2 pairs of sequences */
Mean_Identity(calign * data)11013 phydbl Mean_Identity(calign *data)
11014 {
11015 int i,j,n;
11016 phydbl tot_idt;
11017
11018 n = data->n_otu;
11019
11020 tot_idt = 0.0;
11021 for(i=0;i<n-1;i++)
11022 {
11023 for(j=i+1; j<n; j++)
11024 {
11025 tot_idt += Pairwise_Identity(i,j,data);
11026 }
11027 }
11028
11029 return(tot_idt / (phydbl)(n*(n-1.)/2.));
11030 }
11031
11032 /*////////////////////////////////////////////////////////////
11033 ////////////////////////////////////////////////////////////*/
11034
11035 /* Observed frequency of difference for the (i,j)-pair of sequences */
Pairwise_Identity(int i,int j,calign * data)11036 phydbl Pairwise_Identity(int i, int j, calign *data)
11037 {
11038 int k;
11039 phydbl div,p,d;
11040
11041 div = 0.0;
11042 for(k=0;k<data->crunch_len;k++) if(data->c_seq[i]->state[k] == data->c_seq[j]->state[k]) div += (phydbl)data->wght[k];
11043
11044 /* observed proportion of identity */
11045 p = 1. - div / (phydbl)data->init_len;
11046
11047 d = 0.0;
11048 if(data->io->datatype == NT)
11049 {
11050 if(p > 3./4.) return 0.25;
11051 else
11052 {
11053 /* Jukes & Cantor distance */
11054 d = -(3./4.)*log(1. - 4./3.*p);
11055 }
11056 }
11057 else if(data->io->datatype == AA)
11058 {
11059 if(p > 19./20.) return 1./20.;
11060 else
11061 {
11062 /* Jukes & Cantor distance */
11063 d = -(19./20.)*log(1. - 20./19.*p);
11064 }
11065 }
11066 else Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
11067
11068 return(exp(-d));
11069 }
11070
11071
11072 /*////////////////////////////////////////////////////////////
11073 ////////////////////////////////////////////////////////////*/
11074
Fst(int i,int j,calign * data)11075 phydbl Fst(int i, int j, calign *data)
11076 {
11077 phydbl FA, Fr;
11078
11079 FA = Mean_Identity(data);
11080 Fr = Pairwise_Identity(i,j,data);
11081
11082 return((Fr-FA)/(1-FA));
11083 }
11084
11085 /*////////////////////////////////////////////////////////////
11086 ////////////////////////////////////////////////////////////*/
11087
Nucleotide_Diversity(calign * data)11088 phydbl Nucleotide_Diversity(calign *data)
11089 {
11090
11091 int i,j,n;
11092 phydbl pair_div;
11093
11094 n = data->n_otu;
11095
11096 pair_div = 0.0;
11097 for(i=0;i<n-1;i++)
11098 {
11099 for(j=i+1; j<n; j++)
11100 {
11101 pair_div += 1. - Pairwise_Identity(i,j,data);
11102 }
11103 }
11104
11105 return(pair_div / (phydbl)(n*(n-1.)/2.));
11106 }
11107
11108 /*////////////////////////////////////////////////////////////
11109 ////////////////////////////////////////////////////////////*/
11110
Copy_Scalar_Dbl(scalar_dbl * from,scalar_dbl * to)11111 void Copy_Scalar_Dbl(scalar_dbl *from, scalar_dbl *to)
11112 {
11113 scalar_dbl *f,*t;
11114 f = from;
11115 t = to;
11116 do
11117 {
11118 assert(t);
11119 assert(f);
11120 t->v = f->v;
11121 t = t->next;
11122 f = f->next;
11123 }
11124 while(f);
11125 }
11126
11127 /*////////////////////////////////////////////////////////////
11128 ////////////////////////////////////////////////////////////*/
11129
Duplicate_Scalar_Dbl(scalar_dbl * from)11130 scalar_dbl *Duplicate_Scalar_Dbl(scalar_dbl *from)
11131 {
11132 scalar_dbl *to,*t,*f;
11133
11134 to = (scalar_dbl *)mCalloc(1,sizeof(scalar_dbl));
11135
11136 t = to;
11137 f = from;
11138 do
11139 {
11140
11141 t->v = f->v;
11142 f = f->next;
11143 if(f) t->next = (scalar_dbl *)mCalloc(1,sizeof(scalar_dbl));
11144 t = t->next;
11145 }
11146 while(f);
11147
11148 return(to);
11149 }
11150
11151 /*////////////////////////////////////////////////////////////
11152 ////////////////////////////////////////////////////////////*/
11153
Multiply_Scalar_Dbl(phydbl mult,scalar_dbl * x)11154 void Multiply_Scalar_Dbl(phydbl mult, scalar_dbl *x)
11155 {
11156 scalar_dbl *y;
11157
11158 y = x;
11159 do
11160 {
11161 y->v = y->v * mult;
11162 y = y->next;
11163 }
11164 while(y);
11165 }
11166
11167 /*////////////////////////////////////////////////////////////
11168 ////////////////////////////////////////////////////////////*/
11169
Set_Scalar_Dbl(phydbl val,scalar_dbl * from)11170 void Set_Scalar_Dbl(phydbl val, scalar_dbl *from)
11171 {
11172 scalar_dbl *f;
11173
11174 f = from;
11175 do
11176 {
11177 f->v = val;;
11178 f = f->next;
11179 }
11180 while(f);
11181 }
11182
11183 /*////////////////////////////////////////////////////////////
11184 ////////////////////////////////////////////////////////////*/
11185
Set_Scalar_Dbl_Min_Thresh(phydbl thresh,scalar_dbl * from)11186 void Set_Scalar_Dbl_Min_Thresh(phydbl thresh, scalar_dbl *from)
11187 {
11188 scalar_dbl *f;
11189
11190
11191 f = from;
11192 do
11193 {
11194 if(f->v < thresh) f->v = thresh;;
11195 f = f->next;
11196 }
11197 while(f);
11198 }
11199
11200 /*////////////////////////////////////////////////////////////
11201 ////////////////////////////////////////////////////////////*/
11202
Set_Scalar_Dbl_Max_Thresh(phydbl thresh,scalar_dbl * from)11203 void Set_Scalar_Dbl_Max_Thresh(phydbl thresh, scalar_dbl *from)
11204 {
11205 scalar_dbl *f;
11206
11207 f = from;
11208 do
11209 {
11210 if(f->v > thresh) f->v = thresh;;
11211 f = f->next;
11212 }
11213 while(f);
11214 }
11215 /*////////////////////////////////////////////////////////////
11216 ////////////////////////////////////////////////////////////*/
11217
Scalar_Elem(int pos,scalar_dbl * scl)11218 phydbl Scalar_Elem(int pos, scalar_dbl *scl)
11219 {
11220 scalar_dbl *loc;
11221 loc = scl;
11222 while(--pos >= 0) loc = loc->next;
11223 assert(loc);
11224 return(loc->v);
11225 }
11226
11227 /*////////////////////////////////////////////////////////////
11228 ////////////////////////////////////////////////////////////*/
11229
Linked_List_Elem(int pos,t_ll * ll)11230 void *Linked_List_Elem(int pos, t_ll *ll)
11231 {
11232 t_ll *loc;
11233
11234 if(ll == NULL) return NULL;
11235
11236 loc = ll->head;
11237 while(--pos >= 0)
11238 {
11239 assert(loc);
11240 loc = loc->next;
11241 }
11242 assert(loc);
11243 return(loc->v);
11244 }
11245
11246 /*////////////////////////////////////////////////////////////
11247 ////////////////////////////////////////////////////////////*/
11248
Scalar_Len(scalar_dbl * scl)11249 int Scalar_Len(scalar_dbl *scl)
11250 {
11251 int len;
11252 scalar_dbl *loc;
11253
11254 if(!scl) return 0;
11255
11256 loc = scl;
11257 len = 0;
11258 do
11259 {
11260 len++;
11261 loc = loc->next;
11262 }
11263 while(loc != NULL);
11264
11265 return(len);
11266
11267 }
11268
11269 /*////////////////////////////////////////////////////////////
11270 ////////////////////////////////////////////////////////////*/
11271
Linked_List_Len(t_ll * list)11272 int Linked_List_Len(t_ll *list)
11273 {
11274 int len;
11275 t_ll *loc;
11276
11277 if(list == NULL) return 0;
11278
11279 loc = list->head;
11280 len = 0;
11281 do
11282 {
11283 len++;
11284 loc = loc->next;
11285 }
11286 while(loc != NULL);
11287
11288 return(len);
11289
11290 }
11291
11292 /*////////////////////////////////////////////////////////////
11293 ////////////////////////////////////////////////////////////*/
11294
Push_Bottom_Linked_List(void * what,t_ll ** list,bool remove_duplicates)11295 void Push_Bottom_Linked_List(void *what, t_ll **list, bool remove_duplicates)
11296 {
11297 t_ll *new,*ll;
11298
11299 /* { */
11300 /* if(*list) */
11301 /* { */
11302 /* t_node *n = what; */
11303 /* t_node *m = (*list)?(*list)->tail->v:NULL; */
11304 /* t_node *o = (*list)?(*list)->head->v:NULL; */
11305 /* printf("\n. before: push node %d bot %d head: %d --",n?n->num:-1,m?m->num:-1,o?o->num:-1); */
11306 /* } */
11307 /* else */
11308 /* { */
11309 /* printf("\n. "); */
11310 /* } */
11311 /* } */
11312
11313 /* if(*list == NULL) printf("\n"); */
11314
11315 new = (t_ll *)mCalloc(1,sizeof(t_ll));
11316 new->v = (void *)what;
11317
11318 /* t_node *n = what; */
11319 /* printf("\n push node %p list: %p %p",new,*list,(*list)?(*list)->head:NULL); */
11320
11321
11322 // First elem of list
11323 if(*list == NULL)
11324 {
11325 *list = new;
11326 new->tail = new;
11327 new->head = new;
11328 new->next = NULL;
11329 new->prev = NULL;
11330 }
11331 else
11332 {
11333 ll = (*list)->head;
11334
11335 if(remove_duplicates == YES)
11336 {
11337 do
11338 {
11339 if(ll->v == what)
11340 {
11341 Free(new);
11342 return; // 'what' already in list
11343 }
11344 ll = ll->next;
11345 }
11346 while(ll);
11347 }
11348
11349 new->prev = (*list)->tail;
11350 (*list)->tail->next = new;
11351 new->next = NULL;
11352 new->head = (*list)->head;
11353
11354 ll = (*list)->head;
11355 do
11356 {
11357 ll->tail = new;
11358 ll = ll->next;
11359 }
11360 while(ll);
11361 }
11362 }
11363
11364 /*////////////////////////////////////////////////////////////
11365 ////////////////////////////////////////////////////////////*/
11366
Swap_Partial_Lk_Extra(t_edge * b,t_node * d,int whichone,t_tree * tree)11367 void Swap_Partial_Lk_Extra(t_edge *b, t_node *d, int whichone, t_tree *tree)
11368 {
11369 void *buff;
11370
11371 if(whichone == 0)
11372 {
11373 if(d == b->left)
11374 {
11375 buff = b->div_post_pred_left;
11376 b->div_post_pred_left = tree->div_post_pred_extra_0;
11377 tree->div_post_pred_extra_0 = buff;
11378
11379 buff = b->sum_scale_left_cat;
11380 b->sum_scale_left_cat = tree->sum_scale_cat_extra_0;
11381 tree->sum_scale_cat_extra_0 = buff;
11382
11383 if(b->left)
11384 {
11385 if(!b->left->tax)
11386 {
11387 buff = b->sum_scale_left;
11388 b->sum_scale_left = tree->sum_scale_extra_0;
11389 tree->sum_scale_extra_0 = buff;
11390 }
11391
11392 if(!b->left->tax || tree->mod->s_opt->greedy)
11393 {
11394 buff = b->p_lk_left;
11395 b->p_lk_left = tree->p_lk_extra_0;
11396 tree->p_lk_extra_0 = buff;
11397 }
11398 else if(b->left->tax)
11399 {
11400 buff = b->p_lk_tip_l;
11401 b->p_lk_tip_l = tree->p_lk_tip_extra_0;
11402 tree->p_lk_tip_extra_0 = buff;
11403 }
11404 }
11405 buff = b->patt_id_left;
11406 b->patt_id_left = tree->patt_id_extra_0;
11407 tree->patt_id_extra_0 = buff;
11408 }
11409 else
11410 {
11411 buff = b->div_post_pred_rght;
11412 b->div_post_pred_rght = tree->div_post_pred_extra_0;
11413 tree->div_post_pred_extra_0 = buff;
11414
11415 buff = b->sum_scale_rght_cat;
11416 b->sum_scale_rght_cat = tree->sum_scale_cat_extra_0;
11417 tree->sum_scale_cat_extra_0 = buff;
11418
11419 if(b->rght)
11420 {
11421 if(!b->rght->tax)
11422 {
11423 buff = b->sum_scale_rght;
11424 b->sum_scale_rght = tree->sum_scale_extra_0;
11425 tree->sum_scale_extra_0 = buff;
11426 }
11427
11428 if(!b->rght->tax || tree->mod->s_opt->greedy)
11429 {
11430 buff = b->p_lk_rght;
11431 b->p_lk_rght = tree->p_lk_extra_0;
11432 tree->p_lk_extra_0 = buff;
11433 }
11434 else if(b->rght->tax)
11435 {
11436 buff = b->p_lk_tip_r;
11437 b->p_lk_tip_r = tree->p_lk_tip_extra_0;
11438 tree->p_lk_tip_extra_0 = buff;
11439 }
11440 }
11441 buff = b->patt_id_rght;
11442 b->patt_id_rght = tree->patt_id_extra_0;
11443 tree->patt_id_extra_0 = buff;
11444 }
11445 }
11446 else
11447 {
11448 if(d == b->left)
11449 {
11450 buff = b->div_post_pred_left;
11451 b->div_post_pred_left = tree->div_post_pred_extra_1;
11452 tree->div_post_pred_extra_1 = buff;
11453
11454 buff = b->sum_scale_left_cat;
11455 b->sum_scale_left_cat = tree->sum_scale_cat_extra_1;
11456 tree->sum_scale_cat_extra_1 = buff;
11457
11458 if(b->left)
11459 {
11460 if(!b->left->tax)
11461 {
11462 buff = b->sum_scale_left;
11463 b->sum_scale_left = tree->sum_scale_extra_1;
11464 tree->sum_scale_extra_1 = buff;
11465 }
11466
11467 if(!b->left->tax || tree->mod->s_opt->greedy)
11468 {
11469 buff = b->p_lk_left;
11470 b->p_lk_left = tree->p_lk_extra_1;
11471 tree->p_lk_extra_1 = buff;
11472 }
11473 else if(b->left->tax)
11474 {
11475 buff = b->p_lk_tip_l;
11476 b->p_lk_tip_l = tree->p_lk_tip_extra_1;
11477 tree->p_lk_tip_extra_1 = buff;
11478 }
11479 }
11480 buff = b->patt_id_left;
11481 b->patt_id_left = tree->patt_id_extra_1;
11482 tree->patt_id_extra_1 = buff;
11483 }
11484 else
11485 {
11486 buff = b->div_post_pred_rght;
11487 b->div_post_pred_rght = tree->div_post_pred_extra_1;
11488 tree->div_post_pred_extra_1 = buff;
11489
11490 buff = b->sum_scale_rght_cat;
11491 b->sum_scale_rght_cat = tree->sum_scale_cat_extra_1;
11492 tree->sum_scale_cat_extra_1 = buff;
11493
11494 if(b->rght)
11495 {
11496 if(!b->rght->tax)
11497 {
11498 buff = b->sum_scale_rght;
11499 b->sum_scale_rght = tree->sum_scale_extra_1;
11500 tree->sum_scale_extra_1 = buff;
11501 }
11502
11503 if(!b->rght->tax || tree->mod->s_opt->greedy)
11504 {
11505 buff = b->p_lk_rght;
11506 b->p_lk_rght = tree->p_lk_extra_1;
11507 tree->p_lk_extra_1 = buff;
11508 }
11509 else if(b->rght->tax)
11510 {
11511 buff = b->p_lk_tip_r;
11512 b->p_lk_tip_r = tree->p_lk_tip_extra_1;
11513 tree->p_lk_tip_extra_1 = buff;
11514 }
11515 }
11516 buff = b->patt_id_rght;
11517 b->patt_id_rght = tree->patt_id_extra_1;
11518 tree->patt_id_extra_1 = buff;
11519 }
11520 }
11521 }
11522
11523 /*////////////////////////////////////////////////////////////
11524 ////////////////////////////////////////////////////////////*/
11525
Remove_From_Linked_List(t_ll * elem,void * val,t_ll ** list)11526 void Remove_From_Linked_List(t_ll *elem, void *val, t_ll **list)
11527 {
11528 t_ll *ll;
11529
11530 if(*list == NULL) return;
11531
11532 ll = (*list)->head;
11533
11534 /* t_node *n = elem ? elem->v : val; */
11535
11536 do
11537 {
11538 if((elem && ll == elem) || (val && ll->v == val))
11539 {
11540 if(ll == (*list)->head && ll != (*list)->tail)
11541 {
11542 // Re-initialise head of list
11543 t_ll *mm,*newhead;
11544 mm = (*list);
11545 newhead = (*list)->head->next;
11546 do { mm->head = newhead; mm = mm->next; } while(mm);
11547 (*list) = (*list)->head;
11548 (*list)->head->prev = NULL;
11549 }
11550 else if(ll != (*list)->head && ll == (*list)->tail)
11551 {
11552 // Re-initialise tail of list
11553 t_ll *mm,*newtail;
11554 mm = (*list);
11555 newtail = (*list)->tail->prev;
11556 do { mm->tail = newtail; mm = mm->next; } while(mm);
11557 (*list)->tail->next = NULL;
11558 }
11559 else if(ll == (*list)->head && ll == (*list)->tail)
11560 {
11561 (*list)->tail = NULL;
11562 (*list)->head = NULL;
11563 (*list)->next = NULL;
11564 (*list)->prev = NULL;
11565 (*list) = NULL;
11566 /* printf("\n. free %p",ll); */
11567 Free(ll);
11568 return;
11569 }
11570 else
11571 {
11572 ll->prev->next = ll->next;
11573 ll->next->prev = ll->prev;
11574 }
11575
11576 ll->next = NULL;
11577 ll->prev = NULL;
11578 ll->head = NULL;
11579 ll->tail = NULL;
11580 /* printf("\n. free %p",ll); */
11581 Free(ll);
11582 return;
11583 }
11584 ll = ll->next;
11585 }
11586 while(ll != NULL);
11587 }
11588
11589 /*////////////////////////////////////////////////////////////
11590 ////////////////////////////////////////////////////////////*/
11591
Get_List_Of_Reachable_Tips(t_node * a,t_node * d,t_tree * tree)11592 t_ll *Get_List_Of_Reachable_Tips(t_node *a, t_node *d, t_tree *tree)
11593 {
11594 t_ll *list;
11595 list = NULL;
11596 Get_List_Of_Reachable_Tips_Post(a,d,&list,tree);
11597 return list;
11598 }
11599
11600 /*////////////////////////////////////////////////////////////
11601 ////////////////////////////////////////////////////////////*/
11602
Get_List_Of_Reachable_Tips_Post(t_node * a,t_node * d,t_ll ** list,t_tree * tree)11603 void Get_List_Of_Reachable_Tips_Post(t_node *a, t_node *d, t_ll **list, t_tree *tree)
11604 {
11605 if(d->tax)
11606 {
11607 /* printf("\n. push %d list: %p",d->num,list->head); */
11608 Push_Bottom_Linked_List(d,list,YES);
11609 return;
11610 }
11611 else
11612 {
11613 int i;
11614 for(i=0;i<3;i++)
11615 {
11616 if(d->v[i] != a && d->b[i] != tree->e_root)
11617 Get_List_Of_Reachable_Tips_Post(d,d->v[i],list,tree);
11618 }
11619 }
11620 }
11621
11622 /*////////////////////////////////////////////////////////////
11623 ////////////////////////////////////////////////////////////*/
11624 // tips0: first list of tips. tips1: second list of tips
Length_Of_Path_Between_List_Of_Tips(t_ll * tips0,t_ll * tips1,matrix * mat)11625 phydbl Length_Of_Path_Between_List_Of_Tips(t_ll *tips0, t_ll *tips1, matrix *mat)
11626 {
11627 phydbl d,n;
11628 t_ll *x, *y;
11629 t_node *nx, *ny;
11630
11631 d = 0.0;
11632 n = 0;
11633
11634 /* Print_Mat(mat); */
11635
11636 // Add all distances between tips in distinct lists
11637 x = tips0->head;
11638 do
11639 {
11640 y = tips1->head;
11641 do
11642 {
11643 nx = (t_node *)x->v;
11644 ny = (t_node *)y->v;
11645
11646 d += mat->dist[nx->c_seq->num][ny->c_seq->num];
11647
11648 /* printf("\n. nx: %d ny: %d [%p %p] d: %G", */
11649 /* nx->c_seq->num, */
11650 /* ny->c_seq->num, */
11651 /* x->head, */
11652 /* y->head, */
11653 /* mat->dist[nx->c_seq->num][ny->c_seq->num]); fflush(NULL); */
11654
11655 n++; // number of pairs of tips, each tip in different lists
11656 y = y->next;
11657 }
11658 while(y);
11659 x = x->next;
11660 }
11661 while(x);
11662
11663 // Remove distances between tips in tips0
11664 x = tips0->head;
11665 do
11666 {
11667 y = tips0->head;
11668 do
11669 {
11670 nx = (t_node *)x->v;
11671 ny = (t_node *)y->v;
11672 d -= mat->dist[nx->c_seq->num][ny->c_seq->num];
11673 y = y->next;
11674 }
11675 while(y);
11676 x = x->next;
11677 }
11678 while(x);
11679
11680
11681 // Remove distances between tips in tips1
11682 x = tips1->head;
11683 do
11684 {
11685 y = tips1->head;
11686 do
11687 {
11688 nx = (t_node *)x->v;
11689 ny = (t_node *)y->v;
11690 d -= mat->dist[nx->c_seq->num][ny->c_seq->num];
11691 y = y->next;
11692 }
11693 while(y);
11694 x = x->next;
11695 }
11696 while(x);
11697
11698 return d/(phydbl)n;
11699 }
11700
11701 /*////////////////////////////////////////////////////////////
11702 ////////////////////////////////////////////////////////////*/
11703
Random_Walk_Along_Tree_On_Radius(t_node * a,t_node * d,t_edge * b,phydbl * radius,t_edge ** target_edge,t_node ** target_nd,phydbl * target_time,t_tree * tree)11704 void Random_Walk_Along_Tree_On_Radius(t_node *a, t_node *d, t_edge *b, phydbl *radius, t_edge **target_edge, t_node **target_nd, phydbl *target_time, t_tree *tree)
11705 {
11706 assert(tree->rates);
11707
11708 phydbl delta,ta,td,u;
11709
11710 /* printf("\n. a: %d d: %d radius: %G l->v: %G", */
11711 /* a->num, */
11712 /* d->num, */
11713 /* *radius, */
11714 /* b->l->v); fflush(NULL); */
11715
11716 /* if(tree->times->nd_t[a->num] < tree->times->nd_t[d->num]) */
11717 /* { */
11718 /* printf("\n. a: %d d: %d radius: %f l: %f [%f] -- %f | %f [%d]", */
11719 /* a->num, */
11720 /* d->num, */
11721 /* *radius, */
11722 /* b->l->v, */
11723 /* FABS(tree->times->nd_t[a->num] - tree->times->nd_t[d->num]) * tree->rates->clock_r * tree->rates->br_r[d->num], */
11724 /* tree->rates->cur_l[d->num], */
11725 /* tree->rates->cur_l[a->num], */
11726 /* b == tree->e_root); fflush(NULL); */
11727 /* } */
11728 /* else */
11729 /* { */
11730 /* printf("\n. a: %d d: %d radius: %f l: %f [%f] -- %f | %f [%d]", */
11731 /* a->num, */
11732 /* d->num, */
11733 /* *radius, */
11734 /* b->l->v, */
11735 /* FABS(tree->times->nd_t[a->num] - tree->times->nd_t[d->num]) * tree->rates->clock_r * tree->rates->br_r[a->num], */
11736 /* tree->rates->cur_l[d->num], */
11737 /* tree->rates->cur_l[a->num], */
11738 /* b == tree->e_root); fflush(NULL); */
11739 /* } */
11740
11741 delta = *radius;
11742
11743 if(!(delta > 0.0))
11744 {
11745 PhyML_Fprintf(stderr,"\n. delta=%G",delta);
11746 assert(FALSE);
11747 }
11748
11749 (*radius) -= b->l->v;
11750 if(*radius < 0.0)
11751 {
11752 *target_edge = b;
11753 ta = tree->times->nd_t[a->num];
11754 td = tree->times->nd_t[d->num];
11755
11756 if(b != tree->e_root)
11757 {
11758 if(ta < td)
11759 {
11760 /* *target_time = ta + delta / (tree->rates->clock_r * tree->rates->br_r[d->num]); */
11761 *target_time = ta + delta / (tree->rates->cur_l[d->num] / fabs(ta-td));
11762 *target_nd = d;
11763 /* printf("\n$ %G %G", */
11764 /* tree->rates->clock_r * tree->rates->br_r[d->num], */
11765 /* tree->rates->cur_l[d->num] / fabs(ta-td)); */
11766
11767 /* PhyML_Fprintf(stderr,"\n< ta: %G td: %G new_time: %G delta: %G c: %G r: %G rad: %G l->v: %G cur_l: %G root_edge ? %d", */
11768 /* ta, */
11769 /* td, */
11770 /* *target_time, */
11771 /* delta, */
11772 /* tree->rates->clock_r, */
11773 /* tree->rates->br_r[d->num], */
11774 /* *radius, */
11775 /* b->l->v, */
11776 /* tree->rates->cur_l[d->num], */
11777 /* b == tree->e_root); */
11778
11779 assert(*target_time > ta && *target_time < td);
11780 }
11781 else
11782 {
11783 /* *target_time = ta - delta / (tree->rates->clock_r * tree->rates->br_r[a->num]); */
11784 *target_time = ta - delta / (tree->rates->cur_l[a->num] / fabs(ta-td));
11785 *target_nd = a;
11786 /* printf("\nz %G %G", */
11787 /* tree->rates->clock_r * tree->rates->br_r[a->num], */
11788 /* tree->rates->cur_l[a->num] / fabs(ta-td)); */
11789
11790 /* PhyML_Fprintf(stderr,"\n> ta: %f td: %f new_time: %f delta: %f c: %f r: %f l->v: %G cur_l: %G root_edge ? %d", */
11791 /* ta, */
11792 /* td, */
11793 /* *target_time, */
11794 /* delta, */
11795 /* tree->rates->clock_r, */
11796 /* tree->rates->br_r[a->num], */
11797 /* b->l->v, */
11798 /* tree->rates->cur_l[a->num], */
11799 /* b == tree->e_root); */
11800
11801 assert(*target_time < ta && *target_time > td);
11802 }
11803 }
11804 else
11805 {
11806 phydbl t_root = tree->times->nd_t[tree->n_root->num];
11807
11808 // target falls on edge below root leading to node a
11809 if(delta < tree->rates->cur_l[a->num])
11810 {
11811 /* *target_time = ta - delta / (tree->rates->clock_r * tree->rates->br_r[a->num]); */
11812 *target_time = ta - delta / (tree->rates->cur_l[a->num] / fabs(t_root-ta));
11813 *target_nd = a;
11814 /* printf("\nX %G %G",tree->rates->cur_l[a->num] / fabs(t_root-ta),tree->rates->clock_r * tree->rates->br_r[a->num]); */
11815 /* PhyML_Fprintf(stderr,"\n.. delta: %f l(a): %f time: %f ta: %f",delta,tree->rates->cur_l[a->num],*target_time,ta); */
11816 }
11817 else
11818 {
11819 u = Uni();
11820 if(u < .5)
11821 {
11822 // target falls on edge below root leading to node d
11823 /* *target_time = tree->times->nd_t[tree->n_root->num] + (delta - tree->rates->cur_l[a->num])/(tree->rates->clock_r * tree->rates->br_r[d->num]); */
11824 *target_time = tree->times->nd_t[tree->n_root->num] + (delta - tree->rates->cur_l[a->num])/(tree->rates->cur_l[d->num] / fabs(t_root-td));
11825 *target_nd = d;
11826 /* printf("\nq %G %G", */
11827 /* tree->rates->clock_r * tree->rates->br_r[d->num], */
11828 /* tree->rates->cur_l[d->num] / fabs(t_root-td)); */
11829 /* PhyML_Fprintf(stderr,"\n<< ta: %f td: %f new_time: %f delta: %f c: %f",ta,td,*target_time,delta,tree->rates->clock_r); */
11830 }
11831 else
11832 {
11833 // target falls above root
11834 *target_time = tree->times->nd_t[tree->n_root->num] - (delta - tree->rates->cur_l[a->num])/tree->rates->clock_r;
11835 *target_nd = tree->n_root;
11836 /* PhyML_Fprintf(stderr,"\n>> ta: %f td: %f new_time: %f delta: %f c: %f",ta,td,*target_time,delta,tree->rates->clock_r); */
11837 }
11838 }
11839 }
11840
11841 return;
11842 }
11843
11844 if(d->tax == YES) return;
11845 else
11846 {
11847 int i,dir1,dir2;
11848
11849 dir1 = dir2 = -1;
11850 for(i=0;i<3;++i)
11851 if(d->v[i] != a)
11852 {
11853 if(dir1 < 0) dir1 = i;
11854 else dir2 = i;
11855 }
11856
11857 u = Uni();
11858 if(u < .5)
11859 Random_Walk_Along_Tree_On_Radius(d,d->v[dir1],d->b[dir1],radius,target_edge,target_nd,target_time,tree);
11860 else
11861 Random_Walk_Along_Tree_On_Radius(d,d->v[dir2],d->b[dir2],radius,target_edge,target_nd,target_time,tree);
11862 }
11863 }
11864
11865 /*////////////////////////////////////////////////////////////
11866 ////////////////////////////////////////////////////////////*/
11867
Table_Top(unsigned int width)11868 void Table_Top(unsigned int width)
11869 {
11870 unsigned int i;
11871 PhyML_Printf("\n\t \u256D");
11872 for(i=0;i<width;++i) PhyML_Printf("\u2500");
11873 PhyML_Printf("\u256E ");
11874 }
11875
11876 /*////////////////////////////////////////////////////////////
11877 ////////////////////////////////////////////////////////////*/
11878
Table_Row(unsigned int width)11879 void Table_Row(unsigned int width)
11880 {
11881 unsigned int i;
11882 /* PhyML_Printf("\n\t \u2502"); */
11883 PhyML_Printf("\n\t \u251C");
11884 for(i=0;i<width;++i) PhyML_Printf("\u2500");
11885 /* PhyML_Printf("\u2502"); */
11886 PhyML_Printf("\u2524");
11887 }
11888
11889 /*////////////////////////////////////////////////////////////
11890 ////////////////////////////////////////////////////////////*/
11891
Table_Bottom(unsigned int width)11892 void Table_Bottom(unsigned int width)
11893 {
11894 unsigned int i;
11895 PhyML_Printf("\n\t \u2570");
11896 for(i=0;i<width;++i) PhyML_Printf("\u2500");
11897 PhyML_Printf("\u256F ");
11898 }
11899
11900 /*////////////////////////////////////////////////////////////
11901 ////////////////////////////////////////////////////////////*/
11902
Duplicate_Calib(t_cal * from)11903 t_cal *Duplicate_Calib(t_cal *from)
11904 {
11905 int i;
11906 t_cal *to;
11907
11908 to = Make_Calibration();
11909
11910 to->current_clade_idx = from->current_clade_idx;
11911 to->lower = from->lower;
11912 to->upper = from->upper;
11913 to->is_primary = from->is_primary;
11914 to->clade_list_size = from->clade_list_size;
11915
11916 to->id = (char *)mCalloc(strlen(from->id)+1,sizeof(char));
11917 strcpy(to->id,from->id);
11918
11919 to->alpha_proba_list = (phydbl *)mCalloc(from->clade_list_size,sizeof(phydbl));
11920 to->clade_list = (t_clad **)mCalloc(from->clade_list_size,sizeof(t_clad *));
11921
11922 for(i=0;i<from->clade_list_size;++i)
11923 {
11924 to->alpha_proba_list[i] = from->alpha_proba_list[i];
11925 to->clade_list[i] = Duplicate_Clade(from->clade_list[i]);
11926 }
11927
11928 return(to);
11929 }
11930
11931 /*////////////////////////////////////////////////////////////
11932 ////////////////////////////////////////////////////////////*/
11933
Duplicate_Clade(t_clad * from)11934 t_clad *Duplicate_Clade(t_clad *from)
11935 {
11936 int i;
11937 t_clad *to;
11938
11939 to = Make_Clade();
11940
11941 to->id = (char *)mCalloc(strlen(from->id)+1,sizeof(char));
11942 strcpy(to->id,from->id);
11943
11944 to->n_tax = from->n_tax;
11945 to->target_nd = from->target_nd;
11946
11947 to->tax_list = (char **)mCalloc(from->n_tax,sizeof(char *));
11948 to->tip_list = (t_node **)mCalloc(from->n_tax,sizeof(t_node *));
11949
11950 for(i=0;i<from->n_tax;++i)
11951 {
11952 to->tax_list[i] = (char *)mCalloc(strlen(from->tax_list[i])+1,sizeof(char));
11953 strcpy(to->tax_list[i],from->tax_list[i]);
11954 to->tip_list[i] = from->tip_list[i];
11955 }
11956
11957 return to;
11958 }
11959
11960 /*////////////////////////////////////////////////////////////
11961 ////////////////////////////////////////////////////////////*/
11962
Mutation_Id(int mut_idx,t_tree * tree)11963 char *Mutation_Id(int mut_idx, t_tree *tree)
11964 {
11965 char *s,c;
11966 int ns;
11967
11968 ns = tree->mod->ns;
11969
11970 s = (char *)mCalloc(20,sizeof(char));
11971
11972 /* strcpy(s,"from "); */
11973 strcpy(s," ");
11974 c = Reciproc_Assign_State((int)(mut_idx/ns),tree->mod->io->datatype);
11975 sprintf(s+strlen(s),"%c",c);
11976 /* strcat(s," to "); */
11977 strcat(s," ");
11978 c = Reciproc_Assign_State((int)(mut_idx%ns),tree->mod->io->datatype);
11979 sprintf(s+strlen(s),"%c",c);
11980
11981 return(s);
11982 }
11983
11984 /*////////////////////////////////////////////////////////////
11985 ////////////////////////////////////////////////////////////*/
11986
Random_Tax_Idx(t_node * a,t_node * d,int * idx,t_tree * tree)11987 void Random_Tax_Idx(t_node *a, t_node *d, int *idx, t_tree *tree)
11988 {
11989
11990 if(d->tax == YES)
11991 {
11992 (*idx) = d->num;
11993 return;
11994 }
11995 else
11996 {
11997 for(int i=0;i<3;++i)
11998 {
11999 if(d->v[i] != a && d->b[i] != tree->e_root)
12000 {
12001 Random_Tax_Idx(d,d->v[i],idx,tree);
12002 }
12003 }
12004 }
12005 }
12006
12007 /*////////////////////////////////////////////////////////////
12008 ////////////////////////////////////////////////////////////*/
12009
List_Taxa_In_Clade(t_node * a,t_node * d,t_tree * tree)12010 void List_Taxa_In_Clade(t_node *a, t_node *d, t_tree *tree)
12011 {
12012 if(d->tax == YES)
12013 {
12014 PhyML_Printf("\n- [%50s]",d->name);
12015 }
12016 else
12017 {
12018 for(int i=0;i<3;++i)
12019 {
12020 if(d->v[i] != a && d->b[i] != tree->e_root)
12021 {
12022 List_Taxa_In_Clade(d,d->v[i],tree);
12023 }
12024 }
12025 }
12026 }
12027
12028 /*////////////////////////////////////////////////////////////
12029 ////////////////////////////////////////////////////////////*/
12030
Alias_Subpatt(t_tree * tree)12031 void Alias_Subpatt(t_tree *tree)
12032 {
12033
12034 if(tree->n_root && tree->ignore_root == NO)
12035 {
12036 Alias_Subpatt_Post(tree->n_root,tree->n_root->v[2],tree);
12037 Alias_Subpatt_Post(tree->n_root,tree->n_root->v[1],tree);
12038 }
12039 else
12040 {
12041 Alias_Subpatt_Post(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
12042 /* if(tree->both_sides) */
12043 Alias_Subpatt_Pre(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
12044 }
12045 }
12046
12047 //////////////////////////////////////////////////////////////
12048 //////////////////////////////////////////////////////////////
12049
Alias_One_Subpatt(t_node * a,t_node * d,t_tree * tree)12050 void Alias_One_Subpatt(t_node *a, t_node *d, t_tree *tree)
12051 {
12052 int i,j;
12053 int *patt_id_v1, *patt_id_v2, *patt_id_d;
12054 int *p_lk_loc_d, *p_lk_loc_v1, *p_lk_loc_v2;
12055 t_node *v1, *v2;
12056 t_edge *b0, *b1, *b2;
12057 int curr_patt_id_v1, curr_patt_id_v2;
12058 int curr_p_lk_loc_v1, curr_p_lk_loc_v2;
12059 int num_subpatt;
12060
12061 b0 = b1 = b2 = NULL;
12062
12063 if(d->tax)
12064 {
12065 patt_id_d = (d == d->b[0]->left)?(d->b[0]->patt_id_left):(d->b[0]->patt_id_rght);
12066 p_lk_loc_d = (d == d->b[0]->left)?(d->b[0]->p_lk_loc_left):(d->b[0]->p_lk_loc_rght);
12067
12068 for(i=0;i<tree->n_pattern;i++)
12069 {
12070 for(j=0;j<tree->n_pattern;j++)
12071 {
12072 if(patt_id_d[i] == patt_id_d[j])
12073 {
12074 p_lk_loc_d[i] = j;
12075 break;
12076 }
12077 if(j > i)
12078 {
12079 PhyML_Fprintf(stderr,"\n. Err in file %s at line %d\n\n",__FILE__,__LINE__);
12080 Warn_And_Exit("");
12081 }
12082 }
12083 }
12084 return;
12085 }
12086 else
12087 {
12088 v1 = v2 = NULL;
12089 for(i=0;i<3;i++)
12090 {
12091 if(d->v[i] != a && d->b[i] != tree->e_root)
12092 {
12093 if(!v1) { v1=d->v[i]; b1=d->b[i];}
12094 else { v2=d->v[i]; b2=d->b[i];}
12095 }
12096 else
12097 {
12098 b0 = d->b[i];
12099 }
12100 }
12101
12102
12103 patt_id_v1 = (v1 == b1->left)?(b1->patt_id_left):(b1->patt_id_rght);
12104 patt_id_v2 = (v2 == b2->left)?(b2->patt_id_left):(b2->patt_id_rght);
12105 patt_id_d = (d == b0->left)?(b0->patt_id_left):(b0->patt_id_rght);
12106 p_lk_loc_d = (d == b0->left)?(b0->p_lk_loc_left):(b0->p_lk_loc_rght);
12107 p_lk_loc_v1 = (v1 == b1->left)?(b1->p_lk_loc_left):(b1->p_lk_loc_rght);
12108 p_lk_loc_v2 = (v2 == b2->left)?(b2->p_lk_loc_left):(b2->p_lk_loc_rght);
12109
12110 num_subpatt = 0;
12111 for(i=0;i<tree->n_pattern;i++)
12112 {
12113 curr_patt_id_v1 = patt_id_v1[i];
12114 curr_patt_id_v2 = patt_id_v2[i];
12115 curr_p_lk_loc_v1 = p_lk_loc_v1[i];
12116 curr_p_lk_loc_v2 = p_lk_loc_v2[i];
12117
12118 p_lk_loc_d[i] = i;
12119
12120 if((curr_p_lk_loc_v1 == i) || (curr_p_lk_loc_v2 == i))
12121 {
12122 p_lk_loc_d[i] = i;
12123 patt_id_d[i] = num_subpatt;
12124 num_subpatt++;
12125 }
12126 else
12127 if(curr_p_lk_loc_v1 == curr_p_lk_loc_v2)
12128 {
12129 p_lk_loc_d[i] = curr_p_lk_loc_v1;
12130 patt_id_d[i] = patt_id_d[curr_p_lk_loc_v1];
12131 }
12132 else
12133 {
12134 for(j=MAX(curr_p_lk_loc_v1,curr_p_lk_loc_v2);j<tree->n_pattern;j++)
12135 {
12136 if((patt_id_v1[j] == curr_patt_id_v1) &&
12137 (patt_id_v2[j] == curr_patt_id_v2))
12138 {
12139 p_lk_loc_d[i] = j;
12140
12141 if(j == i)
12142 {
12143 patt_id_d[i] = num_subpatt;
12144 num_subpatt++;
12145 }
12146 else patt_id_d[i] = patt_id_d[j];
12147 break;
12148 }
12149 if(j > i)
12150 {
12151 PhyML_Fprintf(stderr,"\n. Err in file %s at line %d\n\n",__FILE__,__LINE__);
12152 Warn_And_Exit("");
12153 }
12154 }
12155 }
12156 }
12157 }
12158 }
12159
12160 //////////////////////////////////////////////////////////////
12161 //////////////////////////////////////////////////////////////
12162
Alias_Subpatt_Post(t_node * a,t_node * d,t_tree * tree)12163 void Alias_Subpatt_Post(t_node *a, t_node *d, t_tree *tree)
12164 {
12165
12166 if(d->tax) return;
12167 else
12168 {
12169 int i;
12170
12171 for(i=0;i<3;i++)
12172 {
12173 if(d->v[i] != a && d->b[i] != tree->e_root)
12174 {
12175 Alias_Subpatt_Post(d,d->v[i],tree);
12176 }
12177 }
12178 Alias_One_Subpatt(a, d, tree);
12179 }
12180 }
12181
12182 //////////////////////////////////////////////////////////////
12183 //////////////////////////////////////////////////////////////
12184
Alias_Subpatt_Pre(t_node * a,t_node * d,t_tree * tree)12185 void Alias_Subpatt_Pre(t_node *a, t_node *d, t_tree *tree)
12186 {
12187 if(d->tax) return;
12188 else
12189 {
12190 int i;
12191
12192 for(i=0;i<3;++i)
12193 {
12194 if(d->v[i] != a && d->b[i] != tree->e_root)
12195 {
12196 Alias_One_Subpatt(d->v[i],d,tree);
12197 Alias_Subpatt_Pre(d,d->v[i],tree);
12198 }
12199 }
12200 }
12201 }
12202
12203 //////////////////////////////////////////////////////////////
12204 //////////////////////////////////////////////////////////////
12205
Integer_To_IUPAC_Code(int x)12206 char Integer_To_IUPAC_Code(int x)
12207 {
12208 char c;
12209
12210 switch(x)
12211 {
12212 case 0:
12213 {
12214 assert(FALSE);
12215 break;
12216 }
12217 case 1:
12218 {
12219 c = 'A';
12220 break;
12221 }
12222 case 2:
12223 {
12224 c = 'C';
12225 break;
12226 }
12227 case 3:
12228 {
12229 c = 'M';
12230 break;
12231 }
12232 case 4:
12233 {
12234 c = 'G';
12235 break;
12236 }
12237 case 5:
12238 {
12239 c = 'R';
12240 break;
12241 }
12242 case 6:
12243 {
12244 c = 'S';
12245 break;
12246 }
12247 case 7:
12248 {
12249 c = 'V';
12250 break;
12251 }
12252 case 8:
12253 {
12254 c = 'T';
12255 break;
12256 }
12257 case 9:
12258 {
12259 c = 'W';
12260 break;
12261 }
12262 case 10:
12263 {
12264 c = 'Y';
12265 break;
12266 }
12267 case 11:
12268 {
12269 c = 'H';
12270 break;
12271 }
12272 case 12:
12273 {
12274 c = 'K';
12275 break;
12276 }
12277 case 13:
12278 {
12279 c = 'D';
12280 break;
12281 }
12282 case 14:
12283 {
12284 c = 'B';
12285 break;
12286 }
12287 case 15:
12288 {
12289 c = 'N';
12290 break;
12291 }
12292
12293 default : assert(FALSE);
12294 }
12295
12296 return(c);
12297
12298 }
12299
12300 /*////////////////////////////////////////////////////////////
12301 ////////////////////////////////////////////////////////////*/
12302
Integer_To_Bit(int val,const int ns)12303 int *Integer_To_Bit(int val, const int ns)
12304 {
12305 assert(ns > 0);
12306
12307 int *res;
12308 unsigned int mask = 1U << (ns-1);
12309 int i;
12310
12311 res = (int *)mCalloc(ns,sizeof(int));
12312
12313 for(i=0;i<ns;++i)
12314 {
12315 res[i] = (val & mask) ? 1 : 0;
12316 val <<= 1;
12317 }
12318
12319 return res;
12320 }
12321
12322 /*////////////////////////////////////////////////////////////
12323 ////////////////////////////////////////////////////////////*/
12324
Bit_To_Character_String(int * bit,int ns)12325 char *Bit_To_Character_String(int *bit, int ns)
12326 {
12327
12328 assert(ns == 4 || ns == 20);
12329
12330 char *s;
12331 int idx;
12332
12333 s = (char *)mCalloc(2*ns,sizeof(char));
12334
12335 switch(ns)
12336 {
12337 case 4 :
12338 {
12339 char alphabet[4]="ACGT";
12340 idx = 0;
12341 for(int i=0;i<4;++i)
12342 {
12343 if(bit[i]==1)
12344 {
12345 if(idx==0) s[idx++]=alphabet[i];
12346 else
12347 {
12348 s[idx]=',';
12349 s[idx+1]=alphabet[i];
12350 idx+=2;
12351 }
12352 }
12353 }
12354 s[idx]='\0';
12355 break;
12356 }
12357 case 20 :
12358 {
12359 char alphabet[20]="ARNDCQEGHILKMFPSTWYV";
12360 idx = 0;
12361 for(int i=0;i<20;++i)
12362 {
12363 if(bit[i]==1)
12364 {
12365 if(idx==0) s[idx++]=alphabet[i];
12366 else
12367 {
12368 s[idx]=',';
12369 s[idx+1]=alphabet[i];
12370 idx+=2;
12371 }
12372 }
12373 }
12374 s[idx]='\0';
12375 break;
12376 }
12377 default : Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
12378 }
12379 return s;
12380 }
12381
12382
12383
12384 /*////////////////////////////////////////////////////////////
12385 ////////////////////////////////////////////////////////////*/
12386
Shuffle_Sites(const phydbl prop,align ** data,const int n_otu)12387 void Shuffle_Sites(const phydbl prop, align **data, const int n_otu)
12388 {
12389 unsigned int i,j,rand_otu;
12390 phydbl u;
12391 char c;
12392
12393 for(j=0;j<data[0]->len;++j)
12394 {
12395 u = Uni();
12396 if(u < prop)
12397 {
12398 for(i=0;i<n_otu;++i)
12399 {
12400 rand_otu = Rand_Int(0,n_otu-1);
12401
12402 c = data[i]->state[j];
12403 data[i]->state[j] = data[rand_otu]->state[j];
12404 data[rand_otu]->state[j] = c;
12405
12406 }
12407 }
12408 }
12409 }
12410
12411 /*////////////////////////////////////////////////////////////
12412 ////////////////////////////////////////////////////////////*/
12413
Tree_Height(t_tree * tree)12414 phydbl Tree_Height(t_tree *tree)
12415 {
12416 phydbl h;
12417 t_node *n,*anc;
12418 int i;
12419 assert(tree->n_root != NULL);
12420
12421 h = 0;
12422 n = tree->n_root;
12423 anc = NULL;
12424
12425 assert(n->tax == NO);
12426
12427 do
12428 {
12429 for(i=0;i<3;++i)
12430 {
12431 if(n->v[i] && n->v[i] != anc && n->b[i] != tree->e_root)
12432 {
12433 h += n->b[i]->l->v;
12434 break;
12435 }
12436 }
12437
12438 anc = n;
12439 n = n->v[i];
12440 }
12441 while(n->tax == NO);
12442
12443 return h;
12444 }
12445
12446 /*////////////////////////////////////////////////////////////
12447 ////////////////////////////////////////////////////////////*/
12448 /* Adjust node ages so that every edge in tree has length > min_l. Assume strict
12449 molecular clock
12450 */
Inflate_Times_To_Get_Reasonnable_Edge_Lengths(phydbl min_l,t_tree * tree)12451 void Inflate_Times_To_Get_Reasonnable_Edge_Lengths(phydbl min_l, t_tree *tree)
12452 {
12453 phydbl l1,l2;
12454
12455 Post_Inflate_Times_To_Get_Reasonnable_Edge_Lengths(tree->n_root,tree->n_root->v[1],tree->n_root->b[1],min_l,tree);
12456 Post_Inflate_Times_To_Get_Reasonnable_Edge_Lengths(tree->n_root,tree->n_root->v[2],tree->n_root->b[2],min_l,tree);
12457
12458 l1 = (tree->times->nd_t[tree->n_root->v[1]->num] - tree->times->nd_t[tree->n_root->num]) * tree->rates->clock_r;
12459 l2 = (tree->times->nd_t[tree->n_root->v[2]->num] - tree->times->nd_t[tree->n_root->num]) * tree->rates->clock_r;
12460
12461 if(MIN(l1,l2) < min_l)
12462 {
12463 tree->times->nd_t[tree->n_root->num] = -(min_l / tree->rates->clock_r -
12464 MIN(tree->times->nd_t[tree->n_root->v[1]->num],
12465 tree->times->nd_t[tree->n_root->v[2]->num]));
12466 }
12467 }
12468
12469 /*////////////////////////////////////////////////////////////
12470 ////////////////////////////////////////////////////////////*/
12471
Post_Inflate_Times_To_Get_Reasonnable_Edge_Lengths(t_node * a,t_node * d,t_edge * b,phydbl min_l,t_tree * tree)12472 void Post_Inflate_Times_To_Get_Reasonnable_Edge_Lengths(t_node *a, t_node *d, t_edge *b, phydbl min_l, t_tree *tree)
12473 {
12474 if(d->tax == YES) return;
12475 else
12476 {
12477 int i,dir1,dir2;
12478 phydbl l1,l2;
12479
12480 for(i=0;i<3;++i)
12481 if(d->v[i] != a && d->b[i] != tree->e_root)
12482 Post_Inflate_Times_To_Get_Reasonnable_Edge_Lengths(d,d->v[i],d->b[i],min_l,tree);
12483
12484 dir1 = dir2 = -1;
12485 for(i=0;i<3;++i)
12486 {
12487 if(d->v[i] != a && d->b[i] != tree->e_root)
12488 {
12489 if(dir1 < 0) dir1 = i;
12490 else dir2 = i;
12491 }
12492 }
12493
12494 l1 = (tree->times->nd_t[d->v[dir1]->num] - tree->times->nd_t[d->num]) * tree->rates->clock_r;
12495 l2 = (tree->times->nd_t[d->v[dir2]->num] - tree->times->nd_t[d->num]) * tree->rates->clock_r;
12496
12497 if(MIN(l1,l2) < min_l)
12498 {
12499 tree->times->nd_t[d->num] = -(min_l / tree->rates->clock_r - MIN(tree->times->nd_t[d->v[dir1]->num],tree->times->nd_t[d->v[dir2]->num]));
12500 }
12501
12502 }
12503
12504 }
12505
12506 /*////////////////////////////////////////////////////////////
12507 ////////////////////////////////////////////////////////////*/
12508 /* Given an up-to-date values of n->v[i] and n->b[i] for i=0,1,2
12509 and node n in the tree, this function returns up-to-date values
12510 of a_nodes and a_edges array (whereby a_edges[b->num] = b and
12511 a_nodes[n->num] = n, for all b and n in the tree) and updates
12512 n->num and b->num accordingly.
12513 */
Refactor_Tree(t_tree * tree)12514 void Refactor_Tree(t_tree *tree)
12515 {
12516 int i,idx_nd,idx_br;
12517
12518 idx_nd = idx_br = 0;
12519
12520 for(i=0;i<tree->n_otu;++i)
12521 if(tree->a_nodes[i] != NULL)
12522 {
12523 Refactor_External(tree->a_nodes[i],
12524 tree->a_nodes[i]->v[0],
12525 &idx_nd,tree);
12526 break;
12527 }
12528
12529 assert(i < tree->n_otu);
12530 assert(idx_nd == tree->n_otu);
12531 idx_br = idx_nd;
12532
12533 for(i=0;i<tree->n_otu;++i)
12534 if(tree->a_nodes[i] != NULL)
12535 {
12536 Refactor_Internal(tree->a_nodes[i],
12537 tree->a_nodes[i]->v[0],
12538 tree->a_nodes[i]->b[0],
12539 &idx_nd,
12540 &idx_br,
12541 tree);
12542 break;
12543 }
12544 }
12545
12546
12547 /*////////////////////////////////////////////////////////////
12548 ////////////////////////////////////////////////////////////*/
12549
Refactor_External(t_node * a,t_node * d,int * idx,t_tree * tree)12550 void Refactor_External(t_node *a, t_node *d, int *idx, t_tree *tree)
12551 {
12552
12553 if(a->tax == YES)
12554 {
12555 tree->a_nodes[*idx] = a;
12556 tree->a_edges[*idx] = a->b[0];
12557 a->num = *idx;
12558 a->b[0]->num = *idx;
12559 (*idx)+=1;
12560 }
12561
12562 if(d->tax == YES)
12563 {
12564 tree->a_nodes[*idx] = d;
12565 tree->a_edges[*idx] = d->b[0];
12566 d->num = *idx;
12567 d->b[0]->num = *idx;
12568 (*idx)+=1;
12569 return;
12570 }
12571 else
12572 {
12573 for(int i=0;i<3;++i)
12574 {
12575 if(d->v[i] != a && d->b[i] != tree->e_root)
12576 {
12577 Refactor_External(d,d->v[i],idx,tree);
12578 }
12579 }
12580 }
12581 }
12582
12583 /*////////////////////////////////////////////////////////////
12584 ////////////////////////////////////////////////////////////*/
12585
Refactor_Internal(t_node * a,t_node * d,t_edge * b,int * idx_nd,int * idx_br,t_tree * tree)12586 void Refactor_Internal(t_node *a, t_node *d, t_edge *b, int *idx_nd, int *idx_br, t_tree *tree)
12587 {
12588 if(d->tax == YES) return;
12589 else
12590 {
12591 tree->a_nodes[*idx_nd] = d;
12592 d->num = *idx_nd;
12593 (*idx_nd)+=1;
12594
12595 if(a->tax == NO) // b is an external edge
12596 {
12597 tree->a_edges[*idx_br] = b;
12598 b->num = *idx_br;
12599 (*idx_br)+=1;
12600 }
12601
12602 for(int i=0;i<3;++i)
12603 {
12604 if(d->v[i] != a && d->b[i] != tree->e_root)
12605 {
12606 Refactor_Internal(d,d->v[i],d->b[i],idx_nd,idx_br,tree);
12607 }
12608 }
12609 }
12610 }
12611
12612 /*////////////////////////////////////////////////////////////
12613 ////////////////////////////////////////////////////////////*/
12614 // Haversine distance between (lon1,lat1) and (lon2,lat2) where
12615 // coordinates are expressed using decimals
Haversine_Distance(phydbl lon1,phydbl lat1,phydbl lon2,phydbl lat2)12616 phydbl Haversine_Distance(phydbl lon1, phydbl lat1, phydbl lon2, phydbl lat2)
12617 {
12618
12619 phydbl R = 6371.; // Earth radius, in km
12620 phydbl a,b;
12621
12622 lon1 = lon1 * PI / 180.;
12623 lat1 = lat1 * PI / 180.;
12624
12625 lon2 = lon2 * PI / 180.;
12626 lat2 = lat2 * PI / 180.;
12627
12628 a = pow(sin(.5*(lat2-lat1)),2);
12629 a += cos(lat1)*cos(lat2)*pow(sin(.5*(lon2-lon1)),2);
12630
12631 b = 2.*atan2(sqrt(a),sqrt(1.-a));
12632
12633 return(R * b);
12634 }
12635
12636
12637 /*////////////////////////////////////////////////////////////
12638 ////////////////////////////////////////////////////////////*/
12639 /*////////////////////////////////////////////////////////////
12640 ////////////////////////////////////////////////////////////*/
12641 /*////////////////////////////////////////////////////////////
12642 ////////////////////////////////////////////////////////////*/
12643 /*////////////////////////////////////////////////////////////
12644 ////////////////////////////////////////////////////////////*/
12645 /*////////////////////////////////////////////////////////////
12646 ////////////////////////////////////////////////////////////*/
12647 /*////////////////////////////////////////////////////////////
12648 ////////////////////////////////////////////////////////////*/
12649 /*////////////////////////////////////////////////////////////
12650 ////////////////////////////////////////////////////////////*/
12651 /*////////////////////////////////////////////////////////////
12652 ////////////////////////////////////////////////////////////*/
12653
12654