1 /*
2 
3 PhyML:  a program that  computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5 
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7 
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10 
11 */
12 
13 #include "mpi_boot.h"
14 
15 /* #ifdef MPI */
16 
17 /*********************************************************/
18 
19 /*
20    if tbe_bootstrap == 0 => Classical FBP (Felsenstein bootstrap proportions)
21    else => TBE (Transfer bootstrap expectation)
22 */
Bootstrap_MPI(t_tree * tree)23 void Bootstrap_MPI(t_tree *tree)
24 {
25   int *site_num, n_site;
26   int replicate,j,k;
27   int position, init_len, nbRep;
28 
29   calign *boot_data;
30   t_tree *boot_tree;
31   t_mod *boot_mod;
32   matrix *boot_mat;
33   char *s;
34 
35   MPI_Status Stat;
36   int randomRecv, bootRecv, nbElem, i;
37   int *score_par, *score_tot;
38   char *bootStr, *t;
39 
40   if(tree->is_mixt_tree == YES)
41     {
42       PhyML_Printf("\n== Bootstrap option not yet available for partition/mixture analysis.");
43       assert(FALSE);
44     }
45 
46   randomRecv = nbElem = bootRecv = 0;
47 
48   tree->io->print_support_val = YES;
49 
50   boot_tree = NULL;
51 
52   site_num = (int *)mCalloc(tree->data->init_len,sizeof(int));
53 
54   Free_Bip(tree);
55   Alloc_Bip(tree);
56   Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
57 
58   n_site = 0;
59   for(j=0;j<tree->data->crunch_len;j++) For(k,tree->data->wght[j])
60     {
61       site_num[n_site] = j;
62       n_site++;
63     }
64 
65   boot_data = Copy_Cseq(tree->data,tree->io);
66 
67   if(Global_numTask <= 1)
68     {
69       PhyML_Printf("\n\n. The number of CPUs used in the MPI bootstrap analysis should be");
70       PhyML_Printf("\n. strictly greater than 1 (it is equal to %d here).",Global_numTask);
71       assert(FALSE);
72     }
73 
74   //number of bootstraps for each process
75   if (tree->io->n_boot_replicates%Global_numTask != 0)
76     {
77       nbRep = (tree->io->n_boot_replicates / Global_numTask) + 1;
78       tree->io->n_boot_replicates = nbRep * Global_numTask;
79       if (Global_myRank == 0) {
80         PhyML_Printf("\n\n. The number of replicates is not a multiple of %d CPUs.\n", Global_numTask);
81         PhyML_Printf("\n. Will run %d replicates.\n", tree->io->n_boot_replicates);
82       }
83     }
84   else
85     nbRep = tree->io->n_boot_replicates/Global_numTask;
86 
87   //Bip score
88   if (Global_myRank == 0)
89     {
90       score_tot = (int *)mCalloc((2*tree->n_otu - 3),sizeof(int));
91       For(i,2*tree->n_otu-3)
92         score_tot[i] = 0;
93     }
94   else
95     score_tot = NULL;
96 
97   score_par = (int *)mCalloc((2*tree->n_otu - 3),sizeof(int));
98   For(i,2*tree->n_otu-3)
99     score_par[i] = 0;
100 
101   if (Global_myRank == 0)
102     PhyML_Printf("\n  [");
103 
104   for(replicate=0;replicate<nbRep;++replicate)
105     {
106       for(j=0;j<boot_data->crunch_len;j++) boot_data->wght[j] = 0;
107 
108       // Send random data to other process
109       if (Global_myRank == 0)
110         {
111           // Compute number of data to send
112           if (tree->io->n_boot_replicates - randomRecv > Global_numTask)
113             nbElem = Global_numTask;
114           else
115             nbElem = tree->io->n_boot_replicates - randomRecv;
116 
117           for(i=0;i<nbElem;i++)
118             {
119               for(j=0;j<boot_data->crunch_len;j++) boot_data->wght[j] = 0;
120 
121               init_len = 0;
122               // Create random data
123               for(j=0;j<boot_data->init_len;j++)
124                 {
125                   position = Rand_Int(0,(int)(tree->data->init_len-1.0));
126                   boot_data->wght[site_num[position]] += 1;
127                   init_len++;
128                 }
129 
130 
131               if (init_len != tree->data->init_len)
132                 {
133                   PhyML_Printf("\n== thread: %d || init: %d %d here\n",Global_myRank,init_len,tree->data->init_len);
134                   fflush(NULL);
135                   Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
136                 }
137 
138               // Send random data to other process, not to current process
139               if (i < nbElem-1)
140                 {
141                   MPI_Ssend (boot_data->wght, boot_data->crunch_len, MPI_DOUBLE, i+1, Global_myRank, MPI_COMM_WORLD);
142 #ifdef MPI_DEBUG
143                   fprintf (stderr, "\ntask %d, sending random to %d done\n", Global_myRank, i+1);
144                   fflush(stderr);
145 #endif
146                 }
147               randomRecv++;
148             }
149         }
150       else
151         {
152           MPI_Recv(boot_data->wght, boot_data->crunch_len, MPI_DOUBLE, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Stat);
153 #ifdef MPI_DEBUG
154           fprintf (stderr, "\ntask %d, receiving random from task %d done\n", Global_myRank, Stat.MPI_SOURCE);
155           fflush(stderr);
156 #endif
157 
158 
159         }
160 
161       init_len = 0;
162       for(j=0;j<boot_data->crunch_len;j++) init_len += boot_data->wght[j];
163 
164 
165       if(init_len != tree->data->init_len)
166         {
167           printf("\n== thread: %d init: %d %d\n",Global_myRank,init_len,tree->data->init_len);
168           Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
169         }
170 
171 
172       (tree->mod->io->datatype == NT)?
173         (Get_Base_Freqs(boot_data)):
174         (Get_AA_Freqs(boot_data));
175 
176 
177       if(tree->io->random_boot_seq_order) Randomize_Sequence_Order(boot_data);
178 
179       Set_D_States(boot_data,tree->io->datatype,tree->io->state_len);
180 
181       boot_mod        = Copy_Model(tree->mod);
182       boot_mod->s_opt = tree->mod->s_opt; /* WARNING: re-using the same address here instead of creating a copying
183 					     requires to leave the value of s_opt unchanged during the boostrap. */
184       boot_mod->io    = tree->io; /* WARNING: re-using the same address here instead of creating a copying
185 				     requires to leave the value of io unchanged during the boostrap. */
186       Init_Model(boot_data,boot_mod,tree->io);
187 
188       if(tree->io->in_tree == 2)
189         {
190 	  switch(tree->io->tree_file_format)
191 	    {
192 	    case PHYLIP:
193 	      {
194 		rewind(tree->io->fp_in_tree);
195 		boot_tree = Read_Tree_File_Phylip(tree->io->fp_in_tree);
196 		break;
197 	      }
198 	    case NEXUS:
199 	      {
200 		PhyML_Printf("\n== Unfortunately, PhyML cannot read NEXUS files and perform a bootstrap analysis.");
201 		PhyML_Printf("\n== Please use the PHYLIP format.");
202 		PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
203 		Warn_And_Exit("");
204 		break;
205 	      }
206 	    default:
207 	      {
208 		PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
209 		Warn_And_Exit("");
210 		break;
211 	      }
212 	    }
213 	}
214       else
215         {
216           boot_mat = ML_Dist(boot_data,boot_mod);
217           boot_mat->tree = Make_Tree_From_Scratch(boot_data->n_otu,boot_data);
218           Fill_Missing_Dist(boot_mat);
219           Bionj(boot_mat);
220           boot_tree = boot_mat->tree;
221           boot_tree->mat = boot_mat;
222         }
223 
224       boot_tree->mod                = boot_mod;
225       boot_tree->io                 = tree->io;
226       boot_tree->data               = boot_data;
227       boot_tree->verbose            = VL0;
228       boot_tree->n_pattern          = boot_tree->data->crunch_len;
229       boot_tree->io->print_site_lnl = 0;
230       boot_tree->io->print_trace    = 0;
231 
232       Set_Both_Sides(YES,boot_tree);
233 
234       if((boot_tree->mod->s_opt->random_input_tree) && (boot_tree->mod->s_opt->topo_search == SPR_MOVE)) Random_Tree(boot_tree);
235 
236       Connect_CSeqs_To_Nodes(boot_data,boot_tree->io,boot_tree);
237       Share_Lk_Struct(tree,boot_tree);
238       Share_Spr_Struct(tree,boot_tree);
239       Share_Pars_Struct(tree,boot_tree);
240       Fill_Dir_Table(boot_tree);
241       Update_Dirs(boot_tree);
242 
243       Init_Partial_Lk_Tips_Double(boot_tree);
244       Init_Ui_Tips(boot_tree);
245       Init_Partial_Pars_Tips(boot_tree);
246       Br_Len_Not_Involving_Invar(boot_tree);
247 
248       Set_Update_Eigen(YES,boot_tree->mod);
249       Lk(NULL,boot_tree);
250       Set_Update_Eigen(NO,boot_tree->mod);
251 
252       if(boot_tree->mod->s_opt->opt_topo)
253         {
254           Global_Spr_Search(boot_tree);
255         }
256       else
257         {
258           if(boot_tree->mod->s_opt->opt_subst_param || boot_tree->mod->s_opt->opt_bl)
259             Round_Optimize(boot_tree,ROUND_MAX);
260           else
261             Lk(NULL,boot_tree);
262         }
263 
264       Free_Bip(boot_tree);
265 
266       Alloc_Bip(boot_tree);
267 
268       Match_Tip_Numbers(tree,boot_tree);
269 
270       Get_Bip(boot_tree->a_nodes[0],
271               boot_tree->a_nodes[0]->v[0],
272               boot_tree);
273 
274       if(tree->io->do_boot)
275         {
276           Compare_Bip(tree,boot_tree,NO);
277         }
278       else if(tree->io->do_tbe)
279         {
280           Compare_Bip_Distance(tree, boot_tree);
281         }
282       else assert(FALSE);
283 
284 
285       Br_Len_Involving_Invar(boot_tree);
286 
287       if(tree->io->print_boot_trees)
288         {
289           /* Insert_Duplicates(boot_tree); */
290           s = Write_Tree(boot_tree);
291           t=(char *)mCalloc(T_MAX_LINE,sizeof(char));
292           Print_Fp_Out_Lines_MPI(boot_tree, tree->io, replicate+1, t);
293 
294           // Get bootstrap trees from other process and write to boot file
295           if (Global_myRank == 0) {
296             fprintf(tree->io->fp_out_boot_tree,"%s\n",s);
297             fprintf(tree->io->fp_out_boot_stats,"%s\n",t);
298             bootRecv++;
299             PhyML_Printf(".");
300 	    if(!((bootRecv)%tree->io->boot_prog_every))
301 	      {
302 		PhyML_Printf("] %4d/%4d\n  ",bootRecv,tree->io->n_boot_replicates);
303 		if(bootRecv != tree->io->n_boot_replicates) PhyML_Printf("[");
304 	      }
305 
306             // Compute number of bootstraps to receive
307             if (tree->io->n_boot_replicates - bootRecv > Global_numTask)
308               nbElem = Global_numTask;
309             else
310               nbElem = tree->io->n_boot_replicates - bootRecv + 1;
311 
312             bootStr=(char *)mCalloc(T_MAX_LINE,sizeof(char));
313             for (i=1; i<nbElem; i++)
314 	      {
315 		MPI_Recv (bootStr, T_MAX_LINE, MPI_CHAR, i, MPI_ANY_TAG, MPI_COMM_WORLD, &Stat);
316 #ifdef MPI_DEBUG
317 		PhyML_Fprintf (stderr, "\ntask %d, receiving bootstrap from task %d tag %d done\n", Global_myRank, Stat.MPI_SOURCE, Stat.MPI_TAG);
318 		fflush(stderr);
319 #endif
320 		if (Stat.MPI_TAG == BootTreeTag)
321 		  fprintf(tree->io->fp_out_boot_tree,"%s\n", bootStr);
322 		if (Stat.MPI_TAG == BootStatTag)
323 		  fprintf(tree->io->fp_out_boot_stats,"%s\n", bootStr);
324 
325 		MPI_Recv (bootStr, T_MAX_LINE, MPI_CHAR, i, MPI_ANY_TAG, MPI_COMM_WORLD, &Stat);
326 #ifdef MPI_DEBUG
327 		fprintf (stderr, "\ntask %d, receiving bootstrap from task %d tag %d done\n", Global_myRank, Stat.MPI_SOURCE, Stat.MPI_TAG);
328 		fflush(stderr);
329 #endif
330 		if (Stat.MPI_TAG == BootTreeTag)
331 		  fprintf(tree->io->fp_out_boot_tree,"%s\n", bootStr);
332 		if (Stat.MPI_TAG == BootStatTag)
333 		  fprintf(tree->io->fp_out_boot_stats,"%s\n", bootStr);
334 
335 		bootRecv++;
336 		PhyML_Printf(".");
337 		if(!((bootRecv)%tree->io->boot_prog_every)) {
338 		  PhyML_Printf("] %4d/%4d\n  ",bootRecv,tree->io->n_boot_replicates);
339 		  if(bootRecv != tree->io->n_boot_replicates) PhyML_Printf("[");
340 		}
341 	      }
342             Free(bootStr);
343           }
344           else {
345 	    MPI_Ssend (s, T_MAX_LINE, MPI_CHAR, 0, BootTreeTag, MPI_COMM_WORLD);
346 	    MPI_Ssend (t, T_MAX_LINE, MPI_CHAR, 0, BootStatTag, MPI_COMM_WORLD);
347 #ifdef MPI_DEBUG
348 fprintf (stderr, "\ntask %d, sending bootstraps done\n", Global_myRank);
349 fflush(stderr);
350 #endif
351           }
352           Free(t);
353           Free(s);
354         }
355 
356 #ifndef QUIET
357 fflush(stdout);
358 #endif
359 
360       /* if(boot_tree->mat) Free_Mat(boot_tree->mat); */
361       Free_Tree(boot_tree);
362       Free_Model(boot_mod);
363 
364       //Each process computes the Bip score sum for all its bootstrap trees
365       For(i,2*tree->n_otu-3) score_par[i] = tree->a_edges[i]->bip_score;
366 
367       //Each process sends its Bip score sum. The sums are summed.
368       MPI_Reduce(score_par, score_tot, 2*tree->n_otu - 3, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
369     }
370 
371 
372   if (Global_myRank == 0)
373     {
374       For(i,2*tree->n_otu-3)
375         tree->a_edges[i]->bip_score = score_tot[i];
376       Free (score_tot);
377     }
378   Free (score_par);
379 
380   if (Global_myRank == 0)
381     if(((bootRecv)%tree->io->boot_prog_every)) PhyML_Printf("] %4d/%4d\n ",bootRecv,tree->io->n_boot_replicates);
382 
383   PhyML_Printf("\n\n. Exiting bootstrap function normally."); fflush(NULL);
384   tree->lock_topo = 1; /* Topology should not be modified afterwards */
385 
386   if(tree->io->print_boot_trees)
387     {
388       fclose(tree->io->fp_out_boot_tree);
389       fclose(tree->io->fp_out_boot_stats);
390     }
391 
392   Free_Calign(boot_data);
393   Free(site_num);
394 }
395 
396 /*********************************************************/
397 
Print_Fp_Out_Lines_MPI(t_tree * tree,option * io,int n_data_set,char * bootStr)398 void Print_Fp_Out_Lines_MPI(t_tree *tree, option *io, int n_data_set, char *bootStr)
399 {
400   char *s, *tmp;
401 
402   // Build a string to be sent to the writing process
403   s = (char *)mCalloc(T_MAX_LINE,sizeof(char));
404   tmp=(char *)mCalloc(T_MAX_LINE,sizeof(char));
405 
406   if (Global_myRank == 0 && n_data_set == 1) {
407     snprintf(tmp, T_MAX_LINE, ". Sequence file : [%s]\n\n", Basename(io->in_align_file)); strncat (s, tmp, T_MAX_LINE);
408 
409     (tree->mod->io->datatype == NT)?
410       (snprintf(tmp, T_MAX_LINE, ". Model of nucleotides substitution : %s\n\n", io->mod->modelname->s)):
411       (snprintf(tmp, T_MAX_LINE, ". Model of amino acids substitution : %s\n\n", io->mod->modelname->s));
412     strncat (s, tmp, T_MAX_LINE);
413 
414     switch(io->in_tree)
415       {
416       case 0: { snprintf(tmp, T_MAX_LINE, ". Initial tree : [BioNJ]\n\n");               break; }
417       case 1: { snprintf(tmp, T_MAX_LINE, ". Initial tree : [parsimony]\n\n");           break; }
418       case 2: { snprintf(tmp, T_MAX_LINE, ". Initial tree : [%s]\n\n",io->in_tree_file); break; }
419       }
420     strncat (s, tmp, T_MAX_LINE);
421 
422     strncat (s, "\n", T_MAX_LINE);
423 
424     /*headline 1*/
425     strncat (s, ". Data\t", T_MAX_LINE);
426 
427     strncat (s, "Nb of \t", T_MAX_LINE);
428 
429     strncat (s, "Likelihood\t", T_MAX_LINE);
430 
431     strncat (s, "Discrete   \t", T_MAX_LINE);
432 
433     if(tree->mod->ras->n_catg > 1)
434       strncat (s, "Number of \tGamma shape\t", T_MAX_LINE);
435 
436     strncat (s, "Proportion of\t", T_MAX_LINE);
437 
438     if(tree->mod->whichmodel <= 6)
439       strncat (s, "Transition/ \t", T_MAX_LINE);
440 
441     strncat (s, "Nucleotides frequencies               \t", T_MAX_LINE);
442 
443     if((tree->mod->whichmodel == GTR) ||
444        (tree->mod->whichmodel == CUSTOM))
445       strncat (s, "Instantaneous rate matrix              \t", T_MAX_LINE);
446 
447     strncat (s, "\n", T_MAX_LINE);
448 
449     /*headline 2*/
450     strncat (s, "  set\t", T_MAX_LINE);
451 
452     strncat (s, "taxa\t", T_MAX_LINE);
453 
454     strncat (s, "loglk     \t", T_MAX_LINE);
455 
456     strncat (s, "gamma model\t", T_MAX_LINE);
457 
458     if(tree->mod->ras->n_catg > 1)
459       strncat (s, "categories\tparameter  \t", T_MAX_LINE);
460 
461     strncat (s, "invariant    \t", T_MAX_LINE);
462 
463     if(tree->mod->whichmodel <= 6)
464       strncat (s, "transversion\t", T_MAX_LINE);
465 
466     strncat (s, "f(A)      f(C)      f(G)      f(T)    \t", T_MAX_LINE);
467 
468     if((tree->mod->whichmodel == GTR) ||
469        (tree->mod->whichmodel == CUSTOM))
470       strncat (s, "[A---------C---------G---------T------]\t", T_MAX_LINE);
471 
472     strncat (s, "\n", T_MAX_LINE);
473 
474     /*headline 3*/
475     if(tree->mod->whichmodel == TN93) {
476       strncat (s, "    \t      \t          \t           \t", T_MAX_LINE);
477       if(tree->mod->ras->n_catg > 1)
478         strncat (s, "         \t         \t", T_MAX_LINE);
479       strncat (s, "             \t", T_MAX_LINE);
480       strncat (s, "purines pyrimid.\t", T_MAX_LINE);
481       strncat (s, "\n", T_MAX_LINE);
482     }
483 
484     strncat (s, "\n", T_MAX_LINE);
485   }
486 
487   /*line items*/
488 
489   snprintf(tmp, T_MAX_LINE, "  #%d\t", (((n_data_set-1)*Global_numTask)+Global_myRank+1));
490   strncat (s, tmp, T_MAX_LINE);
491 
492   snprintf(tmp, T_MAX_LINE, "%d   \t",tree->n_otu); strncat (s, tmp, T_MAX_LINE);
493 
494   snprintf(tmp, T_MAX_LINE, "%.5f\t",tree->c_lnL); strncat (s, tmp, T_MAX_LINE);
495 
496   snprintf(tmp, T_MAX_LINE, "%s        \t",
497           (tree->mod->ras->n_catg>1)?("Yes"):("No ")); strncat (s, tmp, T_MAX_LINE);
498 
499   if(tree->mod->ras->n_catg > 1)
500     {
501       snprintf(tmp, T_MAX_LINE, "%d        \t",tree->mod->ras->n_catg); strncat (s, tmp, T_MAX_LINE);
502       snprintf(tmp, T_MAX_LINE, "%.3f    \t",tree->mod->ras->alpha->v); strncat (s, tmp, T_MAX_LINE);
503     }
504 
505   snprintf(tmp, T_MAX_LINE, "%.3f    \t",tree->mod->ras->pinvar->v); strncat (s, tmp, T_MAX_LINE);
506 
507   if(tree->mod->whichmodel <= 5)
508     {
509       snprintf(tmp, T_MAX_LINE, "%.3f     \t",tree->mod->kappa->v); strncat (s, tmp, T_MAX_LINE);
510     }
511   else if(tree->mod->whichmodel == TN93)
512     {
513       snprintf(tmp, T_MAX_LINE, "%.3f   ",
514               tree->mod->kappa->v*2.*tree->mod->lambda->v/(1.+tree->mod->lambda->v));
515       strncat (s, tmp, T_MAX_LINE);
516       snprintf(tmp, T_MAX_LINE, "%.3f\t",
517               tree->mod->kappa->v*2./(1.+tree->mod->lambda->v));
518       strncat (s, tmp, T_MAX_LINE);
519     }
520 
521   if(tree->mod->io->datatype == NT)
522     {
523       snprintf(tmp, T_MAX_LINE, "%8.5f  ",tree->mod->e_frq->pi->v[0]); strncat (s, tmp, T_MAX_LINE);
524       snprintf(tmp, T_MAX_LINE, "%8.5f  ",tree->mod->e_frq->pi->v[1]); strncat (s, tmp, T_MAX_LINE);
525       snprintf(tmp, T_MAX_LINE, "%8.5f  ",tree->mod->e_frq->pi->v[2]); strncat (s, tmp, T_MAX_LINE);
526       snprintf(tmp, T_MAX_LINE, "%8.5f\t",tree->mod->e_frq->pi->v[3]); strncat (s, tmp, T_MAX_LINE);
527     }
528 
529   if((tree->mod->whichmodel == GTR) || (tree->mod->whichmodel == CUSTOM))
530     {
531       int i,j;
532 
533       for(i=0;i<4;i++)
534         {
535           if (i!=0) {
536             /*format*/
537             snprintf(tmp, T_MAX_LINE, "      \t     \t          \t           \t");
538             strncat (s, tmp, T_MAX_LINE);
539             if(tree->mod->ras->n_catg > 1) {
540               snprintf(tmp, T_MAX_LINE, "          \t           \t");
541               strncat (s, tmp, T_MAX_LINE);
542             }
543             snprintf(tmp, T_MAX_LINE, "             \t                                      \t");
544             strncat (s, tmp, T_MAX_LINE);
545           }
546           for(j=0;j<4;j++) {
547             snprintf(tmp, T_MAX_LINE, "%8.5f  ",tree->mod->r_mat->qmat->v[i*4+j]);
548             strncat (s, tmp, T_MAX_LINE);
549           }
550           if (i<3) {
551             snprintf(tmp, T_MAX_LINE, "\n");
552             strncat (s, tmp, T_MAX_LINE);
553           }
554         }
555     }
556 
557     Free (tmp);
558     strncpy (bootStr, s, T_MAX_LINE);
559     Free (s);
560 
561   return;
562 }
563 /* #endif */
564