1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13 #include "mpi_boot.h"
14
15 /* #ifdef MPI */
16
17 /*********************************************************/
18
19 /*
20 if tbe_bootstrap == 0 => Classical FBP (Felsenstein bootstrap proportions)
21 else => TBE (Transfer bootstrap expectation)
22 */
Bootstrap_MPI(t_tree * tree)23 void Bootstrap_MPI(t_tree *tree)
24 {
25 int *site_num, n_site;
26 int replicate,j,k;
27 int position, init_len, nbRep;
28
29 calign *boot_data;
30 t_tree *boot_tree;
31 t_mod *boot_mod;
32 matrix *boot_mat;
33 char *s;
34
35 MPI_Status Stat;
36 int randomRecv, bootRecv, nbElem, i;
37 int *score_par, *score_tot;
38 char *bootStr, *t;
39
40 if(tree->is_mixt_tree == YES)
41 {
42 PhyML_Printf("\n== Bootstrap option not yet available for partition/mixture analysis.");
43 assert(FALSE);
44 }
45
46 randomRecv = nbElem = bootRecv = 0;
47
48 tree->io->print_support_val = YES;
49
50 boot_tree = NULL;
51
52 site_num = (int *)mCalloc(tree->data->init_len,sizeof(int));
53
54 Free_Bip(tree);
55 Alloc_Bip(tree);
56 Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
57
58 n_site = 0;
59 for(j=0;j<tree->data->crunch_len;j++) For(k,tree->data->wght[j])
60 {
61 site_num[n_site] = j;
62 n_site++;
63 }
64
65 boot_data = Copy_Cseq(tree->data,tree->io);
66
67 if(Global_numTask <= 1)
68 {
69 PhyML_Printf("\n\n. The number of CPUs used in the MPI bootstrap analysis should be");
70 PhyML_Printf("\n. strictly greater than 1 (it is equal to %d here).",Global_numTask);
71 assert(FALSE);
72 }
73
74 //number of bootstraps for each process
75 if (tree->io->n_boot_replicates%Global_numTask != 0)
76 {
77 nbRep = (tree->io->n_boot_replicates / Global_numTask) + 1;
78 tree->io->n_boot_replicates = nbRep * Global_numTask;
79 if (Global_myRank == 0) {
80 PhyML_Printf("\n\n. The number of replicates is not a multiple of %d CPUs.\n", Global_numTask);
81 PhyML_Printf("\n. Will run %d replicates.\n", tree->io->n_boot_replicates);
82 }
83 }
84 else
85 nbRep = tree->io->n_boot_replicates/Global_numTask;
86
87 //Bip score
88 if (Global_myRank == 0)
89 {
90 score_tot = (int *)mCalloc((2*tree->n_otu - 3),sizeof(int));
91 For(i,2*tree->n_otu-3)
92 score_tot[i] = 0;
93 }
94 else
95 score_tot = NULL;
96
97 score_par = (int *)mCalloc((2*tree->n_otu - 3),sizeof(int));
98 For(i,2*tree->n_otu-3)
99 score_par[i] = 0;
100
101 if (Global_myRank == 0)
102 PhyML_Printf("\n [");
103
104 for(replicate=0;replicate<nbRep;++replicate)
105 {
106 for(j=0;j<boot_data->crunch_len;j++) boot_data->wght[j] = 0;
107
108 // Send random data to other process
109 if (Global_myRank == 0)
110 {
111 // Compute number of data to send
112 if (tree->io->n_boot_replicates - randomRecv > Global_numTask)
113 nbElem = Global_numTask;
114 else
115 nbElem = tree->io->n_boot_replicates - randomRecv;
116
117 for(i=0;i<nbElem;i++)
118 {
119 for(j=0;j<boot_data->crunch_len;j++) boot_data->wght[j] = 0;
120
121 init_len = 0;
122 // Create random data
123 for(j=0;j<boot_data->init_len;j++)
124 {
125 position = Rand_Int(0,(int)(tree->data->init_len-1.0));
126 boot_data->wght[site_num[position]] += 1;
127 init_len++;
128 }
129
130
131 if (init_len != tree->data->init_len)
132 {
133 PhyML_Printf("\n== thread: %d || init: %d %d here\n",Global_myRank,init_len,tree->data->init_len);
134 fflush(NULL);
135 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
136 }
137
138 // Send random data to other process, not to current process
139 if (i < nbElem-1)
140 {
141 MPI_Ssend (boot_data->wght, boot_data->crunch_len, MPI_DOUBLE, i+1, Global_myRank, MPI_COMM_WORLD);
142 #ifdef MPI_DEBUG
143 fprintf (stderr, "\ntask %d, sending random to %d done\n", Global_myRank, i+1);
144 fflush(stderr);
145 #endif
146 }
147 randomRecv++;
148 }
149 }
150 else
151 {
152 MPI_Recv(boot_data->wght, boot_data->crunch_len, MPI_DOUBLE, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Stat);
153 #ifdef MPI_DEBUG
154 fprintf (stderr, "\ntask %d, receiving random from task %d done\n", Global_myRank, Stat.MPI_SOURCE);
155 fflush(stderr);
156 #endif
157
158
159 }
160
161 init_len = 0;
162 for(j=0;j<boot_data->crunch_len;j++) init_len += boot_data->wght[j];
163
164
165 if(init_len != tree->data->init_len)
166 {
167 printf("\n== thread: %d init: %d %d\n",Global_myRank,init_len,tree->data->init_len);
168 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
169 }
170
171
172 (tree->mod->io->datatype == NT)?
173 (Get_Base_Freqs(boot_data)):
174 (Get_AA_Freqs(boot_data));
175
176
177 if(tree->io->random_boot_seq_order) Randomize_Sequence_Order(boot_data);
178
179 Set_D_States(boot_data,tree->io->datatype,tree->io->state_len);
180
181 boot_mod = Copy_Model(tree->mod);
182 boot_mod->s_opt = tree->mod->s_opt; /* WARNING: re-using the same address here instead of creating a copying
183 requires to leave the value of s_opt unchanged during the boostrap. */
184 boot_mod->io = tree->io; /* WARNING: re-using the same address here instead of creating a copying
185 requires to leave the value of io unchanged during the boostrap. */
186 Init_Model(boot_data,boot_mod,tree->io);
187
188 if(tree->io->in_tree == 2)
189 {
190 switch(tree->io->tree_file_format)
191 {
192 case PHYLIP:
193 {
194 rewind(tree->io->fp_in_tree);
195 boot_tree = Read_Tree_File_Phylip(tree->io->fp_in_tree);
196 break;
197 }
198 case NEXUS:
199 {
200 PhyML_Printf("\n== Unfortunately, PhyML cannot read NEXUS files and perform a bootstrap analysis.");
201 PhyML_Printf("\n== Please use the PHYLIP format.");
202 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
203 Warn_And_Exit("");
204 break;
205 }
206 default:
207 {
208 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
209 Warn_And_Exit("");
210 break;
211 }
212 }
213 }
214 else
215 {
216 boot_mat = ML_Dist(boot_data,boot_mod);
217 boot_mat->tree = Make_Tree_From_Scratch(boot_data->n_otu,boot_data);
218 Fill_Missing_Dist(boot_mat);
219 Bionj(boot_mat);
220 boot_tree = boot_mat->tree;
221 boot_tree->mat = boot_mat;
222 }
223
224 boot_tree->mod = boot_mod;
225 boot_tree->io = tree->io;
226 boot_tree->data = boot_data;
227 boot_tree->verbose = VL0;
228 boot_tree->n_pattern = boot_tree->data->crunch_len;
229 boot_tree->io->print_site_lnl = 0;
230 boot_tree->io->print_trace = 0;
231
232 Set_Both_Sides(YES,boot_tree);
233
234 if((boot_tree->mod->s_opt->random_input_tree) && (boot_tree->mod->s_opt->topo_search == SPR_MOVE)) Random_Tree(boot_tree);
235
236 Connect_CSeqs_To_Nodes(boot_data,boot_tree->io,boot_tree);
237 Share_Lk_Struct(tree,boot_tree);
238 Share_Spr_Struct(tree,boot_tree);
239 Share_Pars_Struct(tree,boot_tree);
240 Fill_Dir_Table(boot_tree);
241 Update_Dirs(boot_tree);
242
243 Init_Partial_Lk_Tips_Double(boot_tree);
244 Init_Ui_Tips(boot_tree);
245 Init_Partial_Pars_Tips(boot_tree);
246 Br_Len_Not_Involving_Invar(boot_tree);
247
248 Set_Update_Eigen(YES,boot_tree->mod);
249 Lk(NULL,boot_tree);
250 Set_Update_Eigen(NO,boot_tree->mod);
251
252 if(boot_tree->mod->s_opt->opt_topo)
253 {
254 Global_Spr_Search(boot_tree);
255 }
256 else
257 {
258 if(boot_tree->mod->s_opt->opt_subst_param || boot_tree->mod->s_opt->opt_bl)
259 Round_Optimize(boot_tree,ROUND_MAX);
260 else
261 Lk(NULL,boot_tree);
262 }
263
264 Free_Bip(boot_tree);
265
266 Alloc_Bip(boot_tree);
267
268 Match_Tip_Numbers(tree,boot_tree);
269
270 Get_Bip(boot_tree->a_nodes[0],
271 boot_tree->a_nodes[0]->v[0],
272 boot_tree);
273
274 if(tree->io->do_boot)
275 {
276 Compare_Bip(tree,boot_tree,NO);
277 }
278 else if(tree->io->do_tbe)
279 {
280 Compare_Bip_Distance(tree, boot_tree);
281 }
282 else assert(FALSE);
283
284
285 Br_Len_Involving_Invar(boot_tree);
286
287 if(tree->io->print_boot_trees)
288 {
289 /* Insert_Duplicates(boot_tree); */
290 s = Write_Tree(boot_tree);
291 t=(char *)mCalloc(T_MAX_LINE,sizeof(char));
292 Print_Fp_Out_Lines_MPI(boot_tree, tree->io, replicate+1, t);
293
294 // Get bootstrap trees from other process and write to boot file
295 if (Global_myRank == 0) {
296 fprintf(tree->io->fp_out_boot_tree,"%s\n",s);
297 fprintf(tree->io->fp_out_boot_stats,"%s\n",t);
298 bootRecv++;
299 PhyML_Printf(".");
300 if(!((bootRecv)%tree->io->boot_prog_every))
301 {
302 PhyML_Printf("] %4d/%4d\n ",bootRecv,tree->io->n_boot_replicates);
303 if(bootRecv != tree->io->n_boot_replicates) PhyML_Printf("[");
304 }
305
306 // Compute number of bootstraps to receive
307 if (tree->io->n_boot_replicates - bootRecv > Global_numTask)
308 nbElem = Global_numTask;
309 else
310 nbElem = tree->io->n_boot_replicates - bootRecv + 1;
311
312 bootStr=(char *)mCalloc(T_MAX_LINE,sizeof(char));
313 for (i=1; i<nbElem; i++)
314 {
315 MPI_Recv (bootStr, T_MAX_LINE, MPI_CHAR, i, MPI_ANY_TAG, MPI_COMM_WORLD, &Stat);
316 #ifdef MPI_DEBUG
317 PhyML_Fprintf (stderr, "\ntask %d, receiving bootstrap from task %d tag %d done\n", Global_myRank, Stat.MPI_SOURCE, Stat.MPI_TAG);
318 fflush(stderr);
319 #endif
320 if (Stat.MPI_TAG == BootTreeTag)
321 fprintf(tree->io->fp_out_boot_tree,"%s\n", bootStr);
322 if (Stat.MPI_TAG == BootStatTag)
323 fprintf(tree->io->fp_out_boot_stats,"%s\n", bootStr);
324
325 MPI_Recv (bootStr, T_MAX_LINE, MPI_CHAR, i, MPI_ANY_TAG, MPI_COMM_WORLD, &Stat);
326 #ifdef MPI_DEBUG
327 fprintf (stderr, "\ntask %d, receiving bootstrap from task %d tag %d done\n", Global_myRank, Stat.MPI_SOURCE, Stat.MPI_TAG);
328 fflush(stderr);
329 #endif
330 if (Stat.MPI_TAG == BootTreeTag)
331 fprintf(tree->io->fp_out_boot_tree,"%s\n", bootStr);
332 if (Stat.MPI_TAG == BootStatTag)
333 fprintf(tree->io->fp_out_boot_stats,"%s\n", bootStr);
334
335 bootRecv++;
336 PhyML_Printf(".");
337 if(!((bootRecv)%tree->io->boot_prog_every)) {
338 PhyML_Printf("] %4d/%4d\n ",bootRecv,tree->io->n_boot_replicates);
339 if(bootRecv != tree->io->n_boot_replicates) PhyML_Printf("[");
340 }
341 }
342 Free(bootStr);
343 }
344 else {
345 MPI_Ssend (s, T_MAX_LINE, MPI_CHAR, 0, BootTreeTag, MPI_COMM_WORLD);
346 MPI_Ssend (t, T_MAX_LINE, MPI_CHAR, 0, BootStatTag, MPI_COMM_WORLD);
347 #ifdef MPI_DEBUG
348 fprintf (stderr, "\ntask %d, sending bootstraps done\n", Global_myRank);
349 fflush(stderr);
350 #endif
351 }
352 Free(t);
353 Free(s);
354 }
355
356 #ifndef QUIET
357 fflush(stdout);
358 #endif
359
360 /* if(boot_tree->mat) Free_Mat(boot_tree->mat); */
361 Free_Tree(boot_tree);
362 Free_Model(boot_mod);
363
364 //Each process computes the Bip score sum for all its bootstrap trees
365 For(i,2*tree->n_otu-3) score_par[i] = tree->a_edges[i]->bip_score;
366
367 //Each process sends its Bip score sum. The sums are summed.
368 MPI_Reduce(score_par, score_tot, 2*tree->n_otu - 3, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
369 }
370
371
372 if (Global_myRank == 0)
373 {
374 For(i,2*tree->n_otu-3)
375 tree->a_edges[i]->bip_score = score_tot[i];
376 Free (score_tot);
377 }
378 Free (score_par);
379
380 if (Global_myRank == 0)
381 if(((bootRecv)%tree->io->boot_prog_every)) PhyML_Printf("] %4d/%4d\n ",bootRecv,tree->io->n_boot_replicates);
382
383 PhyML_Printf("\n\n. Exiting bootstrap function normally."); fflush(NULL);
384 tree->lock_topo = 1; /* Topology should not be modified afterwards */
385
386 if(tree->io->print_boot_trees)
387 {
388 fclose(tree->io->fp_out_boot_tree);
389 fclose(tree->io->fp_out_boot_stats);
390 }
391
392 Free_Calign(boot_data);
393 Free(site_num);
394 }
395
396 /*********************************************************/
397
Print_Fp_Out_Lines_MPI(t_tree * tree,option * io,int n_data_set,char * bootStr)398 void Print_Fp_Out_Lines_MPI(t_tree *tree, option *io, int n_data_set, char *bootStr)
399 {
400 char *s, *tmp;
401
402 // Build a string to be sent to the writing process
403 s = (char *)mCalloc(T_MAX_LINE,sizeof(char));
404 tmp=(char *)mCalloc(T_MAX_LINE,sizeof(char));
405
406 if (Global_myRank == 0 && n_data_set == 1) {
407 snprintf(tmp, T_MAX_LINE, ". Sequence file : [%s]\n\n", Basename(io->in_align_file)); strncat (s, tmp, T_MAX_LINE);
408
409 (tree->mod->io->datatype == NT)?
410 (snprintf(tmp, T_MAX_LINE, ". Model of nucleotides substitution : %s\n\n", io->mod->modelname->s)):
411 (snprintf(tmp, T_MAX_LINE, ". Model of amino acids substitution : %s\n\n", io->mod->modelname->s));
412 strncat (s, tmp, T_MAX_LINE);
413
414 switch(io->in_tree)
415 {
416 case 0: { snprintf(tmp, T_MAX_LINE, ". Initial tree : [BioNJ]\n\n"); break; }
417 case 1: { snprintf(tmp, T_MAX_LINE, ". Initial tree : [parsimony]\n\n"); break; }
418 case 2: { snprintf(tmp, T_MAX_LINE, ". Initial tree : [%s]\n\n",io->in_tree_file); break; }
419 }
420 strncat (s, tmp, T_MAX_LINE);
421
422 strncat (s, "\n", T_MAX_LINE);
423
424 /*headline 1*/
425 strncat (s, ". Data\t", T_MAX_LINE);
426
427 strncat (s, "Nb of \t", T_MAX_LINE);
428
429 strncat (s, "Likelihood\t", T_MAX_LINE);
430
431 strncat (s, "Discrete \t", T_MAX_LINE);
432
433 if(tree->mod->ras->n_catg > 1)
434 strncat (s, "Number of \tGamma shape\t", T_MAX_LINE);
435
436 strncat (s, "Proportion of\t", T_MAX_LINE);
437
438 if(tree->mod->whichmodel <= 6)
439 strncat (s, "Transition/ \t", T_MAX_LINE);
440
441 strncat (s, "Nucleotides frequencies \t", T_MAX_LINE);
442
443 if((tree->mod->whichmodel == GTR) ||
444 (tree->mod->whichmodel == CUSTOM))
445 strncat (s, "Instantaneous rate matrix \t", T_MAX_LINE);
446
447 strncat (s, "\n", T_MAX_LINE);
448
449 /*headline 2*/
450 strncat (s, " set\t", T_MAX_LINE);
451
452 strncat (s, "taxa\t", T_MAX_LINE);
453
454 strncat (s, "loglk \t", T_MAX_LINE);
455
456 strncat (s, "gamma model\t", T_MAX_LINE);
457
458 if(tree->mod->ras->n_catg > 1)
459 strncat (s, "categories\tparameter \t", T_MAX_LINE);
460
461 strncat (s, "invariant \t", T_MAX_LINE);
462
463 if(tree->mod->whichmodel <= 6)
464 strncat (s, "transversion\t", T_MAX_LINE);
465
466 strncat (s, "f(A) f(C) f(G) f(T) \t", T_MAX_LINE);
467
468 if((tree->mod->whichmodel == GTR) ||
469 (tree->mod->whichmodel == CUSTOM))
470 strncat (s, "[A---------C---------G---------T------]\t", T_MAX_LINE);
471
472 strncat (s, "\n", T_MAX_LINE);
473
474 /*headline 3*/
475 if(tree->mod->whichmodel == TN93) {
476 strncat (s, " \t \t \t \t", T_MAX_LINE);
477 if(tree->mod->ras->n_catg > 1)
478 strncat (s, " \t \t", T_MAX_LINE);
479 strncat (s, " \t", T_MAX_LINE);
480 strncat (s, "purines pyrimid.\t", T_MAX_LINE);
481 strncat (s, "\n", T_MAX_LINE);
482 }
483
484 strncat (s, "\n", T_MAX_LINE);
485 }
486
487 /*line items*/
488
489 snprintf(tmp, T_MAX_LINE, " #%d\t", (((n_data_set-1)*Global_numTask)+Global_myRank+1));
490 strncat (s, tmp, T_MAX_LINE);
491
492 snprintf(tmp, T_MAX_LINE, "%d \t",tree->n_otu); strncat (s, tmp, T_MAX_LINE);
493
494 snprintf(tmp, T_MAX_LINE, "%.5f\t",tree->c_lnL); strncat (s, tmp, T_MAX_LINE);
495
496 snprintf(tmp, T_MAX_LINE, "%s \t",
497 (tree->mod->ras->n_catg>1)?("Yes"):("No ")); strncat (s, tmp, T_MAX_LINE);
498
499 if(tree->mod->ras->n_catg > 1)
500 {
501 snprintf(tmp, T_MAX_LINE, "%d \t",tree->mod->ras->n_catg); strncat (s, tmp, T_MAX_LINE);
502 snprintf(tmp, T_MAX_LINE, "%.3f \t",tree->mod->ras->alpha->v); strncat (s, tmp, T_MAX_LINE);
503 }
504
505 snprintf(tmp, T_MAX_LINE, "%.3f \t",tree->mod->ras->pinvar->v); strncat (s, tmp, T_MAX_LINE);
506
507 if(tree->mod->whichmodel <= 5)
508 {
509 snprintf(tmp, T_MAX_LINE, "%.3f \t",tree->mod->kappa->v); strncat (s, tmp, T_MAX_LINE);
510 }
511 else if(tree->mod->whichmodel == TN93)
512 {
513 snprintf(tmp, T_MAX_LINE, "%.3f ",
514 tree->mod->kappa->v*2.*tree->mod->lambda->v/(1.+tree->mod->lambda->v));
515 strncat (s, tmp, T_MAX_LINE);
516 snprintf(tmp, T_MAX_LINE, "%.3f\t",
517 tree->mod->kappa->v*2./(1.+tree->mod->lambda->v));
518 strncat (s, tmp, T_MAX_LINE);
519 }
520
521 if(tree->mod->io->datatype == NT)
522 {
523 snprintf(tmp, T_MAX_LINE, "%8.5f ",tree->mod->e_frq->pi->v[0]); strncat (s, tmp, T_MAX_LINE);
524 snprintf(tmp, T_MAX_LINE, "%8.5f ",tree->mod->e_frq->pi->v[1]); strncat (s, tmp, T_MAX_LINE);
525 snprintf(tmp, T_MAX_LINE, "%8.5f ",tree->mod->e_frq->pi->v[2]); strncat (s, tmp, T_MAX_LINE);
526 snprintf(tmp, T_MAX_LINE, "%8.5f\t",tree->mod->e_frq->pi->v[3]); strncat (s, tmp, T_MAX_LINE);
527 }
528
529 if((tree->mod->whichmodel == GTR) || (tree->mod->whichmodel == CUSTOM))
530 {
531 int i,j;
532
533 for(i=0;i<4;i++)
534 {
535 if (i!=0) {
536 /*format*/
537 snprintf(tmp, T_MAX_LINE, " \t \t \t \t");
538 strncat (s, tmp, T_MAX_LINE);
539 if(tree->mod->ras->n_catg > 1) {
540 snprintf(tmp, T_MAX_LINE, " \t \t");
541 strncat (s, tmp, T_MAX_LINE);
542 }
543 snprintf(tmp, T_MAX_LINE, " \t \t");
544 strncat (s, tmp, T_MAX_LINE);
545 }
546 for(j=0;j<4;j++) {
547 snprintf(tmp, T_MAX_LINE, "%8.5f ",tree->mod->r_mat->qmat->v[i*4+j]);
548 strncat (s, tmp, T_MAX_LINE);
549 }
550 if (i<3) {
551 snprintf(tmp, T_MAX_LINE, "\n");
552 strncat (s, tmp, T_MAX_LINE);
553 }
554 }
555 }
556
557 Free (tmp);
558 strncpy (bootStr, s, T_MAX_LINE);
559 Free (s);
560
561 return;
562 }
563 /* #endif */
564