1 /***************************************************************************
2  *   Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include <iqtree_config.h>
26 
27 #if defined WIN32 || defined _WIN32 || defined __WIN32__
28 //#include <winsock2.h>
29 //#include <windows.h>
30 //extern __declspec(dllexport) int gethostname(char *name, int namelen);
31 #else
32 #include <sys/resource.h>
33 #endif
34 
35 //#include "Eigen/Core"
36 #include <stdio.h>
37 #include "tree/phylotree.h"
38 #include <signal.h>
39 #include <cstdio>
40 #include <streambuf>
41 #include <iostream>
42 #include <cstdlib>
43 #include <errno.h>
44 #include "pda/greedy.h"
45 #include "pda/pruning.h"
46 //#include "naivegreedy.h"
47 #include "pda/splitgraph.h"
48 #include "pda/circularnetwork.h"
49 #include "tree/mtreeset.h"
50 #include "tree/mexttree.h"
51 #include "ncl/ncl.h"
52 #include "nclextra/msetsblock.h"
53 #include "nclextra/myreader.h"
54 #include "phyloanalysis.h"
55 #include "tree/matree.h"
56 //#include "ngs.h"
57 #include "obsolete/parsmultistate.h"
58 //#include "gss.h"
59 #include "alignment/maalignment.h" //added by MA
60 #include "tree/ncbitree.h"
61 #include "pda/ecopd.h"
62 #include "tree/upperbounds.h"
63 #include "pda/ecopdmtreeset.h"
64 #include "pda/gurobiwrapper.h"
65 #include "utils/timeutil.h"
66 //#include <unistd.h>
67 #include <stdlib.h>
68 #include "vectorclass/instrset.h"
69 
70 #include "utils/MPIHelper.h"
71 
72 #ifdef _OPENMP
73     #include <omp.h>
74 #endif
75 
76 using namespace std;
77 
78 
79 
generateRandomTree(Params & params)80 void generateRandomTree(Params &params)
81 {
82     if (params.sub_size < 3 && !params.aln_file) {
83         outError(ERR_FEW_TAXA);
84     }
85 
86     if (!params.user_file) {
87         outError("Please specify an output tree file name");
88     }
89     ////cout << "Random number seed: " << params.ran_seed << endl << endl;
90 
91     SplitGraph sg;
92 
93     try {
94 
95         if (params.tree_gen == YULE_HARDING || params.tree_gen == CATERPILLAR ||
96             params.tree_gen == BALANCED || params.tree_gen == UNIFORM || params.tree_gen == STAR_TREE) {
97             if (!overwriteFile(params.user_file)) return;
98             ofstream out;
99             out.open(params.user_file);
100             MTree itree;
101 
102             if (params.second_tree) {
103                 cout << "Generating random branch lengths on tree " << params.second_tree << " ..." << endl;
104                 itree.readTree(params.second_tree, params.is_rooted);
105             } else
106             switch (params.tree_gen) {
107             case YULE_HARDING:
108                 cout << "Generating random Yule-Harding tree..." << endl;
109                 break;
110             case UNIFORM:
111                 cout << "Generating random uniform tree..." << endl;
112                 break;
113             case CATERPILLAR:
114                 cout << "Generating random caterpillar tree..." << endl;
115                 break;
116             case BALANCED:
117                 cout << "Generating random balanced tree..." << endl;
118                 break;
119             case STAR_TREE:
120                 cout << "Generating star tree with random external branch lengths..." << endl;
121                 break;
122             default: break;
123             }
124             ofstream out2;
125             if (params.num_zero_len) {
126                 cout << "Setting " << params.num_zero_len << " internal branches to zero length..." << endl;
127                 string str = params.user_file;
128                 str += ".collapsed";
129                 out2.open(str.c_str());
130             }
131             for (int i = 0; i < params.repeated_time; i++) {
132                 MExtTree mtree;
133                 if (itree.root) {
134                     mtree.copyTree(&itree);
135                     mtree.generateRandomBranchLengths(params);
136                 } else {
137                     mtree.generateRandomTree(params.tree_gen, params);
138                 }
139                 if (params.num_zero_len) {
140                     mtree.setZeroInternalBranches(params.num_zero_len);
141                     MExtTree collapsed_tree;
142                     collapsed_tree.copyTree(&mtree);
143                     collapsed_tree.collapseZeroBranches();
144                     collapsed_tree.printTree(out2);
145                     out2 << endl;
146                 }
147                 mtree.printTree(out);
148                 out << endl;
149             }
150             out.close();
151             cout << params.repeated_time << " tree(s) printed to " << params.user_file << endl;
152             if (params.num_zero_len) {
153                 out2.close();
154                 cout << params.repeated_time << " collapsed tree(s) printed to " << params.user_file << ".collapsed" << endl;
155             }
156         }
157         // Generate random trees if optioned
158         else if (params.tree_gen == CIRCULAR_SPLIT_GRAPH) {
159             cout << "Generating random circular split network..." << endl;
160             if (!overwriteFile(params.user_file)) return;
161             sg.generateCircular(params);
162         } else if (params.tree_gen == TAXA_SET) {
163             sg.init(params);
164             cout << "Generating random taxa set of size " << params.sub_size <<
165                 " overlap " << params.overlap << " with " << params.repeated_time << " times..." << endl;
166             if (!overwriteFile(params.pdtaxa_file)) return;
167             sg.generateTaxaSet(params.pdtaxa_file, params.sub_size, params.overlap, params.repeated_time);
168         }
169     } catch (bad_alloc) {
170         outError(ERR_NO_MEMORY);
171     } catch (ios::failure) {
172         outError(ERR_WRITE_OUTPUT, params.user_file);
173     }
174 
175     // calculate the distance
176     if (params.run_mode == CALC_DIST) {
177         if (params.tree_gen == CIRCULAR_SPLIT_GRAPH) {
178             cout << "Calculating distance matrix..." << endl;
179             sg.calcDistance(params.dist_file);
180             cout << "Distances printed to " << params.dist_file << endl;
181         }// else {
182             //mtree.calcDist(params.dist_file);
183         //}
184     }
185 
186 }
187 
separator(ostream & out,int type=0)188 inline void separator(ostream &out, int type = 0) {
189     switch (type) {
190     case 0:
191         out << endl << "==============================================================================" << endl;
192         break;
193     case 1:
194         out << endl << "-----------------------------------------------------------" << endl;
195         break;
196     default:
197         break;
198     }
199 }
200 
201 
printCopyright(ostream & out)202 void printCopyright(ostream &out) {
203 #ifdef IQ_TREE
204      out << "IQ-TREE";
205     #ifdef _IQTREE_MPI
206     out << " MPI";
207     #endif
208     #ifdef _OPENMP
209     out << " multicore";
210     #endif
211     #ifdef __AVX512KNL
212     out << " Xeon Phi KNL";
213     #endif
214      out << " version ";
215 #else
216      out << "PDA - Phylogenetic Diversity Analyzer version ";
217 #endif
218     out << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR << iqtree_VERSION_PATCH;
219 
220 #if defined _WIN32 || defined WIN32
221     out << " for Windows";
222 #elif defined __APPLE__ || defined __MACH__
223     out << " for Mac OS X";
224 #elif defined __linux__
225     out << " for Linux";
226 #elif defined __unix__ || defined __unix
227     out << " for Unix";
228 #else
229     out << " for unknown platform"
230 #endif
231 
232     out    << " " << 8*sizeof(void*) << "-bit" << " built " << __DATE__;
233 #if defined DEBUG
234     out << " - debug mode";
235 #endif
236 
237 #ifdef IQ_TREE
238     out << endl << "Developed by Bui Quang Minh, Nguyen Lam Tung, Olga Chernomor,"
239         << endl << "Heiko Schmidt, Dominik Schrempf, Michael Woodhams." << endl << endl;
240 #else
241     out << endl << "Copyright (c) 2006-2014 Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl;
242 #endif
243 }
244 
printRunMode(ostream & out,RunMode run_mode)245 void printRunMode(ostream &out, RunMode run_mode) {
246     switch (run_mode) {
247         case DETECTED: out << "Detected"; break;
248         case GREEDY: out << "Greedy"; break;
249         case PRUNING: out << "Pruning"; break;
250         case BOTH_ALG: out << "Greedy and Pruning"; break;
251         case EXHAUSTIVE: out << "Exhaustive"; break;
252         case DYNAMIC_PROGRAMMING: out << "Dynamic Programming"; break;
253         case LINEAR_PROGRAMMING: out << "Integer Linear Programming"; break;
254         default: outError(ERR_INTERNAL);
255     }
256 }
257 
258 /**
259     summarize the running with header
260 */
summarizeHeader(ostream & out,Params & params,bool budget_constraint,InputType analysis_type)261 void summarizeHeader(ostream &out, Params &params, bool budget_constraint, InputType analysis_type) {
262     printCopyright(out);
263     out << "Input tree/split network file name: " << params.user_file << endl;
264     if(params.eco_dag_file)
265         out << "Input food web file name: "<<params.eco_dag_file<<endl;
266      out << "Input file format: " << ((params.intype == IN_NEWICK) ? "Newick" : ( (params.intype == IN_NEXUS) ? "Nexus" : "Unknown" )) << endl;
267     if (params.initial_file != NULL)
268         out << "Initial taxa file: " << params.initial_file << endl;
269     if (params.param_file != NULL)
270         out << "Parameter file: " << params.param_file << endl;
271     out << endl;
272     out << "Type of measure: " << ((params.root != NULL || params.is_rooted) ? "Rooted": "Unrooted") <<
273             (analysis_type== IN_NEWICK ? " phylogenetic diversity (PD)" : " split diversity (SD)");
274     if (params.root != NULL) out << " at " << params.root;
275     out << endl;
276     if (params.run_mode != CALC_DIST && params.run_mode != PD_USER_SET) {
277         out << "Search objective: " << ((params.find_pd_min) ? "Minimum" : "Maximum") << endl;
278         out << "Search algorithm: ";
279         printRunMode(out, params.run_mode);
280         if (params.run_mode == DETECTED) {
281             out << " -> ";
282             printRunMode(out, params.detected_mode);
283         }
284         out << endl;
285         out << "Search option: " << ((params.find_all) ? "Multiple optimal sets" : "Single optimal set") << endl;
286     }
287     out << endl;
288     out << "Type of analysis: ";
289     switch (params.run_mode) {
290         case PD_USER_SET: out << "PD/SD of user sets";
291             if (params.pdtaxa_file) out << " (" << params.pdtaxa_file << ")"; break;
292         case CALC_DIST: out << "Distance matrix computation"; break;
293         default:
294             out << ((budget_constraint) ? "Budget constraint " : "Subset size k ");
295             if (params.intype == IN_NEWICK)
296                 out << ((analysis_type == IN_NEWICK) ? "on tree" : "on tree -> split network");
297             else
298                 out << "on split network";
299     }
300     out << endl;
301     //out << "Random number seed: " << params.ran_seed << endl;
302 }
303 
summarizeFooter(ostream & out,Params & params)304 void summarizeFooter(ostream &out, Params &params) {
305     separator(out);
306     time_t beginTime;
307     time (&beginTime);
308     char *date;
309     date = ctime(&beginTime);
310 
311     out << "Time used: " << params.run_time  << " seconds." << endl;
312     out << "Finished time: " << date << endl;
313 }
314 
315 
getMaxNameLen(vector<string> & setName)316 int getMaxNameLen(vector<string> &setName) {
317     int len = 0;
318     for (vector<string>::iterator it = setName.begin(); it != setName.end(); it++)
319         if (len < (*it).length())
320             len = (*it).length();
321     return len;
322 }
323 
printPDUser(ostream & out,Params & params,PDRelatedMeasures & pd_more)324 void printPDUser(ostream &out, Params &params, PDRelatedMeasures &pd_more) {
325     out << "List of user-defined sets of taxa with PD score computed" << endl << endl;
326     int maxlen = getMaxNameLen(pd_more.setName)+2;
327     out.width(maxlen);
328     out << "Name" << "     PD";
329     if (params.exclusive_pd) out << "   excl.-PD";
330     if (params.endemic_pd) out << "   PD-Endem.";
331     if (params.complement_area) out << "   PD-Compl. given area " << params.complement_area;
332     out << endl;
333     int cnt;
334     for (cnt = 0; cnt < pd_more.setName.size(); cnt++) {
335         out.width(maxlen);
336         out << pd_more.setName[cnt] << " ";
337         out.width(7);
338         out << pd_more.PDScore[cnt] << "  ";
339         if (params.exclusive_pd) {
340             out.width(7);
341             out << pd_more.exclusivePD[cnt] << "  ";
342         }
343         if (params.endemic_pd) {
344             out.width(7);
345             out << pd_more.PDEndemism[cnt] << "  ";
346         }
347         if (params.complement_area) {
348             out.width(8);
349             out << pd_more.PDComplementarity[cnt];
350         }
351         out << endl;
352     }
353     separator(out, 1);
354 }
355 
summarizeTree(Params & params,PDTree & tree,vector<PDTaxaSet> & taxa_set,PDRelatedMeasures & pd_more)356 void summarizeTree(Params &params, PDTree &tree, vector<PDTaxaSet> &taxa_set,
357     PDRelatedMeasures &pd_more) {
358     string filename;
359     if (params.out_file == NULL) {
360         filename = params.out_prefix;
361         filename += ".pda";
362     } else
363         filename = params.out_file;
364 
365     try {
366         ofstream out;
367         out.exceptions(ios::failbit | ios::badbit);
368         out.open(filename.c_str());
369 
370         summarizeHeader(out, params, false, IN_NEWICK);
371         out << "Tree size: " << tree.leafNum-params.is_rooted << " taxa, " <<
372             tree.nodeNum-1-params.is_rooted << " branches" << endl;
373         separator(out);
374 
375         vector<PDTaxaSet>::iterator tid;
376 
377         if (params.run_mode == PD_USER_SET) {
378             printPDUser(out, params, pd_more);
379         }
380         else if (taxa_set.size() > 1)
381             out << "Optimal PD-sets with k = " << params.min_size-params.is_rooted <<
382             " to " << params.sub_size-params.is_rooted << endl << endl;
383 
384 
385         int subsize = params.min_size-params.is_rooted;
386         if (params.run_mode == PD_USER_SET) subsize = 1;
387         for (tid = taxa_set.begin(); tid != taxa_set.end(); tid++, subsize++) {
388             if (tid != taxa_set.begin())
389                 separator(out, 1);
390             if (params.run_mode == PD_USER_SET) {
391                 out << "Set " << subsize << " has PD score of " << tid->score << endl;
392             }
393             else {
394                 out << "For k = " << subsize << " the optimal PD score is " << (*tid).score << endl;
395                 out << "The optimal PD set has " << subsize << " taxa:" << endl;
396             }
397             for (NodeVector::iterator it = (*tid).begin(); it != (*tid).end(); it++)
398                 if ((*it)->name != ROOT_NAME){
399                     out << (*it)->name << endl;
400                 }
401             if (!tid->tree_str.empty()) {
402                 out << endl << "Corresponding sub-tree: " << endl;
403                 out << tid->tree_str << endl;
404             }
405             tid->clear();
406         }
407         taxa_set.clear();
408 
409         summarizeFooter(out, params);
410         out.close();
411         cout << endl << "Results are summarized in " << filename << endl << endl;
412 
413     } catch (ios::failure) {
414         outError(ERR_WRITE_OUTPUT, filename);
415     }
416 }
417 
418 
printTaxaSet(Params & params,vector<PDTaxaSet> & taxa_set,RunMode cur_mode)419 void printTaxaSet(Params &params, vector<PDTaxaSet> &taxa_set, RunMode cur_mode) {
420     int subsize = params.min_size-params.is_rooted;
421     ofstream out;
422     ofstream scoreout;
423     string filename;
424     filename = params.out_prefix;
425     filename += ".score";
426     scoreout.open(filename.c_str());
427     if (!scoreout.is_open())
428         outError(ERR_WRITE_OUTPUT, filename);
429     cout << "PD scores printed to " << filename << endl;
430 
431     if (params.nr_output == 1) {
432         filename = params.out_prefix;
433         filename += ".pdtaxa";
434         out.open(filename.c_str());
435         if (!out.is_open())
436             outError(ERR_WRITE_OUTPUT, filename);
437     }
438     for (vector<PDTaxaSet>::iterator tid = taxa_set.begin(); tid != taxa_set.end(); tid++, subsize++) {
439         if (params.nr_output > 10) {
440             filename = params.out_prefix;
441             filename += ".";
442             filename += subsize;
443             if (params.run_mode == BOTH_ALG) {
444                 if (cur_mode == GREEDY)
445                     filename += ".greedy";
446                 else
447                     filename += ".pruning";
448             } else {
449                 filename += ".pdtree";
450             }
451             (*tid).printTree((char*)filename.c_str());
452 
453             filename = params.out_prefix;
454             filename += ".";
455             filename += subsize;
456             filename += ".pdtaxa";
457             (*tid).printTaxa((char*)filename.c_str());
458         } else {
459             out << subsize << " " << (*tid).score << endl;
460             scoreout << subsize << " " << (*tid).score << endl;
461             (*tid).printTaxa(out);
462         }
463     }
464 
465     if (params.nr_output == 1) {
466         out.close();
467         cout << "All taxa list(s) printed to " << filename << endl;
468     }
469 
470     scoreout.close();
471 }
472 
473 
474 /**
475     run PD algorithm on trees
476 */
runPDTree(Params & params)477 void runPDTree(Params &params)
478 {
479 
480     if (params.run_mode == CALC_DIST) {
481         bool is_rooted = false;
482         MExtTree tree(params.user_file, is_rooted);
483         cout << "Tree contains " << tree.leafNum << " taxa." << endl;
484         cout << "Calculating distance matrix..." << endl;
485         tree.calcDist(params.dist_file);
486         cout << "Distances printed to " << params.dist_file << endl;
487         return;
488     }
489 
490     double t_begin, t_end;
491     //char filename[300];
492     //int idx;
493 
494     vector<PDTaxaSet> taxa_set;
495 
496     if (params.run_mode == PD_USER_SET) {
497         // compute score of user-defined sets
498         t_begin = getCPUTime();
499         cout << "Computing PD score for user-defined set of taxa..." << endl;
500         PDTree tree(params);
501         PDRelatedMeasures pd_more;
502         tree.computePD(params, taxa_set, pd_more);
503 
504         if (params.endemic_pd)
505             tree.calcPDEndemism(taxa_set, pd_more.PDEndemism);
506         if (params.complement_area != NULL)
507             tree.calcPDComplementarity(taxa_set, params.complement_area, pd_more.PDComplementarity);
508 
509         t_end = getCPUTime();
510         params.run_time = (t_end-t_begin);
511         summarizeTree(params, tree, taxa_set, pd_more);
512         return;
513     }
514 
515 
516     /*********************************************
517         run greedy algorithm
518     *********************************************/
519 
520     if (params.sub_size < 2) {
521         outError(ERR_NO_K);
522     }
523 
524     bool detected_greedy = (params.run_mode != PRUNING);
525 
526     Greedy test_greedy;
527 
528     test_greedy.init(params);
529 
530     if (params.root == NULL && !params.is_rooted)
531         cout << endl << "Running PD algorithm on UNROOTED tree..." << endl;
532     else
533         cout << endl << "Running PD algorithm on ROOTED tree..." << endl;
534 
535     if (verbose_mode >= VB_DEBUG)
536         test_greedy.drawTree(cout, WT_INT_NODE + WT_BR_SCALE + WT_BR_LEN);
537 
538     if (params.run_mode == GREEDY || params.run_mode == BOTH_ALG ||
539         (params.run_mode == DETECTED)) {
540 
541         if (params.run_mode == DETECTED && params.sub_size >= test_greedy.leafNum * 7 / 10
542             && params.min_size < 2)
543             detected_greedy = false;
544 
545         if (detected_greedy) {
546             params.detected_mode = GREEDY;
547             t_begin=getCPUTime();
548             cout << endl << "Greedy Algorithm..." << endl;
549 
550             taxa_set.clear();
551             test_greedy.run(params, taxa_set);
552 
553             t_end=getCPUTime();
554             params.run_time = (t_end-t_begin);
555             cout << "Time used: " << params.run_time << " seconds." << endl;
556             if (params.min_size == params.sub_size)
557                 cout << "Resulting tree length = " << taxa_set[0].score << endl;
558 
559             if (params.nr_output > 0)
560                 printTaxaSet(params, taxa_set, GREEDY);
561 
562             PDRelatedMeasures pd_more;
563 
564             summarizeTree(params, test_greedy, taxa_set, pd_more);
565         }
566     }
567 
568     /*********************************************
569         run pruning algorithm
570     *********************************************/
571     if (params.run_mode == PRUNING || params.run_mode == BOTH_ALG ||
572         (params.run_mode == DETECTED)) {
573 
574         Pruning test_pruning;
575 
576         if (params.run_mode == PRUNING || params.run_mode == BOTH_ALG) {
577             //Pruning test_pruning(params);
578             test_pruning.init(params);
579         } else if (!detected_greedy) {
580             test_pruning.init(test_greedy);
581         } else {
582             return;
583         }
584         params.detected_mode = PRUNING;
585         t_begin=getCPUTime();
586         cout << endl << "Pruning Algorithm..." << endl;
587         taxa_set.clear();
588         test_pruning.run(params, taxa_set);
589 
590         t_end=getCPUTime();
591         params.run_time = (t_end-t_begin) ;
592         cout << "Time used: " << params.run_time << " seconds.\n";
593         if (params.min_size == params.sub_size)
594             cout << "Resulting tree length = " << taxa_set[0].score << endl;
595 
596         if (params.nr_output > 0)
597             printTaxaSet(params, taxa_set, PRUNING);
598 
599         PDRelatedMeasures pd_more;
600 
601         summarizeTree(params, test_pruning, taxa_set, pd_more);
602 
603     }
604 
605 }
606 
checkSplitDistance(ostream & out,PDNetwork & sg)607 void checkSplitDistance(ostream &out, PDNetwork &sg) {
608     mmatrix(double) dist;
609     sg.calcDistance(dist);
610     int ntaxa = sg.getNTaxa();
611     int i, j;
612     bool found = false;
613     for (i = 0; i < ntaxa-1; i++) {
614         bool first = true;
615         for (j = i+1; j < ntaxa; j++)
616             if (abs(dist[i][j]) <= 1e-5) {
617                 if (!found) {
618                     out << "The following sets of taxa (each set in a line) have very small split-distance" << endl;
619                     out << "( <= 1e-5) as computed from the split system. To avoid a lot of multiple" << endl;
620                     out << "optimal PD sets to be reported, one should only keep one taxon from each set" << endl;
621                     out << "and exclude the rest from the analysis." << endl << endl;
622                 }
623                 if (first)
624                     out << sg.getTaxa()->GetTaxonLabel(i);
625                 found = true;
626                 first = false;
627                 out << ", " << sg.getTaxa()->GetTaxonLabel(j);
628             }
629         if (!first) out << endl;
630     }
631     if (found)
632         separator(out);
633 }
634 
635 
636 
637 /**
638     check if the set are nested and there are no multiple optimal sets.
639     If yes, return the ranking as could be produced by a greedy algorithm
640 */
makeRanking(vector<SplitSet> & pd_set,IntVector & indices,IntVector & ranking)641 bool makeRanking(vector<SplitSet> &pd_set, IntVector &indices, IntVector &ranking) {
642     vector<SplitSet>::iterator it;
643     IntVector::iterator inti;
644     ranking.clear();
645     bool nested = true;
646     Split *cur_sp = NULL;
647     int id = 1;
648     for (it = pd_set.begin(); it != pd_set.end(); it++) {
649         if ((*it).empty()) continue;
650         if ((*it).size() > 1) {
651             nested = false;
652             ranking.push_back(-10);
653             indices.push_back(0);
654         }
655         Split *sp = (*it)[0];
656 
657         if (!cur_sp) {
658             IntVector sp_tax;
659             sp->getTaxaList(sp_tax);
660             ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end());
661             for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++)
662                 indices.push_back(id++);
663         } else {
664             if ( !cur_sp->subsetOf(*sp)) {
665                 ranking.push_back(-1);
666                 indices.push_back(0);
667                 nested = false;
668             }
669             Split sp_diff(*sp);
670             sp_diff -= *cur_sp;
671             Split sp_diff2(*cur_sp);
672             sp_diff2 -= *sp;
673             IntVector sp_tax;
674             sp_diff2.getTaxaList(sp_tax);
675             ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end());
676             for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++)
677                 indices.push_back(-id);
678             sp_diff.getTaxaList(sp_tax);
679             ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end());
680             for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++)
681                 indices.push_back(id);
682             if ( !cur_sp->subsetOf(*sp)) {
683                 ranking.push_back(-2);
684                 indices.push_back(0);
685             }
686             id++;
687         }
688         cur_sp = sp;
689     }
690     return nested;
691 }
692 
693 
printNexusSets(const char * filename,PDNetwork & sg,vector<SplitSet> & pd_set)694 void printNexusSets(const char *filename, PDNetwork &sg, vector<SplitSet> &pd_set) {
695     try {
696         ofstream out;
697         out.open(filename);
698         out << "#NEXUS" << endl << "BEGIN Sets;" << endl;
699         vector<SplitSet>::iterator it;
700         for (it = pd_set.begin(); it != pd_set.end(); it++) {
701             int id = 1;
702             for (SplitSet::iterator sit = (*it).begin(); sit != (*it).end(); sit++, id++) {
703                 IntVector taxa;
704                 (*sit)->getTaxaList(taxa);
705                 out << "   TAXSET Opt_" << taxa.size() << "_" << id << " =";
706                 for (IntVector::iterator iit = taxa.begin(); iit != taxa.end(); iit++) {
707                     if (sg.isPDArea())
708                         out << " '" << sg.getSetsBlock()->getSet(*iit)->name << "'";
709                     else
710                         out << " '" << sg.getTaxa()->GetTaxonLabel(*iit) << "'";
711                 }
712                 out << ";" << endl;
713             }
714         }
715         out << "END; [Sets]" << endl;
716         out.close();
717         cout << endl << "Optimal sets are written to nexus file " << filename << endl;
718     } catch (ios::failure) {
719         outError(ERR_WRITE_OUTPUT, filename);
720     }
721 
722 }
723 
724 
725 
computeTaxaFrequency(SplitSet & taxa_set,DoubleVector & freq)726 void computeTaxaFrequency(SplitSet &taxa_set, DoubleVector &freq) {
727     ASSERT(taxa_set.size());
728     int ntaxa = taxa_set[0]->getNTaxa();
729     int i;
730 
731     freq.resize(ntaxa, 0);
732     for (SplitSet::iterator it2 = taxa_set.begin(); it2 != taxa_set.end(); it2++) {
733         for ( i = 0; i < ntaxa; i++)
734             if ((*it2)->containTaxon(i)) freq[i] += 1.0;
735     }
736 
737     for ( i = 0; i < ntaxa; i++)
738         freq[i] /= taxa_set.size();
739 
740 }
741 
742 /**
743     summarize the running results
744 */
summarizeSplit(Params & params,PDNetwork & sg,vector<SplitSet> & pd_set,PDRelatedMeasures & pd_more,bool full_report)745 void summarizeSplit(Params &params, PDNetwork &sg, vector<SplitSet> &pd_set, PDRelatedMeasures &pd_more, bool full_report) {
746     int i;
747 
748 
749     if (params.nexus_output) {
750         string nex_file = params.out_prefix;
751         nex_file += ".pdsets.nex";
752         printNexusSets(nex_file.c_str(), sg, pd_set);
753     }
754     string filename;
755     if (params.out_file == NULL) {
756         filename = params.out_prefix;
757         filename += ".pda";
758     } else
759         filename = params.out_file;
760 
761     try {
762         ofstream out;
763         out.open(filename.c_str());
764         /****************************/
765         /********** HEADER **********/
766         /****************************/
767         summarizeHeader(out, params, sg.isBudgetConstraint(), IN_NEXUS);
768 
769         out << "Network size: " << sg.getNTaxa()-params.is_rooted << " taxa, " <<
770             sg.getNSplits()-params.is_rooted << " splits (of which " <<
771             sg.getNTrivialSplits() << " are trivial splits)" << endl;
772         out << "Network type: " << ((sg.isCircular()) ? "Circular" : "General") << endl;
773 
774         separator(out);
775 
776         checkSplitDistance(out, sg);
777 
778         int c_num = 0;
779         //int subsize = (sg.isBudgetConstraint()) ? params.budget : (params.sub_size-params.is_rooted);
780         //subsize -= pd_set.size()-1;
781         int subsize = (sg.isBudgetConstraint()) ? params.min_budget : params.min_size-params.is_rooted;
782         int stepsize = (sg.isBudgetConstraint()) ? params.step_budget : params.step_size;
783         if (params.detected_mode != LINEAR_PROGRAMMING) stepsize = 1;
784         vector<SplitSet>::iterator it;
785         SplitSet::iterator it2;
786 
787 
788         if (params.run_mode == PD_USER_SET) {
789             printPDUser(out, params, pd_more);
790         }
791 
792         /****************************/
793         /********** SUMMARY *********/
794         /****************************/
795 
796         if (params.run_mode != PD_USER_SET && !params.num_bootstrap_samples) {
797             out << "Summary of the PD-score and the number of optimal PD-sets with the same " << endl << "optimal PD-score found." << endl;
798 
799             if (sg.isBudgetConstraint())
800                 out << endl << "Budget   PD-score   %PD-score   #PD-sets" << endl;
801             else
802                 out << endl << "Size-k   PD-score   %PD-score   #PD-sets" << endl;
803 
804             int sizex = subsize;
805             double total = sg.calcWeight();
806 
807             for (it = pd_set.begin(); it != pd_set.end(); it++, sizex+=stepsize) {
808                 out.width(6);
809                 out << right << sizex << " ";
810                 out.width(10);
811                 out << right << (*it).getWeight() << " ";
812                 out.width(10);
813                 out << right << ((*it).getWeight()/total)*100.0 << " ";
814                 out.width(6);
815                 out << right << (*it).size();
816                 out << endl;
817             }
818 
819             out << endl;
820             if (!params.find_all)
821                 out << "Note: You did not choose the option to find multiple optimal PD sets." << endl <<
822                     "That's why we only reported one PD-set per size-k or budget. If you want" << endl <<
823                     "to determine all multiple PD-sets, use the '-a' option.";
824             else {
825                 out << "Note: The number of multiple optimal PD sets to be reported is limited to " << params.pd_limit << "." << endl <<
826                     "There might be cases where the actual #PD-sets exceeds that upper-limit but" << endl <<
827                     "won't be listed here. Please refer to the above list to identify such cases." << endl <<
828                     "To increase the upper-limit, use the '-lim <limit_number>' option.";
829             }
830             out << endl;
831             separator(out);
832         }
833 
834         if (!full_report) {
835             out.close();
836             return;
837         }
838 
839 
840         /****************************/
841         /********* BOOTSTRAP ********/
842         /****************************/
843         if (params.run_mode != PD_USER_SET && params.num_bootstrap_samples) {
844             out << "Summary of the bootstrap analysis " << endl;
845             for (it = pd_set.begin(); it != pd_set.end(); it++) {
846                 DoubleVector freq;
847                 computeTaxaFrequency((*it), freq);
848                 out << "For k/budget = " << subsize << " the " << ((sg.isPDArea()) ? "areas" : "taxa")
849                     << " supports are: " << endl;
850                 for (i = 0; i < freq.size(); i++)
851                     out << ((sg.isPDArea()) ? sg.getSetsBlock()->getSet(i)->name : sg.getTaxa()->GetTaxonLabel(i))
852                         << "\t" << freq[i] << endl;
853                 if ((it+1) != pd_set.end()) separator(out, 1);
854             }
855             out << endl;
856             separator(out);
857         }
858 
859         /****************************/
860         /********** RANKING *********/
861         /****************************/
862 
863         if (params.run_mode != PD_USER_SET && !params.num_bootstrap_samples) {
864 
865 
866             IntVector ranking;
867             IntVector index;
868 
869             out << "Ranking based on the optimal sets" << endl;
870 
871 
872             if (!makeRanking(pd_set, index, ranking)) {
873                 out << "WARNING: Optimal sets are not nested, so ranking should not be considered stable" << endl;
874             }
875             if (subsize > 1) {
876                 out << "WARNING: The first " << subsize << " ranks should be treated equal" << endl;
877             }
878             out << endl << "Rank*   ";
879             if (!sg.isPDArea())
880                 out << "Taxon names" << endl;
881             else
882                 out << "Area names" << endl;
883 
884 
885             for (IntVector::iterator intv = ranking.begin(), intid = index.begin(); intv != ranking.end(); intv ++, intid++) {
886                 if (*intv == -10)
887                     out << "<--- multiple optimal set here --->" << endl;
888                 else if (*intv == -1)
889                     out << "<--- BEGIN: greedy does not work --->" << endl;
890                 else if (*intv == -2)
891                     out << "<--- END --->" << endl;
892                 else {
893                     out.width(5);
894                     out <<  right << *intid << "   ";
895                     if (sg.isPDArea())
896                         out << sg.getSetsBlock()->getSet(*intv)->name << endl;
897                     else
898                         out << sg.getTaxa()->GetTaxonLabel(*intv) << endl;
899                 }
900             }
901             out << endl;
902             out <<  "(*) Negative ranks indicate the point at which the greedy algorithm" << endl <<
903                     "    does not work. In that case, the corresponding taxon/area names" << endl <<
904                     "    should be deleted from the optimal set of the same size" << endl;
905             separator(out);
906         }
907 
908         int max_len = sg.getTaxa()->GetMaxTaxonLabelLength();
909 
910         /****************************/
911         /***** DETAILED SETS ********/
912         /****************************/
913 
914         if (params.run_mode != PD_USER_SET)
915             out << "Detailed information of all taxa found in the optimal PD-sets" << endl;
916 
917         if (pd_set.size() > 1) {
918             if (sg.isBudgetConstraint())
919                 out << "with budget = " << params.min_budget <<
920                     " to " << params.budget << endl << endl;
921             else
922                 out << "with k = " << params.min_size-params.is_rooted <<
923                     " to " << params.sub_size-params.is_rooted << endl << endl;
924         }
925 
926         if (params.run_mode != PD_USER_SET)
927             separator(out,1);
928 
929         for (it = pd_set.begin(); it != pd_set.end(); it++, subsize+=stepsize) {
930 
931             // check if the pd-sets are the same as previous one
932             if (sg.isBudgetConstraint() && it != pd_set.begin()) {
933                 vector<SplitSet>::iterator prev, next;
934                 for (next=it, prev=it-1; next != pd_set.end() && next->getWeight() == (*prev).getWeight() &&
935                     next->size() == (*prev).size(); next++ ) ;
936                 if (next != it) {
937                     // found something in between!
938                     out << endl;
939                     //out << endl << "**************************************************************" << endl;
940                     out << "For budget = " << subsize << " -> " << subsize+(next-it-1)*stepsize <<
941                         " the optimal PD score and PD sets" << endl;
942                     out << "are identical to the case when budget = " << subsize-stepsize << endl;
943                     //out << "**************************************************************" << endl;
944                     subsize += (next-it)*stepsize;
945                     it = next;
946                     if (it == pd_set.end()) break;
947                 }
948             }
949 
950             if (it != pd_set.begin()) separator(out, 1);
951 
952             int num_sets = (*it).size();
953             double weight = (*it).getWeight();
954 
955             if (params.run_mode != PD_USER_SET) {
956                 out << "For " << ((sg.isBudgetConstraint()) ? "budget" : "k") << " = " << subsize;
957                 out << " the optimal PD score is " << weight << endl;
958 
959                 if (num_sets == 1) {
960                     if (!sg.isBudgetConstraint())
961                         out << "The optimal PD set has " << (*it)[0]->countTaxa()-params.is_rooted <<
962                             ((sg.isPDArea()) ? " areas" : " taxa");
963                     else
964                         out << "The optimal PD set has " << (*it)[0]->countTaxa()-params.is_rooted <<
965                         ((sg.isPDArea()) ? " areas" : " taxa") << " and requires " << sg.calcCost(*(*it)[0]) << " budget";
966                     if (!sg.isPDArea()) out << " and covers " << sg.countSplits(*(*it)[0]) <<
967                         " splits (of which " << sg.countInternalSplits(*(*it)[0]) << " are internal splits)";
968                     out << endl;
969                 }
970                 else
971                     out << "Found " << num_sets << " PD sets with the same optimal score." << endl;
972             }
973             for (it2 = (*it).begin(), c_num=1; it2 != (*it).end(); it2++, c_num++){
974                 Split *this_set = *it2;
975 
976                 if (params.run_mode == PD_USER_SET && it2 != (*it).begin())
977                     separator(out, 1);
978 
979                 if (params.run_mode == PD_USER_SET) {
980                     if (!sg.isBudgetConstraint())
981                         out << "Set " << c_num << " has PD score of " << this_set->getWeight();
982                     else
983                         out << "Set " << c_num << " has PD score of " << this_set->getWeight() <<
984                         " and requires " << sg.calcCost(*this_set) << " budget";
985                 } else if (num_sets > 1) {
986                     if (!sg.isBudgetConstraint())
987                         out << endl << "PD set " << c_num;
988                     else
989                         out << endl << "PD set " << c_num << " has " << this_set->countTaxa()-params.is_rooted <<
990                         " taxa and requires " << sg.calcCost(*this_set) << " budget";
991                 }
992 
993                 if (!sg.isPDArea() && (num_sets > 1 || params.run_mode == PD_USER_SET ))
994                     out << " and covers " << sg.countSplits(*(*it)[0]) << " splits (of which "
995                     << sg.countInternalSplits(*(*it)[0]) << " are internal splits)";
996                 out << endl;
997 
998                 if (params.run_mode != PD_USER_SET && sg.isPDArea()) {
999                     for (i = 0; i < sg.getSetsBlock()->getNSets(); i++)
1000                         if (this_set->containTaxon(i)) {
1001                             if (sg.isBudgetConstraint()) {
1002                                 out.width(max_len);
1003                                 out << left << sg.getSetsBlock()->getSet(i)->name << "\t";
1004                                 out.width(10);
1005                                 out << right << sg.getPdaBlock()->getCost(i);
1006                                 out << endl;
1007 
1008                             } else {
1009                                 out << sg.getSetsBlock()->getSet(i)->name << endl;
1010                             }
1011                         }
1012 
1013                     Split sp(sg.getNTaxa());
1014                     for (i = 0; i < sg.getSetsBlock()->getNSets(); i++)
1015                         if (this_set->containTaxon(i))
1016                             sp += *(sg.area_taxa[i]);
1017                     out << endl << "which contains " << sp.countTaxa() - params.is_rooted << " taxa: " << endl;
1018                     for (i = 0; i < sg.getNTaxa(); i++)
1019                         if (sg.getTaxa()->GetTaxonLabel(i) != ROOT_NAME && sp.containTaxon(i))
1020                             out << sg.getTaxa()->GetTaxonLabel(i) << endl;
1021 
1022                 } else
1023                 for ( i = 0; i < sg.getNTaxa(); i++)
1024                     if (sg.getTaxa()->GetTaxonLabel(i) != ROOT_NAME && this_set->containTaxon(i)) {
1025                         if (sg.isBudgetConstraint()) {
1026                             out.width(max_len);
1027                             out << left << sg.getTaxa()->GetTaxonLabel(i) << "\t";
1028                             out.width(10);
1029                             out << right << sg.getPdaBlock()->getCost(i);
1030                             out << endl;
1031 
1032                         } else {
1033                             out << sg.getTaxa()->GetTaxonLabel(i) << endl;
1034                         }
1035                     }
1036             }
1037         }
1038 
1039         /****************************/
1040         /********** FOOTER **********/
1041         /****************************/
1042 
1043         summarizeFooter(out, params);
1044 
1045         out.close();
1046         cout << endl << "Results are summarized in " << filename << endl << endl;
1047     } catch (ios::failure) {
1048         outError(ERR_WRITE_OUTPUT, filename);
1049     }
1050 }
1051 
printGainMatrix(char * filename,mmatrix (double)& delta_gain,int start_k)1052 void printGainMatrix(char *filename, mmatrix(double) &delta_gain, int start_k) {
1053     try {
1054         ofstream out;
1055         out.exceptions(ios::failbit | ios::badbit);
1056         out.open(filename);
1057         int k = start_k;
1058         for (mmatrix(double)::iterator it = delta_gain.begin(); it != delta_gain.end(); it++, k++) {
1059             out << k;
1060             for (int i = 0; i < (*it).size(); i++)
1061                 out << "  " << (*it)[i];
1062             out << endl;
1063         }
1064         out.close();
1065         cout << "PD gain matrix printed to " << filename << endl;
1066     } catch (ios::failure) {
1067         outError(ERR_WRITE_OUTPUT, filename);
1068     }
1069 }
1070 
1071 /**
1072     run PD algorithm on split networks
1073 */
runPDSplit(Params & params)1074 void runPDSplit(Params &params) {
1075 
1076     cout << "Using NCL - Nexus Class Library" << endl << endl;
1077 
1078     // init a split graph class from the parameters
1079     CircularNetwork sg(params);
1080     int i;
1081 
1082     // this vector of SplitSet store all the optimal PD sets
1083     vector<SplitSet> pd_set;
1084     // this define an order of taxa (circular order in case of circular networks)
1085     vector<int> taxa_order;
1086     // this store a particular taxa set
1087     Split taxa_set;
1088 
1089 
1090     if (sg.isCircular()) {
1091         // is a circular network, get circular order
1092         for (i = 0; i < sg.getNTaxa(); i++)
1093             taxa_order.push_back(sg.getCircleId(i));
1094     } else
1095         // otherwise, get the incremental order
1096         for (i = 0; i < sg.getNTaxa(); i++)
1097             taxa_order.push_back(i);
1098 
1099     PDRelatedMeasures pd_more;
1100 
1101     // begining time of the algorithm run
1102     double time_begin = getCPUTime();
1103     //time(&time_begin);
1104     // check parameters
1105     if (sg.isPDArea()) {
1106         if (sg.isBudgetConstraint()) {
1107             int budget = (params.budget >= 0) ? params.budget : sg.getPdaBlock()->getBudget();
1108             if (budget < 0 && params.pd_proportion == 0.0) params.run_mode = PD_USER_SET;
1109         } else {
1110             int sub_size = (params.sub_size >= 1) ? params.sub_size : sg.getPdaBlock()->getSubSize();
1111             if (sub_size < 1 && params.pd_proportion == 0.0) params.run_mode = PD_USER_SET;
1112 
1113         }
1114     }
1115 
1116     if (params.run_mode == PD_USER_SET) {
1117         // compute score of user-defined sets
1118         cout << "Computing PD score for user-defined set of taxa..." << endl;
1119         pd_set.resize(1);
1120         sg.computePD(params, pd_set[0], pd_more);
1121         if (params.endemic_pd)
1122             sg.calcPDEndemism(pd_set[0], pd_more.PDEndemism);
1123 
1124         if (params.complement_area != NULL)
1125             sg.calcPDComplementarity(pd_set[0], params.complement_area, pd_more.setName, pd_more.PDComplementarity);
1126 
1127     } else {
1128         // otherwise, call the main function
1129         if (params.num_bootstrap_samples) {
1130             cout << endl << "======= START BOOTSTRAP ANALYSIS =======" << endl;
1131             MTreeSet *mtrees = sg.getMTrees();
1132             if (mtrees->size() < 100)
1133                 cout << "WARNING: bootstrap may be unstable with less than 100 trees" << endl;
1134             vector<string> taxname;
1135             sg.getTaxaName(taxname);
1136             i = 1;
1137             for (MTreeSet::iterator it = mtrees->begin(); it != mtrees->end(); it++, i++) {
1138                 cout << "---------- TREE " << i << " ----------" << endl;
1139                 // convert tree into split sytem
1140                 SplitGraph sg2;
1141                 (*it)->convertSplits(taxname, sg2);
1142                 // change the current split system
1143                 for (SplitGraph::reverse_iterator it = sg.rbegin(); it != sg.rend(); it++) {
1144                     delete *it;
1145                 }
1146                 sg.clear();
1147                 sg.insert(sg.begin(), sg2.begin(), sg2.end());
1148                 sg2.clear();
1149 
1150                 // now findPD on the converted tree-split system
1151                 sg.findPD(params, pd_set, taxa_order);
1152             }
1153             cout << "======= DONE BOOTSTRAP ANALYSIS =======" << endl << endl;
1154         } else {
1155             sg.findPD(params, pd_set, taxa_order);
1156         }
1157     }
1158 
1159     // ending time
1160     double time_end = getCPUTime();
1161     //time(&time_end);
1162     params.run_time = time_end - time_begin;
1163 
1164     cout << "Time used: " << (double) (params.run_time) << " seconds." << endl;
1165 
1166     if (verbose_mode >= VB_DEBUG && !sg.isPDArea()) {
1167         cout << "PD set(s) with score(s): " << endl;
1168         for (vector<SplitSet>::iterator it = pd_set.begin(); it != pd_set.end(); it++)
1169         for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++ ){
1170             //(*it)->report(cout);
1171             cout << "  " << (*it2)->getWeight() << "    ";
1172             for (i = 0; i < sg.getNTaxa(); i++)
1173                 if ((*it2)->containTaxon(i))
1174                 cout << sg.getTaxa()->GetTaxonLabel(i) << "  ";
1175             if (sg.isBudgetConstraint())
1176                 cout << " (budget = " << sg.calcCost(*(*it2)) << ")";
1177             cout << endl;
1178         }
1179     }
1180 
1181     sg.printOutputSetScore(params, pd_set);
1182 
1183 
1184     summarizeSplit(params, sg, pd_set, pd_more, true);
1185 
1186     if (params.calc_pdgain) {
1187         mmatrix(double) delta_gain;
1188         sg.calcPDGain(pd_set, delta_gain);
1189         string filename = params.out_prefix;
1190         filename += ".pdgain";
1191         printGainMatrix((char*)filename.c_str(), delta_gain, pd_set.front().front()->countTaxa());
1192         //cout << delta_gain;
1193     }
1194 
1195 
1196     //for (i = pd_set.size()-1; i >= 0; i--)
1197     //    delete pd_set[i];
1198 
1199 }
1200 
printSplitSet(SplitGraph & sg,SplitIntMap & hash_ss)1201 void printSplitSet(SplitGraph &sg, SplitIntMap &hash_ss) {
1202 /*
1203     for (SplitIntMap::iterator it = hash_ss.begin(); it != hash_ss.end(); it++) {
1204         if ((*it)->getWeight() > 50 && (*it)->countTaxa() > 1)
1205         (*it)->report(cout);
1206     }*/
1207     sg.getTaxa()->Report(cout);
1208     for (SplitGraph::iterator it = sg.begin(); it != sg.end(); it++) {
1209         if ((*it)->getWeight() > 50 && (*it)->countTaxa() > 1)
1210         (*it)->report(cout);
1211     }
1212 }
1213 
readTaxaOrder(char * taxa_order_file,StrVector & taxa_order)1214 void readTaxaOrder(char *taxa_order_file, StrVector &taxa_order) {
1215 
1216 }
1217 
calcTreeCluster(Params & params)1218 void calcTreeCluster(Params &params) {
1219     ASSERT(params.taxa_order_file);
1220     MExtTree tree(params.user_file, params.is_rooted);
1221 //    StrVector taxa_order;
1222     //readTaxaOrder(params.taxa_order_file, taxa_order);
1223     NodeVector taxa;
1224     mmatrix(int) clusters;
1225     clusters.reserve(tree.leafNum - 3);
1226     tree.getTaxa(taxa);
1227     sort(taxa.begin(), taxa.end(), nodenamecmp);
1228     tree.createCluster(taxa, clusters);
1229     int cnt = 1;
1230 
1231     string treename = params.out_prefix;
1232     treename += ".clu-id";
1233     tree.printTree(treename.c_str());
1234 
1235     for (mmatrix(int)::iterator it = clusters.begin(); it != clusters.end(); it++, cnt++) {
1236         ostringstream filename;
1237         filename << params.out_prefix << "." << cnt << ".clu";
1238         ofstream out(filename.str().c_str());
1239 
1240         ostringstream filename2;
1241         filename2 << params.out_prefix << "." << cnt << ".name-clu";
1242         ofstream out2(filename2.str().c_str());
1243 
1244         out << "w" << endl << "c" << endl << "4" << endl << "b" << endl << "g" << endl << 4-params.is_rooted << endl;
1245         IntVector::iterator it2;
1246         NodeVector::iterator it3;
1247         for (it2 = (*it).begin(), it3 = taxa.begin(); it2 != (*it).end(); it2++, it3++)
1248             if ((*it3)->name != ROOT_NAME) {
1249                 out << char((*it2)+'a') << endl;
1250                 out2 << (*it3)->name << "  " << char((*it2)+'a') << endl;
1251             }
1252         out << "y" << endl;
1253         out.close();
1254         out2.close();
1255         cout << "Cluster " << cnt << " printed to " << filename.rdbuf() << " and " << filename2.rdbuf() << endl;
1256     }
1257 }
1258 
1259 
printTaxa(Params & params)1260 void printTaxa(Params &params) {
1261     MTree mytree(params.user_file, params.is_rooted);
1262     vector<string> taxname;
1263     taxname.resize(mytree.leafNum);
1264     mytree.getTaxaName(taxname);
1265     sort(taxname.begin(), taxname.end());
1266 
1267     string filename = params.out_prefix;
1268     filename += ".taxa";
1269 
1270     try {
1271         ofstream out;
1272         out.exceptions(ios::failbit | ios::badbit);
1273         out.open(filename.c_str());
1274         for (vector<string>::iterator it = taxname.begin(); it != taxname.end(); it++) {
1275             if ((*it) != ROOT_NAME) out << (*it);
1276             out << endl;
1277         }
1278         out.close();
1279         cout << "All taxa names printed to " << filename << endl;
1280     } catch (ios::failure) {
1281         outError(ERR_WRITE_OUTPUT, filename);
1282     }
1283 }
1284 
printAreaList(Params & params)1285 void printAreaList(Params &params) {
1286     MSetsBlock *sets;
1287     sets = new MSetsBlock();
1288      cout << "Reading input file " << params.user_file << "..." << endl;
1289 
1290     MyReader nexus(params.user_file);
1291 
1292     nexus.Add(sets);
1293 
1294     MyToken token(nexus.inf);
1295     nexus.Execute(token);
1296 
1297     //sets->Report(cout);
1298 
1299     TaxaSetNameVector *allsets = sets->getSets();
1300 
1301     string filename = params.out_prefix;
1302     filename += ".names";
1303 
1304     try {
1305         ofstream out;
1306         out.exceptions(ios::failbit | ios::badbit);
1307         out.open(filename.c_str());
1308         for (TaxaSetNameVector::iterator it = allsets->begin(); it != allsets->end(); it++) {
1309             out << (*it)->name;
1310             out << endl;
1311         }
1312         out.close();
1313         cout << "All area names printed to " << filename << endl;
1314     } catch (ios::failure) {
1315         outError(ERR_WRITE_OUTPUT, filename);
1316     }
1317 
1318     delete sets;
1319 }
1320 
scaleBranchLength(Params & params)1321 void scaleBranchLength(Params &params) {
1322     params.is_rooted = true;
1323     PDTree tree(params);
1324     if (params.run_mode == SCALE_BRANCH_LEN) {
1325         cout << "Scaling branch length with a factor of " << params.scaling_factor << " ..." << endl;
1326         tree.scaleLength(params.scaling_factor, false);
1327     } else {
1328         cout << "Scaling clade support with a factor of " << params.scaling_factor << " ..." << endl;
1329         tree.scaleCladeSupport(params.scaling_factor, false);
1330     }
1331     if (params.out_file != NULL)
1332         tree.printTree(params.out_file);
1333     else {
1334         tree.printTree(cout);
1335         cout << endl;
1336     }
1337 }
1338 
calcDistribution(Params & params)1339 void calcDistribution(Params &params) {
1340 
1341     PDTree mytree(params);
1342 
1343     string filename = params.out_prefix;
1344     filename += ".randompd";
1345 
1346     try {
1347         ofstream out;
1348         out.exceptions(ios::failbit | ios::badbit);
1349         out.open(filename.c_str());
1350         for (int size = params.min_size; size <= params.sub_size; size += params.step_size) {
1351             out << size;
1352             for (int sample = 0; sample < params.sample_size; sample++) {
1353                 Split taxset(mytree.leafNum);
1354                 taxset.randomize(size);
1355                 mytree.calcPD(taxset);
1356                 out << "  " << taxset.getWeight();
1357             }
1358             out << endl;
1359         }
1360         out.close();
1361         cout << "PD distribution is printed to " << filename << endl;
1362     } catch (ios::failure) {
1363         outError(ERR_WRITE_OUTPUT, filename);
1364     }
1365 }
1366 
printRFDist(string filename,double * rfdist,int n,int m,int rf_dist_mode,bool print_msg=true)1367 void printRFDist(string filename, double *rfdist, int n, int m, int rf_dist_mode, bool print_msg = true) {
1368     int i, j;
1369 
1370     try {
1371         ofstream out;
1372         out.exceptions(ios::failbit | ios::badbit);
1373         out.open(filename);
1374         if (Params::getInstance().output_format == FORMAT_CSV) {
1375             out << "# Robinson-Foulds distances" << endl
1376             << "# This file can be read in MS Excel or in R with command:" << endl
1377             << "#    dat=read.csv('" <<  filename << "',comment.char='#')" << endl
1378             << "# Columns are comma-separated with following meanings:" << endl
1379             << "#    ID1:     Tree 1 ID" << endl
1380             << "#    ID2:     Tree 2 ID" << endl
1381             << "#    Dist:    Robinson-Foulds distance" << endl
1382             << "ID1,ID2,Dist" << endl;
1383             if (rf_dist_mode == RF_ADJACENT_PAIR) {
1384                 for (i = 0; i < n; i++)
1385                     out << i+1 << ',' << i+2 << ',' << rfdist[i] << endl;
1386             } else if (Params::getInstance().rf_same_pair) {
1387                 for (i = 0; i < n; i++)
1388                     out << i+1 << ',' << i+1 << ',' << rfdist[i] << endl;
1389             } else {
1390                 for (i = 0; i < n; i++)  {
1391                     for (j = 0; j < m; j++)
1392                         out << i+1 << ',' << j+1 << ',' << rfdist[i*m+j] << endl;
1393                 }
1394             }
1395         } else if (rf_dist_mode == RF_ADJACENT_PAIR || Params::getInstance().rf_same_pair) {
1396             out << "XXX        ";
1397             out << 1 << " " << n << endl;
1398             for (i = 0; i < n; i++)
1399                 out << " " << rfdist[i];
1400             out << endl;
1401         } else {
1402             // all pairs
1403             out << n << " " << m << endl;
1404             for (i = 0; i < n; i++)  {
1405                 out << "Tree" << i << "      ";
1406                 for (j = 0; j < m; j++)
1407                     out << " " << rfdist[i*m+j];
1408                 out << endl;
1409             }
1410         }
1411         out.close();
1412         if (print_msg)
1413             cout << "Robinson-Foulds distances printed to " << filename << endl;
1414     } catch (ios::failure) {
1415         outError(ERR_WRITE_OUTPUT, filename);
1416     }
1417 }
1418 
computeRFDistExtended(const char * trees1,const char * trees2,const char * filename)1419 void computeRFDistExtended(const char *trees1, const char *trees2, const char *filename) {
1420     cout << "Reading input trees 1 file " << trees1 << endl;
1421     int ntrees = 0, ntrees2 = 0;
1422     double *rfdist_raw = NULL;
1423     try {
1424         ifstream in;
1425         in.exceptions(ios::failbit | ios::badbit);
1426         in.open(trees1);
1427         IntVector rfdist;
1428         for (ntrees = 1; !in.eof(); ntrees++) {
1429             MTree tree;
1430             bool is_rooted = false;
1431 
1432             // read in the tree and convert into split system for indexing
1433             tree.readTree(in, is_rooted);
1434             if (verbose_mode >= VB_DEBUG)
1435                 cout << ntrees << " " << endl;
1436             DoubleVector dist;
1437             tree.computeRFDist(trees2, dist);
1438             ntrees2 = dist.size();
1439             rfdist.insert(rfdist.end(), dist.begin(), dist.end());
1440             char ch;
1441             in.exceptions(ios::goodbit);
1442             (in) >> ch;
1443             if (in.eof()) break;
1444             in.unget();
1445             in.exceptions(ios::failbit | ios::badbit);
1446 
1447         }
1448 
1449         in.close();
1450         ASSERT(ntrees * ntrees2 == rfdist.size());
1451         rfdist_raw = new double[rfdist.size()];
1452         copy(rfdist.begin(), rfdist.end(), rfdist_raw);
1453 
1454     } catch (ios::failure) {
1455         outError(ERR_READ_INPUT, trees1);
1456     }
1457 
1458     printRFDist(filename, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED);
1459     delete [] rfdist_raw;
1460 }
1461 
computeRFDistSamePair(const char * trees1,const char * trees2,const char * filename)1462 void computeRFDistSamePair(const char *trees1, const char *trees2, const char *filename) {
1463     cout << "Reading input trees 1 file " << trees1 << endl;
1464     int ntrees = 0, ntrees2 = 0;
1465     double *rfdist_raw = NULL;
1466     try {
1467         ifstream in;
1468         in.exceptions(ios::failbit | ios::badbit);
1469         in.open(trees1);
1470 
1471         ifstream in2;
1472         in2.exceptions(ios::failbit | ios::badbit);
1473         in2.open(trees2);
1474 
1475         DoubleVector rfdist;
1476         for (ntrees = 1; !in.eof() && !in2.eof(); ntrees++) {
1477             MTree tree;
1478             bool is_rooted = false;
1479             // read in the tree and convert into split system for indexing
1480             tree.readTree(in, is_rooted);
1481 
1482             if (verbose_mode >= VB_DEBUG)
1483                 cout << ntrees << " " << endl;
1484             DoubleVector dist;
1485             tree.computeRFDist(in2, dist, 0, true);
1486             ntrees2 = dist.size();
1487             rfdist.insert(rfdist.end(), dist.begin(), dist.end());
1488             char ch;
1489             in.exceptions(ios::goodbit);
1490             (in) >> ch;
1491             if (in.eof()) break;
1492             in.unget();
1493             in.exceptions(ios::failbit | ios::badbit);
1494 
1495         }
1496 
1497         in.close();
1498         in2.close();
1499         ASSERT(ntrees * ntrees2 == rfdist.size());
1500         rfdist_raw = new double[rfdist.size()];
1501         copy(rfdist.begin(), rfdist.end(), rfdist_raw);
1502 
1503     } catch (ios::failure) {
1504         outError(ERR_READ_INPUT, trees1);
1505     }
1506 
1507     printRFDist(filename, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED);
1508 
1509     delete [] rfdist_raw;
1510 }
1511 
computeRFDist(Params & params)1512 void computeRFDist(Params &params) {
1513 
1514     if (!params.user_file) outError("User tree file not provided");
1515 
1516     string filename = params.out_prefix;
1517     filename += ".rfdist";
1518 
1519     if (params.rf_dist_mode == RF_TWO_TREE_SETS_EXTENDED) {
1520         computeRFDistExtended(params.user_file, params.second_tree, filename.c_str());
1521         return;
1522     }
1523 
1524     if (params.rf_same_pair) {
1525         computeRFDistSamePair(params.user_file, params.second_tree, filename.c_str());
1526         return;
1527     }
1528 
1529     MTreeSet trees(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count);
1530     int n = trees.size(), m = trees.size();
1531     double *rfdist;
1532     double *incomp_splits = NULL;
1533     string infoname = params.out_prefix;
1534     infoname += ".rfinfo";
1535     string treename = params.out_prefix;
1536     treename += ".rftree";
1537     if (params.rf_dist_mode == RF_TWO_TREE_SETS) {
1538         MTreeSet treeset2(params.second_tree, params.is_rooted, params.tree_burnin, params.tree_max_count);
1539         cout << "Computing Robinson-Foulds distances between two sets of trees" << endl;
1540         m = treeset2.size();
1541         size_t size = n*m;
1542         if (params.rf_same_pair) {
1543             if (m != n)
1544                 outError("Tree sets has different number of trees");
1545             size = n;
1546         }
1547         rfdist = new double [size];
1548         memset(rfdist, 0, size*sizeof(double));
1549         if (verbose_mode >= VB_MAX) {
1550             incomp_splits = new double [size];
1551             memset(incomp_splits, 0, size*sizeof(double));
1552         }
1553         if (verbose_mode >= VB_MED)
1554             trees.computeRFDist(rfdist, &treeset2, params.rf_same_pair,
1555                                 infoname.c_str(),treename.c_str(), incomp_splits);
1556         else
1557             trees.computeRFDist(rfdist, &treeset2, params.rf_same_pair);
1558     } else {
1559         rfdist = new double [n*n];
1560         memset(rfdist, 0, n*n* sizeof(double));
1561         trees.computeRFDist(rfdist, params.rf_dist_mode, params.split_weight_threshold);
1562     }
1563 
1564     //if (verbose_mode >= VB_MED) printRFDist(cout, rfdist, n, m, params.rf_dist_mode);
1565 
1566     printRFDist(filename, rfdist, n, m, params.rf_dist_mode);
1567 
1568     if (incomp_splits) {
1569         filename = params.out_prefix;
1570         filename += ".incomp";
1571         printRFDist(filename, incomp_splits, n, m, params.rf_dist_mode, false);
1572         cout << "Number of incompatible splits in printed to " << filename << endl;
1573     }
1574 
1575     if (incomp_splits) delete [] incomp_splits;
1576     delete [] rfdist;
1577 }
1578 
1579 
testInputFile(Params & params)1580 void testInputFile(Params &params) {
1581     SplitGraph sg(params);
1582     if (sg.isWeaklyCompatible())
1583         cout << "The split system is weakly compatible." << endl;
1584     else
1585         cout << "The split system is NOT weakly compatible." << endl;
1586 
1587 }
1588 
1589 /**MINH ANH: for some statistics about the branches on the input tree*/
branchStats(Params & params)1590 void branchStats(Params &params){
1591     MaTree mytree(params.user_file, params.is_rooted);
1592     mytree.drawTree(cout,WT_TAXON_ID + WT_INT_NODE);
1593     //report to output file
1594     string output;
1595     if (params.out_file)
1596         output = params.out_file;
1597     else {
1598         if (params.out_prefix)
1599             output = params.out_prefix;
1600         else
1601             output = params.user_file;
1602         output += ".stats";
1603     }
1604 
1605     try {
1606         ofstream out;
1607         out.exceptions(ios::failbit | ios::badbit);
1608         out.open(output.c_str());
1609         mytree.printBrInfo(out);
1610     } catch (ios::failure) {
1611         outError(ERR_WRITE_OUTPUT, output);
1612     }
1613     cout << "Information about branch lengths of the tree is printed to: " << output << endl;
1614 
1615     /***** Following added by BQM to print internal branch lengths */
1616     NodeVector nodes1, nodes2;
1617     mytree.generateNNIBraches(nodes1, nodes2);
1618     output = params.out_prefix;
1619     output += ".inlen";
1620     try {
1621         ofstream out;
1622         out.exceptions(ios::failbit | ios::badbit);
1623         out.open(output.c_str());
1624         for (int i = 0; i < nodes1.size(); i++)
1625             out << nodes1[i]->findNeighbor(nodes2[i])->length << " ";
1626         out << endl;
1627     } catch (ios::failure) {
1628         outError(ERR_WRITE_OUTPUT, output);
1629     }
1630     cout << "Internal branch lengths printed to: " << output << endl;
1631 }
1632 
1633 /**MINH ANH: for comparison between the input tree and each tree in a given set of trees*/
compare(Params & params)1634 void compare(Params &params){
1635     MaTree mytree(params.second_tree, params.is_rooted);
1636     //sort taxon names and update nodeID, to be consistent with MTreeSet
1637     NodeVector taxa;
1638     mytree.getTaxa(taxa);
1639     sort(taxa.begin(), taxa.end(), nodenamecmp);
1640     int i;
1641     NodeVector::iterator it;
1642     for (it = taxa.begin(), i = 0; it != taxa.end(); it++, i++)
1643             (*it)->id = i;
1644 
1645     string drawFile = params.second_tree;
1646     drawFile += ".draw";
1647     try {
1648         ofstream out1;
1649         out1.exceptions(ios::failbit | ios::badbit);
1650         out1.open(drawFile.c_str());
1651         mytree.drawTree(out1,WT_TAXON_ID + WT_INT_NODE);
1652     } catch (ios::failure) {
1653         outError(ERR_WRITE_OUTPUT, drawFile);
1654     }
1655     cout << "Tree with branchID (nodeID) was printed to: " << drawFile << endl;
1656 
1657 
1658     MTreeSet trees(params.user_file,params.is_rooted, params.tree_burnin, params.tree_max_count);
1659     DoubleMatrix brMatrix;
1660     DoubleVector BSDs;
1661     IntVector RFs;
1662     mytree.comparedTo(trees, brMatrix, RFs, BSDs);
1663     int numTree = trees.size();
1664     int numNode = mytree.nodeNum;
1665 
1666     string output;
1667     if (params.out_file)
1668         output = params.out_file;
1669     else {
1670         if (params.out_prefix)
1671             output = params.out_prefix;
1672         else
1673             output = params.user_file;
1674         output += ".compare";
1675     }
1676 
1677     try {
1678         ofstream out;
1679         out.exceptions(ios::failbit | ios::badbit);
1680         out.open(output.c_str());
1681         //print the header
1682         out << "tree  " ;
1683         for (int nodeID = 0; nodeID < numNode; nodeID++ )
1684             if ( brMatrix[0][nodeID] != -2 )
1685                 out << "br_" << nodeID << "  ";
1686         out << "RF  BSD" << endl;
1687         for ( int treeID = 0; treeID < numTree; treeID++ )
1688         {
1689             out << treeID << "  ";
1690             for (int nodeID = 0; nodeID < numNode; nodeID++ )
1691                 if ( brMatrix[treeID][nodeID] != -2 )
1692                     out << brMatrix[treeID][nodeID] << "  ";
1693             out << RFs[treeID] << "  " << BSDs[treeID] << endl;
1694         }
1695     } catch (ios::failure) {
1696         outError(ERR_WRITE_OUTPUT, output);
1697     }
1698     cout << "Comparison with the given set of trees is printed to: " << output << endl;
1699 }
1700 
1701 /**MINH ANH: to compute 'guided bootstrap' alignment*/
guidedBootstrap(Params & params)1702 void guidedBootstrap(Params &params)
1703 {
1704     MaAlignment inputAlign(params.aln_file,params.sequence_type, params.intype, params.model_name);
1705     inputAlign.readLogLL(params.siteLL_file);
1706 
1707     string outFre_name = params.out_prefix;
1708     outFre_name += ".patInfo";
1709 
1710     inputAlign.printPatObsExpFre(outFre_name.c_str());
1711 
1712     string gboAln_name = params.out_prefix;
1713     gboAln_name += ".gbo";
1714 
1715     MaAlignment gboAlign;
1716     double prob;
1717     gboAlign.generateExpectedAlignment(&inputAlign, prob);
1718     gboAlign.printAlignment(IN_PHYLIP, gboAln_name.c_str());
1719 
1720 
1721     string outProb_name = params.out_prefix;
1722     outProb_name += ".gbo.logP";
1723     try {
1724         ofstream outProb;
1725         outProb.exceptions(ios::failbit | ios::badbit);
1726         outProb.open(outProb_name.c_str());
1727         outProb.precision(10);
1728         outProb << prob << endl;
1729         outProb.close();
1730     } catch (ios::failure) {
1731         outError(ERR_WRITE_OUTPUT, outProb_name);
1732     }
1733 
1734     cout << "Information about patterns in the input alignment is printed to: " << outFre_name << endl;
1735     cout << "A 'guided bootstrap' alignment is printed to: " << gboAln_name << endl;
1736     cout << "Log of the probability of the new alignment is printed to: " << outProb_name << endl;
1737 }
1738 
1739 /**MINH ANH: to compute the probability of an alignment given the multinomial distribution of patterns frequencies derived from a reference alignment*/
computeMulProb(Params & params)1740 void computeMulProb(Params &params)
1741 {
1742     Alignment refAlign(params.second_align, params.sequence_type, params.intype, params.model_name);
1743     Alignment inputAlign(params.aln_file, params.sequence_type, params.intype, params.model_name);
1744     double prob;
1745     inputAlign.multinomialProb(refAlign,prob);
1746     //Printing
1747     string outProb_name = params.out_prefix;
1748     outProb_name += ".mprob";
1749     try {
1750         ofstream outProb;
1751         outProb.exceptions(ios::failbit | ios::badbit);
1752         outProb.open(outProb_name.c_str());
1753         outProb.precision(10);
1754         outProb << prob << endl;
1755         outProb.close();
1756     } catch (ios::failure) {
1757         outError(ERR_WRITE_OUTPUT, outProb_name);
1758     }
1759     cout << "Probability of alignment " << params.aln_file << " given alignment " << params.second_align << " is: " << prob << endl;
1760     cout << "The probability is printed to: " << outProb_name << endl;
1761 }
1762 
processNCBITree(Params & params)1763 void processNCBITree(Params &params) {
1764     NCBITree tree;
1765     Node *dad = tree.readNCBITree(params.user_file, params.ncbi_taxid, params.ncbi_taxon_level, params.ncbi_ignore_level);
1766     if (params.ncbi_names_file) tree.readNCBINames(params.ncbi_names_file);
1767 
1768     cout << "Dad ID: " << dad->name << " Root ID: " << tree.root->name << endl;
1769     string str = params.user_file;
1770     str += ".tree";
1771     if (params.out_file) str = params.out_file;
1772     //tree.printTree(str.c_str(), WT_SORT_TAXA | WT_BR_LEN);
1773     cout << "NCBI tree printed to " << str << endl;
1774     try {
1775         ofstream out;
1776         out.exceptions(ios::failbit | ios::badbit);
1777         out.open(str.c_str());
1778         tree.printTree(out, WT_SORT_TAXA | WT_BR_LEN | WT_TAXON_ID, tree.root, dad);
1779         out << ";" << endl;
1780         out.close();
1781     } catch (ios::failure) {
1782         outError(ERR_WRITE_OUTPUT, str);
1783     }
1784 }
1785 
1786 /* write simultaneously to cout/cerr and a file */
1787 class outstreambuf : public streambuf {
1788 public:
1789     outstreambuf* open( const char* name, ios::openmode mode = ios::out);
1790     bool is_open();
1791     outstreambuf* close();
~outstreambuf()1792     ~outstreambuf() { close(); }
get_fout_buf()1793     streambuf *get_fout_buf() {
1794         return fout_buf;
1795     }
get_cout_buf()1796     streambuf *get_cout_buf() {
1797         return cout_buf;
1798     }
get_fout()1799     ofstream *get_fout() {
1800         return &fout;
1801     }
1802 
1803 protected:
1804     ofstream fout;
1805     streambuf *cout_buf;
1806     streambuf *fout_buf;
1807     virtual int     overflow( int c = EOF);
1808     virtual int     sync();
1809 };
1810 
open(const char * name,ios::openmode mode)1811 outstreambuf* outstreambuf::open( const char* name, ios::openmode mode) {
1812     if (!(Params::getInstance().suppress_output_flags & OUT_LOG) && MPIHelper::getInstance().isMaster()) {
1813         fout.open(name, mode);
1814         if (!fout.is_open()) {
1815             cerr << "ERROR: Could not open " << name << " for logging" << endl;
1816             exit(EXIT_FAILURE);
1817             return NULL;
1818         }
1819         fout_buf = fout.rdbuf();
1820     }
1821     cout_buf = cout.rdbuf();
1822     cout.rdbuf(this);
1823     return this;
1824 }
1825 
is_open()1826 bool outstreambuf::is_open() {
1827     return fout.is_open();
1828 }
1829 
close()1830 outstreambuf* outstreambuf::close() {
1831     cout.rdbuf(cout_buf);
1832     if ( fout.is_open()) {
1833         sync();
1834         fout.close();
1835         return this;
1836     }
1837     return NULL;
1838 }
1839 
overflow(int c)1840 int outstreambuf::overflow( int c) { // used for output buffer only
1841     if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED)
1842         if (cout_buf->sputc(c) == EOF) return EOF;
1843     if (Params::getInstance().suppress_output_flags & OUT_LOG)
1844         return c;
1845     if (!MPIHelper::getInstance().isMaster())
1846         return c;
1847     if (fout_buf->sputc(c) == EOF) return EOF;
1848     return c;
1849 }
1850 
1851 
1852 
sync()1853 int outstreambuf::sync() { // used for output buffer only
1854     if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED)
1855         cout_buf->pubsync();
1856     if ((Params::getInstance().suppress_output_flags & OUT_LOG) || !MPIHelper::getInstance().isMaster())
1857         return 0;
1858     return fout_buf->pubsync();
1859 }
1860 
1861 class errstreambuf : public streambuf {
1862 public:
init(streambuf * fout_buf)1863     void init(streambuf *fout_buf) {
1864         this->fout_buf = fout_buf;
1865         cerr_buf = cerr.rdbuf();
1866         cerr.rdbuf(this);
1867         new_line = true;
1868     }
1869 
~errstreambuf()1870     ~errstreambuf() {
1871         cerr.rdbuf(cerr_buf);
1872     }
1873 
1874 protected:
1875     streambuf *cerr_buf;
1876     streambuf *fout_buf;
1877     bool new_line;
1878 
overflow(int c=EOF)1879     virtual int overflow( int c = EOF) {
1880         if (new_line)
1881             cerr_buf->sputn("ERROR: ", 7);
1882         if (cerr_buf->sputc(c) == EOF) {
1883             new_line = false;
1884             if (c == '\n') new_line = true;
1885             return EOF;
1886         }
1887         if ((Params::getInstance().suppress_output_flags & OUT_LOG)) {
1888             new_line = false;
1889             if (c == '\n') new_line = true;
1890             return c;
1891         }
1892         if (new_line)
1893             fout_buf->sputn("ERROR: ", 7);
1894         new_line = false;
1895         if (c == '\n') new_line = true;
1896         if (fout_buf->sputc(c) == EOF) return EOF;
1897         return c;
1898     }
1899 
sync()1900     virtual int sync() {
1901         cerr_buf->pubsync();
1902         if (Params::getInstance().suppress_output_flags & OUT_LOG)
1903             return 0;
1904         return fout_buf->pubsync();
1905     }
1906 };
1907 
1908 class muststreambuf : public streambuf {
1909 public:
init(streambuf * cout_buf,streambuf * fout_buf)1910     void init(streambuf *cout_buf, streambuf *fout_buf) {
1911         this->fout_buf = fout_buf;
1912         this->cout_buf = cout_buf;
1913     }
1914 
1915 protected:
1916     streambuf *cout_buf;
1917     streambuf *fout_buf;
1918 
overflow(int c=EOF)1919     virtual int overflow( int c = EOF) {
1920         if (cout_buf->sputc(c) == EOF) {
1921             return EOF;
1922         }
1923         if (fout_buf->sputc(c) == EOF) return EOF;
1924         return c;
1925     }
1926 
sync()1927     virtual int sync() {
1928         cout_buf->pubsync();
1929         return fout_buf->pubsync();
1930     }
1931 };
1932 
1933 
1934 /*********************************************************************************
1935  * GLOBAL VARIABLES
1936  *********************************************************************************/
1937 outstreambuf _out_buf;
1938 errstreambuf _err_buf;
1939 muststreambuf _must_buf;
1940 ostream cmust(&_must_buf);
1941 
1942 string _log_file;
1943 int _exit_wait_optn = FALSE;
1944 
startLogFile(bool append_log)1945 extern "C" void startLogFile(bool append_log) {
1946     if (append_log)
1947         _out_buf.open(_log_file.c_str(), ios::app);
1948     else
1949         _out_buf.open(_log_file.c_str());
1950     _err_buf.init(_out_buf.get_fout_buf());
1951     _must_buf.init(_out_buf.get_cout_buf(), _out_buf.get_fout_buf());
1952 }
1953 
endLogFile()1954 extern "C" void endLogFile() {
1955     if (_out_buf.is_open())
1956         _out_buf.close();
1957 }
1958 
funcExit(void)1959 void funcExit(void) {
1960     if(_exit_wait_optn) {
1961         printf("\npress [return] to finish: ");
1962         fflush(stdout);
1963         while (getchar() != '\n');
1964     }
1965 
1966     endLogFile();
1967     MPIHelper::getInstance().finalize();
1968 }
1969 
funcAbort(int signal_number)1970 extern "C" void funcAbort(int signal_number)
1971 {
1972     /*Your code goes here. You can output debugging info.
1973       If you return from this function, and it was called
1974       because abort() was called, your program will exit or crash anyway
1975       (with a dialog box on Windows).
1976      */
1977 #if (defined(__GNUC__) || defined(__clang__)) && !defined(WIN32) && !defined(__CYGWIN__)
1978     print_stacktrace(cerr);
1979 #endif
1980 
1981     cerr << endl << "*** IQ-TREE CRASHES WITH SIGNAL ";
1982     switch (signal_number) {
1983         case SIGABRT: cerr << "ABORTED"; break;
1984         case SIGFPE:  cerr << "ERRONEOUS NUMERIC"; break;
1985         case SIGILL:  cerr << "ILLEGAL INSTRUCTION"; break;
1986         case SIGSEGV: cerr << "SEGMENTATION FAULT"; break;
1987 #if !defined WIN32 && !defined _WIN32 && !defined __WIN32__
1988         case SIGBUS: cerr << "BUS ERROR"; break;
1989 #endif
1990     }
1991     cerr << endl;
1992     cerr << "*** For bug report please send to developers:" << endl << "***    Log file: " << _log_file;
1993     cerr << endl << "***    Alignment files (if possible)" << endl;
1994     funcExit();
1995     signal(signal_number, SIG_DFL);
1996 }
1997 
getintargv(int * argc,char ** argv[])1998 extern "C" void getintargv(int *argc, char **argv[])
1999 {
2000     int    done;
2001     int    count;
2002     int    n;
2003     int    l;
2004     char   ch;
2005     char  *argtmp;
2006     char **argstr;
2007 
2008     argtmp = (char  *)calloc(10100, sizeof(char));
2009     argstr = (char **)calloc(100, sizeof(char*));
2010     for(n=0; n<100; n++) {
2011         argstr[n] = &(argtmp[n * 100]);
2012     }
2013     n=1;
2014 
2015     fprintf(stdout, "\nYou seem to have click-started this program,");
2016     fprintf(stdout, "\ndo you want to enter commandline parameters: [y]es, [n]o: ");
2017     fflush(stdout);
2018 
2019     /* read one char */
2020     ch = getc(stdin);
2021     if (ch != '\n') {
2022         do ;
2023         while (getc(stdin) != '\n');
2024     }
2025     ch = (char) tolower((int) ch);
2026 
2027     if (ch == 'y') {
2028         done=FALSE;
2029 
2030         fprintf(stdout, "\nEnter single parameter [! for none]: ");
2031         fflush(stdout);
2032         count = fscanf(stdin, "%s", argstr[n]);
2033         do ;
2034         while (getc(stdin) != '\n');
2035 
2036         if(argstr[0][0] == '!') {
2037             count = 0;
2038         } else {
2039             if (strlen(argstr[n]) > 100) {
2040                 fprintf(stdout, "\nParameter too long!!!\n");
2041             } else {
2042                 n++;
2043             }
2044         }
2045 
2046         while(!done) {
2047             fprintf(stdout, "\nCurrent commandline: ");
2048             for(l=1; l<n; l++) {
2049                 fprintf(stdout, "%s ", argstr[l]);
2050             }
2051             fprintf(stdout, "\nQuit [q]; confirm [y]%s%s%s: ",
2052                 (n<99 ? ", extend [e]" : ""),
2053                 (n>1 ? ", delete last [l]" : ""),
2054                 (n>1 ? ", delete all [a]" : ""));
2055             fflush(stdout);
2056 
2057             /* read one char */
2058             ch = getc(stdin);
2059             /* ch = getchar(); */
2060             if (ch != '\n') {
2061                 do ;
2062                 while (getc(stdin) != '\n');
2063                 /* while (getchar() != '\n'); */
2064             }
2065             ch = (char) tolower((int) ch);
2066 
2067             switch (ch) {
2068                 case 'y':
2069                     done=TRUE;
2070                     break;
2071                 case 'e':
2072                     fprintf(stdout, "\nEnter single parameter [! for none]: ");
2073                     fflush(stdout);
2074                     count = fscanf(stdin, "%s", argstr[n]);
2075                     do ;
2076                     while (getc(stdin) != '\n');
2077 
2078                     if(argstr[0][0] == '!') {
2079                         count = 0;
2080                     } else {
2081                         if (strlen(argstr[n]) > 100) {
2082                             fprintf(stdout, "\nParameter too long!!!\n");
2083                         } else {
2084                             n++;
2085                         }
2086                     }
2087                     break;
2088                 case 'l':
2089                     if (n>1) n--;
2090                     break;
2091                 case 'a':
2092                     n=1;
2093                     break;
2094                 case 'q':
2095                        // tp_exit(0, NULL, FALSE, __FILE__, __LINE__, _exit_wait_optn);
2096                     if(_exit_wait_optn) {
2097                         printf("\npress [return] to finish: ");
2098                         fflush(stdout);
2099                         while (getchar() != '\n');
2100                     }
2101                     exit(0);
2102                     break;
2103             }
2104         }
2105     }
2106 
2107     *argc = n;
2108     *argv = argstr;
2109 } /* getintargv */
2110 
2111 /*********************************************************************************************************************************
2112     Olga: ECOpd - phylogenetic diversity with ecological constraint: choosing a viable subset of species which maximizes PD/SD
2113 *********************************************************************************************************************************/
2114 
processECOpd(Params & params)2115 void processECOpd(Params &params) {
2116     double startTime = getCPUTime();
2117     params.detected_mode = LINEAR_PROGRAMMING;
2118     cout<<"----------------------------------------------------------------------------------------"<<endl;
2119     int i;
2120     double score;
2121     double *variables;
2122     int threads = params.gurobi_threads;
2123     params.gurobi_format=true;
2124 
2125     string model_file,subFoodWeb,outFile;
2126 
2127     model_file = params.out_prefix;
2128     model_file += ".lp";
2129 
2130     subFoodWeb = params.out_prefix;
2131     subFoodWeb += ".subFoodWeb";
2132 
2133     outFile = params.out_prefix;
2134     outFile += ".pda";
2135 
2136     //Defining the input phylo type: t - rooted/unrooted tree, n - split network
2137     params.intype=detectInputFile(params.user_file);
2138     if(params.intype == IN_NEWICK){
2139         params.eco_type = "t";
2140     } else if(params.intype == IN_NEXUS){
2141         params.eco_type = "n";
2142     }
2143 
2144     // Checking whether to treat the food web as weighted or non weighted
2145     if(params.diet_max == 0){
2146         params.eco_weighted = false;
2147     }else if(params.diet_max > 100 || params.diet_max < 0){
2148         cout<<"The minimum percentage of the diet to be conserved for each predator"<<endl;
2149         cout<<"d = "<<params.diet_max<<endl;
2150         cout<<"ERROR: Wrong value of parameter d. It must be within the range 0 <= d <= 100"<<endl;
2151         exit(0);
2152     }else{
2153         params.eco_weighted = true;
2154     }
2155 
2156     if(strcmp(params.eco_type,"t")==0){
2157     /*--------------------------------- EcoPD Trees ---------------------------------*/
2158         ECOpd tree(params.user_file,params.is_rooted);
2159 
2160         // Setting all the information-----------------
2161         tree.phyloType = "t";
2162         tree.TaxaNUM = tree.leafNum;
2163         if(verbose_mode == VB_MAX){
2164             cout<<"TaxaNUM = "<<tree.TaxaNUM<<endl;
2165             cout<<"LeafNUM = "<<tree.leafNum<<endl;
2166             cout<<"root_id = "<<tree.root->id<<" root_name = "<<tree.root->name<<endl;
2167 
2168             for(i=0; i<tree.leafNum; i++){
2169                 cout<<i<<" "<<tree.findNodeID(i)->name <<endl;
2170             }
2171         }
2172 
2173         //Getting Species Names from tree
2174         for(i = 0; i < tree.TaxaNUM; i++)
2175             (tree.phyloNames).push_back(tree.findNodeID(i)->name);
2176         //for(i=0;i<tree.phyloNames.size();i++)
2177         //    cout<<"["<<i<<"] "<<tree.phyloNames[i]<<endl;
2178 
2179         // Full species list including info from tree and food web. Here adding names from phyloInput.
2180         for(i=0; i<tree.TaxaNUM; i++)
2181             tree.names.push_back(&(tree.phyloNames[i]));
2182 
2183         // Read the taxa to be included in the final optimal subset
2184         if(params.initial_file)
2185             tree.readInitialTaxa(params.initial_file);
2186 
2187         // Read the DAG file, Synchronize species on the Tree and in the Food Web
2188         tree.weighted = params.eco_weighted;
2189         tree.T = params.diet_max*0.01;
2190         tree.readDAG(params.eco_dag_file);
2191         tree.defineK(params);
2192 
2193         // IP formulation
2194         cout<<"Formulating an IP problem..."<<endl;
2195         if(tree.rooted){
2196             tree.printECOlpRooted(model_file.c_str(),tree);
2197         } else {
2198             tree.printECOlpUnrooted(model_file.c_str(),tree);
2199         }
2200 
2201         // Solve IP problem
2202         cout<<"Solving the problem..."<<endl;
2203         variables = new double[tree.nvar];
2204         int g_return = gurobi_solve((char*)model_file.c_str(), tree.nvar, &score, variables, verbose_mode, threads);
2205         if(verbose_mode == VB_MAX){
2206             cout<<"GUROBI finished with "<<g_return<<" return."<<endl;
2207             for(i=0; i<tree.nvar; i++)
2208                 cout<<"x"<<i<<" = "<<variables[i]<<endl;
2209             cout<<"score = "<<score<<endl;
2210         }
2211         tree.dietConserved(variables);
2212         params.run_time = getCPUTime() - startTime;
2213         tree.printResults((char*)outFile.c_str(),variables,score,params);
2214         tree.printSubFoodWeb((char*)subFoodWeb.c_str(),variables);
2215         delete[] variables;
2216 
2217     } else if(strcmp(params.eco_type,"n")==0){
2218     /*----------------------------- EcoPD SplitNetwork ------------------------------*/
2219         params.intype=detectInputFile(params.user_file);
2220         PDNetwork splitSYS(params);
2221         ECOpd ecoInfDAG;
2222 
2223         // Get the species names from SplitNetwork
2224         splitSYS.speciesList(&(ecoInfDAG.phyloNames));
2225         //for(i=0;i<ecoInfDAG.phyloNames.size();i++)
2226         //    cout<<"["<<i<<"] "<<ecoInfDAG.phyloNames[i]<<endl;
2227 
2228         ecoInfDAG.phyloType = "n";
2229         ecoInfDAG.TaxaNUM = splitSYS.getNTaxa();
2230 
2231         // Full species list including info from tree and food web
2232         for(i=0; i<ecoInfDAG.TaxaNUM; i++)
2233             ecoInfDAG.names.push_back(&(ecoInfDAG.phyloNames[i]));
2234 
2235         ecoInfDAG.weighted = params.eco_weighted;
2236         // Read the taxa to be included in the final optimal subset
2237         if(params.initial_file)
2238             ecoInfDAG.readInitialTaxa(params.initial_file);
2239         ecoInfDAG.T = params.diet_max*0.01;
2240         ecoInfDAG.readDAG(params.eco_dag_file);
2241         ecoInfDAG.defineK(params);
2242 
2243         cout<<"Formulating an IP problem..."<<endl;
2244         splitSYS.transformEcoLP(params, model_file.c_str(), 0);
2245         /**
2246          * (subset_size-4) - influences constraints for conserved splits.
2247          * should be less than taxaNUM in the split system.
2248          * With 0 prints all the constraints.
2249          * Values different of 0 reduce the # of constraints.
2250          **/
2251 
2252         ecoInfDAG.printInfDAG(model_file.c_str(),splitSYS,params);
2253         cout<<"Solving the problem..."<<endl;
2254         variables = new double[ecoInfDAG.nvar];
2255         int g_return = gurobi_solve((char*)model_file.c_str(), ecoInfDAG.nvar, &score, variables, verbose_mode, threads);
2256         if(verbose_mode == VB_MAX){
2257             cout<<"GUROBI finished with "<<g_return<<" return."<<endl;
2258             for(i=0; i<ecoInfDAG.nvar; i++)
2259                 cout<<"x"<<i<<" = "<<variables[i]<<endl;
2260             cout<<"score = "<<score<<endl;
2261         }
2262         ecoInfDAG.splitsNUM = splitSYS.getNSplits();
2263         ecoInfDAG.totalSD = splitSYS.calcWeight();
2264         ecoInfDAG.dietConserved(variables);
2265         params.run_time = getCPUTime() - startTime;
2266         ecoInfDAG.printResults((char*)outFile.c_str(),variables, score,params);
2267         ecoInfDAG.printSubFoodWeb((char*)subFoodWeb.c_str(),variables);
2268         delete[] variables;
2269     }
2270 }
2271 
collapseLowBranchSupport(char * user_file,char * split_threshold_str)2272 void collapseLowBranchSupport(char *user_file, char *split_threshold_str) {
2273     DoubleVector minsup;
2274     convert_double_vec(split_threshold_str, minsup, '/');
2275     if (minsup.empty())
2276         outError("wrong -minsupnew argument, please use back-slash separated string");
2277     MExtTree tree;
2278     bool isrooted = false;
2279     tree.readTree(user_file, isrooted);
2280     tree.collapseLowBranchSupport(minsup);
2281     tree.collapseZeroBranches(NULL, NULL, -1.0);
2282     if (verbose_mode >= VB_MED)
2283         tree.drawTree(cout);
2284     string outfile = (string)user_file + ".collapsed";
2285     tree.printTree(outfile.c_str());
2286     cout << "Tree with collapsed branches written to " << outfile << endl;
2287 }
2288 
2289 
2290 /********************************************************
2291     main function
2292 ********************************************************/
2293 /*
2294 int main(){
2295     IQTree tree;
2296     char * str = "(1, (2, 345));";
2297     string k;
2298     tree.pllConvertTaxaID2IQTreeForm(str, k);
2299     cout << str << endl;
2300     cout << k << endl;
2301     cout << "WHAT" << endl;
2302     return 0;
2303 }
2304 */
2305 
2306 
main(int argc,char * argv[])2307 int main(int argc, char *argv[]) {
2308 
2309     /*
2310     Instruction set ID reported by vectorclass::instrset_detect
2311     0           = 80386 instruction set
2312     1  or above = SSE (XMM) supported by CPU (not testing for O.S. support)
2313     2  or above = SSE2
2314     3  or above = SSE3
2315     4  or above = Supplementary SSE3 (SSSE3)
2316     5  or above = SSE4.1
2317     6  or above = SSE4.2
2318     7  or above = AVX supported by CPU and operating system
2319     8  or above = AVX2
2320     9  or above = AVX512F
2321     */
2322     int instruction_set;
2323 
2324     MPIHelper::getInstance().init(argc, argv);
2325 
2326     atexit(funcExit);
2327 
2328     /*************************/
2329     { /* local scope */
2330         int found = FALSE;              /* "click" found in cmd name? */
2331         int n, dummyint;
2332         char *tmpstr;
2333         int intargc;
2334         char **intargv;
2335         intargc = 0;
2336         intargv = NULL;
2337 
2338         for (n = strlen(argv[0]) - 5;
2339              (n >= 0) && !found && (argv[0][n] != '/')
2340              && (argv[0][n] != '\\'); n--) {
2341 
2342             tmpstr = &(argv[0][n]);
2343             dummyint = 0;
2344             (void) sscanf(tmpstr, "click%n", &dummyint);
2345             if (dummyint == 5) found = TRUE;
2346             else {
2347                 dummyint = 0;
2348                 (void) sscanf(tmpstr, "CLICK%n", &dummyint);
2349                 if (dummyint == 5) found = TRUE;
2350                 else {
2351                     dummyint = 0;
2352                     (void) sscanf(tmpstr, "Click%n", &dummyint);
2353                     if (dummyint == 5) found = TRUE;
2354                 }
2355             }
2356         }
2357         if (found) _exit_wait_optn = TRUE;
2358 
2359         if (_exit_wait_optn) { // get commandline parameters from keyboard
2360             getintargv(&intargc, &intargv);
2361             fprintf(stdout, "\n\n");
2362             if (intargc > 1) { // if there were option entered, use them as argc/argv
2363                 argc = intargc;
2364                 argv = intargv;
2365             }
2366         }
2367     } /* local scope */
2368     /*************************/
2369 
2370     parseArg(argc, argv, Params::getInstance());
2371 
2372     // 2015-12-05
2373     Checkpoint *checkpoint = new Checkpoint;
2374     string filename = (string)Params::getInstance().out_prefix +".ckp.gz";
2375     checkpoint->setFileName(filename);
2376 
2377     bool append_log = false;
2378 
2379     if (!Params::getInstance().ignore_checkpoint && fileExists(filename)) {
2380         checkpoint->load();
2381         if (checkpoint->hasKey("finished")) {
2382             if (checkpoint->getBool("finished")) {
2383                 if (Params::getInstance().force_unfinished) {
2384                     if (MPIHelper::getInstance().isMaster())
2385                         cout << "NOTE: Continue analysis although a previous run already finished" << endl;
2386                 } else {
2387                     delete checkpoint;
2388                     if (MPIHelper::getInstance().isMaster())
2389                         outError("Checkpoint (" + filename + ") indicates that a previous run successfully finished\n" +
2390                             "Use `-redo` option if you really want to redo the analysis and overwrite all output files.\n" +
2391                             "Use `--redo-tree` option if you want to restore ModelFinder and only redo tree search.\n" +
2392                             "Use `--undo` option if you want to continue previous run when changing/adding options."
2393                         );
2394                     else
2395                         exit(EXIT_SUCCESS);
2396                     exit(EXIT_FAILURE);
2397                 }
2398             } else {
2399                 append_log = true;
2400             }
2401         } else {
2402             if (MPIHelper::getInstance().isMaster())
2403                 outWarning("Ignore invalid checkpoint file " + filename);
2404             checkpoint->clear();
2405         }
2406     }
2407 
2408     // after loading, workers are not allowed to write checkpoint anymore
2409     if (MPIHelper::getInstance().isWorker())
2410         checkpoint->setFileName("");
2411 
2412     _log_file = Params::getInstance().out_prefix;
2413     _log_file += ".log";
2414     startLogFile(append_log);
2415     time_t start_time;
2416 
2417     if (append_log) {
2418         cout << endl << "******************************************************"
2419              << endl << "CHECKPOINT: Resuming analysis from " << filename << endl << endl;
2420     }
2421 
2422     MPIHelper::getInstance().syncRandomSeed();
2423 
2424     signal(SIGABRT, &funcAbort);
2425     signal(SIGFPE, &funcAbort);
2426     signal(SIGILL, &funcAbort);
2427     signal(SIGSEGV, &funcAbort);
2428 #if !defined WIN32 && !defined _WIN32 && !defined __WIN32__
2429     signal(SIGBUS, &funcAbort);
2430 #endif
2431     printCopyright(cout);
2432 
2433     /*
2434     double x=1e-100;
2435     double y=1e-101;
2436     if (x > y) cout << "ok!" << endl;
2437     else cout << "shit!" << endl;
2438     */
2439     //FILE *pfile = popen("hostname","r");
2440     char hostname[100];
2441 #if defined WIN32 || defined _WIN32 || defined __WIN32__
2442     WSADATA wsaData;
2443     WSAStartup(MAKEWORD(2, 2), &wsaData);
2444     gethostname(hostname, sizeof(hostname));
2445     WSACleanup();
2446 #else
2447     gethostname(hostname, sizeof(hostname));
2448 #endif
2449     //fgets(hostname, sizeof(hostname), pfile);
2450     //pclose(pfile);
2451 
2452     instruction_set = instrset_detect();
2453 #if defined(BINARY32) || defined(__NOAVX__)
2454     instruction_set = min(instruction_set, (int)LK_SSE42);
2455 #endif
2456     if (instruction_set < LK_SSE2) outError("Your CPU does not support SSE2!");
2457     bool has_fma3 = (instruction_set >= LK_AVX) && hasFMA3();
2458 
2459 #ifdef __FMA__
2460     bool has_fma =  has_fma3;
2461     if (!has_fma) {
2462         outError("Your CPU does not support FMA instruction, quiting now...");
2463     }
2464 #endif
2465 
2466     cout << "Host:    " << hostname << " (";
2467     switch (instruction_set) {
2468     case 0: cout << "x86, "; break;
2469     case 1: cout << "SSE, "; break;
2470     case 2: cout << "SSE2, "; break;
2471     case 3: cout << "SSE3, "; break;
2472     case 4: cout << "SSSE3, "; break;
2473     case 5: cout << "SSE4.1, "; break;
2474     case 6: cout << "SSE4.2, "; break;
2475     case 7: cout << "AVX, "; break;
2476     case 8: cout << "AVX2, "; break;
2477     default: cout << "AVX512, "; break;
2478     }
2479     if (has_fma3) cout << "FMA3, ";
2480 //    if (has_fma4) cout << "FMA4, ";
2481 //#if defined __APPLE__ || defined __MACH__
2482     cout << (int)(((getMemorySize()/1024.0)/1024)/1024) << " GB RAM)" << endl;
2483 //#else
2484 //    cout << (int)(((getMemorySize()/1000.0)/1000)/1000) << " GB RAM)" << endl;
2485 //#endif
2486 
2487     cout << "Command:";
2488     int i;
2489     for (i = 0; i < argc; i++)
2490         cout << " " << argv[i];
2491     cout << endl;
2492 
2493     checkpoint->get("iqtree.seed", Params::getInstance().ran_seed);
2494     cout << "Seed:    " << Params::getInstance().ran_seed <<  " ";
2495     init_random(Params::getInstance().ran_seed + MPIHelper::getInstance().getProcessID(), true);
2496 
2497     time(&start_time);
2498     cout << "Time:    " << ctime(&start_time);
2499 
2500     // increase instruction set level with FMA
2501     if (has_fma3 && instruction_set < LK_AVX_FMA)
2502         instruction_set = LK_AVX_FMA;
2503 
2504     Params::getInstance().SSE = min(Params::getInstance().SSE, (LikelihoodKernel)instruction_set);
2505 
2506     cout << "Kernel:  ";
2507 
2508     if (Params::getInstance().lk_safe_scaling) {
2509         cout << "Safe ";
2510     }
2511 
2512     if (Params::getInstance().pll) {
2513 #ifdef __AVX__
2514         cout << "PLL-AVX";
2515 #else
2516         cout << "PLL-SSE3";
2517 #endif
2518     } else {
2519         if (Params::getInstance().SSE >= LK_AVX512)
2520             cout << "AVX-512";
2521         else if (Params::getInstance().SSE >= LK_AVX_FMA) {
2522             cout << "AVX+FMA";
2523         } else if (Params::getInstance().SSE >= LK_AVX) {
2524             cout << "AVX";
2525         } else if (Params::getInstance().SSE >= LK_SSE2) {
2526             cout << "SSE2";
2527         } else
2528             cout << "x86";
2529     }
2530 
2531 #ifdef _OPENMP
2532     if (Params::getInstance().num_threads >= 1) {
2533         omp_set_num_threads(Params::getInstance().num_threads);
2534         Params::getInstance().num_threads = omp_get_max_threads();
2535     }
2536 //    int max_threads = omp_get_max_threads();
2537     int max_procs = countPhysicalCPUCores();
2538     cout << " - ";
2539     if (Params::getInstance().num_threads > 0)
2540         cout << Params::getInstance().num_threads  << " threads";
2541     else
2542         cout << "auto-detect threads";
2543     cout << " (" << max_procs << " CPU cores detected)";
2544     if (Params::getInstance().num_threads  > max_procs) {
2545         cout << endl;
2546         outError("You have specified more threads than CPU cores available");
2547     }
2548     omp_set_nested(false); // don't allow nested OpenMP parallelism
2549 #else
2550     if (Params::getInstance().num_threads != 1) {
2551         cout << endl << endl;
2552         outError("Number of threads must be 1 for sequential version.");
2553     }
2554 #endif
2555 
2556 #ifdef _IQTREE_MPI
2557     cout << endl << "MPI:     " << MPIHelper::getInstance().getNumProcesses() << " processes";
2558 #endif
2559 
2560     int num_procs = countPhysicalCPUCores();
2561 #ifdef _OPENMP
2562     if (num_procs > 1 && Params::getInstance().num_threads == 1) {
2563         cout << endl << endl << "HINT: Use -nt option to specify number of threads because your CPU has " << num_procs << " cores!";
2564         cout << endl << "HINT: -nt AUTO will automatically determine the best number of threads to use.";
2565     }
2566 #else
2567     if (num_procs > 1)
2568         cout << endl << endl << "NOTE: Consider using the multicore version because your CPU has " << num_procs << " cores!";
2569 #endif
2570 
2571     //cout << "sizeof(int)=" << sizeof(int) << endl;
2572     cout << endl << endl;
2573 
2574     cout.precision(3);
2575     cout.setf(ios::fixed);
2576 
2577     // checkpoint general run information
2578     checkpoint->startStruct("iqtree");
2579     string command;
2580 
2581     if (CKP_RESTORE_STRING(command)) {
2582         // compare command between saved and current commands
2583         stringstream ss(command);
2584         string str;
2585         bool mismatch = false;
2586         for (i = 1; i < argc; i++) {
2587             if (!(ss >> str)) {
2588                 outWarning("Number of command-line arguments differs from checkpoint");
2589                 mismatch = true;
2590                 break;
2591             }
2592             if (str != argv[i]) {
2593                 outWarning((string)"Command-line argument `" + argv[i] + "` differs from checkpoint `" + str + "`");
2594                 mismatch = true;
2595             }
2596         }
2597         if (mismatch) {
2598             outWarning("Command-line differs from checkpoint!");
2599         }
2600         command = "";
2601     }
2602 
2603     for (i = 1; i < argc; i++)
2604         command += string(" ") + argv[i];
2605     CKP_SAVE(command);
2606     int seed = Params::getInstance().ran_seed;
2607     CKP_SAVE(seed);
2608     CKP_SAVE(start_time);
2609 
2610     // check for incompatible version
2611     string version;
2612     stringstream sversion;
2613     sversion << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR << iqtree_VERSION_PATCH;
2614     version = sversion.str();
2615     CKP_SAVE(version);
2616     checkpoint->endStruct();
2617 
2618     if (MPIHelper::getInstance().getNumProcesses() > 1) {
2619         if (Params::getInstance().aln_file || Params::getInstance().partition_file) {
2620             runPhyloAnalysis(Params::getInstance(), checkpoint);
2621         } else {
2622             outError("Please use one MPI process! The feature you wanted does not need parallelization.");
2623         }
2624     } else
2625     // call the main function
2626     if (Params::getInstance().tree_gen != NONE) {
2627         generateRandomTree(Params::getInstance());
2628     } else if (Params::getInstance().do_pars_multistate) {
2629         doParsMultiState(Params::getInstance());
2630     } else if (Params::getInstance().rf_dist_mode != 0) {
2631         computeRFDist(Params::getInstance());
2632     } else if (Params::getInstance().test_input != TEST_NONE) {
2633         Params::getInstance().intype = detectInputFile(Params::getInstance().user_file);
2634         testInputFile(Params::getInstance());
2635     } else if (Params::getInstance().run_mode == PRINT_TAXA) {
2636         printTaxa(Params::getInstance());
2637     } else if (Params::getInstance().run_mode == PRINT_AREA) {
2638         printAreaList(Params::getInstance());
2639     } else if (Params::getInstance().run_mode == SCALE_BRANCH_LEN || Params::getInstance().run_mode == SCALE_NODE_NAME) {
2640         scaleBranchLength(Params::getInstance());
2641     } else if (Params::getInstance().run_mode == PD_DISTRIBUTION) {
2642         calcDistribution(Params::getInstance());
2643     } else if (Params::getInstance().run_mode == STATS){ /**MINH ANH: for some statistics on the input tree*/
2644         branchStats(Params::getInstance()); // MA
2645     } else if (Params::getInstance().branch_cluster > 0) {
2646         calcTreeCluster(Params::getInstance());
2647     } else if (Params::getInstance().ncbi_taxid) {
2648         processNCBITree(Params::getInstance());
2649     } else if (Params::getInstance().user_file && Params::getInstance().eco_dag_file) { /**ECOpd analysis*/
2650         processECOpd(Params::getInstance());
2651     } else if ((Params::getInstance().aln_file || Params::getInstance().partition_file) &&
2652                Params::getInstance().consensus_type != CT_ASSIGN_SUPPORT_EXTENDED)
2653     {
2654         if ((Params::getInstance().siteLL_file || Params::getInstance().second_align) && !Params::getInstance().gbo_replicates)
2655         {
2656             if (Params::getInstance().siteLL_file)
2657                 guidedBootstrap(Params::getInstance());
2658             if (Params::getInstance().second_align)
2659                 computeMulProb(Params::getInstance());
2660         } else {
2661             runPhyloAnalysis(Params::getInstance(), checkpoint);
2662         }
2663 //    } else if (Params::getInstance().ngs_file || Params::getInstance().ngs_mapped_reads) {
2664 //        runNGSAnalysis(Params::getInstance());
2665 //    } else if (Params::getInstance().pdtaxa_file && Params::getInstance().gene_scale_factor >=0.0 && Params::getInstance().gene_pvalue_file) {
2666 //        runGSSAnalysis(Params::getInstance());
2667     } else if (Params::getInstance().consensus_type != CT_NONE) {
2668         MExtTree tree;
2669         switch (Params::getInstance().consensus_type) {
2670             case CT_CONSENSUS_TREE:
2671                 computeConsensusTree(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, Params::getInstance().split_threshold,
2672                     Params::getInstance().split_weight_threshold, Params::getInstance().out_file, Params::getInstance().out_prefix, Params::getInstance().tree_weight_file, &Params::getInstance());
2673                 break;
2674             case CT_CONSENSUS_NETWORK:
2675                 computeConsensusNetwork(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, Params::getInstance().split_threshold,
2676                     Params::getInstance().split_weight_summary, Params::getInstance().split_weight_threshold, Params::getInstance().out_file, Params::getInstance().out_prefix, Params::getInstance().tree_weight_file);
2677                 break;
2678             case CT_ASSIGN_SUPPORT:
2679                 assignBootstrapSupport(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count,
2680                     Params::getInstance().second_tree, Params::getInstance().is_rooted, Params::getInstance().out_file,
2681                     Params::getInstance().out_prefix, tree, Params::getInstance().tree_weight_file, &Params::getInstance());
2682                 break;
2683             case CT_ASSIGN_SUPPORT_EXTENDED:
2684                 assignBranchSupportNew(Params::getInstance());
2685                 break;
2686             case CT_NONE: break;
2687             /**MINH ANH: for some comparison*/
2688             case COMPARE: compare(Params::getInstance()); break; //MA
2689         }
2690     } else if (Params::getInstance().split_threshold_str) {
2691         // for Ricardo: keep those splits from input tree above given support threshold
2692         collapseLowBranchSupport(Params::getInstance().user_file, Params::getInstance().split_threshold_str);
2693     } else {
2694         Params::getInstance().intype = detectInputFile(Params::getInstance().user_file);
2695         if (Params::getInstance().intype == IN_NEWICK && Params::getInstance().pdtaxa_file && Params::getInstance().tree_gen == NONE) {
2696             if (Params::getInstance().budget_file) {
2697                 //if (Params::getInstance().budget < 0) Params::getInstance().run_mode = PD_USER_SET;
2698             } else {
2699                 if (Params::getInstance().sub_size < 1 && Params::getInstance().pd_proportion == 0.0)
2700                     Params::getInstance().run_mode = PD_USER_SET;
2701             }
2702             // input is a tree, check if it is a reserve selection -> convert to splits
2703             if (Params::getInstance().run_mode != PD_USER_SET) Params::getInstance().multi_tree = true;
2704         }
2705 
2706 
2707         if (Params::getInstance().intype == IN_NEWICK && !Params::getInstance().find_all && Params::getInstance().budget_file == NULL &&
2708             Params::getInstance().find_pd_min == false && Params::getInstance().calc_pdgain == false &&
2709             Params::getInstance().run_mode != LINEAR_PROGRAMMING && Params::getInstance().multi_tree == false)
2710             runPDTree(Params::getInstance());
2711         else if (Params::getInstance().intype == IN_NEXUS || Params::getInstance().intype == IN_NEWICK) {
2712             if (Params::getInstance().run_mode == LINEAR_PROGRAMMING && Params::getInstance().find_pd_min)
2713                 outError("Current linear programming does not support finding minimal PD sets!");
2714             if (Params::getInstance().find_all && Params::getInstance().run_mode == LINEAR_PROGRAMMING)
2715                 Params::getInstance().binary_programming = true;
2716             runPDSplit(Params::getInstance());
2717         } else {
2718             outError("Unknown file input format");
2719         }
2720     }
2721 
2722     time(&start_time);
2723     cout << "Date and Time: " << ctime(&start_time);
2724     delete checkpoint;
2725 
2726     finish_random();
2727 
2728     return EXIT_SUCCESS;
2729 }
2730