1 /***************************************************************************
2 * Copyright (C) 2009-2015 by *
3 * BUI Quang Minh <minh.bui@univie.ac.at> *
4 * Lam-Tung Nguyen <nltung@gmail.com> *
5 * *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
21 ***************************************************************************/
22
23
24
25 #include "tools.h"
26 #include "timeutil.h"
27 #include "MPIHelper.h"
28 #include <dirent.h>
29 #include <thread>
30
31 #if defined(Backtrace_FOUND)
32 #include <execinfo.h>
33 #include <cxxabi.h>
34 #endif
35
36 #include "tools.h"
37 #include "timeutil.h"
38 #include "gzstream.h"
39 #include "MPIHelper.h"
40 #include "alignment/alignment.h"
41
42 VerboseMode verbose_mode;
43 extern void printCopyright(ostream &out);
44
45 /*
46 WIN32 does not define gettimeofday() function.
47 Here declare it extra for WIN32 only.
48 */
49 //#if defined(WIN32) && !defined(HAVE_GETTIMEOFDAY)
50 #if defined(WIN32)
51 #include <sstream>
52 #endif
53 //
54 //struct timezone {
55 //};
56 //
57 //void gettimeofday(struct timeval* t, void* timezone) {
58 // struct _timeb timebuffer;
59 // _ftime(&timebuffer);
60 // t->tv_sec = timebuffer.time;
61 // t->tv_usec = 1000 * timebuffer.millitm;
62 //}
63 //#else
64 //#include <sys/time.h>
65 //#endif
66
67
68 /********************************************************
69 Defining DoubleMatrix methods
70 ********************************************************/
71
72 /*DoubleMatrix::DoubleMatrix(int arows, int acols) {
73 rows = arows;
74 cols = acols;
75 size = rows * cols;
76 value = new double[size];
77 }
78
79 void DoubleMatrix::setZero() {
80 memset(value, 0, size * sizeof(double));
81 }
82
83
84 DoubleMatrix::~DoubleMatrix() {
85 if (value) delete value;
86 value = NULL;
87 }
88 */
89
90 /********************************************************
91 Miscellaneous
92 ********************************************************/
93
94 /**
95 Output an error to screen, then exit program
96 @param error error message
97 */
98 /*
99 void outError(char *error)
100 {
101 cerr << "ERROR: " << error << endl;
102 exit(2);
103 }
104 */
105
106
107 /**
108 Output an error to screen, then exit program
109 @param error error message
110 */
outError(const char * error,bool quit)111 void outError(const char *error, bool quit) {
112 if (error == ERR_NO_MEMORY) {
113 print_stacktrace(cerr);
114 }
115 cerr << error << endl;
116 if (quit)
117 exit(2);
118 }
119
120 /**
121 Output an error to screen, then exit program
122 @param error error message
123 */
outError(string error,bool quit)124 void outError(string error, bool quit) {
125 outError(error.c_str(), quit);
126 }
127
outError(const char * error,const char * msg,bool quit)128 void outError(const char *error, const char *msg, bool quit) {
129 string str = error;
130 str += msg;
131 outError(str, quit);
132 }
133
outError(const char * error,string msg,bool quit)134 void outError(const char *error, string msg, bool quit) {
135 string str = error;
136 str += msg;
137 outError(str, quit);
138 }
139
140 /**
141 Output a warning message to screen
142 @param error warning message
143 */
outWarning(const char * warn)144 void outWarning(const char *warn) {
145 cout << "WARNING: " << warn << endl;
146 }
147
outWarning(string warn)148 void outWarning(string warn) {
149 outWarning(warn.c_str());
150 }
151
randomLen(Params & params)152 double randomLen(Params ¶ms) {
153 double ran = static_cast<double> (random_int(999) + 1) / 1000;
154 double len = -params.mean_len * log(ran);
155
156 if (len < params.min_len) {
157 int fac = random_int(1000);
158 double delta = static_cast<double> (fac) / 1000.0; //delta < 1.0
159 len = params.min_len + delta / 1000.0;
160 }
161
162 if (len > params.max_len) {
163 int fac = random_int(1000);
164 double delta = static_cast<double> (fac) / 1000.0; //delta < 1.0
165 len = params.max_len - delta / 1000.0;
166 }
167 return len;
168 }
169
safeGetline(std::istream & is,std::string & t)170 std::istream& safeGetline(std::istream& is, std::string& t)
171 {
172 t.clear();
173
174 // The characters in the stream are read one-by-one using a std::streambuf.
175 // That is faster than reading them one-by-one using the std::istream.
176 // Code that uses streambuf this way must be guarded by a sentry object.
177 // The sentry object performs various tasks,
178 // such as thread synchronization and updating the stream state.
179
180 std::istream::sentry se(is, true);
181 std::streambuf* sb = is.rdbuf();
182
183 for(;;) {
184 int c = sb->sbumpc();
185 switch (c) {
186 case '\n':
187 return is;
188 case '\r':
189 if(sb->sgetc() == '\n')
190 sb->sbumpc();
191 return is;
192 case EOF:
193 // Also handle the case when the last line has no line ending
194 if(t.empty())
195 is.setstate(std::ios::eofbit);
196 return is;
197 default:
198 t += (char)c;
199 }
200 }
201 }
202
203 //From Tung
204
convertIntToString(int number)205 string convertIntToString(int number) {
206 stringstream ss; //create a stringstream
207 ss << number; //add number to the stream
208 return ss.str(); //return a string with the contents of the stream
209 }
210
convertInt64ToString(int64_t number)211 string convertInt64ToString(int64_t number) {
212 stringstream ss; //create a stringstream
213 ss << number; //add number to the stream
214 return ss.str(); //return a string with the contents of the stream
215 }
216
convertDoubleToString(double number)217 string convertDoubleToString(double number) {
218 stringstream ss; //create a stringstream
219 ss << number; //add number to the stream
220 return ss.str(); //return a string with the contents of the stream
221 }
222
iEquals(const string a,const string b)223 bool iEquals(const string a, const string b)
224 {
225 unsigned int sz = a.size();
226 if (b.size() != sz)
227 return false;
228 for (unsigned int i = 0; i < sz; ++i)
229 if (tolower(a[i]) != tolower(b[i]))
230 return false;
231 return true;
232 }
233
234 //From Tung
235
copyFile(const char SRC[],const char DEST[])236 bool copyFile(const char SRC[], const char DEST[]) {
237 std::ifstream src; // the source file
238 std::ofstream dest; // the destination file
239
240 src.open(SRC, std::ios::binary); // open in binary to prevent jargon at the end of the buffer
241 dest.open(DEST, std::ios::binary); // same again, binary
242 if (!src.is_open() || !dest.is_open())
243 return false; // could not be copied
244
245 dest << src.rdbuf(); // copy the content
246 dest.close(); // close destination file
247 src.close(); // close source file
248
249 return true; // file copied successfully
250 }
251
fileExists(string strFilename)252 bool fileExists(string strFilename) {
253 struct stat stFileInfo;
254 bool blnReturn;
255 int intStat;
256
257 // Attempt to get the file attributes
258 intStat = stat(strFilename.c_str(), &stFileInfo);
259 if (intStat == 0) {
260 // We were able to get the file attributes
261 // so the file obviously exists.
262 blnReturn = true;
263 } else {
264 // We were not able to get the file attributes.
265 // This may mean that we don't have permission to
266 // access the folder which contains this file. If you
267 // need to do that level of checking, lookup the
268 // return values of stat which will give you
269 // more details on why stat failed.
270 blnReturn = false;
271 }
272 return (blnReturn);
273 }
274
isDirectory(const char * path)275 int isDirectory(const char *path) {
276 struct stat statbuf;
277 if (stat(path, &statbuf) != 0)
278 return 0;
279 return S_ISDIR(statbuf.st_mode);
280 }
281
isFile(const char * path)282 int isFile(const char *path) {
283 struct stat statbuf;
284 if (stat(path, &statbuf) != 0)
285 return 0;
286 return S_ISREG(statbuf.st_mode);
287 }
288
getFilesInDir(const char * path,StrVector & filenames)289 int getFilesInDir(const char *path, StrVector &filenames)
290 {
291 if (!isDirectory(path))
292 return 0;
293 string path_name = path;
294 if (path_name.back() != '/')
295 path_name.append("/");
296 DIR *dp;
297 struct dirent *ep;
298 dp = opendir (path);
299
300 if (dp != NULL)
301 {
302 while ((ep = readdir (dp)) != NULL) {
303 if (isFile((path_name + ep->d_name).c_str()))
304 filenames.push_back(ep->d_name);
305 }
306
307 (void) closedir (dp);
308 return 1;
309 }
310 else
311 return 0;
312
313 return 1;
314 }
convert_int(const char * str)315 int convert_int(const char *str) {
316 char *endptr;
317 int i = strtol(str, &endptr, 10);
318
319 if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL || *endptr != 0) {
320 string err = "Expecting integer, but found \"";
321 err += str;
322 err += "\" instead";
323 throw err;
324 }
325
326 return i;
327 }
328
convert_int(const char * str,int & end_pos)329 int convert_int(const char *str, int &end_pos) {
330 char *endptr;
331 int i = strtol(str, &endptr, 10);
332
333 if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL) {
334 string err = "Expecting integer, but found \"";
335 err += str;
336 err += "\" instead";
337 throw err;
338 }
339 end_pos = endptr - str;
340 return i;
341 }
342
convert_int_vec(const char * str,IntVector & vec)343 void convert_int_vec(const char *str, IntVector &vec) {
344 char *beginptr = (char*)str, *endptr;
345 vec.clear();
346 do {
347 int i = strtol(beginptr, &endptr, 10);
348
349 if ((i == 0 && endptr == beginptr) || abs(i) == HUGE_VALL) {
350 string err = "Expecting integer, but found \"";
351 err += beginptr;
352 err += "\" instead";
353 throw err;
354 }
355 vec.push_back(i);
356 if (*endptr == ',') endptr++;
357 beginptr = endptr;
358 } while (*endptr != 0);
359 }
360
361
convert_int64(const char * str)362 int64_t convert_int64(const char *str) {
363 char *endptr;
364 int64_t i = (int64_t)strtoll(str, &endptr, 10); // casted because 'long long' may be larger than int64_t
365
366 if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL || *endptr != 0) {
367 string err = "Expecting large integer , but found \"";
368 err += str;
369 err += "\" instead";
370 throw err;
371 }
372
373 return i;
374 }
375
convert_int64(const char * str,int & end_pos)376 int64_t convert_int64(const char *str, int &end_pos) {
377 char *endptr;
378 int64_t i = (int64_t)strtoll(str, &endptr, 10); // casted because 'long long' may be larger than int64_t
379
380 if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL) {
381 string err = "Expecting large integer, but found \"";
382 err += str;
383 err += "\" instead";
384 throw err;
385 }
386 end_pos = endptr - str;
387 return i;
388 }
389
390
convert_double(const char * str)391 double convert_double(const char *str) {
392 char *endptr;
393 double d = strtod(str, &endptr);
394 if ((d == 0.0 && endptr == str) || fabs(d) == HUGE_VALF || *endptr != 0) {
395 string err = "Expecting floating-point number, but found \"";
396 err += str;
397 err += "\" instead";
398 throw err;
399 }
400 return d;
401 }
402
convert_double(const char * str,int & end_pos)403 double convert_double(const char *str, int &end_pos) {
404 char *endptr;
405 double d = strtod(str, &endptr);
406 if ((d == 0.0 && endptr == str) || fabs(d) == HUGE_VALF) {
407 string err = "Expecting floating-point number, but found \"";
408 err += str;
409 err += "\" instead";
410 throw err;
411 }
412 end_pos = endptr - str;
413 return d;
414 }
415
convert_double_vec(const char * str,DoubleVector & vec,char separator)416 void convert_double_vec(const char *str, DoubleVector &vec, char separator) {
417 char *beginptr = (char*)str, *endptr;
418 vec.clear();
419 do {
420 double d = strtod(beginptr, &endptr);
421
422 if ((d == 0.0 && endptr == beginptr) || fabs(d) == HUGE_VALF) {
423 string err = "Expecting floating-point number, but found \"";
424 err += beginptr;
425 err += "\" instead";
426 throw err;
427 }
428 vec.push_back(d);
429 if (*endptr == separator) endptr++;
430 beginptr = endptr;
431 } while (*endptr != 0);
432 }
433
convert_time(const double sec)434 string convert_time(const double sec) {
435 int sec_int = (int) floor(sec);
436 int secs = sec_int % 60;
437 int mins = (sec_int % 3600) / 60;
438 int hours = sec_int / 3600;
439 stringstream ss;
440 ss << hours << "h:" << mins << "m:" << secs << "s";
441 return ss.str();
442 }
443
convert_range(const char * str,int & lower,int & upper,int & step_size)444 void convert_range(const char *str, int &lower, int &upper, int &step_size) {
445 char *endptr;
446 char *beginptr = (char*) str;
447
448 // parse the lower bound of the range
449 int d = strtol(str, &endptr, 10);
450 if ((d == 0 && endptr == str) || abs(d) == HUGE_VALL || (*endptr != 0 && *endptr != ':')) {
451 string err = "Expecting integer, but found \"";
452 err += str;
453 err += "\" instead";
454 throw err;
455 }
456 //lower = d;
457 int d_save = d;
458 upper = d;
459 if (*endptr == 0) return;
460
461
462 // parse the upper bound of the range
463 str = endptr + 1;
464 d = strtol(str, &endptr, 10);
465 if ((d == 0 && endptr == str) || abs(d) == HUGE_VALL || (*endptr != 0 && *endptr != ':')) {
466 string err = "Expecting integer, but found \"";
467 err += str;
468 err += "\" instead";
469 throw err;
470 }
471
472 lower = d_save;
473 upper = d;
474 if (*endptr == 0) return;
475
476 // parse the step size of the range
477 str = endptr + 1;
478 d = strtol(str, &endptr, 10);
479 if ((d == 0 && endptr == str) || abs(d) == HUGE_VALL || *endptr != 0) {
480 string err = "Expecting integer, but found \"";
481 err += str;
482 err += "\" instead";
483 throw err;
484 }
485
486 step_size = d;
487 str = beginptr;
488
489 }
490
convert_range(const char * str,double & lower,double & upper,double & step_size)491 void convert_range(const char *str, double &lower, double &upper, double &step_size) {
492 char *endptr;
493 char *beginptr = (char*) str;
494
495 // parse the lower bound of the range
496 double d = strtod(str, &endptr);
497 if ((d == 0.0 && endptr == str) || fabs(d) == HUGE_VALF || (*endptr != 0 && *endptr != ':')) {
498 string err = "Expecting floating-point number, but found \"";
499 err += str;
500 err += "\" instead";
501 throw err;
502 }
503 //lower = d;
504 double d_save = d;
505 upper = d;
506 if (*endptr == 0) return;
507
508
509 // parse the upper bound of the range
510 str = endptr + 1;
511 d = strtod(str, &endptr);
512 if ((d == 0.0 && endptr == str) || fabs(d) == HUGE_VALF || (*endptr != 0 && *endptr != ':')) {
513 string err = "Expecting floating-point number, but found \"";
514 err += str;
515 err += "\" instead";
516 throw err;
517 }
518
519 lower = d_save;
520 upper = d;
521 if (*endptr == 0) return;
522
523 // parse the step size of the range
524 str = endptr + 1;
525 d = strtod(str, &endptr);
526 if ((d == 0.0 && endptr == str) || fabs(d) == HUGE_VALF || *endptr != 0) {
527 string err = "Expecting floating-point number, but found \"";
528 err += str;
529 err += "\" instead";
530 throw err;
531 }
532
533 step_size = d;
534 str = beginptr;
535
536 }
537
convert_string_vec(const char * str,StrVector & vec,char separator)538 void convert_string_vec(const char *str, StrVector &vec, char separator) {
539 char *beginptr = (char*)str, *endptr;
540 vec.clear();
541 string elem;
542 do {
543 endptr = strchr(beginptr, separator);
544 if (!endptr) {
545 elem.assign(beginptr);
546 vec.push_back(elem);
547 return;
548 }
549 elem.assign(beginptr, endptr-beginptr);
550 vec.push_back(elem);
551 beginptr = endptr+1;
552 } while (*endptr != 0);
553
554 }
555
renameString(string & name)556 bool renameString(string &name) {
557 bool renamed = false;
558 for (string::iterator i = name.begin(); i != name.end(); i++) {
559 if ((*i) == '/') {
560 // PLL does not accept '/' in names, turn it off
561 if (Params::getInstance().start_tree == STT_PLL_PARSIMONY)
562 Params::getInstance().start_tree = STT_PARSIMONY;
563 }
564 if (!isalnum(*i) && (*i) != '_' && (*i) != '-' && (*i) != '.' && (*i) != '|' && (*i) != '/') {
565 (*i) = '_';
566 renamed = true;
567 }
568 }
569 return renamed;
570 }
571
readWeightFile(Params & params,int ntaxa,double & scale,StrVector & tax_name,DoubleVector & tax_weight)572 void readWeightFile(Params ¶ms, int ntaxa, double &scale, StrVector &tax_name, DoubleVector &tax_weight) {
573 cout << "Reading scale factor and taxa weights file " << params.param_file << " ..." << endl;
574 try {
575 ifstream in;
576 in.exceptions(ios::failbit | ios::badbit);
577 in.open(params.param_file);
578 string name, tmp;
579
580 in >> tmp;
581 scale = convert_double(tmp.c_str());
582
583 for (; !in.eof() && ntaxa > 0; ntaxa--) {
584 // remove the failbit
585 in.exceptions(ios::badbit);
586 if (!(in >> name)) break;
587 // set the failbit again
588 in.exceptions(ios::failbit | ios::badbit);
589
590 tax_name.push_back(name);
591 // read the sequence weight
592 in >> tmp;
593 tax_weight.push_back(convert_double(tmp.c_str()));
594 }
595 in.clear();
596 // set the failbit again
597 in.exceptions(ios::failbit | ios::badbit);
598 in.close();
599 } catch (ios::failure) {
600 outError(ERR_READ_INPUT);
601 } catch (string str) {
602 outError(str);
603 }
604 }
605
readStringFile(const char * filename,int max_num,StrVector & strv)606 void readStringFile(const char* filename, int max_num, StrVector &strv) {
607 try {
608 ifstream in;
609 // set the failbit and badbit
610 in.exceptions(ios::failbit | ios::badbit);
611 in.open(filename);
612 string name;
613
614 // remove the failbit
615 in.exceptions(ios::badbit);
616 for (; !in.eof() && max_num > 0; max_num--) {
617 if (!(in >> name)) break;
618 strv.push_back(name);
619 }
620 in.clear();
621 // set the failbit again
622 in.exceptions(ios::failbit | ios::badbit);
623 in.close();
624 } catch (ios::failure) {
625 outError(ERR_READ_INPUT);
626 }
627 }
628
readInitTaxaFile(Params & params,int ntaxa,StrVector & tax_name)629 void readInitTaxaFile(Params ¶ms, int ntaxa, StrVector &tax_name) {
630 cout << "Reading initial taxa set file " << params.initial_file << " ..." << endl;
631 readStringFile(params.initial_file, ntaxa, tax_name);
632 }
633
printString2File(string myString,string filename)634 void printString2File(string myString, string filename) {
635 ofstream myfile(filename.c_str());
636 if (myfile.is_open()) {
637 myfile << myString;
638 myfile.close();
639 } else {
640 cout << "Unable to open file " << filename << endl;
641 }
642 }
643
readInitAreaFile(Params & params,int nareas,StrVector & area_name)644 void readInitAreaFile(Params ¶ms, int nareas, StrVector &area_name) {
645 cout << "Reading initial area file " << params.initial_area_file << " ..." << endl;
646 readStringFile(params.initial_area_file, nareas, area_name);
647 }
648
readAreasBoundary(char * file_name,MSetsBlock * areas,double * areas_boundary)649 void readAreasBoundary(char *file_name, MSetsBlock *areas, double *areas_boundary) {
650
651 try {
652 ifstream in;
653 in.exceptions(ios::failbit | ios::badbit);
654 in.open(file_name);
655
656 int nset;
657 in >> nset;
658 if (nset != areas->getNSets())
659 throw "File has different number of areas";
660 int pos = 0, seq1, seq2;
661 for (seq1 = 0; seq1 < nset; seq1++) {
662 string seq_name;
663 in >> seq_name;
664 if (seq_name != areas->getSet(seq1)->name)
665 throw "Area name " + seq_name + " is different from " + areas->getSet(seq1)->name;
666 for (seq2 = 0; seq2 < nset; seq2++) {
667 in >> areas_boundary[pos++];
668 }
669 }
670 // check for symmetric matrix
671 for (seq1 = 0; seq1 < nset - 1; seq1++) {
672 if (areas_boundary[seq1 * nset + seq1] <= 1e-6)
673 throw "Diagonal elements of distance matrix should represent the boundary of single areas";
674 for (seq2 = seq1 + 1; seq2 < nset; seq2++)
675 if (areas_boundary[seq1 * nset + seq2] != areas_boundary[seq2 * nset + seq1])
676 throw "Shared boundary between " + areas->getSet(seq1)->name + " and " + areas->getSet(seq2)->name + " is not symmetric";
677 }
678
679
680 in.close();
681 cout << "Areas relation matrix was read from " << file_name << endl;
682 } catch (const char *str) {
683 outError(str);
684 } catch (string str) {
685 outError(str);
686 } catch (ios::failure) {
687 outError(ERR_READ_INPUT, file_name);
688 }
689
690 }
691
readTaxaSets(char * filename,MSetsBlock * sets)692 void readTaxaSets(char *filename, MSetsBlock *sets) {
693 TaxaSetNameVector *allsets = sets->getSets();
694 try {
695 int count = 0;
696 ifstream in;
697 // set the failbit and badbit
698 in.exceptions(ios::failbit | ios::badbit);
699 in.open(filename);
700 string name;
701
702 // remove the failbit
703 in.exceptions(ios::badbit);
704 while (!in.eof()) {
705 int ntaxa = 0;
706 string str;
707 if (!(in >> str)) break;
708 ntaxa = convert_int(str.c_str());
709 if (ntaxa <= 0) throw "Number of taxa must be > 0";
710 count++;
711 //allsets->resize(allsets->size()+1);
712 TaxaSetName *myset = new TaxaSetName;
713 allsets->push_back(myset);
714 myset->name = "";
715 myset->name += count;
716 for (; ntaxa > 0; ntaxa--) {
717 string str;
718 if (!(in >> str)) throw "Cannot read in taxon name";
719 if ((ntaxa > 1) && in.eof()) throw "Unexpected end of file while reading taxon names";
720 myset->taxlist.push_back(str);
721 }
722 }
723 in.clear();
724 // set the failbit again
725 in.exceptions(ios::failbit | ios::badbit);
726 in.close();
727 if (count == 0) throw "No set found, you must specify at least 1 set";
728 } catch (ios::failure) {
729 outError(ERR_READ_INPUT);
730 } catch (const char *str) {
731 outError(str);
732 } catch (string str) {
733 outError(str);
734 }
735 }
736
get2RandNumb(const int size,int & first,int & second)737 void get2RandNumb(const int size, int &first, int &second) {
738 // pick a random element
739 first = random_int(size);
740 // pick a random element from what's left (there is one fewer to choose from)...
741 second = random_int(size - 1);
742 // ...and adjust second choice to take into account the first choice
743 if (second >= first) {
744 ++second;
745 }
746 }
747
748 void quickStartGuide();
749
parseArg(int argc,char * argv[],Params & params)750 void parseArg(int argc, char *argv[], Params ¶ms) {
751 int cnt;
752 verbose_mode = VB_MIN;
753 params.tree_gen = NONE;
754 params.user_file = NULL;
755 params.constraint_tree_file = NULL;
756 params.opt_gammai = true;
757 params.opt_gammai_fast = false;
758 params.opt_gammai_keep_bran = false;
759 params.testAlphaEpsAdaptive = false;
760 params.randomAlpha = false;
761 params.testAlphaEps = 0.1;
762 params.exh_ai = false;
763 params.alpha_invar_file = NULL;
764 params.out_prefix = NULL;
765 params.out_file = NULL;
766 params.sub_size = 0;
767 params.pd_proportion = 0.0;
768 params.min_proportion = 0.0;
769 params.step_proportion = 0.01;
770 params.min_size = 0;
771 params.step_size = 1;
772 params.find_all = false;
773 params.run_mode = DETECTED;
774 params.detected_mode = DETECTED;
775 params.param_file = NULL;
776 params.initial_file = NULL;
777 params.initial_area_file = NULL;
778 params.pdtaxa_file = NULL;
779 params.areas_boundary_file = NULL;
780 params.boundary_modifier = 1.0;
781 params.dist_file = NULL;
782 params.compute_obs_dist = false;
783 params.compute_jc_dist = true;
784 params.compute_ml_dist = true;
785 params.compute_ml_tree = true;
786 params.budget_file = NULL;
787 params.overlap = 0;
788 params.is_rooted = false;
789 params.root_move_dist = 2;
790 params.root_find = false;
791 params.root_test = false;
792 params.sample_size = -1;
793 params.repeated_time = 1;
794 //params.nr_output = 10000;
795 params.nr_output = 0;
796 //params.smode = EXHAUSTIVE;
797 params.intype = IN_OTHER;
798 params.budget = -1;
799 params.min_budget = -1;
800 params.step_budget = 1;
801 params.root = NULL;
802 params.num_splits = 0;
803 params.min_len = 0.001;
804 params.mean_len = 0.1;
805 params.max_len = 0.999;
806 params.num_zero_len = 0;
807 params.pd_limit = 100;
808 params.calc_pdgain = false;
809 params.multi_tree = false;
810 params.second_tree = NULL;
811 params.support_tag = NULL;
812 params.site_concordance = 0;
813 params.site_concordance_partition = false;
814 params.print_df1_trees = false;
815 params.internode_certainty = 0;
816 params.tree_weight_file = NULL;
817 params.consensus_type = CT_NONE;
818 params.find_pd_min = false;
819 params.branch_cluster = 0;
820 params.taxa_order_file = NULL;
821 params.endemic_pd = false;
822 params.exclusive_pd = false;
823 params.complement_area = NULL;
824 params.scaling_factor = -1;
825 params.numeric_precision = -1;
826 params.binary_programming = false;
827 params.quad_programming = false;
828 params.test_input = TEST_NONE;
829 params.tree_burnin = 0;
830 params.tree_max_count = 1000000;
831 params.split_threshold = 0.0;
832 params.split_threshold_str = NULL;
833 params.split_weight_threshold = -1000;
834 params.collapse_zero_branch = false;
835 params.split_weight_summary = SW_SUM;
836 params.gurobi_format = true;
837 params.gurobi_threads = 1;
838 params.num_bootstrap_samples = 0;
839 params.bootstrap_spec = NULL;
840 params.transfer_bootstrap = 0;
841
842 params.aln_file = NULL;
843 params.phylip_sequential_format = false;
844 params.symtest = SYMTEST_NONE;
845 params.symtest_only = false;
846 params.symtest_remove = 0;
847 params.symtest_keep_zero = false;
848 params.symtest_type = 0;
849 params.symtest_pcutoff = 0.05;
850 params.symtest_stat = false;
851 params.symtest_shuffle = 1;
852 //params.treeset_file = NULL;
853 params.topotest_replicates = 0;
854 params.topotest_optimize_model = false;
855 params.do_weighted_test = false;
856 params.do_au_test = false;
857 params.siteLL_file = NULL; //added by MA
858 params.partition_file = NULL;
859 params.partition_type = BRLEN_OPTIMIZE;
860 params.partfinder_rcluster = 100;
861 params.partfinder_rcluster_max = 0;
862 params.partition_merge = MERGE_NONE;
863 params.merge_models = "1";
864 params.merge_rates = "1";
865 params.partfinder_log_rate = true;
866 params.remove_empty_seq = true;
867 params.terrace_aware = true;
868 #ifdef IQTREE_TERRAPHAST
869 params.terrace_analysis = false;
870 #else
871 params.terrace_analysis = false;
872 #endif
873 params.sequence_type = NULL;
874 params.aln_output = NULL;
875 params.aln_site_list = NULL;
876 params.aln_output_format = IN_PHYLIP;
877 params.output_format = FORMAT_NORMAL;
878 params.newick_extended_format = false;
879 params.gap_masked_aln = NULL;
880 params.concatenate_aln = NULL;
881 params.aln_nogaps = false;
882 params.aln_no_const_sites = false;
883 params.print_aln_info = false;
884 // params.parsimony = false;
885 // params.parsimony_tree = false;
886 params.tree_spr = false;
887 params.nexus_output = false;
888 params.k_representative = 4;
889 params.loglh_epsilon = 0.001;
890 params.numSmoothTree = 1;
891 params.nni5 = true;
892 params.nni5_num_eval = 1;
893 params.brlen_num_traversal = 1;
894 params.leastSquareBranch = false;
895 params.pars_branch_length = false;
896 params.bayes_branch_length = false;
897 params.manuel_analytic_approx = false;
898 params.leastSquareNNI = false;
899 params.ls_var_type = OLS;
900 params.maxCandidates = 20;
901 params.popSize = 5;
902 params.p_delete = -1;
903 params.min_iterations = -1;
904 params.max_iterations = 1000;
905 params.num_param_iterations = 100;
906 params.stop_condition = SC_UNSUCCESS_ITERATION;
907 params.stop_confidence = 0.95;
908 params.num_runs = 1;
909 params.model_name = "";
910 params.model_name_init = NULL;
911 params.model_opt_steps = 10;
912 params.model_set = "ALL";
913 params.model_extra_set = NULL;
914 params.model_subset = NULL;
915 params.state_freq_set = NULL;
916 params.ratehet_set = "AUTO";
917 params.score_diff_thres = 10.0;
918 params.model_def_file = NULL;
919 params.modelomatic = false;
920 params.model_test_again = false;
921 params.model_test_and_tree = 0;
922 params.model_test_separate_rate = false;
923 params.optimize_mixmodel_weight = false;
924 params.optimize_rate_matrix = false;
925 params.store_trans_matrix = false;
926 //params.freq_type = FREQ_EMPIRICAL;
927 params.freq_type = FREQ_UNKNOWN;
928 params.keep_zero_freq = true;
929 params.min_state_freq = MIN_FREQUENCY;
930 params.min_rate_cats = 2;
931 params.num_rate_cats = 4;
932 params.max_rate_cats = 10;
933 params.gamma_shape = -1.0;
934 params.min_gamma_shape = MIN_GAMMA_SHAPE;
935 params.gamma_median = false;
936 params.p_invar_sites = -1.0;
937 params.optimize_model_rate_joint = false;
938 params.optimize_by_newton = true;
939 params.optimize_alg = "2-BFGS,EM";
940 params.optimize_alg_mixlen = "EM";
941 params.optimize_alg_gammai = "EM";
942 params.optimize_from_given_params = false;
943 params.fixed_branch_length = BRLEN_OPTIMIZE;
944 params.min_branch_length = 0.0; // this is now adjusted later based on alignment length
945 // TODO DS: This seems inappropriate for PoMo. It is handled in
946 // phyloanalysis::2908.
947 params.max_branch_length = 10.0; // Nov 22 2016: reduce from 100 to 10!
948 params.iqp_assess_quartet = IQP_DISTANCE;
949 params.iqp = false;
950 params.write_intermediate_trees = 0;
951 // params.avoid_duplicated_trees = false;
952 params.writeDistImdTrees = false;
953 params.rf_dist_mode = 0;
954 params.rf_same_pair = false;
955 params.normalize_tree_dist = false;
956 params.mvh_site_rate = false;
957 params.rate_mh_type = true;
958 params.discard_saturated_site = false;
959 params.mean_rate = 1.0;
960 params.aLRT_threshold = 101;
961 params.aLRT_replicates = 0;
962 params.aLRT_test = false;
963 params.aBayes_test = false;
964 params.localbp_replicates = 0;
965 #ifdef __AVX512KNL
966 params.SSE = LK_AVX512;
967 #else
968 params.SSE = LK_AVX_FMA;
969 #endif
970 params.lk_safe_scaling = false;
971 params.numseq_safe_scaling = 2000;
972 params.kernel_nonrev = false;
973 params.print_site_lh = WSL_NONE;
974 params.print_partition_lh = false;
975 params.print_site_prob = WSL_NONE;
976 params.print_site_state_freq = WSF_NONE;
977 params.print_site_rate = 0;
978 params.print_trees_site_posterior = 0;
979 params.print_ancestral_sequence = AST_NONE;
980 params.min_ancestral_prob = 0.0;
981 params.print_tree_lh = false;
982 params.lambda = 1;
983 params.speed_conf = 1.0;
984 params.whtest_simulations = 1000;
985 params.mcat_type = MCAT_LOG + MCAT_PATTERN;
986 params.rate_file = NULL;
987 params.ngs_file = NULL;
988 params.ngs_mapped_reads = NULL;
989 params.ngs_ignore_gaps = true;
990 params.do_pars_multistate = false;
991 params.gene_pvalue_file = NULL;
992 params.gene_scale_factor = -1;
993 params.gene_pvalue_loga = false;
994 params.second_align = NULL;
995 params.ncbi_taxid = 0;
996 params.ncbi_taxon_level = NULL;
997 params.ncbi_names_file = NULL;
998 params.ncbi_ignore_level = NULL;
999
1000 params.eco_dag_file = NULL;
1001 params.eco_type = NULL;
1002 params.eco_detail_file = NULL;
1003 params.k_percent = 0;
1004 params.diet_min = 0;
1005 params.diet_max = 0;
1006 params.diet_step = 0;
1007 params.eco_weighted = false;
1008 params.eco_run = 0;
1009
1010 params.upper_bound = false;
1011 params.upper_bound_NNI = false;
1012 params.upper_bound_frac = 0.0;
1013
1014 params.gbo_replicates = 0;
1015 params.ufboot_epsilon = 0.5;
1016 params.check_gbo_sample_size = 0;
1017 params.use_rell_method = true;
1018 params.use_elw_method = false;
1019 params.use_weighted_bootstrap = false;
1020 params.use_max_tree_per_bootstrap = true;
1021 params.max_candidate_trees = 0;
1022 params.distinct_trees = false;
1023 params.online_bootstrap = true;
1024 params.min_correlation = 0.99;
1025 params.step_iterations = 100;
1026 // params.store_candidate_trees = false;
1027 params.print_ufboot_trees = 0;
1028 params.jackknife_prop = 0.0;
1029 //const double INF_NNI_CUTOFF = -1000000.0;
1030 params.nni_cutoff = -1000000.0;
1031 params.estimate_nni_cutoff = false;
1032 params.nni_sort = false;
1033 //params.nni_opt_5branches = false;
1034 params.testNNI = false;
1035 params.approximate_nni = false;
1036 params.do_compression = false;
1037
1038 params.new_heuristic = true;
1039 params.iteration_multiple = 1;
1040 params.initPS = 0.5;
1041 #ifdef USING_PLL
1042 params.pll = true;
1043 #else
1044 params.pll = false;
1045 #endif
1046 params.modelEps = 0.01;
1047 params.modelfinder_eps = 0.1;
1048 params.parbran = false;
1049 params.binary_aln_file = NULL;
1050 params.maxtime = 1000000;
1051 params.reinsert_par = false;
1052 params.bestStart = true;
1053 params.snni = true; // turn on sNNI default now
1054 // params.autostop = true; // turn on auto stopping rule by default now
1055 params.unsuccess_iteration = 100;
1056 params.speednni = true; // turn on reduced hill-climbing NNI by default now
1057 params.numInitTrees = 100;
1058 params.fixStableSplits = false;
1059 params.stableSplitThreshold = 0.9;
1060 params.five_plus_five = false;
1061 params.memCheck = false;
1062 params.tabu = false;
1063 params.adaptPertubation = false;
1064 params.numSupportTrees = 20;
1065 // params.sprDist = 20;
1066 params.sprDist = 6;
1067 params.sankoff_cost_file = NULL;
1068 params.numNNITrees = 20;
1069 params.avh_test = 0;
1070 params.bootlh_test = 0;
1071 params.bootlh_partitions = NULL;
1072 params.site_freq_file = NULL;
1073 params.tree_freq_file = NULL;
1074 params.num_threads = 1;
1075 params.num_threads_max = 10000;
1076 params.openmp_by_model = false;
1077 params.model_test_criterion = MTC_BIC;
1078 // params.model_test_stop_rule = MTC_ALL;
1079 params.model_test_sample_size = 0;
1080 params.root_state = NULL;
1081 params.print_bootaln = false;
1082 params.print_boot_site_freq = false;
1083 params.print_subaln = false;
1084 params.print_partition_info = false;
1085 params.print_conaln = false;
1086 params.count_trees = false;
1087 params.pomo = false;
1088 params.pomo_random_sampling = false;
1089 // params.pomo_counts_file_flag = false;
1090 params.pomo_pop_size = 9;
1091 params.print_branch_lengths = false;
1092 params.lh_mem_save = LM_PER_NODE; // auto detect
1093 params.buffer_mem_save = false;
1094 params.start_tree = STT_PLL_PARSIMONY;
1095 params.modelfinder_ml_tree = true;
1096 params.final_model_opt = true;
1097 params.print_splits_file = false;
1098 params.print_splits_nex_file = true;
1099 params.ignore_identical_seqs = true;
1100 params.write_init_tree = false;
1101 params.write_candidate_trees = false;
1102 params.write_branches = false;
1103 params.freq_const_patterns = NULL;
1104 params.no_rescale_gamma_invar = false;
1105 params.compute_seq_identity_along_tree = false;
1106 params.compute_seq_composition = true;
1107 params.lmap_num_quartets = -1;
1108 params.lmap_cluster_file = NULL;
1109 params.print_lmap_quartet_lh = false;
1110 params.num_mixlen = 1;
1111 params.link_alpha = false;
1112 params.link_model = false;
1113 params.model_joint = NULL;
1114 params.ignore_checkpoint = false;
1115 params.checkpoint_dump_interval = 60;
1116 params.force_unfinished = false;
1117 params.suppress_output_flags = 0;
1118 params.ufboot2corr = false;
1119 params.u2c_nni5 = false;
1120 params.date_with_outgroup = true;
1121 params.date_debug = false;
1122 params.date_replicates = 0;
1123 params.clock_stddev = -1.0;
1124 params.date_outlier = -1.0;
1125
1126 params.matrix_exp_technique = MET_EIGEN3LIB_DECOMPOSITION;
1127
1128 if (params.nni5) {
1129 params.nni_type = NNI5;
1130 } else {
1131 params.nni_type = NNI1;
1132 }
1133
1134 struct timeval tv;
1135 struct timezone tz;
1136 // initialize random seed based on current time
1137 gettimeofday(&tv, &tz);
1138 //params.ran_seed = (unsigned) (tv.tv_sec+tv.tv_usec);
1139 params.ran_seed = (tv.tv_usec);
1140 params.subsampling_seed = params.ran_seed;
1141 params.subsampling = 0;
1142
1143 for (cnt = 1; cnt < argc; cnt++) {
1144 try {
1145
1146 if (strcmp(argv[cnt], "-h") == 0 || strcmp(argv[cnt], "--help") == 0) {
1147 #ifdef IQ_TREE
1148 usage_iqtree(argv, strcmp(argv[cnt], "--help") == 0);
1149 #else
1150 usage(argv, false);
1151 #endif
1152 continue;
1153 }
1154 if (strcmp(argv[cnt], "-V") == 0 || strcmp(argv[cnt], "-version") == 0 || strcmp(argv[cnt], "--version") == 0) {
1155 printCopyright(cout);
1156 exit(EXIT_SUCCESS);
1157 }
1158 if (strcmp(argv[cnt], "-ho") == 0 || strcmp(argv[cnt], "-?") == 0) {
1159 usage_iqtree(argv, false);
1160 continue;
1161 }
1162 if (strcmp(argv[cnt], "-hh") == 0 || strcmp(argv[cnt], "-hhh") == 0) {
1163 #ifdef IQ_TREE
1164 usage_iqtree(argv, true);
1165 #else
1166 usage(argv);
1167 #endif
1168 continue;
1169 }
1170 if (strcmp(argv[cnt], "-v0") == 0) {
1171 verbose_mode = VB_QUIET;
1172 continue;
1173 }
1174 if (strcmp(argv[cnt], "-v") == 0 || strcmp(argv[cnt], "--verbose") == 0) {
1175 verbose_mode = VB_MED;
1176 continue;
1177 }
1178 if (strcmp(argv[cnt], "-vv") == 0
1179 || strcmp(argv[cnt], "-v2") == 0) {
1180 verbose_mode = VB_MAX;
1181 continue;
1182 }
1183 if (strcmp(argv[cnt], "-vvv") == 0
1184 || strcmp(argv[cnt], "-v3") == 0) {
1185 verbose_mode = VB_DEBUG;
1186 continue;
1187 }
1188 if (strcmp(argv[cnt], "-k") == 0) {
1189 cnt++;
1190 if (cnt >= argc)
1191 throw "Use -k <num_taxa>";
1192 convert_range(argv[cnt], params.min_size, params.sub_size,
1193 params.step_size);
1194 params.k_representative = params.min_size;
1195 continue;
1196 }
1197 if (strcmp(argv[cnt], "-pre") == 0 || strcmp(argv[cnt], "--prefix") == 0) {
1198 cnt++;
1199 if (cnt >= argc)
1200 throw "Use -pre <output_prefix>";
1201 params.out_prefix = argv[cnt];
1202 continue;
1203 }
1204 if (strcmp(argv[cnt], "-pp") == 0) {
1205 cnt++;
1206 if (cnt >= argc)
1207 throw "Use -pp <pd_proportion>";
1208 convert_range(argv[cnt], params.min_proportion,
1209 params.pd_proportion, params.step_proportion);
1210 if (params.pd_proportion < 0 || params.pd_proportion > 1)
1211 throw "PD proportion must be between 0 and 1";
1212 continue;
1213 }
1214 if (strcmp(argv[cnt], "-mk") == 0) {
1215 cnt++;
1216 if (cnt >= argc)
1217 throw "Use -mk <min_taxa>";
1218 params.min_size = convert_int(argv[cnt]);
1219 continue;
1220 }
1221 if (strcmp(argv[cnt], "-bud") == 0) {
1222 cnt++;
1223 if (cnt >= argc)
1224 throw "Use -bud <budget>";
1225 convert_range(argv[cnt], params.min_budget, params.budget,
1226 params.step_budget);
1227 continue;
1228 }
1229 if (strcmp(argv[cnt], "-mb") == 0) {
1230 cnt++;
1231 if (cnt >= argc)
1232 throw "Use -mb <min_budget>";
1233 params.min_budget = convert_int(argv[cnt]);
1234 continue;
1235 }
1236 if (strcmp(argv[cnt], "-o") == 0) {
1237 cnt++;
1238 if (cnt >= argc)
1239 throw "Use -o <taxon>";
1240 params.root = argv[cnt];
1241 continue;
1242 }
1243 if (strcmp(argv[cnt], "-optalg") == 0) {
1244 cnt++;
1245 if (cnt >= argc)
1246 throw "Use -opt_alg <1-BFGS|2-BFGS|EM>";
1247 params.optimize_alg = argv[cnt];
1248 continue;
1249 }
1250 if (strcmp(argv[cnt], "-optlen") == 0) {
1251 cnt++;
1252 if (cnt >= argc)
1253 throw "Use -optlen <BFGS|EM>";
1254 params.optimize_alg_mixlen = argv[cnt];
1255 continue;
1256 }
1257 if (strcmp(argv[cnt], "-optalg_gammai") == 0) {
1258 cnt++;
1259 if (cnt >= argc)
1260 throw "Use -optalg_gammai <Brent|BFGS|EM>";
1261 params.optimize_alg_gammai = argv[cnt];
1262 continue;
1263 }
1264 if (strcmp(argv[cnt], "-root") == 0 || strcmp(argv[cnt], "-rooted") == 0) {
1265 params.is_rooted = true;
1266 continue;
1267 }
1268
1269 if (strcmp(argv[cnt], "--root-dist") == 0) {
1270 cnt++;
1271 if (cnt >= argc)
1272 throw "Use --root-dist <maximum-root-move-distance>";
1273 params.root_move_dist = convert_int(argv[cnt]);
1274 continue;
1275 }
1276
1277 if (strcmp(argv[cnt], "--root-find") == 0) {
1278 params.root_find = true;
1279 continue;
1280 }
1281
1282 if (strcmp(argv[cnt], "--root-test") == 0) {
1283 params.root_test = true;
1284 continue;
1285 }
1286
1287 if (strcmp(argv[cnt], "-all") == 0) {
1288 params.find_all = true;
1289 continue;
1290 }
1291 if (strcmp(argv[cnt], "--greedy") == 0) {
1292 params.run_mode = GREEDY;
1293 continue;
1294 }
1295 if (strcmp(argv[cnt], "-pr") == 0
1296 || strcmp(argv[cnt], "--pruning") == 0) {
1297 params.run_mode = PRUNING;
1298 //continue; } if (strcmp(argv[cnt],"--both") == 0) {
1299 //params.run_mode = BOTH_ALG;
1300 continue;
1301 }
1302 if (strcmp(argv[cnt], "-e") == 0) {
1303 cnt++;
1304 if (cnt >= argc)
1305 throw "Use -e <file>";
1306 params.param_file = argv[cnt];
1307 continue;
1308 }
1309 if (strcmp(argv[cnt], "-if") == 0) {
1310 cnt++;
1311 if (cnt >= argc)
1312 throw "Use -if <file>";
1313 params.initial_file = argv[cnt];
1314 continue;
1315 }
1316 if (strcmp(argv[cnt], "-nni_nr_step") == 0) {
1317 cnt++;
1318 if (cnt >= argc)
1319 throw "Use -nni_nr_step <newton_raphson_steps>";
1320 NNI_MAX_NR_STEP = convert_int(argv[cnt]);
1321 continue;
1322 }
1323 if (strcmp(argv[cnt], "-ia") == 0) {
1324 cnt++;
1325 if (cnt >= argc)
1326 throw "Use -ia <file>";
1327 params.initial_area_file = argv[cnt];
1328 continue;
1329 }
1330 if (strcmp(argv[cnt], "-u") == 0) {
1331 // file containing budget information
1332 cnt++;
1333 if (cnt >= argc)
1334 throw "Use -u <file>";
1335 params.budget_file = argv[cnt];
1336 continue;
1337 }
1338 if (strcmp(argv[cnt], "-dd") == 0) {
1339 // compute distribution of PD score on random sets
1340 cnt++;
1341 if (cnt >= argc)
1342 throw "Use -dd <sample_size>";
1343 params.run_mode = PD_DISTRIBUTION;
1344 params.sample_size = convert_int(argv[cnt]);
1345 continue;
1346 }
1347 if (strcmp(argv[cnt], "-ts") == 0) {
1348 // calculate PD score a taxa set listed in the file
1349 cnt++;
1350 //params.run_mode = PD_USER_SET;
1351 if (cnt >= argc)
1352 throw "Use -ts <taxa_file>";
1353 params.pdtaxa_file = argv[cnt];
1354 continue;
1355 }
1356 if (strcmp(argv[cnt], "-bound") == 0) {
1357 // boundary length of areas
1358 cnt++;
1359 if (cnt >= argc)
1360 throw "Use -bound <file>";
1361 params.areas_boundary_file = argv[cnt];
1362 continue;
1363 }
1364 if (strcmp(argv[cnt], "-blm") == 0) {
1365 // boundary length modifier
1366 cnt++;
1367 if (cnt >= argc)
1368 throw "Use -blm <boundary_modifier>";
1369 params.boundary_modifier = convert_double(argv[cnt]);
1370 continue;
1371 }
1372 if (strcmp(argv[cnt], "-dist") == 0
1373 || strcmp(argv[cnt], "-d") == 0) {
1374 // calculate distance matrix from the tree
1375 params.run_mode = CALC_DIST;
1376 cnt++;
1377 if (cnt >= argc)
1378 throw "Use -dist <distance_file>";
1379 params.dist_file = argv[cnt];
1380 continue;
1381 }
1382 if (strcmp(argv[cnt], "-djc") == 0) {
1383 params.compute_ml_dist = false;
1384 continue;
1385 }
1386 if (strcmp(argv[cnt], "-dobs") == 0) {
1387 params.compute_obs_dist = true;
1388 continue;
1389 }
1390 if (strcmp(argv[cnt], "-r") == 0) {
1391 cnt++;
1392 if (cnt >= argc)
1393 throw "Use -r <num_taxa>";
1394 params.sub_size = convert_int(argv[cnt]);
1395 params.tree_gen = YULE_HARDING;
1396 continue;
1397 }
1398 if (strcmp(argv[cnt], "-rs") == 0) {
1399 cnt++;
1400 if (cnt >= argc)
1401 throw "Use -rs <alignment_file>";
1402 params.tree_gen = YULE_HARDING;
1403 params.aln_file = argv[cnt];
1404 continue;
1405 }
1406 if (strcmp(argv[cnt], "-rstar") == 0) {
1407 cnt++;
1408 if (cnt >= argc)
1409 throw "Use -rstar <num_taxa>";
1410 params.sub_size = convert_int(argv[cnt]);
1411 params.tree_gen = STAR_TREE;
1412 continue;
1413 }
1414 if (strcmp(argv[cnt], "-ru") == 0) {
1415 cnt++;
1416 if (cnt >= argc)
1417 throw "Use -ru <num_taxa>";
1418 params.sub_size = convert_int(argv[cnt]);
1419 params.tree_gen = UNIFORM;
1420 continue;
1421 }
1422 if (strcmp(argv[cnt], "-rcat") == 0) {
1423 cnt++;
1424 if (cnt >= argc)
1425 throw "Use -rcat <num_taxa>";
1426 params.sub_size = convert_int(argv[cnt]);
1427 params.tree_gen = CATERPILLAR;
1428 continue;
1429 }
1430 if (strcmp(argv[cnt], "-rbal") == 0) {
1431 cnt++;
1432 if (cnt >= argc)
1433 throw "Use -rbal <num_taxa>";
1434 params.sub_size = convert_int(argv[cnt]);
1435 params.tree_gen = BALANCED;
1436 continue;
1437 }
1438 if (strcmp(argv[cnt], "--rand") == 0) {
1439 cnt++;
1440 if (cnt >= argc)
1441 throw "Use -rand UNI | CAT | BAL | NET";
1442 if (strcmp(argv[cnt], "UNI") == 0)
1443 params.tree_gen = UNIFORM;
1444 else if (strcmp(argv[cnt], "CAT") == 0)
1445 params.tree_gen = CATERPILLAR;
1446 else if (strcmp(argv[cnt], "BAL") == 0)
1447 params.tree_gen = BALANCED;
1448 else if (strcmp(argv[cnt], "NET") == 0)
1449 params.tree_gen = CIRCULAR_SPLIT_GRAPH;
1450 else
1451 throw "wrong --rand option";
1452 continue;
1453 }
1454
1455 if (strcmp(argv[cnt], "--keep-ident") == 0 || strcmp(argv[cnt], "-keep-ident") == 0) {
1456 params.ignore_identical_seqs = false;
1457 continue;
1458 }
1459 if (strcmp(argv[cnt], "-rcsg") == 0) {
1460 cnt++;
1461 if (cnt >= argc)
1462 throw "Use -rcsg <num_taxa>";
1463 params.sub_size = convert_int(argv[cnt]);
1464 params.tree_gen = CIRCULAR_SPLIT_GRAPH;
1465 continue;
1466 }
1467 if (strcmp(argv[cnt], "-rpam") == 0) {
1468 cnt++;
1469 if (cnt >= argc)
1470 throw "Use -rpam <num_splits>";
1471 params.num_splits = convert_int(argv[cnt]);
1472 continue;
1473 }
1474 if (strcmp(argv[cnt], "-rlen") == 0 || strcmp(argv[cnt], "--rlen") == 0) {
1475 cnt++;
1476 if (cnt >= argc - 2)
1477 throw "Use -rlen <min_len> <mean_len> <max_len>";
1478 params.min_len = convert_double(argv[cnt]);
1479 params.mean_len = convert_double(argv[cnt + 1]);
1480 params.max_len = convert_double(argv[cnt + 2]);
1481 cnt += 2;
1482 continue;
1483 }
1484 if (strcmp(argv[cnt], "-rzero") == 0) {
1485 cnt++;
1486 if (cnt >= argc)
1487 throw "Use -rzero <num_zero_branch>";
1488 params.num_zero_len = convert_int(argv[cnt]);
1489 if (params.num_zero_len < 0)
1490 throw "num_zero_len must not be negative";
1491 continue;
1492 }
1493 if (strcmp(argv[cnt], "-rset") == 0) {
1494 cnt++;
1495 if (cnt >= argc - 1)
1496 throw "Use -rset <overlap> <outfile>";
1497 params.overlap = convert_int(argv[cnt]);
1498 cnt++;
1499 params.pdtaxa_file = argv[cnt];
1500 params.tree_gen = TAXA_SET;
1501 continue;
1502 }
1503 if (strcmp(argv[cnt], "-rep") == 0) {
1504 cnt++;
1505 if (cnt >= argc)
1506 throw "Use -rep <repeated_times>";
1507 params.repeated_time = convert_int(argv[cnt]);
1508 continue;
1509 }
1510 if (strcmp(argv[cnt], "-lim") == 0) {
1511 cnt++;
1512 if (cnt >= argc)
1513 throw "Use -lim <pd_limit>";
1514 params.pd_limit = convert_int(argv[cnt]);
1515 continue;
1516 }
1517 if (strcmp(argv[cnt], "-noout") == 0) {
1518 params.nr_output = 0;
1519 continue;
1520 }
1521 if (strcmp(argv[cnt], "-1out") == 0) {
1522 params.nr_output = 1;
1523 continue;
1524 }
1525 if (strcmp(argv[cnt], "-oldout") == 0) {
1526 params.nr_output = 100;
1527 continue;
1528 }
1529 if (strcmp(argv[cnt], "-nexout") == 0) {
1530 params.nexus_output = true;
1531 continue;
1532 }
1533 if (strcmp(argv[cnt], "-exhaust") == 0) {
1534 params.run_mode = EXHAUSTIVE;
1535 continue;
1536 }
1537 if (strcmp(argv[cnt], "-seed") == 0 || strcmp(argv[cnt], "--seed") == 0) {
1538 cnt++;
1539 if (cnt >= argc)
1540 throw "Use -seed <random_seed>";
1541 params.ran_seed = abs(convert_int(argv[cnt]));
1542 continue;
1543 }
1544 if (strcmp(argv[cnt], "-pdgain") == 0) {
1545 params.calc_pdgain = true;
1546 continue;
1547 }
1548 if (strcmp(argv[cnt], "-sup") == 0 || strcmp(argv[cnt], "--support") == 0) {
1549 cnt++;
1550 if (cnt >= argc)
1551 throw "Use -sup <target_tree_file>";
1552 params.second_tree = argv[cnt];
1553 params.consensus_type = CT_ASSIGN_SUPPORT;
1554 continue;
1555 }
1556 if (strcmp(argv[cnt], "-suptag") == 0 || strcmp(argv[cnt], "--suptag") == 0) {
1557 cnt++;
1558 if (cnt >= argc)
1559 throw "Use -suptag <tagname or ALL>";
1560 params.support_tag = argv[cnt];
1561 continue;
1562 }
1563 if (strcmp(argv[cnt], "-sup2") == 0) {
1564 outError("Deprecated -sup2 option, please use --gcf --tree FILE");
1565 }
1566
1567 if (strcmp(argv[cnt], "--gcf") == 0) {
1568 params.consensus_type = CT_ASSIGN_SUPPORT_EXTENDED;
1569 cnt++;
1570 if (cnt >= argc)
1571 throw "Use --gcf <user_trees_file>";
1572 params.treeset_file = argv[cnt];
1573 continue;
1574 }
1575 if (strcmp(argv[cnt], "--scf") == 0) {
1576 params.consensus_type = CT_ASSIGN_SUPPORT_EXTENDED;
1577 cnt++;
1578 if (cnt >= argc)
1579 throw "Use --scf NUM_QUARTETS";
1580 params.site_concordance = convert_int(argv[cnt]);
1581 if (params.site_concordance < 1)
1582 throw "Positive --scf please";
1583 continue;
1584 }
1585 if (strcmp(argv[cnt], "--scf-part") == 0 || strcmp(argv[cnt], "--cf-verbose") == 0) {
1586 params.site_concordance_partition = true;
1587 continue;
1588 }
1589 if (strcmp(argv[cnt], "--df-tree") == 0) {
1590 params.print_df1_trees = true;
1591 continue;
1592 }
1593 if (strcmp(argv[cnt], "--qic") == 0) {
1594 params.internode_certainty = 1;
1595 continue;
1596 }
1597 if (strcmp(argv[cnt], "-treew") == 0) {
1598 cnt++;
1599 if (cnt >= argc)
1600 throw "Use -treew <tree_weight_file>";
1601 params.tree_weight_file = argv[cnt];
1602 continue;
1603 }
1604 if (strcmp(argv[cnt], "-con") == 0 || strcmp(argv[cnt], "--contree") == 0) {
1605 params.consensus_type = CT_CONSENSUS_TREE;
1606 continue;
1607 }
1608 if (strcmp(argv[cnt], "-net") == 0 || strcmp(argv[cnt], "--connet") == 0) {
1609 params.consensus_type = CT_CONSENSUS_NETWORK;
1610 continue;
1611 }
1612
1613 /**MINH ANH: to serve some statistics on tree*/
1614 if (strcmp(argv[cnt], "-comp") == 0) {
1615 cnt++;
1616 if (cnt >= argc)
1617 throw "Use -comp <treefile>";
1618 params.consensus_type = COMPARE;
1619 params.second_tree = argv[cnt];
1620 continue;
1621 }
1622 if (strcmp(argv[cnt], "-stats") == 0) {
1623 params.run_mode = STATS;
1624 continue;
1625 }
1626 if (strcmp(argv[cnt], "-gbo") == 0) { //guided bootstrap
1627 cnt++;
1628 if (cnt >= argc)
1629 throw "Use -gbo <site likelihod file>";
1630 params.siteLL_file = argv[cnt];
1631 //params.run_mode = GBO;
1632 continue;
1633 } // MA
1634
1635 if (strcmp(argv[cnt], "-mprob") == 0) { //compute multinomial distribution probability
1636 cnt++;
1637 if (cnt >= argc)
1638 throw "Use -mprob <ref_alignment>";
1639 params.second_align = argv[cnt];
1640 //params.run_mode = MPRO;
1641 continue;
1642 } // MA
1643
1644 if (strcmp(argv[cnt], "-min") == 0) {
1645 params.find_pd_min = true;
1646 continue;
1647 }
1648 if (strcmp(argv[cnt], "-excl") == 0) {
1649 params.exclusive_pd = true;
1650 continue;
1651 }
1652 if (strcmp(argv[cnt], "-endem") == 0) {
1653 params.endemic_pd = true;
1654 continue;
1655 }
1656 if (strcmp(argv[cnt], "-compl") == 0) {
1657 cnt++;
1658 if (cnt >= argc)
1659 throw "Use -compl <area_name>";
1660 params.complement_area = argv[cnt];
1661 continue;
1662 }
1663 if (strcmp(argv[cnt], "-cluster") == 0) {
1664 params.branch_cluster = 4;
1665 cnt++;
1666 if (cnt >= argc)
1667 throw "Use -cluster <taxa_order_file>";
1668 params.taxa_order_file = argv[cnt];
1669 continue;
1670 }
1671 if (strcmp(argv[cnt], "-taxa") == 0) {
1672 params.run_mode = PRINT_TAXA;
1673 continue;
1674 }
1675 if (strcmp(argv[cnt], "-area") == 0) {
1676 params.run_mode = PRINT_AREA;
1677 continue;
1678 }
1679 if (strcmp(argv[cnt], "-scale") == 0) {
1680 cnt++;
1681 if (cnt >= argc)
1682 throw "Use -scale <scaling_factor>";
1683 params.scaling_factor = convert_double(argv[cnt]);
1684 continue;
1685 }
1686 if (strcmp(argv[cnt], "-scaleg") == 0) {
1687 cnt++;
1688 if (cnt >= argc)
1689 throw "Use -scaleg <gene_scale_factor>";
1690 params.gene_scale_factor = convert_double(argv[cnt]);
1691 continue;
1692 }
1693 if (strcmp(argv[cnt], "-scalebranch") == 0) {
1694 params.run_mode = SCALE_BRANCH_LEN;
1695 cnt++;
1696 if (cnt >= argc)
1697 throw "Use -scalebranch <scaling_factor>";
1698 params.scaling_factor = convert_double(argv[cnt]);
1699 continue;
1700 }
1701 if (strcmp(argv[cnt], "-scalenode") == 0) {
1702 params.run_mode = SCALE_NODE_NAME;
1703 cnt++;
1704 if (cnt >= argc)
1705 throw "Use -scalenode <scaling_factor>";
1706 params.scaling_factor = convert_double(argv[cnt]);
1707 continue;
1708 }
1709 if (strcmp(argv[cnt], "-prec") == 0) {
1710 cnt++;
1711 if (cnt >= argc)
1712 throw "Use -prec <numeric_precision>";
1713 params.numeric_precision = convert_int(argv[cnt]);
1714 continue;
1715 }
1716 if (strcmp(argv[cnt], "-lp") == 0) {
1717 params.run_mode = LINEAR_PROGRAMMING;
1718 continue;
1719 }
1720 if (strcmp(argv[cnt], "-lpbin") == 0) {
1721 params.run_mode = LINEAR_PROGRAMMING;
1722 params.binary_programming = true;
1723 continue;
1724 }
1725 if (strcmp(argv[cnt], "-qp") == 0) {
1726 params.gurobi_format = true;
1727 params.quad_programming = true;
1728 continue;
1729 }
1730 if (strcmp(argv[cnt], "-quiet") == 0 || strcmp(argv[cnt], "--quiet") == 0) {
1731 verbose_mode = VB_QUIET;
1732 continue;
1733 }
1734 if (strcmp(argv[cnt], "-mult") == 0) {
1735 params.multi_tree = true;
1736 continue;
1737 }
1738 if (strcmp(argv[cnt], "-bi") == 0 || strcmp(argv[cnt], "--burnin") == 0) {
1739 cnt++;
1740 if (cnt >= argc)
1741 throw "Use -bi <burnin_value>";
1742 params.tree_burnin = convert_int(argv[cnt]);
1743 if (params.tree_burnin < 0)
1744 throw "Burnin value must not be negative";
1745 continue;
1746 }
1747 if (strcmp(argv[cnt], "-tm") == 0) {
1748 cnt++;
1749 if (cnt >= argc)
1750 throw "Use -tm <tree_max_count>";
1751 params.tree_max_count = convert_int(argv[cnt]);
1752 if (params.tree_max_count < 0)
1753 throw "tree_max_count must not be negative";
1754 continue;
1755 }
1756 if (strcmp(argv[cnt], "-minsup") == 0 || strcmp(argv[cnt], "--sup-min") == 0) {
1757 cnt++;
1758 if (cnt >= argc)
1759 throw "Use -minsup <split_threshold>";
1760 params.split_threshold = convert_double(argv[cnt]);
1761 if (params.split_threshold < 0 || params.split_threshold > 1)
1762 throw "Split threshold must be between 0 and 1";
1763 continue;
1764 }
1765 if (strcmp(argv[cnt], "-minsupnew") == 0) {
1766 cnt++;
1767 if (cnt >= argc)
1768 throw "Use -minsupnew <split_threshold_1/.../split_threshold_k>";
1769 params.split_threshold_str = argv[cnt];
1770 continue;
1771 }
1772 if (strcmp(argv[cnt], "-tw") == 0) {
1773 cnt++;
1774 if (cnt >= argc)
1775 throw "Use -tw <split_weight_threshold>";
1776 params.split_weight_threshold = convert_double(argv[cnt]);
1777 if (params.split_weight_threshold < 0)
1778 throw "Split weight threshold is negative";
1779 continue;
1780 }
1781
1782 if (strcmp(argv[cnt], "-czb") == 0 || strcmp(argv[cnt], "--polytomy") == 0) {
1783 params.collapse_zero_branch = true;
1784 continue;
1785 }
1786
1787 if (strcmp(argv[cnt], "-swc") == 0) {
1788 params.split_weight_summary = SW_COUNT;
1789 continue;
1790 }
1791 if (strcmp(argv[cnt], "-swa") == 0) {
1792 params.split_weight_summary = SW_AVG_ALL;
1793 continue;
1794 }
1795 if (strcmp(argv[cnt], "-swp") == 0) {
1796 params.split_weight_summary = SW_AVG_PRESENT;
1797 continue;
1798 }
1799 if (strcmp(argv[cnt], "-iwc") == 0) {
1800 params.test_input = TEST_WEAKLY_COMPATIBLE;
1801 continue;
1802 }
1803 if (strcmp(argv[cnt], "--aln") == 0 || strcmp(argv[cnt], "--msa") == 0 || strcmp(argv[cnt], "-s") == 0) {
1804 cnt++;
1805 if (cnt >= argc)
1806 throw "Use --aln, -s <alignment_file>";
1807 params.aln_file = argv[cnt];
1808 continue;
1809 }
1810 if (strcmp(argv[cnt], "--sequential") == 0) {
1811 params.phylip_sequential_format = true;
1812 continue;
1813 }
1814 if (strcmp(argv[cnt], "--symtest") == 0) {
1815 params.symtest = SYMTEST_MAXDIV;
1816 continue;
1817 }
1818
1819 if (strcmp(argv[cnt], "--bisymtest") == 0) {
1820 params.symtest = SYMTEST_BINOM;
1821 continue;
1822 }
1823
1824 if (strcmp(argv[cnt], "--symtest-only") == 0) {
1825 params.symtest_only = true;
1826 if (params.symtest == SYMTEST_NONE)
1827 params.symtest = SYMTEST_MAXDIV;
1828 continue;
1829 }
1830
1831 if (strcmp(argv[cnt], "--symtest-remove-bad") == 0) {
1832 params.symtest_remove = 1;
1833 if (params.symtest == SYMTEST_NONE)
1834 params.symtest = SYMTEST_MAXDIV;
1835 continue;
1836 }
1837
1838 if (strcmp(argv[cnt], "--symtest-remove-good") == 0) {
1839 params.symtest_remove = 2;
1840 if (params.symtest == SYMTEST_NONE)
1841 params.symtest = SYMTEST_MAXDIV;
1842 continue;
1843 }
1844
1845 if (strcmp(argv[cnt], "--symtest-keep-zero") == 0) {
1846 params.symtest_keep_zero = true;
1847 continue;
1848 }
1849
1850 if (strcmp(argv[cnt], "--symtest-type") == 0) {
1851 cnt++;
1852 if (cnt >= argc)
1853 throw "Use --symtest-type SYM|MAR|INT";
1854 if (strcmp(argv[cnt], "SYM") == 0)
1855 params.symtest_type = 0;
1856 else if (strcmp(argv[cnt], "MAR") == 0)
1857 params.symtest_type = 1;
1858 else if (strcmp(argv[cnt], "INT") == 0)
1859 params.symtest_type = 2;
1860 else
1861 throw "Use --symtest-type SYM|MAR|INT";
1862 if (params.symtest == SYMTEST_NONE)
1863 params.symtest = SYMTEST_MAXDIV;
1864 continue;
1865 }
1866
1867 if (strcmp(argv[cnt], "--symtest-pval") == 0) {
1868 cnt++;
1869 if (cnt >= argc)
1870 throw "Use --symtest-pval PVALUE_CUTOFF";
1871 params.symtest_pcutoff = convert_double(argv[cnt]);
1872 if (params.symtest_pcutoff <= 0 || params.symtest_pcutoff >= 1)
1873 throw "--symtest-pval must be between 0 and 1";
1874 if (params.symtest == SYMTEST_NONE)
1875 params.symtest = SYMTEST_MAXDIV;
1876 continue;
1877 }
1878
1879 if (strcmp(argv[cnt], "--symstat") == 0) {
1880 params.symtest_stat = true;
1881 if (params.symtest == SYMTEST_NONE)
1882 params.symtest = SYMTEST_MAXDIV;
1883 continue;
1884 }
1885
1886 if (strcmp(argv[cnt], "--symtest-perm") == 0) {
1887 cnt++;
1888 if (cnt >= argc)
1889 throw "Use --symtest-perm INT";
1890 params.symtest_shuffle = convert_int(argv[cnt]);
1891 if (params.symtest_shuffle <= 0)
1892 throw "--symtest-perm must be positive";
1893 if (params.symtest == SYMTEST_NONE)
1894 params.symtest = SYMTEST_MAXDIV;
1895 continue;
1896 }
1897
1898 if (strcmp(argv[cnt], "-z") == 0 || strcmp(argv[cnt], "--trees") == 0) {
1899 cnt++;
1900 if (cnt >= argc)
1901 throw "Use -z <user_trees_file>";
1902 params.treeset_file = argv[cnt];
1903 continue;
1904 }
1905 if (strcmp(argv[cnt], "-zb") == 0 || strcmp(argv[cnt], "--test") == 0) {
1906 cnt++;
1907 if (cnt >= argc)
1908 throw "Use -zb <#replicates>";
1909 params.topotest_replicates = convert_int(argv[cnt]);
1910 if (params.topotest_replicates < 1000)
1911 throw "Please specify at least 1000 replicates";
1912 continue;
1913 }
1914 if (strcmp(argv[cnt], "--estimate-model") == 0) {
1915 params.topotest_optimize_model = true;
1916 continue;
1917 }
1918 if (strcmp(argv[cnt], "-zw") == 0 || strcmp(argv[cnt], "--test-weight") == 0) {
1919 params.do_weighted_test = true;
1920 continue;
1921 }
1922 if (strcmp(argv[cnt], "-au") == 0 || strcmp(argv[cnt], "--test-au") == 0) {
1923 params.do_au_test = true;
1924 continue;
1925 }
1926 if (strcmp(argv[cnt], "-sp") == 0 || strcmp(argv[cnt], "-Q") == 0) {
1927 cnt++;
1928 if (cnt >= argc)
1929 throw "Use -sp <partition_file>";
1930 params.partition_file = argv[cnt];
1931 continue;
1932 }
1933 if (strcmp(argv[cnt], "-spp") == 0 || strcmp(argv[cnt], "-p") == 0 || strcmp(argv[cnt], "--partition") == 0) {
1934 cnt++;
1935 if (cnt >= argc)
1936 throw "Use -p <partition_file>";
1937 params.partition_file = argv[cnt];
1938 params.partition_type = BRLEN_SCALE;
1939 params.opt_gammai = false;
1940 continue;
1941 }
1942 if (strcmp(argv[cnt], "-spj") == 0 || strcmp(argv[cnt], "-q") == 0) {
1943 cnt++;
1944 if (cnt >= argc)
1945 throw "Use -q <partition_file>";
1946 params.partition_file = argv[cnt];
1947 params.partition_type = BRLEN_FIX;
1948 params.optimize_alg_gammai = "Brent";
1949 params.opt_gammai = false;
1950 continue;
1951 }
1952 if (strcmp(argv[cnt], "-M") == 0) {
1953 params.partition_type = BRLEN_OPTIMIZE;
1954 continue;
1955 }
1956
1957 if (strcmp(argv[cnt], "-spu") == 0 || strcmp(argv[cnt], "-S") == 0) {
1958 cnt++;
1959 if (cnt >= argc)
1960 throw "Use -spu <partition_file>";
1961 params.partition_file = argv[cnt];
1962 params.partition_type = TOPO_UNLINKED;
1963 params.ignore_identical_seqs = false;
1964 params.buffer_mem_save = true;
1965 params.print_splits_nex_file = false;
1966 continue;
1967 }
1968
1969 if (strcmp(argv[cnt], "--edge") == 0) {
1970 cnt++;
1971 if (cnt >= argc)
1972 throw "Use --edge equal|scale|unlink";
1973 if (strcmp(argv[cnt], "equal") == 0)
1974 params.partition_type = BRLEN_FIX;
1975 else if (strcmp(argv[cnt], "scale") == 0)
1976 params.partition_type = BRLEN_SCALE;
1977 else if (strcmp(argv[cnt], "unlink") == 0)
1978 params.partition_type = BRLEN_OPTIMIZE;
1979 else
1980 throw "Use --edge equal|scale|unlink";
1981 }
1982
1983 if (strcmp(argv[cnt], "-rcluster") == 0 || strcmp(argv[cnt], "--rcluster") == 0) {
1984 cnt++;
1985 if (cnt >= argc)
1986 throw "Use -rcluster <percent>";
1987 params.partfinder_rcluster = convert_double(argv[cnt]);
1988 if (params.partfinder_rcluster < 0 || params.partfinder_rcluster > 100)
1989 throw "rcluster percentage must be between 0 and 100";
1990 params.partition_merge = MERGE_RCLUSTER;
1991 continue;
1992 }
1993 if (strcmp(argv[cnt], "-rclusterf") == 0 || strcmp(argv[cnt], "--rclusterf") == 0) {
1994 cnt++;
1995 if (cnt >= argc)
1996 throw "Use -rclusterf <percent>";
1997 params.partfinder_rcluster = convert_double(argv[cnt]);
1998 if (params.partfinder_rcluster < 0 || params.partfinder_rcluster > 100)
1999 throw "rcluster percentage must be between 0 and 100";
2000 params.partition_merge = MERGE_RCLUSTERF;
2001 continue;
2002 }
2003
2004 if (strcmp(argv[cnt], "-rcluster-max") == 0 || strcmp(argv[cnt], "--rcluster-max") == 0) {
2005 cnt++;
2006 if (cnt >= argc)
2007 throw "Use -rcluster-max <num>";
2008 params.partfinder_rcluster_max = convert_int(argv[cnt]);
2009 if (params.partfinder_rcluster_max <= 0)
2010 throw "rcluster-max must be between > 0";
2011 if (params.partfinder_rcluster == 100)
2012 params.partfinder_rcluster = 99.9999;
2013 if (params.partition_merge != MERGE_RCLUSTER && params.partition_merge != MERGE_RCLUSTERF)
2014 params.partition_merge = MERGE_RCLUSTERF;
2015 continue;
2016 }
2017
2018 if (strcmp(argv[cnt], "--merge") == 0) {
2019 if (cnt >= argc-1 || argv[cnt+1][0] == '-') {
2020 if (params.partfinder_rcluster == 100)
2021 params.partfinder_rcluster = 99.9999;
2022 params.partition_merge = MERGE_RCLUSTERF;
2023 continue;
2024 }
2025 cnt++;
2026 if (cnt >= argc)
2027 throw "Use --merge [none|greedy|rcluster|rclusterf|kmeans]";
2028 if (strcmp(argv[cnt], "none") == 0)
2029 params.partition_merge = MERGE_NONE;
2030 else if (strcmp(argv[cnt], "greedy") == 0)
2031 params.partition_merge = MERGE_GREEDY;
2032 else if (strcmp(argv[cnt], "rcluster") == 0) {
2033 if (params.partfinder_rcluster == 100)
2034 params.partfinder_rcluster = 99.9999;
2035 params.partition_merge = MERGE_RCLUSTER;
2036 } else if (strcmp(argv[cnt], "rclusterf") == 0) {
2037 if (params.partfinder_rcluster == 100)
2038 params.partfinder_rcluster = 99.9999;
2039 params.partition_merge = MERGE_RCLUSTERF;
2040 } else if (strcmp(argv[cnt], "rcluster") == 0)
2041 params.partition_merge = MERGE_KMEANS;
2042 else
2043 throw "Use --merge [none|greedy|rcluster|rclusterf|kmeans]";
2044 continue;
2045 }
2046
2047 if (strcmp(argv[cnt], "--merge-model") == 0) {
2048 cnt++;
2049 if (cnt >= argc)
2050 throw "Use --merge-model 1|4|ALL|model1,...,modelK";
2051 params.merge_models = argv[cnt];
2052 if (params.partition_merge == MERGE_NONE) {
2053 if (params.partfinder_rcluster == 100)
2054 params.partfinder_rcluster = 99.9999;
2055 params.partition_merge = MERGE_RCLUSTERF;
2056 continue;
2057 }
2058 continue;
2059 }
2060
2061 if (strcmp(argv[cnt], "--merge-rate") == 0) {
2062 cnt++;
2063 if (cnt >= argc)
2064 throw "Use --merge-rate rate1,...,rateK";
2065 params.merge_rates = argv[cnt];
2066 if (params.partition_merge == MERGE_NONE) {
2067 if (params.partfinder_rcluster == 100)
2068 params.partfinder_rcluster = 99.9999;
2069 params.partition_merge = MERGE_RCLUSTERF;
2070 continue;
2071 }
2072 continue;
2073 }
2074
2075 if (strcmp(argv[cnt], "--merge-log-rate") == 0) {
2076 params.partfinder_log_rate = true;
2077 continue;
2078 }
2079
2080 if (strcmp(argv[cnt], "--merge-normal-rate") == 0) {
2081 params.partfinder_log_rate = false;
2082 continue;
2083 }
2084
2085 if (strcmp(argv[cnt], "-keep_empty_seq") == 0) {
2086 params.remove_empty_seq = false;
2087 continue;
2088 }
2089 if (strcmp(argv[cnt], "-no_terrace") == 0) {
2090 params.terrace_aware = false;
2091 params.terrace_analysis = false;
2092 continue;
2093 }
2094 if (strcmp(argv[cnt], "--terrace") == 0) {
2095 #ifdef IQTREE_TERRAPHAST
2096 params.terrace_analysis = true;
2097 #else
2098 throw "Unsupported command: --terrace.\n"
2099 "Please build IQ-TREE with the USE_TERRAPHAST flag.";
2100 #endif
2101 continue;
2102 }
2103
2104 if (strcmp(argv[cnt], "--no-terrace") == 0) {
2105 params.terrace_analysis = false;
2106 continue;
2107 }
2108
2109 if (strcmp(argv[cnt], "-sf") == 0) {
2110 cnt++;
2111 if (cnt >= argc)
2112 throw "Use -sf <ngs_file>";
2113 params.ngs_file = argv[cnt];
2114 continue;
2115 }
2116 if (strcmp(argv[cnt], "-sm") == 0) {
2117 cnt++;
2118 if (cnt >= argc)
2119 throw "Use -sm <ngs_mapped_read_file>";
2120 params.ngs_mapped_reads = argv[cnt];
2121 continue;
2122 }
2123 if (strcmp(argv[cnt], "-ngs_gap") == 0) {
2124 params.ngs_ignore_gaps = false;
2125 continue;
2126 }
2127 if (strcmp(argv[cnt], "-st") == 0 || strcmp(argv[cnt], "--seqtype") == 0) {
2128 cnt++;
2129 if (cnt >= argc)
2130 throw "Use -st BIN or -st DNA or -st AA or -st CODON or -st MORPH or -st CRXX or -st CFxx.";
2131 string arg = argv[cnt];
2132 params.sequence_type = argv[cnt];
2133 // if (arg.substr(0,2) == "CR") params.pomo_random_sampling = true;
2134 // if (arg.substr(0,2) == "CF" || arg.substr(0,2) == "CR") {
2135 // outWarning("Setting the sampling method and population size with this flag is deprecated.");
2136 // outWarning("Please use the model string instead (see `iqtree --help`).");
2137 // if (arg.length() > 2) {
2138 // int ps = convert_int(arg.substr(2).c_str());
2139 // params.pomo_pop_size = ps;
2140 // if (((ps != 10) && (ps != 2) && (ps % 2 == 0)) || (ps < 2) || (ps > 19)) {
2141 // std::cout << "Please give a correct PoMo sequence type parameter; e.g., `-st CF09`." << std::endl;
2142 // outError("Custom virtual population size of PoMo not 2, 10 or any other odd number between 3 and 19.");
2143 // }
2144 // }
2145 // }
2146 continue;
2147 }
2148
2149 if (strcmp(argv[cnt], "-starttree") == 0) {
2150 cnt++;
2151 if (cnt >= argc)
2152 throw "Use -starttree BIONJ|PARS|PLLPARS";
2153 if (strcmp(argv[cnt], "BIONJ") == 0)
2154 params.start_tree = STT_BIONJ;
2155 else if (strcmp(argv[cnt], "PARS") == 0)
2156 params.start_tree = STT_PARSIMONY;
2157 else if (strcmp(argv[cnt], "PLLPARS") == 0)
2158 params.start_tree = STT_PLL_PARSIMONY;
2159 else
2160 throw "Invalid option, please use -starttree with BIONJ or PARS or PLLPARS";
2161 continue;
2162 }
2163
2164 if (strcmp(argv[cnt], "-ao") == 0 || strcmp(argv[cnt], "--out-alignment") == 0 || strcmp(argv[cnt], "--out-aln") == 0) {
2165 cnt++;
2166 if (cnt >= argc)
2167 throw "Use -ao <alignment_file>";
2168 params.aln_output = argv[cnt];
2169 continue;
2170 }
2171 if (strcmp(argv[cnt], "-as") == 0) {
2172 cnt++;
2173 if (cnt >= argc)
2174 throw "Use -as <aln_site_list>";
2175 params.aln_site_list = argv[cnt];
2176 continue;
2177 }
2178 if (strcmp(argv[cnt], "-an") == 0) {
2179 cnt++;
2180 if (cnt >= argc)
2181 throw "Use -an <ref_seq_name>";
2182 params.ref_seq_name = argv[cnt];
2183 continue;
2184 }
2185 if (strcmp(argv[cnt], "-af") == 0 || strcmp(argv[cnt], "--out-format") == 0) {
2186 cnt++;
2187 if (cnt >= argc)
2188 throw "Use -af phy|fasta";
2189 if (strcmp(argv[cnt], "phy") == 0)
2190 params.aln_output_format = IN_PHYLIP;
2191 else if (strcmp(argv[cnt], "fasta") == 0)
2192 params.aln_output_format = IN_FASTA;
2193 else if (strcmp(argv[cnt], "nexus") == 0)
2194 params.aln_output_format = IN_NEXUS;
2195 else
2196 throw "Unknown output format";
2197 continue;
2198 }
2199
2200
2201 if (strcmp(argv[cnt], "--out-csv") == 0) {
2202 params.output_format = FORMAT_CSV;
2203 continue;
2204 }
2205
2206 if (strcmp(argv[cnt], "--out-tsv") == 0) {
2207 params.output_format = FORMAT_TSV;
2208 continue;
2209 }
2210
2211 if (strcmp(argv[cnt], "--figtree") == 0) {
2212 params.newick_extended_format = true;
2213 continue;
2214 }
2215
2216 if (strcmp(argv[cnt], "-am") == 0) {
2217 cnt++;
2218 if (cnt >= argc)
2219 throw "Use -am <gap_masked_aln>";
2220 params.gap_masked_aln = argv[cnt];
2221 continue;
2222 }
2223 if (strcmp(argv[cnt], "-ac") == 0) {
2224 cnt++;
2225 if (cnt >= argc)
2226 throw "Use -ac <concatenate_aln>";
2227 params.concatenate_aln = argv[cnt];
2228 continue;
2229 }
2230 if (strcmp(argv[cnt], "-nogap") == 0) {
2231 params.aln_nogaps = true;
2232 continue;
2233 }
2234 if (strcmp(argv[cnt], "-noconst") == 0) {
2235 params.aln_no_const_sites = true;
2236 continue;
2237 }
2238 if (strcmp(argv[cnt], "-alninfo") == 0) {
2239 params.print_aln_info = true;
2240 continue;
2241 }
2242 // if (strcmp(argv[cnt], "-parstree") == 0) {
2243 // maximum parsimony
2244 // params.parsimony_tree = true;
2245 // continue; } if (strcmp(argv[cnt], "-pars") == 0) {
2246 // // maximum parsimony
2247 // params.parsimony = true;
2248 // continue;
2249 // }
2250 if (strcmp(argv[cnt], "-spr") == 0) {
2251 // subtree pruning and regrafting
2252 params.tree_spr = true;
2253 continue;
2254 }
2255 if (strcmp(argv[cnt], "-krep") == 0) {
2256 cnt++;
2257 if (cnt >= argc)
2258 throw "Use -krep <num_k>";
2259 params.k_representative = convert_int(argv[cnt]);
2260 continue;
2261 }
2262 if (strcmp(argv[cnt], "-pdel") == 0) {
2263 cnt++;
2264 if (cnt >= argc)
2265 throw "Use -pdel <probability>";
2266 params.p_delete = convert_double(argv[cnt]);
2267 if (params.p_delete < 0.0 || params.p_delete > 1.0)
2268 throw "Probability of deleting a leaf must be between 0 and 1";
2269 continue;
2270 }
2271 if (strcmp(argv[cnt], "-pers") == 0 || strcmp(argv[cnt], "--perturb") == 0) {
2272 cnt++;
2273 if (cnt >= argc)
2274 throw "Use -pers <perturbation_strength>";
2275 params.initPS = convert_double(argv[cnt]);
2276 continue;
2277 }
2278 if (strcmp(argv[cnt], "-n") == 0) {
2279 cnt++;
2280 if (cnt >= argc)
2281 throw "Use -n <#iterations>";
2282 if (params.gbo_replicates != 0) {
2283 throw("Ultrafast bootstrap does not work with -n option");
2284 }
2285 params.min_iterations = convert_int(argv[cnt]);
2286 params.stop_condition = SC_FIXED_ITERATION;
2287 // params.autostop = false;
2288 continue;
2289 }
2290 if (strcmp(argv[cnt], "-nparam") == 0) {
2291 cnt++;
2292 if (cnt >= argc)
2293 throw "Use -nparam <#iterations>";
2294 params.num_param_iterations = convert_int(argv[cnt]);
2295 if (params.num_param_iterations < 0)
2296 throw "Number of parameter optimization iterations (-nparam) must be non negative";
2297 continue;
2298 }
2299
2300 if (strcmp(argv[cnt], "-nb") == 0) {
2301 cnt++;
2302 if (cnt >= argc)
2303 throw "Use -nb <#bootstrap_replicates>";
2304 params.min_iterations = convert_int(argv[cnt]);
2305 params.iqp_assess_quartet = IQP_BOOTSTRAP;
2306 // params.avoid_duplicated_trees = true;
2307 continue;
2308 }
2309 if (strcmp(argv[cnt], "--model") == 0 || strcmp(argv[cnt], "-m") == 0) {
2310 cnt++;
2311 if (cnt >= argc)
2312 throw "Use --model <model_name>";
2313 params.model_name = argv[cnt];
2314 continue;
2315 }
2316 if (strcmp(argv[cnt], "--init-model") == 0) {
2317 cnt++;
2318 if (cnt >= argc)
2319 throw "Use --init-model FILE";
2320 params.model_name_init = argv[cnt];
2321 continue;
2322 }
2323 if (strcmp(argv[cnt], "--loop-model") == 0) {
2324 cnt++;
2325 if (cnt >= argc)
2326 throw "Use --loop-model NUM";
2327 params.model_opt_steps = convert_int(argv[cnt]);
2328 continue;
2329 }
2330 if (strcmp(argv[cnt], "-mset") == 0 || strcmp(argv[cnt], "--mset") == 0 || strcmp(argv[cnt], "--models") == 0 ) {
2331 cnt++;
2332 if (cnt >= argc)
2333 throw "Use -mset <model_set>";
2334 params.model_set = argv[cnt];
2335 continue;
2336 }
2337 if (strcmp(argv[cnt], "-madd") == 0 || strcmp(argv[cnt], "--madd") == 0) {
2338 cnt++;
2339 if (cnt >= argc)
2340 throw "Use -madd <extra_model_set>";
2341 params.model_extra_set = argv[cnt];
2342 continue;
2343 }
2344 if (strcmp(argv[cnt], "-msub") == 0 || strcmp(argv[cnt], "--msub") == 0 || strcmp(argv[cnt], "--model-sub") == 0) {
2345 cnt++;
2346 if (cnt >= argc)
2347 throw "Use -msub <model_subset>";
2348 params.model_subset = argv[cnt];
2349 continue;
2350 }
2351 if (strcmp(argv[cnt], "-mfreq") == 0 || strcmp(argv[cnt], "--freqs") == 0) {
2352 cnt++;
2353 if (cnt >= argc)
2354 throw "Use -mfreq <state_freq_set>";
2355 params.state_freq_set = argv[cnt];
2356 continue;
2357 }
2358 if (strcmp(argv[cnt], "-mrate") == 0 || strcmp(argv[cnt], "--mrate") == 0 || strcmp(argv[cnt], "--rates") == 0) {
2359 cnt++;
2360 if (cnt >= argc)
2361 throw "Use -mrate <rate_set>";
2362 params.ratehet_set = argv[cnt];
2363 continue;
2364 }
2365
2366 if (strcmp(argv[cnt], "--score-diff") == 0) {
2367 cnt++;
2368 if (cnt >= argc)
2369 throw "Use --score-diff <score>";
2370 if (iEquals(argv[cnt], "all"))
2371 params.score_diff_thres = -1.0;
2372 else
2373 params.score_diff_thres = convert_double(argv[cnt]);
2374 continue;
2375 }
2376
2377 if (strcmp(argv[cnt], "-mdef") == 0 || strcmp(argv[cnt], "--mdef") == 0) {
2378 cnt++;
2379 if (cnt >= argc)
2380 throw "Use -mdef <model_definition_file>";
2381 params.model_def_file = argv[cnt];
2382 continue;
2383 }
2384 if (strcmp(argv[cnt], "--modelomatic") == 0) {
2385 params.modelomatic = true;
2386 continue;
2387 }
2388 if (strcmp(argv[cnt], "-mredo") == 0 || strcmp(argv[cnt], "--mredo") == 0 || strcmp(argv[cnt], "--model-redo") == 0) {
2389 params.model_test_again = true;
2390 continue;
2391 }
2392 if (strcmp(argv[cnt], "-mtree") == 0 || strcmp(argv[cnt], "--mtree") == 0) {
2393 params.model_test_and_tree = 1;
2394 continue;
2395 }
2396 if (strcmp(argv[cnt], "-mretree") == 0) {
2397 params.model_test_and_tree = 2;
2398 continue;
2399 }
2400 if (strcmp(argv[cnt], "-msep") == 0) {
2401 params.model_test_separate_rate = true;
2402 continue;
2403 }
2404 if (strcmp(argv[cnt], "-mwopt") == 0 || strcmp(argv[cnt], "--mix-opt") == 0) {
2405 params.optimize_mixmodel_weight = true;
2406 continue;
2407 }
2408 if (strcmp(argv[cnt], "--opt-rate-mat") == 0) {
2409 params.optimize_rate_matrix = true;
2410 continue;
2411 }
2412 // if (strcmp(argv[cnt], "-mh") == 0) {
2413 // params.mvh_site_rate = true;
2414 // params.discard_saturated_site = false;
2415 // params.SSE = LK_NORMAL;
2416 // continue;
2417 // }
2418 // if (strcmp(argv[cnt], "-mhs") == 0) {
2419 // params.mvh_site_rate = true;
2420 // params.discard_saturated_site = true;
2421 // params.SSE = LK_NORMAL;
2422 // continue;
2423 // }
2424 if (strcmp(argv[cnt], "-rl") == 0) {
2425 params.rate_mh_type = false;
2426 continue;
2427 }
2428 if (strcmp(argv[cnt], "-nr") == 0) {
2429 cnt++;
2430 if (cnt >= argc)
2431 throw "Use -nr <mean_rate>";
2432 params.mean_rate = convert_double(argv[cnt]);
2433 if (params.mean_rate < 0)
2434 throw "Wrong mean rate for MH model";
2435 continue;
2436 }
2437 if (strcmp(argv[cnt], "-mstore") == 0) {
2438 params.store_trans_matrix = true;
2439 continue;
2440 }
2441 if (strcmp(argv[cnt], "-nni_lh") == 0) {
2442 params.nni_lh = true;
2443 continue;
2444 }
2445 if (strcmp(argv[cnt], "-lmd") == 0) {
2446 cnt++;
2447 if (cnt >= argc)
2448 throw "Use -lmd <lambda>";
2449 params.lambda = convert_double(argv[cnt]);
2450 if (params.lambda > 1.0)
2451 throw "Lambda must be in (0,1]";
2452 continue;
2453 }
2454
2455 if (strcmp(argv[cnt], "-lk") == 0) {
2456 cnt++;
2457 if (cnt >= argc)
2458 throw "-lk x86|SSE|AVX|FMA|AVX512";
2459 if (strcmp(argv[cnt], "x86") == 0)
2460 params.SSE = LK_386;
2461 else if (strcmp(argv[cnt], "SSE") == 0)
2462 params.SSE = LK_SSE2;
2463 else if (strcmp(argv[cnt], "AVX") == 0)
2464 params.SSE = LK_AVX;
2465 else if (strcmp(argv[cnt], "FMA") == 0)
2466 params.SSE = LK_AVX_FMA;
2467 else if (strcmp(argv[cnt], "AVX512") == 0)
2468 params.SSE = LK_AVX512;
2469 else
2470 throw "Incorrect -lk likelihood kernel option";
2471 continue;
2472 }
2473
2474 if (strcmp(argv[cnt], "-safe") == 0 || strcmp(argv[cnt], "--safe") == 0) {
2475 params.lk_safe_scaling = true;
2476 continue;
2477 }
2478
2479 if (strcmp(argv[cnt], "-safe-seq") == 0) {
2480 cnt++;
2481 if (cnt >= argc)
2482 throw "-safe-seq <number of sequences>";
2483 params.numseq_safe_scaling = convert_int(argv[cnt]);
2484 if (params.numseq_safe_scaling < 10)
2485 throw "Too small -safe-seq";
2486 continue;
2487 }
2488
2489 if (strcmp(argv[cnt], "--kernel-nonrev") == 0) {
2490 params.kernel_nonrev = true;
2491 continue;
2492 }
2493
2494 if (strcmp(argv[cnt], "-f") == 0) {
2495 cnt++;
2496 if (cnt >= argc)
2497 throw "Use -f <c | o | u | q | ry | ws | mk | <digits>>";
2498 if (strcmp(argv[cnt], "q") == 0 || strcmp(argv[cnt], "EQ") == 0)
2499 params.freq_type = FREQ_EQUAL;
2500 else if (strcmp(argv[cnt], "c") == 0
2501 || strcmp(argv[cnt], "EM") == 0)
2502 params.freq_type = FREQ_EMPIRICAL;
2503 else if (strcmp(argv[cnt], "o") == 0
2504 || strcmp(argv[cnt], "ES") == 0)
2505 params.freq_type = FREQ_ESTIMATE;
2506 else if (strcmp(argv[cnt], "u") == 0
2507 || strcmp(argv[cnt], "UD") == 0)
2508 params.freq_type = FREQ_USER_DEFINED;
2509 else if (strcmp(argv[cnt], "ry") == 0
2510 || strcmp(argv[cnt], "RY") == 0)
2511 params.freq_type = FREQ_DNA_RY;
2512 else if (strcmp(argv[cnt], "ws") == 0
2513 || strcmp(argv[cnt], "WS") == 0)
2514 params.freq_type = FREQ_DNA_WS;
2515 else if (strcmp(argv[cnt], "mk") == 0
2516 || strcmp(argv[cnt], "MK") == 0)
2517 params.freq_type = FREQ_DNA_MK;
2518 else
2519 // throws error message if can't parse
2520 params.freq_type = parseStateFreqDigits(argv[cnt]);
2521 continue;
2522 }
2523
2524 if (strcmp(argv[cnt], "--keep-zero-freq") == 0) {
2525 params.keep_zero_freq = true;
2526 continue;
2527 }
2528
2529 if (strcmp(argv[cnt], "--min-freq") == 0) {
2530 cnt++;
2531 if (cnt >= argc)
2532 throw "Use --min-freq NUM";
2533 params.min_state_freq = convert_double(argv[cnt]);
2534 if (params.min_state_freq <= 0)
2535 throw "--min-freq must be positive";
2536 if (params.min_state_freq >= 1.0)
2537 throw "--min-freq must be < 1.0";
2538 continue;
2539 }
2540
2541 if (strcmp(argv[cnt], "--inc-zero-freq") == 0) {
2542 params.keep_zero_freq = false;
2543 continue;
2544 }
2545
2546 if (strcmp(argv[cnt], "-fs") == 0 || strcmp(argv[cnt], "--site-freq") == 0) {
2547 if (params.tree_freq_file)
2548 throw "Specifying both -fs and -ft not allowed";
2549 cnt++;
2550 if (cnt >= argc)
2551 throw "Use -fs <site_freq_file>";
2552 params.site_freq_file = argv[cnt];
2553 // params.SSE = LK_EIGEN;
2554 continue;
2555 }
2556 if (strcmp(argv[cnt], "-ft") == 0 || strcmp(argv[cnt], "--tree-freq") == 0) {
2557 if (params.site_freq_file)
2558 throw "Specifying both -fs and -ft not allowed";
2559 cnt++;
2560 if (cnt >= argc)
2561 throw "Use -ft <treefile_to_infer_site_frequency_model>";
2562 params.tree_freq_file = argv[cnt];
2563 if (params.print_site_state_freq == WSF_NONE)
2564 params.print_site_state_freq = WSF_POSTERIOR_MEAN;
2565 continue;
2566 }
2567
2568 if (strcmp(argv[cnt], "-fconst") == 0) {
2569 cnt++;
2570 if (cnt >= argc)
2571 throw "Use -fconst <const_pattern_frequencies>";
2572 params.freq_const_patterns = argv[cnt];
2573 continue;
2574 }
2575 if (strcmp(argv[cnt], "--nrate") == 0) {
2576 cnt++;
2577 if (cnt >= argc)
2578 throw "Use -c <#rate_category>";
2579 params.num_rate_cats = convert_int(argv[cnt]);
2580 if (params.num_rate_cats < 1)
2581 throw "Wrong number of rate categories";
2582 continue;
2583 }
2584 if (strcmp(argv[cnt], "-cmin") == 0 || strcmp(argv[cnt], "--cmin") == 0) {
2585 cnt++;
2586 if (cnt >= argc)
2587 throw "Use -cmin <#min_rate_category>";
2588 params.min_rate_cats = convert_int(argv[cnt]);
2589 if (params.min_rate_cats < 2)
2590 throw "Wrong number of rate categories for -cmin";
2591 continue;
2592 }
2593 if (strcmp(argv[cnt], "-cmax") == 0 || strcmp(argv[cnt], "--cmax") == 0) {
2594 cnt++;
2595 if (cnt >= argc)
2596 throw "Use -cmax <#max_rate_category>";
2597 params.max_rate_cats = convert_int(argv[cnt]);
2598 if (params.max_rate_cats < 2)
2599 throw "Wrong number of rate categories for -cmax";
2600 continue;
2601 }
2602 if (strcmp(argv[cnt], "-a") == 0) {
2603 cnt++;
2604 if (cnt >= argc)
2605 throw "Use -a <gamma_shape>";
2606 params.gamma_shape = convert_double(argv[cnt]);
2607 if (params.gamma_shape <= 0)
2608 throw "Wrong gamma shape parameter (alpha)";
2609 continue;
2610 }
2611
2612 if (strcmp(argv[cnt], "-amin") == 0 || strcmp(argv[cnt], "--alpha-min") == 0) {
2613 cnt++;
2614 if (cnt >= argc)
2615 throw "Use -amin <min_gamma_shape>";
2616 params.min_gamma_shape = convert_double(argv[cnt]);
2617 if (params.min_gamma_shape <= 0)
2618 throw "Wrong minimum gamma shape parameter (alpha)";
2619 continue;
2620 }
2621
2622 if (strcmp(argv[cnt], "-gmean") == 0 || strcmp(argv[cnt], "--gamma-mean") == 0) {
2623 params.gamma_median = false;
2624 continue;
2625 }
2626 if (strcmp(argv[cnt], "-gmedian") == 0 || strcmp(argv[cnt], "--gamma-median") == 0) {
2627 params.gamma_median = true;
2628 continue;
2629 }
2630 if (strcmp(argv[cnt], "-i") == 0) {
2631 cnt++;
2632 if (cnt >= argc)
2633 throw "Use -i <p_invar_sites>";
2634 params.p_invar_sites = convert_double(argv[cnt]);
2635 if (params.p_invar_sites < 0)
2636 throw "Wrong number of proportion of invariable sites";
2637 continue;
2638 }
2639 if (strcmp(argv[cnt], "-optfromgiven") == 0) {
2640 params.optimize_from_given_params = true;
2641 continue;
2642 }
2643 if (strcmp(argv[cnt], "-brent") == 0) {
2644 params.optimize_by_newton = false;
2645 continue;
2646 }
2647 if (strcmp(argv[cnt], "-jointopt") == 0) {
2648 params.optimize_model_rate_joint = true;
2649 continue;
2650 }
2651 if (strcmp(argv[cnt], "-brent_ginvar") == 0) {
2652 params.optimize_model_rate_joint = false;
2653 continue;
2654 }
2655 if (strcmp(argv[cnt], "-fixbr") == 0 || strcmp(argv[cnt], "-blfix") == 0) {
2656 params.fixed_branch_length = BRLEN_FIX;
2657 params.optimize_alg_gammai = "Brent";
2658 params.opt_gammai = false;
2659 params.min_iterations = 0;
2660 params.stop_condition = SC_FIXED_ITERATION;
2661 continue;
2662 }
2663 if (strcmp(argv[cnt], "-blscale") == 0) {
2664 params.fixed_branch_length = BRLEN_SCALE;
2665 params.optimize_alg_gammai = "Brent";
2666 params.opt_gammai = false;
2667 params.min_iterations = 0;
2668 params.stop_condition = SC_FIXED_ITERATION;
2669 continue;
2670 }
2671 if (strcmp(argv[cnt], "-blmin") == 0) {
2672 cnt++;
2673 if (cnt >= argc)
2674 throw "Use -blmin <min_branch_length>";
2675 params.min_branch_length = convert_double(argv[cnt]);
2676 if (params.min_branch_length < 0.0)
2677 throw("Negative -blmin not allowed!");
2678 if (params.min_branch_length == 0.0)
2679 throw("Zero -blmin is not allowed due to numerical problems");
2680 if (params.min_branch_length > 0.1)
2681 throw("-blmin must be < 0.1");
2682
2683 continue;
2684 }
2685 if (strcmp(argv[cnt], "-blmax") == 0) {
2686 cnt++;
2687 if (cnt >= argc)
2688 throw "Use -blmax <max_branch_length>";
2689 params.max_branch_length = convert_double(argv[cnt]);
2690 if (params.max_branch_length < 0.5)
2691 throw("-blmax smaller than 0.5 is not allowed");
2692 continue;
2693 }
2694 if (strcmp(argv[cnt], "--show-lh") == 0) {
2695 params.ignore_identical_seqs = false;
2696 params.fixed_branch_length = BRLEN_FIX;
2697 params.optimize_alg_gammai = "Brent";
2698 params.opt_gammai = false;
2699 params.min_iterations = 0;
2700 params.stop_condition = SC_FIXED_ITERATION;
2701 verbose_mode = VB_DEBUG;
2702 params.ignore_checkpoint = true;
2703 continue;
2704 }
2705 if (strcmp(argv[cnt], "-sr") == 0) {
2706 params.stop_condition = SC_WEIBULL;
2707 cnt++;
2708 if (cnt >= argc)
2709 throw "Use -sr <#max_iteration>";
2710 params.max_iterations = convert_int(argv[cnt]);
2711 if (params.max_iterations <= params.min_iterations)
2712 throw "Specified max iteration must be greater than min iteration";
2713 continue;
2714 }
2715 if (strcmp(argv[cnt], "-nm") == 0 || strcmp(argv[cnt], "--nmax") == 0) {
2716 cnt++;
2717 if (cnt >= argc)
2718 throw "Use -nm <#max_iteration>";
2719 params.max_iterations = convert_int(argv[cnt]);
2720 if (params.max_iterations <= params.min_iterations)
2721 throw "Specified max iteration must be greater than min iteration";
2722 continue;
2723 }
2724 if (strcmp(argv[cnt], "-sc") == 0) {
2725 cnt++;
2726 if (cnt >= argc)
2727 throw "Use -sc <stop_confidence_value>";
2728 params.stop_confidence = convert_double(argv[cnt]);
2729 if (params.stop_confidence <= 0.5
2730 || params.stop_confidence >= 1)
2731 throw "Stop confidence value must be in range (0.5,1)";
2732 continue;
2733 }
2734 if (strcmp(argv[cnt], "--runs") == 0) {
2735 cnt++;
2736 if (cnt >= argc)
2737 throw "Use --runs <number_of_runs>";
2738 params.num_runs = convert_int(argv[cnt]);
2739 if (params.num_runs < 1)
2740 throw "Positive --runs please";
2741 continue;
2742 }
2743 if (strcmp(argv[cnt], "-gurobi") == 0) {
2744 params.gurobi_format = true;
2745 continue;
2746 }
2747 if (strcmp(argv[cnt], "-gthreads") == 0) {
2748 params.gurobi_format = true;
2749 cnt++;
2750 if (cnt >= argc)
2751 throw "Use -gthreads <gurobi_threads>";
2752 params.gurobi_threads = convert_int(argv[cnt]);
2753 if (params.gurobi_threads < 1)
2754 throw "Wrong number of threads";
2755 continue;
2756 }
2757 if (strcmp(argv[cnt], "-b") == 0 || strcmp(argv[cnt], "--boot") == 0 ||
2758 strcmp(argv[cnt], "-j") == 0 || strcmp(argv[cnt], "--jack") == 0 ||
2759 strcmp(argv[cnt], "-bo") == 0 || strcmp(argv[cnt], "--bonly") == 0) {
2760 params.multi_tree = true;
2761 if (strcmp(argv[cnt], "-bo") == 0 || strcmp(argv[cnt], "--bonly") == 0)
2762 params.compute_ml_tree = false;
2763 else
2764 params.consensus_type = CT_CONSENSUS_TREE;
2765 if ((strcmp(argv[cnt], "-j") == 0 || strcmp(argv[cnt], "--jack") == 0) && params.jackknife_prop == 0.0)
2766 params.jackknife_prop = 0.5;
2767 cnt++;
2768 if (cnt >= argc)
2769 throw "Use -b <num_bootstrap_samples>";
2770 params.num_bootstrap_samples = convert_int(argv[cnt]);
2771 if (params.num_bootstrap_samples < 1)
2772 throw "Wrong number of bootstrap samples";
2773 if (params.num_bootstrap_samples == 1)
2774 params.compute_ml_tree = false;
2775 if (params.num_bootstrap_samples == 1)
2776 params.consensus_type = CT_NONE;
2777 continue;
2778 }
2779 if (strcmp(argv[cnt], "--bsam") == 0 || strcmp(argv[cnt], "-bsam") == 0 || strcmp(argv[cnt], "--sampling") == 0) {
2780 cnt++;
2781 if (cnt >= argc)
2782 throw "Use -bsam <bootstrap_specification>";
2783 params.bootstrap_spec = argv[cnt];
2784 params.remove_empty_seq = false;
2785 continue;
2786 }
2787
2788 if (strcmp(argv[cnt], "--subsample") == 0) {
2789 cnt++;
2790 if (cnt >= argc)
2791 throw "Use --subsample NUM";
2792 params.subsampling = convert_int(argv[cnt]);
2793 continue;
2794 }
2795
2796 if (strcmp(argv[cnt], "--subsample-seed") == 0) {
2797 cnt++;
2798 if (cnt >= argc)
2799 throw "Use --subsample-seed <random_seed>";
2800 params.subsampling_seed = convert_int(argv[cnt]);
2801 continue;
2802 }
2803
2804 #ifdef USE_BOOSTER
2805 if (strcmp(argv[cnt], "--tbe") == 0) {
2806 params.transfer_bootstrap = 1;
2807 continue;
2808 }
2809
2810 if (strcmp(argv[cnt], "--tbe-raw") == 0) {
2811 params.transfer_bootstrap = 2;
2812 continue;
2813 }
2814 #endif
2815
2816 if (strcmp(argv[cnt], "-bc") == 0 || strcmp(argv[cnt], "--bcon") == 0) {
2817 params.multi_tree = true;
2818 params.compute_ml_tree = false;
2819 cnt++;
2820 if (cnt >= argc)
2821 throw "Use -bc <num_bootstrap_samples>";
2822 params.num_bootstrap_samples = convert_int(argv[cnt]);
2823 if (params.num_bootstrap_samples < 1)
2824 throw "Wrong number of bootstrap samples";
2825 if (params.num_bootstrap_samples > 1)
2826 params.consensus_type = CT_CONSENSUS_TREE;
2827 continue;
2828 }
2829 if (strcmp(argv[cnt], "-iqppars") == 0) {
2830 params.iqp_assess_quartet = IQP_PARSIMONY;
2831 continue;
2832 }
2833 if (strcmp(argv[cnt], "-iqp") == 0) {
2834 params.iqp = true;
2835 continue;
2836 }
2837 if (strcmp(argv[cnt], "-wct") == 0) {
2838 params.write_candidate_trees = true;
2839 continue;
2840 }
2841
2842 if (strcmp(argv[cnt], "-wt") == 0 || strcmp(argv[cnt], "--treels") == 0) {
2843 params.write_intermediate_trees = 1;
2844 continue;
2845 }
2846
2847 if (strcmp(argv[cnt], "-wdt") == 0) {
2848 params.writeDistImdTrees = true;
2849 continue;
2850 }
2851
2852 if (strcmp(argv[cnt], "-wtc") == 0) {
2853 params.write_intermediate_trees = 1;
2854 params.print_tree_lh = true;
2855 continue;
2856 }
2857
2858 if (strcmp(argv[cnt], "-wt2") == 0) {
2859 params.write_intermediate_trees = 2;
2860 // params.avoid_duplicated_trees = true;
2861 params.print_tree_lh = true;
2862 continue;
2863 }
2864 if (strcmp(argv[cnt], "-wt3") == 0) {
2865 params.write_intermediate_trees = 3;
2866 // params.avoid_duplicated_trees = true;
2867 params.print_tree_lh = true;
2868 continue;
2869 }
2870 if (strcmp(argv[cnt], "-wbl") == 0) {
2871 params.print_branch_lengths = true;
2872 continue;
2873 }
2874 if (strcmp(argv[cnt], "-wit") == 0) {
2875 params.write_init_tree = true;
2876 continue;
2877 }
2878
2879 if (strcmp(argv[cnt], "--write-branches") == 0) {
2880 params.write_branches = true;
2881 continue;
2882 }
2883
2884 // if (strcmp(argv[cnt], "-nodup") == 0) {
2885 // params.avoid_duplicated_trees = true;
2886 // continue;
2887 // }
2888 if (strcmp(argv[cnt], "-rf_all") == 0 || strcmp(argv[cnt], "--tree-dist-all") == 0) {
2889 params.rf_dist_mode = RF_ALL_PAIR;
2890 continue;
2891 }
2892 if (strcmp(argv[cnt], "-rf_adj") == 0) {
2893 params.rf_dist_mode = RF_ADJACENT_PAIR;
2894 continue;
2895 }
2896 if (strcmp(argv[cnt], "-rf") == 0 || strcmp(argv[cnt], "--tree-dist") == 0) {
2897 params.rf_dist_mode = RF_TWO_TREE_SETS;
2898 cnt++;
2899 if (cnt >= argc)
2900 throw "Use -rf <second_tree>";
2901 params.second_tree = argv[cnt];
2902 continue;
2903 }
2904 if (strcmp(argv[cnt], "-rf1") == 0 || strcmp(argv[cnt], "--tree-dist1") == 0) {
2905 params.rf_dist_mode = RF_TWO_TREE_SETS;
2906 params.rf_same_pair = true;
2907 cnt++;
2908 if (cnt >= argc)
2909 throw "Use --tree-dist1 <second_tree>";
2910 params.second_tree = argv[cnt];
2911 continue;
2912 }
2913 if (strcmp(argv[cnt], "-rf2") == 0 || strcmp(argv[cnt], "--tree-dist2") == 0) {
2914 params.rf_dist_mode = RF_TWO_TREE_SETS_EXTENDED;
2915 cnt++;
2916 if (cnt >= argc)
2917 throw "Use -rf2 <second_tree>";
2918 params.second_tree = argv[cnt];
2919 continue;
2920 }
2921
2922 if (strcmp(argv[cnt], "--normalize-dist") == 0) {
2923 params.normalize_tree_dist = true;
2924 continue;
2925 }
2926
2927 if (strcmp(argv[cnt], "-aLRT") == 0) {
2928 cnt++;
2929 if (cnt + 1 >= argc)
2930 throw "Use -aLRT <threshold%> <#replicates>";
2931 params.aLRT_threshold = convert_int(argv[cnt]);
2932 if (params.aLRT_threshold < 85 || params.aLRT_threshold > 101)
2933 throw "aLRT threshold must be between 85 and 100";
2934 cnt++;
2935 params.aLRT_replicates = convert_int(argv[cnt]);
2936 if (params.aLRT_replicates < 1000
2937 && params.aLRT_replicates != 0)
2938 throw "aLRT replicates must be at least 1000";
2939 continue;
2940 }
2941 if (strcmp(argv[cnt], "-alrt") == 0 || strcmp(argv[cnt], "--alrt") == 0) {
2942 cnt++;
2943 if (cnt >= argc)
2944 throw "Use -alrt <#replicates | 0>";
2945 int reps = convert_int(argv[cnt]);
2946 if (reps == 0)
2947 params.aLRT_test = true;
2948 else {
2949 params.aLRT_replicates = reps;
2950 if (params.aLRT_replicates < 1000)
2951 throw "aLRT replicates must be at least 1000";
2952 }
2953 continue;
2954 }
2955 if (strcmp(argv[cnt], "-abayes") == 0 || strcmp(argv[cnt], "--abayes") == 0) {
2956 params.aBayes_test = true;
2957 continue;
2958 }
2959 if (strcmp(argv[cnt], "-lbp") == 0 || strcmp(argv[cnt], "--lbp") == 0) {
2960 cnt++;
2961 if (cnt >= argc)
2962 throw "Use -lbp <#replicates>";
2963 params.localbp_replicates = convert_int(argv[cnt]);
2964 if (params.localbp_replicates < 1000
2965 && params.localbp_replicates != 0)
2966 throw "Local bootstrap (LBP) replicates must be at least 1000";
2967 continue;
2968 }
2969 if (strcmp(argv[cnt], "-wsl") == 0 || strcmp(argv[cnt], "--sitelh") == 0) {
2970 params.print_site_lh = WSL_SITE;
2971 continue;
2972 }
2973
2974 if (strcmp(argv[cnt], "-wpl") == 0 || strcmp(argv[cnt], "--partlh") == 0) {
2975 params.print_partition_lh = true;
2976 continue;
2977 }
2978
2979 if (strcmp(argv[cnt], "-wslg") == 0 || strcmp(argv[cnt], "-wslr") == 0) {
2980 params.print_site_lh = WSL_RATECAT;
2981 continue;
2982 }
2983
2984 if (strcmp(argv[cnt], "-wslm") == 0) {
2985 params.print_site_lh = WSL_MIXTURE;
2986 continue;
2987 }
2988 if (strcmp(argv[cnt], "-wslmr") == 0 || strcmp(argv[cnt], "-wslrm") == 0) {
2989 params.print_site_lh = WSL_MIXTURE_RATECAT;
2990 continue;
2991 }
2992
2993 if (strcmp(argv[cnt], "-wspr") == 0) {
2994 params.print_site_prob = WSL_RATECAT;
2995 continue;
2996 }
2997
2998 if (strcmp(argv[cnt], "-wspm") == 0) {
2999 params.print_site_prob = WSL_MIXTURE;
3000 continue;
3001 }
3002 if (strcmp(argv[cnt], "-wspmr") == 0 || strcmp(argv[cnt], "-wsprm") == 0) {
3003 params.print_site_prob = WSL_MIXTURE_RATECAT;
3004 continue;
3005 }
3006
3007 if (strcmp(argv[cnt], "-asr") == 0 || strcmp(argv[cnt], "--ancestral") == 0) {
3008 params.print_ancestral_sequence = AST_MARGINAL;
3009 params.ignore_identical_seqs = false;
3010 continue;
3011 }
3012
3013 if (strcmp(argv[cnt], "-asr-min") == 0 || strcmp(argv[cnt], "--asr-min") == 0) {
3014 cnt++;
3015 if (cnt >= argc)
3016 throw "Use -asr-min <probability>";
3017
3018 params.min_ancestral_prob = convert_double(argv[cnt]);
3019 if (params.min_ancestral_prob < 0 || params.min_ancestral_prob > 1)
3020 throw "Minimum ancestral probability [-asr-min] must be between 0 and 1.0";
3021 continue;
3022 }
3023
3024 if (strcmp(argv[cnt], "-asr-joint") == 0) {
3025 params.print_ancestral_sequence = AST_JOINT;
3026 params.ignore_identical_seqs = false;
3027 continue;
3028 }
3029
3030 if (strcmp(argv[cnt], "-wsr") == 0 || strcmp(argv[cnt], "--rate") == 0) {
3031 params.print_site_rate |= 1;
3032 continue;
3033 }
3034
3035 if (strcmp(argv[cnt], "--mlrate") == 0) {
3036 params.print_site_rate |= 2;
3037 continue;
3038 }
3039
3040 if (strcmp(argv[cnt], "-wsptrees") == 0) {
3041 params.print_trees_site_posterior = 1;
3042 continue;
3043 }
3044 if (strcmp(argv[cnt], "-wsf") == 0) {
3045 params.print_site_state_freq = WSF_POSTERIOR_MEAN;
3046 continue;
3047 }
3048 if (strcmp(argv[cnt], "--freq-max") == 0 || strcmp(argv[cnt], "-fmax") == 0) {
3049 params.print_site_state_freq = WSF_POSTERIOR_MAX;
3050 continue;
3051 }
3052 if (strcmp(argv[cnt], "-wba") == 0) {
3053 params.print_bootaln = true;
3054 continue;
3055 }
3056 if (strcmp(argv[cnt], "-wbsf") == 0) {
3057 params.print_boot_site_freq = true;
3058 continue;
3059 }
3060 if (strcmp(argv[cnt], "-wsa") == 0) {
3061 params.print_subaln = true;
3062 continue;
3063 }
3064 if (strcmp(argv[cnt], "-wtl") == 0) {
3065 params.print_tree_lh = true;
3066 continue;
3067 }
3068 if (strcmp(argv[cnt], "-wpi") == 0) {
3069 params.print_partition_info = true;
3070 params.print_conaln = true;
3071 continue;
3072 }
3073 if (strcmp(argv[cnt], "-wca") == 0) {
3074 params.print_conaln = true;
3075 continue;
3076 }
3077
3078 if (strcmp(argv[cnt], "-wsplits") == 0) {
3079 params.print_splits_file = true;
3080 continue;
3081 }
3082 if (strcmp(argv[cnt], "--no-splits.nex") == 0) {
3083 params.print_splits_nex_file = false;
3084 continue;
3085 }
3086 if (strcmp(argv[cnt], "-ns") == 0) {
3087 cnt++;
3088 if (cnt >= argc)
3089 throw "Use -ns <num_simulations>";
3090 params.whtest_simulations = convert_int(argv[cnt]);
3091 if (params.whtest_simulations < 1)
3092 throw "Wrong number of simulations for WH-test";
3093 continue;
3094 }
3095 if (strcmp(argv[cnt], "-mr") == 0) {
3096 cnt++;
3097 if (cnt >= argc)
3098 throw "Use -mr <rate_file>";
3099 params.rate_file = argv[cnt];
3100 continue;
3101 }
3102 if (strcmp(argv[cnt], "-cat_mean") == 0) {
3103 params.mcat_type |= MCAT_MEAN;
3104 continue;
3105 }
3106 if (strcmp(argv[cnt], "-cat_nolog") == 0) {
3107 params.mcat_type &= (127 - MCAT_LOG);
3108 continue;
3109 }
3110 if (strcmp(argv[cnt], "-cat_site") == 0) {
3111 params.mcat_type &= (127 - MCAT_PATTERN);
3112 continue;
3113 }
3114 if (strcmp(argv[cnt], "-tina") == 0) {
3115 params.do_pars_multistate = true;
3116 params.ignore_checkpoint = true;
3117 continue;
3118 }
3119 if (strcmp(argv[cnt], "-pval") == 0) {
3120 cnt++;
3121 if (cnt >= argc)
3122 throw "Use -pval <gene_pvalue_file>";
3123 params.gene_pvalue_file = argv[cnt];
3124 continue;
3125 }
3126 if (strcmp(argv[cnt], "-nnitest") == 0) {
3127 params.testNNI = true;
3128 continue;
3129 }
3130 if (strcmp(argv[cnt], "-anni") == 0) {
3131 params.approximate_nni = true;
3132 continue;
3133 }
3134 if (strcmp(argv[cnt], "-nnicut") == 0) {
3135 params.estimate_nni_cutoff = true;
3136 //nni_cutoff = -5.41/2;
3137 continue;
3138 }
3139 if (strcmp(argv[cnt], "-nnichi2") == 0) {
3140 params.nni_cutoff = -5.41 / 2;
3141 continue;
3142 }
3143 if (strcmp(argv[cnt], "-nnicutval") == 0) {
3144 cnt++;
3145 if (cnt >= argc)
3146 throw "Use -nnicutval <log_diff_value>";
3147 params.nni_cutoff = convert_double(argv[cnt]);
3148 if (params.nni_cutoff >= 0)
3149 throw "cutoff value for -nnicutval must be negative";
3150 continue;
3151 }
3152 if (strcmp(argv[cnt], "-nnisort") == 0) {
3153 params.nni_sort = true;
3154 continue;
3155 }
3156 if (strcmp(argv[cnt], "-plog") == 0) {
3157 params.gene_pvalue_loga = true;
3158 continue;
3159 }
3160 if (strcmp(argv[cnt], "-dmp") == 0) {
3161 cnt++;
3162 if (cnt >= argc)
3163 throw "Use -dmp <ncbi_taxid>";
3164 params.ncbi_taxid = convert_int(argv[cnt]);
3165 continue;
3166 }
3167 if (strcmp(argv[cnt], "-dmplevel") == 0
3168 || strcmp(argv[cnt], "-dmprank") == 0) {
3169 cnt++;
3170 if (cnt >= argc)
3171 throw "Use -dmprank <ncbi_taxon_rank>";
3172 params.ncbi_taxon_level = argv[cnt];
3173 continue;
3174 }
3175 if (strcmp(argv[cnt], "-dmpignore") == 0) {
3176 cnt++;
3177 if (cnt >= argc)
3178 throw "Use -dmpignore <ncbi_ignore_level>";
3179 params.ncbi_ignore_level = argv[cnt];
3180 continue;
3181 }
3182 if (strcmp(argv[cnt], "-dmpname") == 0) {
3183 cnt++;
3184 if (cnt >= argc)
3185 throw "Use -dmpname <ncbi_names_file>";
3186 params.ncbi_names_file = argv[cnt];
3187 continue;
3188 }
3189 if (strcmp(argv[cnt], "-eco") == 0) {
3190 cnt++;
3191 if (cnt >= argc)
3192 throw "Use -eco <eco_dag_file>";
3193 params.eco_dag_file = argv[cnt];
3194 continue;
3195 }
3196 if (strcmp(argv[cnt], "-k%") == 0) {
3197 cnt++;
3198 if (cnt >= argc)
3199 throw "Use -k% <k in %>";
3200 //convert_range(argv[cnt], params.k_percent, params.sub_size, params.step_size);
3201 params.k_percent = convert_int(argv[cnt]);
3202 continue;
3203 }
3204 if (strcmp(argv[cnt], "-diet") == 0) {
3205 cnt++;
3206 if (cnt >= argc)
3207 throw "Use -diet <d in %>";
3208 convert_range(argv[cnt], params.diet_min, params.diet_max,
3209 params.diet_step);
3210 //params.diet = convert_int(argv[cnt]);
3211 continue;
3212 }
3213 if (strcmp(argv[cnt], "-up") == 0) {
3214 params.upper_bound = true;
3215 continue;
3216 }
3217 if (strcmp(argv[cnt], "-upNNI") == 0) {
3218 params.upper_bound_NNI = true;
3219 }
3220 if (strcmp(argv[cnt], "-upFrac") == 0) {
3221 cnt++;
3222 if (cnt >= argc)
3223 throw "Use -upFrac <fraction>";
3224 params.upper_bound_frac = convert_double(argv[cnt]);
3225 }
3226 if (strcmp(argv[cnt], "-ecoR") == 0) {
3227 cnt++;
3228 if (cnt >= argc)
3229 throw "Use -ecoR <run number>";
3230 params.eco_run = convert_int(argv[cnt]);
3231 continue;
3232 }
3233 if (strcmp(argv[cnt], "-bb") == 0 || strcmp(argv[cnt], "-B") == 0 || strcmp(argv[cnt], "--ufboot") == 0 ||
3234 strcmp(argv[cnt], "-J") == 0 || strcmp(argv[cnt], "--ufjack") == 0) {
3235 if ((strcmp(argv[cnt], "-J") == 0 || strcmp(argv[cnt], "--ufjack") == 0) && params.jackknife_prop == 0.0)
3236 params.jackknife_prop = 0.5;
3237 cnt++;
3238 if (cnt >= argc)
3239 throw "Use -B <#replicates>";
3240 if (params.stop_condition == SC_FIXED_ITERATION) {
3241 throw("Ultrafast bootstrap does not work with -fast, -te or -n option");
3242 }
3243 params.gbo_replicates = convert_int(argv[cnt]);
3244 // params.avoid_duplicated_trees = true;
3245 if (params.gbo_replicates < 1000)
3246 throw "#replicates must be >= 1000";
3247 params.consensus_type = CT_CONSENSUS_TREE;
3248 params.stop_condition = SC_BOOTSTRAP_CORRELATION;
3249 //params.nni5Branches = true;
3250 continue;
3251 }
3252 if (strcmp(argv[cnt], "-beps") == 0 || strcmp(argv[cnt], "--beps") == 0) {
3253 cnt++;
3254 if (cnt >= argc)
3255 throw "Use -beps <epsilon>";
3256 params.ufboot_epsilon = convert_double(argv[cnt]);
3257 if (params.ufboot_epsilon <= 0.0)
3258 throw "Epsilon must be positive";
3259 continue;
3260 }
3261 if (strcmp(argv[cnt], "-wbt") == 0 || strcmp(argv[cnt], "--wbt") == 0 || strcmp(argv[cnt], "--boot-trees") == 0) {
3262 params.print_ufboot_trees = 1;
3263 continue;
3264 }
3265 if (strcmp(argv[cnt], "-wbtl") == 0 || strcmp(argv[cnt], "--wbtl") == 0) {
3266 // print ufboot trees with branch lengths
3267 params.print_ufboot_trees = 2;
3268 continue;
3269 }
3270 if (strcmp(argv[cnt], "-bs") == 0) {
3271 cnt++;
3272 if (cnt >= argc)
3273 throw "Use -bs <begin_sampling_size>";
3274 params.check_gbo_sample_size = convert_int(argv[cnt]);
3275 continue;
3276 }
3277 if (strcmp(argv[cnt], "-bmax") == 0) {
3278 cnt++;
3279 if (cnt >= argc)
3280 throw "Use -bmax <max_candidate_trees>";
3281 params.max_candidate_trees = convert_int(argv[cnt]);
3282 continue;
3283 }
3284 if (strcmp(argv[cnt], "-bcor") == 0 | strcmp(argv[cnt], "--bcor") == 0) {
3285 cnt++;
3286 if (cnt >= argc)
3287 throw "Use -bcor <min_correlation>";
3288 params.min_correlation = convert_double(argv[cnt]);
3289 continue;
3290 }
3291 if (strcmp(argv[cnt], "--bnni") == 0 || strcmp(argv[cnt], "-bnni") == 0) {
3292 params.ufboot2corr = true;
3293 // print ufboot trees with branch lengths
3294 // params.print_ufboot_trees = 2; // Diep: relocate to be below this for loop
3295 continue;
3296 }
3297 if (strcmp(argv[cnt], "-u2c_nni5") == 0) {
3298 params.u2c_nni5 = true;
3299 continue;
3300 }
3301
3302 if (strcmp(argv[cnt], "-nstep") == 0 || strcmp(argv[cnt], "--nstep") == 0) {
3303 cnt++;
3304 if (cnt >= argc)
3305 throw "Use -nstep <step_iterations>";
3306 params.step_iterations = convert_int(argv[cnt]);
3307 if (params.step_iterations < 10
3308 || params.step_iterations % 2 == 1)
3309 throw "At least step size of 10 and even number please";
3310 params.min_iterations = params.step_iterations;
3311 continue;
3312 }
3313 if (strcmp(argv[cnt], "-boff") == 0) {
3314 params.online_bootstrap = false;
3315 continue;
3316 }
3317 // if (strcmp(argv[cnt], "-nostore") == 0
3318 // || strcmp(argv[cnt], "-memsave") == 0) {
3319 // params.store_candidate_trees = false;
3320 // continue;
3321 // }
3322
3323 if (strcmp(argv[cnt], "--jack-prop") == 0) {
3324 cnt++;
3325 if (cnt >= argc)
3326 throw "Use --jack-prop jackknife_proportion";
3327 params.jackknife_prop = convert_double(argv[cnt]);
3328 if (params.jackknife_prop <= 0.0 || params.jackknife_prop >= 1.0)
3329 throw "Jackknife proportion must be between 0.0 and 1.0";
3330 continue;
3331 }
3332
3333 if (strcmp(argv[cnt], "-mem") == 0 || strcmp(argv[cnt], "--mem") == 0) {
3334 cnt++;
3335 if (cnt >= argc)
3336 throw "Use -mem max_mem_size";
3337 params.lh_mem_save = LM_MEM_SAVE;
3338 int end_pos;
3339 double mem = convert_double(argv[cnt], end_pos);
3340 if (mem < 0)
3341 throw "-mem must be non-negative";
3342 if (argv[cnt][end_pos] == 'G') {
3343 params.max_mem_size = mem * 1073741824.0;
3344 } else if (argv[cnt][end_pos] == 'M') {
3345 params.max_mem_size = mem * 1048576.0;
3346 } else if (argv[cnt][end_pos] == '%'){
3347 params.max_mem_size = mem * 0.01;
3348 if (params.max_mem_size > 1)
3349 throw "-mem percentage must be between 0 and 100";
3350 } else {
3351 if (mem > 1)
3352 throw "Invalid -mem option. Example: -mem 200M, -mem 10G";
3353 params.max_mem_size = mem;
3354 }
3355 continue;
3356 }
3357 if (strcmp(argv[cnt], "--save-mem-buffer") == 0) {
3358 params.buffer_mem_save = true;
3359 continue;
3360 }
3361 if (strcmp(argv[cnt], "--no-save-mem-buffer") == 0) {
3362 params.buffer_mem_save = false;
3363 continue;
3364 }
3365 // if (strcmp(argv[cnt], "-storetrees") == 0) {
3366 // params.store_candidate_trees = true;
3367 // continue;
3368 // }
3369 if (strcmp(argv[cnt], "-nodiff") == 0) {
3370 params.distinct_trees = false;
3371 continue;
3372 }
3373 if (strcmp(argv[cnt], "-treediff") == 0) {
3374 params.distinct_trees = true;
3375 continue;
3376 }
3377 if (strcmp(argv[cnt], "-norell") == 0) {
3378 params.use_rell_method = false;
3379 continue;
3380 }
3381 if (strcmp(argv[cnt], "-elw") == 0) {
3382 params.use_elw_method = true;
3383 continue;
3384 }
3385 if (strcmp(argv[cnt], "-noweight") == 0) {
3386 params.use_weighted_bootstrap = false;
3387 continue;
3388 }
3389 if (strcmp(argv[cnt], "-nomore") == 0) {
3390 params.use_max_tree_per_bootstrap = true;
3391 continue;
3392 }
3393 if (strcmp(argv[cnt], "-bweight") == 0) {
3394 params.use_weighted_bootstrap = true;
3395 continue;
3396 }
3397 if (strcmp(argv[cnt], "-bmore") == 0) {
3398 params.use_max_tree_per_bootstrap = false;
3399 continue;
3400 }
3401 if (strcmp(argv[cnt], "-gz") == 0) {
3402 params.do_compression = true;
3403 continue;
3404 }
3405 if (strcmp(argv[cnt], "-newheu") == 0) {
3406 params.new_heuristic = true;
3407 // Enable RAxML kernel
3408 continue;
3409 }
3410 if (strcmp(argv[cnt], "-maxtime") == 0) {
3411 cnt++;
3412 if (cnt >= argc)
3413 throw "Use -maxtime <time_in_minutes>";
3414 params.maxtime = convert_double(argv[cnt]);
3415 params.min_iterations = 1000000;
3416 params.stop_condition = SC_REAL_TIME;
3417 continue;
3418 }
3419 if (strcmp(argv[cnt], "--ninit") == 0 || strcmp(argv[cnt], "-ninit") == 0) {
3420 cnt++;
3421 if (cnt >= argc)
3422 throw "Use -ninit <number_of_parsimony_trees>";
3423 params.numInitTrees = convert_int(argv[cnt]);
3424 if (params.numInitTrees < 0)
3425 throw "-ninit must be non-negative";
3426 if (params.numInitTrees < params.numNNITrees)
3427 params.numNNITrees = params.numInitTrees;
3428 continue;
3429 }
3430 if (strcmp(argv[cnt], "-fast") == 0 || strcmp(argv[cnt], "--fast") == 0) {
3431 // fast search option to resemble FastTree
3432 if (params.gbo_replicates != 0) {
3433 throw("Ultrafast bootstrap (-bb) does not work with -fast option");
3434 }
3435 params.numInitTrees = 2;
3436 if (params.min_iterations == -1)
3437 params.min_iterations = 2;
3438 params.stop_condition = SC_FIXED_ITERATION;
3439 params.modelEps = 0.05;
3440 continue;
3441 }
3442 if (strcmp(argv[cnt], "-fss") == 0) {
3443 params.fixStableSplits = true;
3444 // params.five_plus_five = true;
3445 continue;
3446 }
3447 if (strcmp(argv[cnt], "--stable-thres") == 0) {
3448 cnt++;
3449 if (cnt >= argc)
3450 throw "Use --stable-thres <support_value_threshold>";
3451 params.stableSplitThreshold = convert_double(argv[cnt]);
3452 continue;
3453 }
3454 if (strcmp(argv[cnt], "-ff") == 0) {
3455 params.five_plus_five = true;
3456 continue;
3457 }
3458
3459 if (strcmp(argv[cnt], "-tabu") == 0) {
3460 params.fixStableSplits = true;
3461 params.tabu = true;
3462 params.maxCandidates = params.numSupportTrees;
3463 continue;
3464 }
3465
3466 if (strcmp(argv[cnt], "--adt-pert") == 0) {
3467 if (params.tabu == true) {
3468 throw("option -tabu and --adt-pert cannot be combined");
3469 }
3470 params.adaptPertubation = true;
3471 params.stableSplitThreshold = 1.0;
3472 continue;
3473 }
3474
3475 if (strcmp(argv[cnt], "-memcheck") == 0) {
3476 params.memCheck = true;
3477 continue;
3478 }
3479
3480 if (strcmp(argv[cnt], "--ntop") == 0 || strcmp(argv[cnt], "-ntop") == 0) {
3481 cnt++;
3482 if (cnt >= argc)
3483 throw "Use -ntop <number_of_top_parsimony_trees>";
3484 params.numNNITrees = convert_int(argv[cnt]);
3485 continue;
3486 }
3487 if (strcmp(argv[cnt], "--num-sup-trees") == 0) {
3488 cnt++;
3489 if (cnt >= argc)
3490 throw "Use --num-sup-trees <number_of_support_trees>";
3491 params.numSupportTrees = convert_int(argv[cnt]);
3492 continue;
3493 }
3494 if (strcmp(argv[cnt], "-fixai") == 0) {
3495 cnt++;
3496 if (cnt >= argc)
3497 throw "Use -fixai <alpha_invar_file>";
3498 params.alpha_invar_file = argv[cnt];
3499 continue;
3500 }
3501
3502 if (strcmp(argv[cnt], "--opt-gamma-inv") == 0) {
3503 params.opt_gammai = true;
3504 continue;
3505 }
3506
3507 if (strcmp(argv[cnt], "--no-opt-gamma-inv") == 0) {
3508 params.opt_gammai = false;
3509 continue;
3510 }
3511
3512 if (strcmp(argv[cnt], "--opt-gammai-fast") == 0) {
3513 params.opt_gammai_fast = true;
3514 params.opt_gammai = true;
3515 continue;
3516 }
3517
3518 if (strcmp(argv[cnt], "--opt-gammai-kb") == 0) {
3519 params.opt_gammai_keep_bran = true;
3520 params.opt_gammai = true;
3521 continue;
3522 }
3523
3524 if (strcmp(argv[cnt], "--adaptive-eps") == 0) {
3525 params.testAlphaEpsAdaptive = true;
3526 continue;
3527 }
3528 if (strcmp(argv[cnt], "--rand-alpha") == 0) {
3529 params.randomAlpha = true;
3530 continue;
3531 }
3532
3533 if (strcmp(argv[cnt], "-eai") == 0) {
3534 params.exh_ai = true;
3535 continue;
3536 }
3537 if (strcmp(argv[cnt], "-poplim") == 0) {
3538 cnt++;
3539 if (cnt >= argc)
3540 throw "Use -poplim <max_pop_size>";
3541 params.maxCandidates = convert_int(argv[cnt]);
3542 continue;
3543 }
3544 if (strcmp(argv[cnt], "--nbest") == 0 ||strcmp(argv[cnt], "-nbest") == 0) {
3545 cnt++;
3546 if (cnt >= argc)
3547 throw "Use -nbest <number_of_candidate_trees>";
3548 params.popSize = convert_int(argv[cnt]);
3549 ASSERT(params.popSize < params.numInitTrees);
3550 continue;
3551 }
3552 if (strcmp(argv[cnt], "-beststart") == 0) {
3553 params.bestStart = true;
3554 cnt++;
3555 if (cnt >= argc)
3556 throw "Use -best_start <binary_alignment_file>";
3557 params.binary_aln_file = argv[cnt];
3558 continue;
3559 }
3560 if (strcmp(argv[cnt], "-pll") == 0) {
3561 throw("-pll option is discontinued.");
3562 params.pll = true;
3563 continue;
3564 }
3565 if (strcmp(argv[cnt], "-me") == 0 || strcmp(argv[cnt], "--epsilon") == 0) {
3566 cnt++;
3567 if (cnt >= argc)
3568 throw "Use -me <model_epsilon>";
3569 params.modelEps = convert_double(argv[cnt]);
3570 if (params.modelEps <= 0.0)
3571 throw "Model epsilon must be positive";
3572 if (params.modelEps > 1.0)
3573 throw "Model epsilon must not be larger than 1.0";
3574 continue;
3575 }
3576
3577 if (strcmp(argv[cnt], "--mf-epsilon") == 0) {
3578 cnt++;
3579 if (cnt >= argc)
3580 throw "Use --mf-epsilon <modelfinder_epsilon>";
3581 params.modelfinder_eps = convert_double(argv[cnt]);
3582 if (params.modelfinder_eps <= 0.0)
3583 throw "ModelFinder epsilon must be positive";
3584 if (params.modelEps > 1.0)
3585 throw "ModelFinder epsilon must not be larger than 1.0";
3586 continue;
3587 }
3588
3589 if (strcmp(argv[cnt], "-pars_ins") == 0) {
3590 params.reinsert_par = true;
3591 continue;
3592 }
3593 if (strcmp(argv[cnt], "-allnni") == 0 || strcmp(argv[cnt], "--allnni") == 0) {
3594 params.speednni = false;
3595 continue;
3596 }
3597
3598 if (strcmp(argv[cnt], "-snni") == 0) {
3599 params.snni = true;
3600 // dont need to turn this on here
3601 //params.autostop = true;
3602 //params.speednni = true;
3603 // Minh: why do you turn this on? it doubles curPerStrength at some point
3604 //params.adaptPert = true;
3605 continue;
3606 }
3607 if (strcmp(argv[cnt], "-iqpnni") == 0) {
3608 params.snni = false;
3609 params.start_tree = STT_BIONJ;
3610 params.numNNITrees = 1;
3611 // continue; } if (strcmp(argv[cnt], "-auto") == 0) {
3612 // params.autostop = true;
3613 continue;
3614 }
3615 if (strcmp(argv[cnt], "--nstop") == 0 || strcmp(argv[cnt], "-nstop") == 0) {
3616 if (params.stop_condition != SC_BOOTSTRAP_CORRELATION)
3617 params.stop_condition = SC_UNSUCCESS_ITERATION;
3618 cnt++;
3619 if (cnt >= argc)
3620 throw "Use -nstop <#iterations>";
3621 params.unsuccess_iteration = convert_int(argv[cnt]);
3622 if (params.unsuccess_iteration <= 0)
3623 throw "-nstop iterations must be positive";
3624 params.max_iterations = max(params.max_iterations, params.unsuccess_iteration*10);
3625 continue;
3626 }
3627 if (strcmp(argv[cnt], "-lsbran") == 0) {
3628 params.leastSquareBranch = true;
3629 continue;
3630 }
3631 if (strcmp(argv[cnt], "-manuel") == 0) {
3632 params.manuel_analytic_approx = true;
3633 continue;
3634 }
3635 if (strcmp(argv[cnt], "-parsbran") == 0) {
3636 params.pars_branch_length = true;
3637 continue;
3638 }
3639 if (strcmp(argv[cnt], "-bayesbran") == 0) {
3640 params.bayes_branch_length = true;
3641 continue;
3642 }
3643 if (strcmp(argv[cnt], "-fivebran") == 0
3644 || strcmp(argv[cnt], "-nni5") == 0) {
3645 params.nni5 = true;
3646 params.nni_type = NNI5;
3647 continue;
3648 }
3649 if (strcmp(argv[cnt], "-onebran") == 0
3650 || strcmp(argv[cnt], "-nni1") == 0) {
3651 params.nni_type = NNI1;
3652 params.nni5 = false;
3653 continue;
3654 }
3655
3656 if (strcmp(argv[cnt], "-nni-eval") == 0) {
3657 cnt++;
3658 if (cnt >= argc)
3659 throw "Use -nni-eval <num_evaluation>";
3660 params.nni5_num_eval = convert_int(argv[cnt]);
3661 if (params.nni5_num_eval < 1)
3662 throw("Positive -nni-eval expected");
3663 continue;
3664 }
3665
3666 if (strcmp(argv[cnt], "-bl-eval") == 0) {
3667 cnt++;
3668 if (cnt >= argc)
3669 throw "Use -bl-eval <num_evaluation>";
3670 params.brlen_num_traversal = convert_int(argv[cnt]);
3671 if (params.brlen_num_traversal < 1)
3672 throw("Positive -bl-eval expected");
3673 continue;
3674 }
3675
3676 if (strcmp(argv[cnt], "-smooth") == 0) {
3677 cnt++;
3678 if (cnt >= argc)
3679 throw "Use -smooth <num_iterations>";
3680 params.numSmoothTree = convert_int(argv[cnt]);
3681 continue;
3682 }
3683 if (strcmp(argv[cnt], "-lsnni") == 0) {
3684 params.leastSquareNNI = true;
3685 continue;
3686 }
3687 if (strcmp(argv[cnt], "-lsvar") == 0) {
3688 cnt++;
3689 if (cnt >= argc)
3690 throw "Use -lsvar <o|ft|fm|st|p>";
3691 if (strcmp(argv[cnt], "o") == 0
3692 || strcmp(argv[cnt], "ols") == 0) {
3693 params.ls_var_type = OLS;
3694 continue;
3695 }
3696 if (strcmp(argv[cnt], "ft") == 0
3697 || strcmp(argv[cnt], "first_taylor") == 0) {
3698 params.ls_var_type = WLS_FIRST_TAYLOR;
3699 continue;
3700 }
3701 if (strcmp(argv[cnt], "fm") == 0
3702 || strcmp(argv[cnt], "fitch_margoliash") == 0) {
3703 params.ls_var_type = WLS_FITCH_MARGOLIASH;
3704 continue;
3705 }
3706 if (strcmp(argv[cnt], "st") == 0
3707 || strcmp(argv[cnt], "second_taylor") == 0) {
3708 params.ls_var_type = WLS_SECOND_TAYLOR;
3709 continue;
3710 }
3711 if (strcmp(argv[cnt], "p") == 0
3712 || strcmp(argv[cnt], "pauplin") == 0) {
3713 params.ls_var_type = WLS_PAUPLIN;
3714 } else {
3715 throw "Use -lsvar <o|ft|fm|st|p>";
3716 }
3717 continue;
3718 }
3719 if (strcmp(argv[cnt], "-eps") == 0) {
3720 cnt++;
3721 if (cnt >= argc)
3722 throw "Use -eps <log-likelihood epsilon>";
3723 params.loglh_epsilon = convert_double(argv[cnt]);
3724 continue;
3725 }
3726 if (strcmp(argv[cnt], "-pb") == 0) { // Enable parsimony branch length estimation
3727 params.parbran = true;
3728 continue;
3729 }
3730 if (strcmp(argv[cnt], "-x") == 0) {
3731 cnt++;
3732 if (cnt >= argc)
3733 throw "Use -x <iteration_multiple>";
3734 params.iteration_multiple = convert_int(argv[cnt]);
3735 continue;
3736 }
3737 if (strcmp(argv[cnt], "-sp_iter") == 0) {
3738 cnt++;
3739 if (cnt >= argc)
3740 throw "Use -sp_iter <number_iteration>";
3741 params.speedup_iter = convert_int(argv[cnt]);
3742 continue;
3743 }
3744 if (strcmp(argv[cnt], "-avh") == 0) {
3745 cnt++;
3746 if (cnt >= argc)
3747 throw "Use -avh <arndt_#bootstrap>";
3748 params.avh_test = convert_int(argv[cnt]);
3749 continue;
3750 }
3751 if (strcmp(argv[cnt], "-bootlh") == 0) {
3752 cnt++;
3753 if (cnt >= argc)
3754 throw "Use -bootlh <#replicates>";
3755 params.bootlh_test = convert_int(argv[cnt]);
3756 continue;
3757 }
3758 if (strcmp(argv[cnt], "-bootpart") == 0) {
3759 cnt++;
3760 if (cnt >= argc)
3761 throw "Use -bootpart <part1_length,part2_length,...>";
3762 params.bootlh_partitions = argv[cnt];
3763 continue;
3764 }
3765 if (strcmp(argv[cnt], "-AIC") == 0) {
3766 params.model_test_criterion = MTC_AIC;
3767 continue;
3768 }
3769 if (strcmp(argv[cnt], "-AICc") == 0 || strcmp(argv[cnt], "-AICC") == 0) {
3770 params.model_test_criterion = MTC_AICC;
3771 continue;
3772 }
3773 if (strcmp(argv[cnt], "-merit") == 0 || strcmp(argv[cnt], "--merit") == 0) {
3774 cnt++;
3775 if (cnt >= argc)
3776 throw "Use -merit AIC|AICC|BIC";
3777 if (strcmp(argv[cnt], "AIC") == 0)
3778 params.model_test_criterion = MTC_AIC;
3779 else if (strcmp(argv[cnt], "AICc") == 0 || strcmp(argv[cnt], "AICC") == 0)
3780 params.model_test_criterion = MTC_AICC;
3781 else if (strcmp(argv[cnt], "BIC") == 0)
3782 params.model_test_criterion = MTC_BIC;
3783 else throw "Use -merit AIC|AICC|BIC";
3784 continue;
3785 }
3786 if (strcmp(argv[cnt], "-ms") == 0) {
3787 cnt++;
3788 if (cnt >= argc)
3789 throw "Use -ms <model_test_sample_size>";
3790 params.model_test_sample_size = convert_int(argv[cnt]);
3791 continue;
3792 }
3793 if (strcmp(argv[cnt], "-nt") == 0 || strcmp(argv[cnt], "-c") == 0 ||
3794 strcmp(argv[cnt], "-T") == 0 || strcmp(argv[cnt], "--threads") == 0) {
3795 cnt++;
3796 if (cnt >= argc)
3797 throw "Use -nt <num_threads|AUTO>";
3798 if (iEquals(argv[cnt], "AUTO"))
3799 params.num_threads = 0;
3800 else {
3801 params.num_threads = convert_int(argv[cnt]);
3802 if (params.num_threads < 1)
3803 throw "At least 1 thread please";
3804 }
3805 continue;
3806 }
3807
3808 if (strcmp(argv[cnt], "-ntmax") == 0 || strcmp(argv[cnt], "--threads-max") == 0) {
3809 cnt++;
3810 if (cnt >= argc)
3811 throw "Use -ntmax <num_threads_max>";
3812 params.num_threads_max = convert_int(argv[cnt]);
3813 if (params.num_threads_max < 1)
3814 throw "At least 1 thread please";
3815 continue;
3816 }
3817
3818 if (strcmp(argv[cnt], "--thread-model") == 0) {
3819 params.openmp_by_model = true;
3820 continue;
3821 }
3822
3823 if (strcmp(argv[cnt], "--thread-site") == 0) {
3824 params.openmp_by_model = false;
3825 continue;
3826 }
3827
3828 // if (strcmp(argv[cnt], "-rootstate") == 0) {
3829 // cnt++;
3830 // if (cnt >= argc)
3831 // throw "Use -rootstate <rootstate>";
3832 // params.root_state = argv[cnt];
3833 // params.SSE = LK_NORMAL;
3834 // continue;
3835 // }
3836 if (strcmp(argv[cnt], "-ct") == 0) {
3837 params.count_trees = true;
3838 continue;
3839 }
3840 if (strcmp(argv[cnt], "--sprrad") == 0 || strcmp(argv[cnt], "--radius") == 0) {
3841 cnt++;
3842 if (cnt >= argc)
3843 throw "Use -sprrad <SPR radius used in parsimony search>";
3844 params.sprDist = convert_int(argv[cnt]);
3845 continue;
3846 }
3847
3848 if (strcmp(argv[cnt], "--mpcost") == 0) {
3849 cnt++;
3850 if (cnt >= argc)
3851 throw "Use --mpcost <parsimony_cost_file>";
3852 params.sankoff_cost_file = argv[cnt];
3853 continue;
3854 }
3855
3856 if (strcmp(argv[cnt], "-no_rescale_gamma_invar") == 0) {
3857 params.no_rescale_gamma_invar = true;
3858 continue;
3859 }
3860
3861 if (strcmp(argv[cnt], "-wsi") == 0) {
3862 params.compute_seq_identity_along_tree = true;
3863 continue;
3864 }
3865
3866 if (strcmp(argv[cnt], "--no-seq-comp") == 0) {
3867 params.compute_seq_composition = false;
3868 continue;
3869 }
3870
3871 if (strcmp(argv[cnt], "-t") == 0 || strcmp(argv[cnt], "-te") == 0 || strcmp(argv[cnt], "--tree") == 0) {
3872 if (strcmp(argv[cnt], "-te") == 0) {
3873 if (params.gbo_replicates != 0) {
3874 throw("Ultrafast bootstrap does not work with -te option");
3875 }
3876 params.min_iterations = 0;
3877 params.stop_condition = SC_FIXED_ITERATION;
3878 }
3879 cnt++;
3880 if (cnt >= argc)
3881 throw "Use -t,-te <start_tree | BIONJ | PARS | PLLPARS>";
3882 if (strcmp(argv[cnt], "BIONJ") == 0 || strcmp(argv[cnt], "NJ") == 0)
3883 params.start_tree = STT_BIONJ;
3884 else if (strcmp(argv[cnt], "PARS") == 0)
3885 params.start_tree = STT_PARSIMONY;
3886 else if (strcmp(argv[cnt], "PLLPARS") == 0)
3887 params.start_tree = STT_PLL_PARSIMONY;
3888 else if (strcmp(argv[cnt], "RANDOM") == 0 || strcmp(argv[cnt], "RAND") == 0)
3889 params.start_tree = STT_RANDOM_TREE;
3890 else {
3891 params.user_file = argv[cnt];
3892 if (params.min_iterations == 0)
3893 params.start_tree = STT_USER_TREE;
3894 }
3895 continue;
3896 }
3897
3898 if (strcmp(argv[cnt], "--no-ml-tree") == 0) {
3899 params.modelfinder_ml_tree = false;
3900 continue;
3901 }
3902
3903 if (strcmp(argv[cnt], "--tree-fix") == 0) {
3904 if (params.gbo_replicates != 0) {
3905 outError("Ultrafast bootstrap does not work with -te option");
3906 }
3907 params.min_iterations = 0;
3908 params.stop_condition = SC_FIXED_ITERATION;
3909 }
3910
3911 if (strcmp(argv[cnt], "-g") == 0) {
3912 cnt++;
3913 if (cnt >= argc)
3914 throw "Use -g <constraint_tree>";
3915 params.constraint_tree_file = argv[cnt];
3916 continue;
3917 }
3918
3919 if (strcmp(argv[cnt], "-lmap") == 0 || strcmp(argv[cnt], "--lmap") == 0) {
3920 cnt++;
3921 if (cnt >= argc)
3922 throw "Use -lmap <likelihood_mapping_num_quartets>";
3923 if (iEquals(argv[cnt], "ALL")) {
3924 params.lmap_num_quartets = 0;
3925 } else {
3926 params.lmap_num_quartets = convert_int64(argv[cnt]);
3927 if (params.lmap_num_quartets < 0)
3928 throw "Number of quartets must be >= 1";
3929 }
3930 continue;
3931 }
3932
3933 if (strcmp(argv[cnt], "-lmclust") == 0 || strcmp(argv[cnt], "--lmclust") == 0) {
3934 cnt++;
3935 if (cnt >= argc)
3936 throw "Use -lmclust <likelihood_mapping_cluster_file>";
3937 params.lmap_cluster_file = argv[cnt];
3938 // '-keep_ident' is currently required to allow a 1-to-1 mapping of the
3939 // user-given groups (HAS) - possibly obsolete in the future versions
3940 params.ignore_identical_seqs = false;
3941 if (params.lmap_num_quartets < 0)
3942 params.lmap_num_quartets = 0;
3943 continue;
3944 }
3945
3946 if (strcmp(argv[cnt], "-wql") == 0 || strcmp(argv[cnt], "--quartetlh") == 0) {
3947 params.print_lmap_quartet_lh = true;
3948 continue;
3949 }
3950
3951 if (strcmp(argv[cnt], "-mixlen") == 0) {
3952 cnt++;
3953 if (cnt >= argc)
3954 throw "Use -mixlen <number of mixture branch lengths for heterotachy model>";
3955 params.num_mixlen = convert_int(argv[cnt]);
3956 if (params.num_mixlen < 1)
3957 throw("-mixlen must be >= 1");
3958 continue;
3959 }
3960
3961 if (strcmp(argv[cnt], "--link-alpha") == 0) {
3962 params.link_alpha = true;
3963 continue;
3964 }
3965
3966 if (strcmp(argv[cnt], "--link-model") == 0) {
3967 params.link_model = true;
3968 continue;
3969 }
3970
3971 if (strcmp(argv[cnt], "--model-joint") == 0) {
3972 cnt++;
3973 if (cnt >= argc)
3974 throw "Use --model-joint MODEL_NAME";
3975 params.model_joint = argv[cnt];
3976 params.link_model = true;
3977 continue;
3978 }
3979
3980 if (strcmp(argv[cnt], "--unlink-tree") == 0) {
3981 params.partition_type = TOPO_UNLINKED;
3982 params.ignore_identical_seqs = false;
3983 continue;
3984 }
3985
3986 if (strcmp(argv[cnt], "-redo") == 0 || strcmp(argv[cnt], "--redo") == 0) {
3987 params.ignore_checkpoint = true;
3988 // 2020-04-27: SEMANTIC CHANGE: also redo ModelFinder
3989 params.model_test_again = true;
3990 continue;
3991 }
3992
3993 if (strcmp(argv[cnt], "-tredo") == 0 || strcmp(argv[cnt], "--tredo") == 0 || strcmp(argv[cnt], "--redo-tree") == 0) {
3994 params.ignore_checkpoint = true;
3995 continue;
3996 }
3997
3998 if (strcmp(argv[cnt], "-undo") == 0 || strcmp(argv[cnt], "--undo") == 0) {
3999 params.force_unfinished = true;
4000 continue;
4001 }
4002
4003 if (strcmp(argv[cnt], "-cptime") == 0 || strcmp(argv[cnt], "--cptime") == 0) {
4004 cnt++;
4005 if (cnt >= argc)
4006 throw "Use -cptime <checkpoint_time_interval>";
4007 params.checkpoint_dump_interval = convert_int(argv[cnt]);
4008 continue;
4009 }
4010
4011 if (strcmp(argv[cnt], "--no-log") == 0) {
4012 params.suppress_output_flags |= OUT_LOG;
4013 continue;
4014 }
4015
4016 if (strcmp(argv[cnt], "--no-treefile") == 0) {
4017 params.suppress_output_flags |= OUT_TREEFILE;
4018 continue;
4019 }
4020 if (strcmp(argv[cnt], "--no-iqtree") == 0) {
4021 params.suppress_output_flags |= OUT_IQTREE;
4022 continue;
4023 }
4024 if (strcmp(argv[cnt], "--no-outfiles") == 0) {
4025 params.suppress_output_flags |= OUT_LOG + OUT_TREEFILE + OUT_IQTREE;
4026 continue;
4027 }
4028
4029 // -- Mon Apr 17 21:18:23 BST 2017
4030 // DONE Minh: merged correctly.
4031 if (strcmp(argv[cnt], "--scaling-squaring") == 0) {
4032 params.matrix_exp_technique = MET_SCALING_SQUARING;
4033 continue;
4034 }
4035 if (strcmp(argv[cnt], "--eigenlib") == 0) {
4036 params.matrix_exp_technique = MET_EIGEN3LIB_DECOMPOSITION;
4037 continue;
4038 }
4039 if (strcmp(argv[cnt], "--eigen") == 0) {
4040 params.matrix_exp_technique = MET_EIGEN_DECOMPOSITION;
4041 continue;
4042 }
4043 if (strcmp(argv[cnt], "--lie-markov") == 0) {
4044 params.matrix_exp_technique = MET_LIE_MARKOV_DECOMPOSITION;
4045 continue;
4046 }
4047 if (strcmp(argv[cnt], "--no-uniqueseq") == 0) {
4048 params.suppress_output_flags |= OUT_UNIQUESEQ;
4049 continue;
4050 }
4051 // --
4052
4053 if (strcmp(argv[cnt], "--dating") == 0) {
4054 cnt++;
4055 if (cnt >= argc)
4056 throw "Use --dating LSD";
4057 params.dating_method = argv[cnt];
4058 if (params.dating_method != "LSD")
4059 throw "Currently only LSD (least-square dating) method is supported";
4060 continue;
4061 }
4062
4063 if (strcmp(argv[cnt], "--date") == 0) {
4064 cnt++;
4065 if (cnt >= argc)
4066 throw "Use --date <date_file>|TAXNAME";
4067 if (params.dating_method == "")
4068 params.dating_method = "LSD";
4069 params.date_file = argv[cnt];
4070 continue;
4071 }
4072
4073 if (strcmp(argv[cnt], "--date-tip") == 0) {
4074 cnt++;
4075 if (cnt >= argc)
4076 throw "Use --date-tip <YYYY[-MM-DD]>";
4077 if (params.dating_method == "")
4078 params.dating_method = "LSD";
4079 params.date_tip = argv[cnt];
4080 continue;
4081 }
4082
4083 if (strcmp(argv[cnt], "--date-root") == 0) {
4084 cnt++;
4085 if (cnt >= argc)
4086 throw "Use --date-root <YYYY[-MM-DD]>";
4087 if (params.dating_method == "")
4088 params.dating_method = "LSD";
4089 params.date_root = argv[cnt];
4090 continue;
4091 }
4092
4093 if (strcmp(argv[cnt], "--date-no-outgroup") == 0) {
4094 params.date_with_outgroup = false;
4095 continue;
4096 }
4097
4098 if (strcmp(argv[cnt], "--date-outgroup") == 0) {
4099 params.date_with_outgroup = true;
4100 continue;
4101 }
4102
4103 if (strcmp(argv[cnt], "--date-ci") == 0) {
4104 cnt++;
4105 if (cnt >= argc)
4106 throw "Use --date-ci <number_of_replicates>";
4107 params.date_replicates = convert_int(argv[cnt]);
4108 if (params.date_replicates < 1)
4109 throw "--date-ci must be positive";
4110 continue;
4111 }
4112
4113 if (strcmp(argv[cnt], "--clock-sd") == 0) {
4114 cnt++;
4115 if (cnt >= argc)
4116 throw "Use --clock-sd <standard_dev_of_lognormal_relaxed_lock>";
4117 params.clock_stddev = convert_double(argv[cnt]);
4118 if (params.clock_stddev < 0)
4119 throw "--clock-sd must be non-negative";
4120 continue;
4121 }
4122
4123 if (strcmp(argv[cnt], "--date-outlier") == 0) {
4124 cnt++;
4125 if (cnt >= argc)
4126 throw "Use --date-outlier <z_score_for_removing_outlier_nodes>";
4127 params.date_outlier = convert_double(argv[cnt]);
4128 if (params.date_outlier < 0)
4129 throw "--date-outlier must be non-negative";
4130 continue;
4131 }
4132
4133 if (strcmp(argv[cnt], "--date-debug") == 0) {
4134 params.date_debug = true;
4135 continue;
4136 }
4137
4138
4139 if (strcmp(argv[cnt], "--date-options") == 0) {
4140 cnt++;
4141 if (cnt >= argc)
4142 throw "Use --date-options <extra_options_for_dating_method>";
4143 params.dating_options = argv[cnt];
4144 continue;
4145 }
4146
4147 if (argv[cnt][0] == '-') {
4148 string err = "Invalid \"";
4149 err += argv[cnt];
4150 err += "\" option.";
4151 throw err;
4152 } else {
4153 if (params.user_file == NULL)
4154 params.user_file = argv[cnt];
4155 else
4156 params.out_file = argv[cnt];
4157 }
4158 }
4159 // try
4160 catch (const char *str) {
4161 if (MPIHelper::getInstance().isMaster())
4162 outError(str);
4163 else
4164 exit(EXIT_SUCCESS);
4165 //} catch (char *str) {
4166 //outError(str);
4167 } catch (string str) {
4168 if (MPIHelper::getInstance().isMaster())
4169 outError(str);
4170 else
4171 exit(EXIT_SUCCESS);
4172 } catch (...) {
4173 string err = "Unknown argument \"";
4174 err += argv[cnt];
4175 err += "\"";
4176 if (MPIHelper::getInstance().isMaster())
4177 outError(err);
4178 else
4179 exit(EXIT_SUCCESS);
4180 }
4181
4182 } // for
4183 if (!params.user_file && !params.aln_file && !params.ngs_file && !params.ngs_mapped_reads && !params.partition_file) {
4184 #ifdef IQ_TREE
4185 quickStartGuide();
4186 // usage_iqtree(argv, false);
4187 #else
4188 usage(argv, false);
4189 #endif
4190 }
4191
4192 // if (params.do_au_test)
4193 // outError("The AU test is temporarily disabled due to numerical issue when bp-RELL=0");
4194
4195 if (params.root != NULL && params.is_rooted)
4196 outError("Not allowed to specify both -o <taxon> and -root");
4197
4198 if (params.model_test_and_tree && params.partition_type != BRLEN_OPTIMIZE)
4199 outError("-mtree not allowed with edge-linked partition model (-spp or -q)");
4200
4201 if (params.do_au_test && params.topotest_replicates == 0)
4202 outError("For AU test please specify number of bootstrap replicates via -zb option");
4203
4204 if (params.lh_mem_save == LM_MEM_SAVE && params.partition_file)
4205 outError("-mem option does not work with partition models yet");
4206
4207 if (params.gbo_replicates && params.num_bootstrap_samples)
4208 outError("UFBoot (-bb) and standard bootstrap (-b) must not be specified together");
4209
4210 if ((params.model_name.find("ONLY") != string::npos || (params.model_name.substr(0,2) == "MF" && params.model_name.substr(0,3) != "MFP")) && (params.gbo_replicates || params.num_bootstrap_samples))
4211 outError("ModelFinder only cannot be combined with bootstrap analysis");
4212
4213 if (params.num_runs > 1 && !params.treeset_file.empty())
4214 outError("Can't combine --runs and -z options");
4215
4216 if (params.num_runs > 1 && params.lmap_num_quartets >= 0)
4217 outError("Can't combine --runs and -lmap options");
4218
4219 if (params.terrace_analysis && !params.partition_file)
4220 params.terrace_analysis = false;
4221
4222 if (params.constraint_tree_file && params.partition_type == TOPO_UNLINKED)
4223 outError("-g constraint tree option does not work with -S yet.");
4224
4225 if (params.num_bootstrap_samples && params.partition_type == TOPO_UNLINKED)
4226 outError("-b bootstrap option does not work with -S yet.");
4227
4228 if (params.dating_method != "") {
4229 #ifndef USE_LSD2
4230 outError("IQ-TREE was not compiled with LSD2 library, rerun cmake with -DUSE_LSD2=ON option");
4231 #endif
4232 }
4233
4234 if (params.date_file.empty()) {
4235 if (params.date_root.empty() ^ params.date_tip.empty())
4236 outError("Both --date-root and --date-tip must be provided when --date file is absent");
4237 }
4238
4239 // Diep:
4240 if(params.ufboot2corr == true){
4241 if(params.gbo_replicates <= 0) params.ufboot2corr = false;
4242 else params.stop_condition = SC_UNSUCCESS_ITERATION;
4243
4244 params.print_ufboot_trees = 2; // 2017-09-25: fix bug regarding the order of -bb 1000 -bnni -wbt
4245 }
4246
4247 if (!params.out_prefix) {
4248 if (params.eco_dag_file)
4249 params.out_prefix = params.eco_dag_file;
4250 else if (params.user_file && params.consensus_type == CT_ASSIGN_SUPPORT_EXTENDED)
4251 params.out_prefix = params.user_file;
4252 else if (params.partition_file) {
4253 params.out_prefix = params.partition_file;
4254 if (params.out_prefix[strlen(params.out_prefix)-1] == '/' || params.out_prefix[strlen(params.out_prefix)-1] == '\\') {
4255 params.out_prefix[strlen(params.out_prefix)-1] = 0;
4256 }
4257 } else if (params.aln_file) {
4258 params.out_prefix = params.aln_file;
4259 if (params.out_prefix[strlen(params.out_prefix)-1] == '/' || params.out_prefix[strlen(params.out_prefix)-1] == '\\') {
4260 params.out_prefix[strlen(params.out_prefix)-1] = 0;
4261 }
4262 } else if (params.ngs_file)
4263 params.out_prefix = params.ngs_file;
4264 else if (params.ngs_mapped_reads)
4265 params.out_prefix = params.ngs_mapped_reads;
4266 else
4267 params.out_prefix = params.user_file;
4268 }
4269
4270 if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos)
4271 if (params.partition_merge == MERGE_NONE)
4272 params.partition_merge = MERGE_RCLUSTERF;
4273
4274 // if (MPIHelper::getInstance().isWorker()) {
4275 // BUG: setting out_prefix this way cause access to stack, which is cleaned up after returning from this function
4276 // string newPrefix = string(params.out_prefix) + "." + NumberToString(MPIHelper::getInstance().getProcessID()) ;
4277 // params.out_prefix = (char *) newPrefix.c_str();
4278 // }
4279
4280 }
4281
usage(char * argv[])4282 void usage(char* argv[]) {
4283 printCopyright(cout);
4284 cout << "Usage: " << argv[0] << " [OPTIONS] <file_name> [<output_file>]" << endl;
4285 cout << "GENERAL OPTIONS:" << endl;
4286 cout << " -hh Print this help dialog" << endl;
4287 cout << " -h Print help options for phylogenetic inference" << endl;
4288 cout << " <file_name> User tree in NEWICK format or split network in NEXUS format" << endl;
4289 cout << " <output_file> Output file to store results, default is '<file_name>.pda'" << endl;
4290 cout << " -k <num_taxa> Find optimal set of size <num_taxa>" << endl;
4291 cout << " -k <min>:<max> Find optimal sets of size from <min> to <max>" << endl;
4292 cout << " -k <min>:<max>:<step>" << endl;
4293 cout << " Find optimal sets of size min, min+step, min+2*step,..." << endl;
4294 cout << " -o <taxon> Root name to compute rooted PD (default: unrooted)" << endl;
4295 cout << " -if <file> File containing taxa to be included into optimal sets" << endl;
4296 cout << " -e <file> File containing branch/split scale and taxa weights" << endl;
4297 cout << " -all Identify all multiple optimal sets" << endl;
4298 cout << " -lim <max_limit> The maximum number of optimal sets for each k if -a is specified" << endl;
4299 cout << " -min Compute minimal sets (default: maximal)" << endl;
4300 cout << " -1out Print taxa sets and scores to separate files" << endl;
4301 cout << " -oldout Print output compatible with version 0.3" << endl;
4302 cout << " -v Verbose mode" << endl;
4303 cout << endl;
4304 cout << "OPTIONS FOR PHYLOGENETIC DIVERSITY (PD):" << endl;
4305 cout << " -root Make the tree ROOTED, default is unrooted" << endl;
4306 cout << " NOTE: this option and -o <taxon> cannot be both specified" << endl;
4307 cout << " -g Run greedy algorithm only (default: auto)" << endl;
4308 cout << " -pr Run pruning algorithm only (default: auto)" << endl;
4309 cout << endl;
4310 /*
4311 cout << "OPTIONS FOR SPLIT DIVERSITY:" << endl;
4312 cout << " -exhaust Force to use exhaustive search" << endl;
4313 cout << " NOTE: by default, the program applies dynamic programming algorithm" << endl;
4314 cout << " on circular networks and exhaustive search on general networks" << endl;
4315 cout << endl;*/
4316 cout << "OPTIONS FOR BUDGET CONSTRAINTS:" << endl;
4317 cout << " -u <file> File containing total budget and taxa preservation costs" << endl;
4318 cout << " -b <budget> Total budget to conserve taxa" << endl;
4319 cout << " -b <min>:<max> Find all sets with budget from <min> to <max>" << endl;
4320 cout << " -b <min>:<max>:<step>" << endl;
4321 cout << " Find optimal sets with budget min, min+step, min+2*step,..." << endl;
4322 cout << endl;
4323 cout << "OPTIONS FOR AREA ANALYSIS:" << endl;
4324 cout << " -ts <taxa_file> Compute/maximize PD/SD of areas (combine with -k to maximize)" << endl;
4325 cout << " -excl Compute exclusive PD/SD" << endl;
4326 cout << " -endem Compute endemic PD/SD" << endl;
4327 cout << " -compl <areas> Compute complementary PD/SD given the listed <areas>" << endl;
4328 cout << endl;
4329
4330 cout << "OPTIONS FOR VIABILITY CONSTRAINTS:" << endl;
4331 cout << " -eco <food_web> File containing food web matrix" << endl;
4332 cout << " -k% <n> Find optimal set of size relative the total number of taxa" << endl;
4333 cout << " -diet <min_diet> Minimum diet portion (%) to be preserved for each predator" << endl;
4334 cout << endl;
4335 //if (!full_command) exit(0);
4336
4337 cout << "MISCELLANEOUS:" << endl;
4338 cout << " -dd <sample_size> Compute PD distribution of random sets of size k" << endl;
4339 /*
4340 cout << " -gbo <sitelh_file> Compute and output the alignment of (normalized)" << endl;
4341 cout << " expected frequencies given in site_ll_file" << endl;
4342 */
4343
4344 // cout << " -rep <times> Repeat algorithm a number of times." << endl;
4345 // cout << " -noout Print no output file." << endl;
4346 cout << endl;
4347 //cout << "HIDDEN OPTIONS: see the source code file pda.cpp::parseArg()" << endl;
4348
4349 exit(0);
4350 }
4351
usage_iqtree(char * argv[],bool full_command)4352 void usage_iqtree(char* argv[], bool full_command) {
4353 printCopyright(cout);
4354 cout << "Usage: iqtree [-s ALIGNMENT] [-p PARTITION] [-m MODEL] [-t TREE] ..." << endl << endl;
4355 cout << "GENERAL OPTIONS:" << endl
4356 << " -h, --help Print (more) help usages" << endl
4357 << " -s FILE[,...,FILE] PHYLIP/FASTA/NEXUS/CLUSTAL/MSF alignment file(s)" << endl
4358 << " -s DIR Directory of alignment files" << endl
4359 << " --seqtype STRING BIN, DNA, AA, NT2AA, CODON, MORPH (default: auto-detect)" << endl
4360 << " -t FILE|PARS|RAND Starting tree (default: 99 parsimony and BIONJ)" << endl
4361 << " -o TAX[,...,TAX] Outgroup taxon (list) for writing .treefile" << endl
4362 << " --prefix STRING Prefix for all output files (default: aln/partition)" << endl
4363 << " --seed NUM Random seed number, normally used for debugging purpose" << endl
4364 << " --safe Safe likelihood kernel to avoid numerical underflow" << endl
4365 << " --mem NUM[G|M|%] Maximal RAM usage in GB | MB | %" << endl
4366 << " --runs NUM Number of indepedent runs (default: 1)" << endl
4367 << " -v, --verbose Verbose mode, printing more messages to screen" << endl
4368 << " -V, --version Display version number" << endl
4369 << " --quiet Quiet mode, suppress printing to screen (stdout)" << endl
4370 << " -fconst f1,...,fN Add constant patterns into alignment (N=no. states)" << endl
4371 << " --epsilon NUM Likelihood epsilon for parameter estimate (default 0.01)" << endl
4372 #ifdef _OPENMP
4373 << " -T NUM|AUTO No. cores/threads or AUTO-detect (default: 1)" << endl
4374 << " --threads-max NUM Max number of threads for -T AUTO (default: all cores)" << endl
4375 #endif
4376 << endl << "CHECKPOINT:" << endl
4377 << " --redo Redo both ModelFinder and tree search" << endl
4378 << " --redo-tree Restore ModelFinder and only redo tree search" << endl
4379 << " --undo Revoke finished run, used when changing some options" << endl
4380 << " --cptime NUM Minimum checkpoint interval (default: 60 sec and adapt)" << endl
4381 << endl << "PARTITION MODEL:" << endl
4382 << " -p FILE|DIR NEXUS/RAxML partition file or directory with alignments" << endl
4383 << " Edge-linked proportional partition model" << endl
4384 << " -q FILE|DIR Like -p but edge-linked equal partition model " << endl
4385 << " -Q FILE|DIR Like -p but edge-unlinked partition model" << endl
4386 << " -S FILE|DIR Like -p but separate tree inference" << endl
4387 << " --subsample NUM Randomly sub-sample partitions (negative for complement)" << endl
4388 << " --subsample-seed NUM Random number seed for --subsample" << endl
4389 << endl << "LIKELIHOOD/QUARTET MAPPING:" << endl
4390 << " --lmap NUM Number of quartets for likelihood mapping analysis" << endl
4391 << " --lmclust FILE NEXUS file containing clusters for likelihood mapping" << endl
4392 << " --quartetlh Print quartet log-likelihoods to .quartetlh file" << endl
4393 << endl << "TREE SEARCH ALGORITHM:" << endl
4394 // << " -pll Use phylogenetic likelihood library (PLL) (default: off)" << endl
4395 << " --ninit NUM Number of initial parsimony trees (default: 100)" << endl
4396 << " --ntop NUM Number of top initial trees (default: 20)" << endl
4397 << " --nbest NUM Number of best trees retained during search (defaut: 5)" << endl
4398 << " -n NUM Fix number of iterations to stop (default: OFF)" << endl
4399 << " --nstop NUM Number of unsuccessful iterations to stop (default: 100)" << endl
4400 << " --perturb NUM Perturbation strength for randomized NNI (default: 0.5)" << endl
4401 << " --radius NUM Radius for parsimony SPR search (default: 6)" << endl
4402 << " --allnni Perform more thorough NNI search (default: OFF)" << endl
4403 << " -g FILE (Multifurcating) topological constraint tree file" << endl
4404 << " --fast Fast search to resemble FastTree" << endl
4405 << " --polytomy Collapse near-zero branches into polytomy" << endl
4406 << " --tree-fix Fix -t tree (no tree search performed)" << endl
4407 << " --treels Write locally optimal trees into .treels file" << endl
4408 << " --show-lh Compute tree likelihood without optimisation" << endl
4409 #ifdef IQTREE_TERRAPHAST
4410 << " --terrace Check if the tree lies on a phylogenetic terrace" << endl
4411 #endif
4412 // << " -iqp Use the IQP tree perturbation (default: randomized NNI)" << endl
4413 // << " -iqpnni Switch back to the old IQPNNI tree search algorithm" << endl
4414 << endl << "ULTRAFAST BOOTSTRAP/JACKKNIFE:" << endl
4415 << " -B, --ufboot NUM Replicates for ultrafast bootstrap (>=1000)" << endl
4416 << " -J, --ufjack NUM Replicates for ultrafast jackknife (>=1000)" << endl
4417 << " --jack-prop NUM Subsampling proportion for jackknife (default: 0.5)" << endl
4418 << " --sampling STRING GENE|GENESITE resampling for partitions (default: SITE)" << endl
4419 << " --boot-trees Write bootstrap trees to .ufboot file (default: none)" << endl
4420 << " --wbtl Like --boot-trees but also writing branch lengths" << endl
4421 // << " -n <#iterations> Minimum number of iterations (default: 100)" << endl
4422 << " --nmax NUM Maximum number of iterations (default: 1000)" << endl
4423 << " --nstep NUM Iterations for UFBoot stopping rule (default: 100)" << endl
4424 << " --bcor NUM Minimum correlation coefficient (default: 0.99)" << endl
4425 << " --beps NUM RELL epsilon to break tie (default: 0.5)" << endl
4426 << " --bnni Optimize UFBoot trees by NNI on bootstrap alignment" << endl
4427 << endl << "NON-PARAMETRIC BOOTSTRAP/JACKKNIFE:" << endl
4428 << " -b, --boot NUM Replicates for bootstrap + ML tree + consensus tree" << endl
4429 << " -j, --jack NUM Replicates for jackknife + ML tree + consensus tree" << endl
4430 << " --jack-prop NUM Subsampling proportion for jackknife (default: 0.5)" << endl
4431 << " --bcon NUM Replicates for bootstrap + consensus tree" << endl
4432 << " --bonly NUM Replicates for bootstrap only" << endl
4433 #ifdef USE_BOOSTER
4434 << " --tbe Transfer bootstrap expectation" << endl
4435 #endif
4436 // << " -t <threshold> Minimum bootstrap support [0...1) for consensus tree" << endl
4437 << endl << "SINGLE BRANCH TEST:" << endl
4438 << " --alrt NUM Replicates for SH approximate likelihood ratio test" << endl
4439 << " --alrt 0 Parametric aLRT test (Anisimova and Gascuel 2006)" << endl
4440 << " --abayes approximate Bayes test (Anisimova et al. 2011)" << endl
4441 << " --lbp NUM Replicates for fast local bootstrap probabilities" << endl
4442 << endl << "MODEL-FINDER:" << endl
4443 << " -m TESTONLY Standard model selection (like jModelTest, ProtTest)" << endl
4444 << " -m TEST Standard model selection followed by tree inference" << endl
4445 << " -m MF Extended model selection with FreeRate heterogeneity" << endl
4446 << " -m MFP Extended model selection followed by tree inference" << endl
4447 << " -m ...+LM Additionally test Lie Markov models" << endl
4448 << " -m ...+LMRY Additionally test Lie Markov models with RY symmetry" << endl
4449 << " -m ...+LMWS Additionally test Lie Markov models with WS symmetry" << endl
4450 << " -m ...+LMMK Additionally test Lie Markov models with MK symmetry" << endl
4451 << " -m ...+LMSS Additionally test strand-symmetric models" << endl
4452 << " --mset STRING Restrict search to models supported by other programs" << endl
4453 << " (raxml, phyml or mrbayes)" << endl
4454 << " --mset STR,... Comma-separated model list (e.g. -mset WAG,LG,JTT)" << endl
4455 << " --msub STRING Amino-acid model source" << endl
4456 << " (nuclear, mitochondrial, chloroplast or viral)" << endl
4457 << " --mfreq STR,... List of state frequencies" << endl
4458 << " --mrate STR,... List of rate heterogeneity among sites" << endl
4459 << " (e.g. -mrate E,I,G,I+G,R is used for -m MF)" << endl
4460 << " --cmin NUM Min categories for FreeRate model [+R] (default: 2)" << endl
4461 << " --cmax NUM Max categories for FreeRate model [+R] (default: 10)" << endl
4462 << " --merit AIC|AICc|BIC Akaike|Bayesian information criterion (default: BIC)" << endl
4463 // << " -msep Perform model selection and then rate selection" << endl
4464 << " --mtree Perform full tree search for every model" << endl
4465 << " --madd STR,... List of mixture models to consider" << endl
4466 << " --mdef FILE Model definition NEXUS file (see Manual)" << endl
4467 << " --modelomatic Find best codon/protein/DNA models (Whelan et al. 2015)" << endl
4468
4469 << endl << "PARTITION-FINDER:" << endl
4470 << " --merge Merge partitions to increase model fit" << endl
4471 << " --merge greedy|rcluster|rclusterf" << endl
4472 << " Set merging algorithm (default: rclusterf)" << endl
4473 << " --merge-model 1|all Use only 1 or all models for merging (default: 1)" << endl
4474 << " --merge-model STR,..." << endl
4475 << " Comma-separated model list for merging" << endl
4476 << " --merge-rate 1|all Use only 1 or all rate heterogeneity (default: 1)" << endl
4477 << " --merge-rate STR,..." << endl
4478 << " Comma-separated rate list for merging" << endl
4479 << " --rcluster NUM Percentage of partition pairs for rcluster algorithm" << endl
4480 << " --rclusterf NUM Percentage of partition pairs for rclusterf algorithm" << endl
4481 << " --rcluster-max NUM Max number of partition pairs (default: 10*partitions)" << endl
4482
4483 << endl << "SUBSTITUTION MODEL:" << endl
4484 << " -m STRING Model name string (e.g. GTR+F+I+G)" << endl
4485 << " DNA: HKY (default), JC, F81, K2P, K3P, K81uf, TN/TrN, TNef," << endl
4486 << " TIM, TIMef, TVM, TVMef, SYM, GTR, or 6-digit model" << endl
4487 << " specification (e.g., 010010 = HKY)" << endl
4488 << " Protein: LG (default), Poisson, cpREV, mtREV, Dayhoff, mtMAM," << endl
4489 << " JTT, WAG, mtART, mtZOA, VT, rtREV, DCMut, PMB, HIVb," << endl
4490 << " HIVw, JTTDCMut, FLU, Blosum62, GTR20, mtMet, mtVer, mtInv, FLAVI," << endl
4491 << " Q.LG, Q.pfam, Q.pfam_gb, Q.bird, Q.mammal, Q.insect, Q.plant, Q.yeast" << endl
4492 << " Protein mixture: C10,...,C60, EX2, EX3, EHO, UL2, UL3, EX_EHO, LG4M, LG4X" << endl
4493 << " Binary: JC2 (default), GTR2" << endl
4494 << " Empirical codon: KOSI07, SCHN05" << endl
4495 << " Mechanistic codon: GY (default), MG, MGK, GY0K, GY1KTS, GY1KTV, GY2K," << endl
4496 << " MG1KTS, MG1KTV, MG2K" << endl
4497 << "Semi-empirical codon: XX_YY where XX is empirical and YY is mechanistic model" << endl
4498 << " Morphology/SNP: MK (default), ORDERED, GTR" << endl
4499 << " Lie Markov DNA: 1.1, 2.2b, 3.3a, 3.3b, 3.3c, 3.4, 4.4a, 4.4b, 4.5a," << endl
4500 << " 4.5b, 5.6a, 5.6b, 5.7a, 5.7b, 5.7c, 5.11a, 5.11b, 5.11c," << endl
4501 << " 5.16, 6.6, 6.7a, 6.7b, 6.8a, 6.8b, 6.17a, 6.17b, 8.8," << endl
4502 << " 8.10a, 8.10b, 8.16, 8.17, 8.18, 9.20a, 9.20b, 10.12," << endl
4503 << " 10.34, 12.12 (optionally prefixed by RY, WS or MK)" << endl
4504 << " Non-reversible: STRSYM (strand symmetric model, equiv. WS6.6)," << endl
4505 << " NONREV, UNREST (unrestricted model, equiv. 12.12)" << endl
4506 << " Otherwise: Name of file containing user-model parameters" << endl
4507 << endl << "STATE FREQUENCY:" << endl
4508 << " -m ...+F Empirically counted frequencies from alignment" << endl
4509 << " -m ...+FO Optimized frequencies by maximum-likelihood" << endl
4510 << " -m ...+FQ Equal frequencies" << endl
4511 << " -m ...+FRY For DNA, freq(A+G)=1/2=freq(C+T)" << endl
4512 << " -m ...+FWS For DNA, freq(A+T)=1/2=freq(C+G)" << endl
4513 << " -m ...+FMK For DNA, freq(A+C)=1/2=freq(G+T)" << endl
4514 << " -m ...+Fabcd 4-digit constraint on ACGT frequency" << endl
4515 << " (e.g. +F1221 means f_A=f_T, f_C=f_G)" << endl
4516 << " -m ...+FU Amino-acid frequencies given protein matrix" << endl
4517 << " -m ...+F1x4 Equal NT frequencies over three codon positions" << endl
4518 << " -m ...+F3x4 Unequal NT frequencies over three codon positions" << endl
4519
4520 << endl << "RATE HETEROGENEITY AMONG SITES:" << endl
4521 << " -m ...+I A proportion of invariable sites" << endl
4522 << " -m ...+G[n] Discrete Gamma model with n categories (default n=4)" << endl
4523 << " -m ...*G[n] Discrete Gamma model with unlinked model parameters" << endl
4524 << " -m ...+I+G[n] Invariable sites plus Gamma model with n categories" << endl
4525 << " -m ...+R[n] FreeRate model with n categories (default n=4)" << endl
4526 << " -m ...*R[n] FreeRate model with unlinked model parameters" << endl
4527 << " -m ...+I+R[n] Invariable sites plus FreeRate model with n categories" << endl
4528 << " -m ...+Hn Heterotachy model with n classes" << endl
4529 << " -m ...*Hn Heterotachy model with n classes and unlinked parameters" << endl
4530 << " --alpha-min NUM Min Gamma shape parameter for site rates (default: 0.02)" << endl
4531 << " --gamma-median Median approximation for +G site rates (default: mean)" << endl
4532 << " --rate Write empirical Bayesian site rates to .rate file" << endl
4533 << " --mlrate Write maximum likelihood site rates to .mlrate file" << endl
4534 // << " --mhrate Computing site-specific rates to .mhrate file using" << endl
4535 // << " Meyer & von Haeseler (2003) method" << endl
4536
4537 << endl << "POLYMORPHISM AWARE MODELS (PoMo):" << endl
4538 << " -s FILE Input counts file (see manual)" << endl
4539 << " -m ...+P DNA substitution model (see above) used with PoMo" << endl
4540 << " -m ...+N<POPSIZE> Virtual population size (default: 9)" << endl
4541 // TODO DS: Maybe change default to +WH.
4542 << " -m ...+WB|WH|S] Weighted binomial sampling" << endl
4543 << " -m ...+WH Weighted hypergeometric sampling" << endl
4544 << " -m ...+S Sampled sampling" << endl
4545 << " -m ...+G[n] Discrete Gamma rate with n categories (default n=4)" << endl
4546 // TODO DS: Maybe change default to +WH.
4547
4548 << endl << "COMPLEX MODELS:" << endl
4549 << " -m \"MIX{m1,...,mK}\" Mixture model with K components" << endl
4550 << " -m \"FMIX{f1,...fK}\" Frequency mixture model with K components" << endl
4551 << " --mix-opt Optimize mixture weights (default: detect)" << endl
4552 << " -m ...+ASC Ascertainment bias correction" << endl
4553 << " --tree-freq FILE Input tree to infer site frequency model" << endl
4554 << " --site-freq FILE Input site frequency model file" << endl
4555 << " --freq-max Posterior maximum instead of mean approximation" << endl
4556
4557 << endl << "TREE TOPOLOGY TEST:" << endl
4558 << " --trees FILE Set of trees to evaluate log-likelihoods" << endl
4559 << " --test NUM Replicates for topology test" << endl
4560 << " --test-weight Perform weighted KH and SH tests" << endl
4561 << " --test-au Approximately unbiased (AU) test (Shimodaira 2002)" << endl
4562 << " --sitelh Write site log-likelihoods to .sitelh file" << endl
4563
4564 << endl << "ANCESTRAL STATE RECONSTRUCTION:" << endl
4565 << " --ancestral Ancestral state reconstruction by empirical Bayes" << endl
4566 << " --asr-min NUM Min probability of ancestral state (default: equil freq)" << endl
4567
4568 << endl << "TEST OF SYMMETRY:" << endl
4569 << " --symtest Perform three tests of symmetry" << endl
4570 << " --symtest-only Do --symtest then exist" << endl
4571 // << " --bisymtest Perform three binomial tests of symmetry" << endl
4572 // << " --symtest-perm NUM Replicates for permutation tests of symmetry" << endl
4573 << " --symtest-remove-bad Do --symtest and remove bad partitions" << endl
4574 << " --symtest-remove-good Do --symtest and remove good partitions" << endl
4575 << " --symtest-type MAR|INT Use MARginal/INTernal test when removing partitions" << endl
4576 << " --symtest-pval NUMER P-value cutoff (default: 0.05)" << endl
4577 << " --symtest-keep-zero Keep NAs in the tests" << endl
4578
4579 << endl << "CONCORDANCE FACTOR ANALYSIS:" << endl
4580 << " -t FILE Reference tree to assign concordance factor" << endl
4581 << " --gcf FILE Set of source trees for gene concordance factor (gCF)" << endl
4582 << " --df-tree Write discordant trees associated with gDF1" << endl
4583 << " --scf NUM Number of quartets for site concordance factor (sCF)" << endl
4584 << " -s FILE Sequence alignment for --scf" << endl
4585 << " -p FILE|DIR Partition file or directory for --scf" << endl
4586 << " --cf-verbose Write CF per tree/locus to cf.stat_tree/_loci" << endl
4587
4588 #ifdef USE_LSD2
4589 << endl << "TIME TREE RECONSTRUCTION:" << endl
4590 << " --date FILE File containing dates of tips or ancestral nodes" << endl
4591 << " --date TAXNAME Extract dates from taxon names after last '|'" << endl
4592 << " --date-tip STRING Tip dates as a real number or YYYY-MM-DD" << endl
4593 << " --date-root STRING Root date as a real number or YYYY-MM-DD" << endl
4594 << " --date-ci NUM Number of replicates to compute confidence interval" << endl
4595 << " --clock-sd NUM Std-dev for lognormal relaxed clock (default: 0.2)" << endl
4596 << " --date-no-outgroup Exclude outgroup from time tree" << endl
4597 << " --date-outlier NUM Z-score cutoff to remove outlier tips/nodes (e.g. 3)" << endl
4598 << " --date-options \"..\" Extra options passing directly to LSD2" << endl
4599 << " --dating STRING Dating method: LSD for least square dating (default)" << endl
4600 #endif
4601 << endl;
4602
4603
4604 // << endl << "TEST OF MODEL HOMOGENEITY:" << endl
4605 // << " -m WHTEST Testing model (GTR+G) homogeneity assumption using" << endl
4606 // << " Weiss & von Haeseler (2003) method" << endl
4607 // << " -ns <#simulations> #Simulations to obtain null-distribution (default: 1000)" << endl
4608
4609 if (full_command)
4610 cout
4611 << endl << "CONSENSUS RECONSTRUCTION:" << endl
4612 << " -t FILE Set of input trees for consensus reconstruction" << endl
4613 << " --sup-min NUM Min split support, 0.5 for majority-rule consensus" << endl
4614 << " (default: 0, extended consensus)" << endl
4615 << " --burnin NUM Burnin number of trees to ignore" << endl
4616 << " --con-tree Compute consensus tree to .contree file" << endl
4617 << " --con-net Computing consensus network to .nex file" << endl
4618 << " --support FILE Assign support values into this tree from -t trees" << endl
4619 //<< " -sup2 FILE Like -sup but -t trees can have unequal taxon sets" << endl
4620 << " --suptag STRING Node name (or ALL) to assign tree IDs where node occurs" << endl
4621 << endl << "TREE DISTANCE BY ROBINSON-FOULDS (RF) METRIC:" << endl
4622 << " --tree-dist-all Compute all-to-all RF distances for -t trees" << endl
4623 << " --tree-dist FILE Compute RF distances between -t trees and this set" << endl
4624 << " --tree-dist2 FILE Like -rf but trees can have unequal taxon sets" << endl
4625 // << " -rf_adj Computing RF distances of adjacent trees in <treefile>" << endl
4626 // << " -wja Write ancestral sequences by joint reconstruction" << endl
4627
4628
4629 << endl
4630
4631 << "GENERATING RANDOM TREES:" << endl
4632 << " -r NUM No. taxa for Yule-Harding random tree" << endl
4633 << " --rand UNI|CAT|BAL UNIform | CATerpillar | BALanced random tree" << endl
4634 //<< " --rand NET Random circular split network" << endl
4635 << " --rlen NUM NUM NUM min, mean, and max random branch lengths" << endl
4636
4637 << endl << "MISCELLANEOUS:" << endl
4638 << " --keep-ident Keep identical sequences (default: remove & finally add)" << endl
4639 << " -blfix Fix branch lengths of user tree passed via -te" << endl
4640 << " -blscale Scale branch lengths of user tree passed via -t" << endl
4641 << " -blmin Min branch length for optimization (default 0.000001)" << endl
4642 << " -blmax Max branch length for optimization (default 100)" << endl
4643 << " -wslr Write site log-likelihoods per rate category" << endl
4644 << " -wslm Write site log-likelihoods per mixture class" << endl
4645 << " -wslmr Write site log-likelihoods per mixture+rate class" << endl
4646 << " -wspr Write site probabilities per rate category" << endl
4647 << " -wspm Write site probabilities per mixture class" << endl
4648 << " -wspmr Write site probabilities per mixture+rate class" << endl
4649 << " --partlh Write partition log-likelihoods to .partlh file" << endl
4650 << " --no-outfiles Suppress printing output files" << endl
4651 << " --eigenlib Use Eigen3 library" << endl
4652 << " -alninfo Print alignment sites statistics to .alninfo" << endl
4653 // << " -d <file> Reading genetic distances from file (default: JC)" << endl
4654 // << " -d <outfile> Calculate the distance matrix inferred from tree" << endl
4655 // << " -stats <outfile> Output some statistics about branch lengths" << endl
4656 // << " -comp <treefile> Compare tree with each in the input trees" << endl;
4657
4658
4659 << endl;
4660
4661 if (full_command) {
4662 //TODO Print other options here (to be added)
4663 }
4664
4665 exit(0);
4666 }
4667
quickStartGuide()4668 void quickStartGuide() {
4669 printCopyright(cout);
4670 cout << "Command-line examples (replace 'iqtree2 ...' by actual path to executable):" << endl << endl
4671 << "1. Infer maximum-likelihood tree from a sequence alignment (example.phy)" << endl
4672 << " with the best-fit model automatically selected by ModelFinder:" << endl
4673 << " iqtree2 -s example.phy" << endl << endl
4674 << "2. Perform ModelFinder without subsequent tree inference:" << endl
4675 << " iqtree2 -s example.phy -m MF" << endl
4676 << " (use '-m TEST' to resemble jModelTest/ProtTest)" << endl << endl
4677 << "3. Combine ModelFinder, tree search, ultrafast bootstrap and SH-aLRT test:" << endl
4678 << " iqtree2 -s example.phy --alrt 1000 -B 1000" << endl << endl
4679 << "4. Perform edge-linked proportional partition model (example.nex):" << endl
4680 << " iqtree2 -s example.phy -p example.nex" << endl
4681 << " (replace '-p' by '-Q' for edge-unlinked model)" << endl << endl
4682 << "5. Find best partition scheme by possibly merging partitions:" << endl
4683 << " iqtree2 -s example.phy -p example.nex -m MF+MERGE" << endl
4684 << " (use '-m TESTMERGEONLY' to resemble PartitionFinder)" << endl << endl
4685 << "6. Find best partition scheme followed by tree inference and bootstrap:" << endl
4686 << " iqtree2 -s example.phy -p example.nex -m MFP+MERGE -B 1000" << endl << endl
4687 #ifdef _OPENMP
4688 << "7. Use 4 CPU cores to speed up computation: add '-T 4' option" << endl << endl
4689 #endif
4690 << "8. Polymorphism-aware model with HKY nucleotide model and Gamma rate:" << endl
4691 << " iqtree2 -s counts_file.cf -m HKY+P+G" << endl << endl
4692 << "9. PoMo mixture with virtual popsize 5 and weighted binomial sampling:" << endl
4693 << " iqtree2 -s counts_file.cf -m \"MIX{HKY+P{EMP},JC+P}+N5+WB\"" << endl << endl
4694 << "To show all available options: run 'iqtree2 -h'" << endl << endl
4695 << "Have a look at the tutorial and manual for more information:" << endl
4696 << " http://www.iqtree.org" << endl << endl;
4697 exit(0);
4698 }
4699
detectInputFile(const char * input_file)4700 InputType detectInputFile(const char *input_file) {
4701
4702 if (!fileExists(input_file))
4703 outError("File not found ", input_file);
4704
4705 try {
4706 igzstream in;
4707 in.exceptions(ios::failbit | ios::badbit);
4708 in.open(input_file);
4709
4710 unsigned char ch, ch2;
4711 int count = 0;
4712 do {
4713 in >> ch;
4714 } while (ch <= 32 && !in.eof() && count++ < 20);
4715 in >> ch2;
4716 in.close();
4717 switch (ch) {
4718 case '#': return IN_NEXUS;
4719 case '(': return IN_NEWICK;
4720 case '[': return IN_NEWICK;
4721 case '>': return IN_FASTA;
4722 case 'C': if (ch2 == 'L') return IN_CLUSTAL;
4723 else if (ch2 == 'O') return IN_COUNTS;
4724 else return IN_OTHER;
4725 case '!': if (ch2 == '!') return IN_MSF; else return IN_OTHER;
4726 default:
4727 if (isdigit(ch)) return IN_PHYLIP;
4728 return IN_OTHER;
4729 }
4730 } catch (ios::failure) {
4731 outError("Cannot read file ", input_file);
4732 } catch (...) {
4733 outError("Cannot read file ", input_file);
4734 }
4735 return IN_OTHER;
4736 }
4737
overwriteFile(char * filename)4738 bool overwriteFile(char *filename) {
4739 ifstream infile(filename);
4740 if (infile.is_open()) {
4741 cout << "Overwrite " << filename << " (y/n)? ";
4742 char ch;
4743 cin >> ch;
4744 if (ch != 'Y' && ch != 'y') {
4745 infile.close();
4746 return false;
4747 }
4748 }
4749 infile.close();
4750 return true;
4751 }
4752
parseAreaName(char * area_names,set<string> & areas)4753 void parseAreaName(char *area_names, set<string> &areas) {
4754 string all = area_names;
4755 int pos;
4756 while (!all.empty()) {
4757 pos = all.find(',');
4758 if (pos < 0) pos = all.length();
4759 areas.insert(all.substr(0, pos));
4760 if (pos >= (signed int) all.length())
4761 all = "";
4762 else
4763 all = all.substr(pos + 1);
4764 }
4765 }
4766
logFac(const int num)4767 double logFac(const int num) {
4768 if (num < 0) return -1.0;
4769 if (num == 0) return 0.0;
4770 double ret = 0;
4771 for (int i = 1; i <= num; i++)
4772 ret += log((double) i);
4773 return ret;
4774 }
4775
4776 template <typename I>
random_element(I begin,I end)4777 I random_element(I begin, I end)
4778 {
4779 const unsigned long n = std::distance(begin, end);
4780 const unsigned long divisor = (RAND_MAX + 1) / n;
4781
4782 unsigned long k;
4783 do { k = std::rand() / divisor; } while (k >= n);
4784
4785 return std::advance(begin, k);
4786 }
4787
4788 template <class T>
quantile(const vector<T> & v,const double q)4789 inline T quantile(const vector<T>& v, const double q) {
4790 unsigned int size = v.size();
4791 if (q <= 0) return *std::min_element(v.begin(), v.end());
4792 if (q >= 1) return *std::max_element(v.begin(), v.end());
4793 //double pos = (size - 1) * q;
4794 //unsigned int ind = (unsigned int)(pos);
4795 //double delta = pos - ind;
4796 vector<T> w(size);
4797 std::copy(v, v.begin() + size, w.begin());
4798 }
4799
4800 #define RAN_STANDARD 1
4801 #define RAN_SPRNG 2
4802 #define RAN_RAND4 3
4803
4804 #define RAN_TYPE 2
4805
4806 #if RAN_TYPE == RAN_STANDARD
4807
init_random(int seed)4808 int init_random(int seed) {
4809 srand(seed);
4810 cout << "(Using rand() - Standard Random Number Generator)" << endl;
4811 return seed;
4812 }
4813
finish_random()4814 int finish_random() {
4815 return 0;
4816 }
4817
4818
4819 #elif RAN_TYPE == RAN_RAND4
4820 /******************************************************************************/
4821 /* random numbers generator (Numerical recipes) */
4822 /******************************************************************************/
4823
4824 /* variable */
4825 long _idum;
4826
4827 /* definitions */
4828 #define IM1 2147483563
4829 #define IM2 2147483399
4830 #define AM (1.0/IM1)
4831 #define IMM1 (IM1-1)
4832 #define IA1 40014
4833 #define IA2 40692
4834 #define IQ1 53668
4835 #define IQ2 52774
4836 #define IR1 12211
4837 #define IR2 3791
4838 #define NTAB 32
4839 #define NDIV (1+IMM1/NTAB)
4840 #define EPS 1.2e-7
4841 #define RNMX (1.0-EPS)
4842
randomunitintervall()4843 double randomunitintervall()
4844 /* Long period (> 2e18) random number generator. Returns a uniform random
4845 deviate between 0.0 and 1.0 (exclusive of endpoint values).
4846
4847 Source:
4848 Press et al., "Numerical recipes in C", Cambridge University Press, 1992
4849 (chapter 7 "Random numbers", ran2 random number generator) */ {
4850 int j;
4851 long k;
4852 static long _idum2 = 123456789;
4853 static long iy = 0;
4854 static long iv[NTAB];
4855 double temp;
4856
4857 if (_idum <= 0) {
4858 if (-(_idum) < 1)
4859 _idum = 1;
4860 else
4861 _idum = -(_idum);
4862 _idum2 = (_idum);
4863 for (j = NTAB + 7; j >= 0; j--) {
4864 k = (_idum) / IQ1;
4865 _idum = IA1 * (_idum - k * IQ1) - k*IR1;
4866 if (_idum < 0)
4867 _idum += IM1;
4868 if (j < NTAB)
4869 iv[j] = _idum;
4870 }
4871 iy = iv[0];
4872 }
4873 k = (_idum) / IQ1;
4874 _idum = IA1 * (_idum - k * IQ1) - k*IR1;
4875 if (_idum < 0)
4876 _idum += IM1;
4877 k = _idum2 / IQ2;
4878 _idum2 = IA2 * (_idum2 - k * IQ2) - k*IR2;
4879 if (_idum2 < 0)
4880 _idum2 += IM2;
4881 j = iy / NDIV;
4882 iy = iv[j] - _idum2;
4883 iv[j] = _idum;
4884 if (iy < 1)
4885 iy += IMM1;
4886 if ((temp = AM * iy) > RNMX)
4887 return RNMX;
4888 else
4889 return temp;
4890 } /* randomunitintervall */
4891
4892 #undef IM1
4893 #undef IM2
4894 #undef AM
4895 #undef IMM1
4896 #undef IA1
4897 #undef IA2
4898 #undef IQ1
4899 #undef IQ2
4900 #undef IR1
4901 #undef IR2
4902 #undef NTAB
4903 #undef NDIV
4904 #undef EPS
4905 #undef RNMX
4906
init_random(int seed)4907 int init_random(int seed) /* RAND4 */ {
4908 // srand((unsigned) time(NULL));
4909 // if (seed < 0)
4910 // seed = rand();
4911 _idum = -(long) seed;
4912 #ifndef PARALLEL
4913 cout << "(Using RAND4 Random Number Generator)" << endl;
4914 #else /* PARALLEL */
4915 {
4916 int n;
4917 if (PP_IamMaster) {
4918 cout << "(Using RAND4 Random Number Generator with leapfrog method)" << endl;
4919 }
4920 for (n = 0; n < PP_Myid; n++)
4921 (void) randomunitintervall();
4922 if (verbose_mode >= VB_MED) {
4923 cout << "(" << PP_Myid << ") !!! random seed set to " << seed << ", " << n << " drawn !!!" << endl;
4924 }
4925 }
4926 #endif
4927 return (seed);
4928 } /* initrandom */
4929
finish_random()4930 int finish_random() {
4931 return 0;
4932 }
4933 /******************/
4934
4935 #else /* SPRNG */
4936
4937 /******************/
4938
4939 int *randstream;
4940
init_random(int seed,bool write_info,int ** rstream)4941 int init_random(int seed, bool write_info, int** rstream) {
4942 // srand((unsigned) time(NULL));
4943 if (seed < 0)
4944 seed = make_sprng_seed();
4945 #ifndef PARALLEL
4946 if (write_info)
4947 cout << "(Using SPRNG - Scalable Parallel Random Number Generator)" << endl;
4948 if (rstream) {
4949 *rstream = init_sprng(0, 1, seed, SPRNG_DEFAULT); /*init stream*/
4950 } else {
4951 randstream = init_sprng(0, 1, seed, SPRNG_DEFAULT); /*init stream*/
4952 if (verbose_mode >= VB_MED) {
4953 print_sprng(randstream);
4954 }
4955 }
4956 #else /* PARALLEL */
4957 if (PP_IamMaster && write_info) {
4958 cout << "(Using SPRNG - Scalable Parallel Random Number Generator)" << endl;
4959 }
4960 /* MPI_Bcast(&seed, 1, MPI_UNSIGNED, PP_MyMaster, MPI_COMM_WORLD); */
4961 if (rstream) {
4962 *rstream = init_sprng(PP_Myid, PP_NumProcs, seed, SPRNG_DEFAULT); /*initialize stream*/
4963 } else {
4964 randstream = init_sprng(PP_Myid, PP_NumProcs, seed, SPRNG_DEFAULT); /*initialize stream*/
4965 if (verbose_mode >= VB_MED) {
4966 cout << "(" << PP_Myid << ") !!! random seed set to " << seed << " !!!" << endl;
4967 print_sprng(randstream);
4968 }
4969 }
4970 #endif /* PARALLEL */
4971 return (seed);
4972 } /* initrandom */
4973
finish_random(int * rstream)4974 int finish_random(int *rstream) {
4975 if (rstream)
4976 return free_sprng(rstream);
4977 else
4978 return free_sprng(randstream);
4979 }
4980
4981 #endif /* USE_SPRNG */
4982
4983 /******************/
4984
4985 /* returns a random integer in the range [0; n - 1] */
random_int(int n,int * rstream)4986 int random_int(int n, int *rstream) {
4987 return (int) floor(random_double(rstream) * n);
4988 } /* randominteger */
4989
4990 /* returns a random integer in the range [a; b] */
random_int(int a,int b)4991 int random_int(int a, int b) {
4992 ASSERT(b > a);
4993 //return a + (RAND_MAX * rand() + rand()) % (b + 1 - a);
4994 return a + random_int(b - a);
4995 }
4996
random_double(int * rstream)4997 double random_double(int *rstream) {
4998 #ifndef FIXEDINTRAND
4999 #ifndef PARALLEL
5000 #if RAN_TYPE == RAN_STANDARD
5001 return ((double) rand()) / ((double) RAND_MAX + 1);
5002 #elif RAN_TYPE == RAN_SPRNG
5003 if (rstream)
5004 return sprng(rstream);
5005 else
5006 return sprng(randstream);
5007 #else /* NO_SPRNG */
5008 return randomunitintervall();
5009 #endif /* NO_SPRNG */
5010 #else /* NOT PARALLEL */
5011 #if RAN_TYPE == RAN_SPRNG
5012 if (rstream)
5013 return sprng(rstream);
5014 else
5015 return sprng(randstream);
5016 #else /* NO_SPRNG */
5017 int m;
5018 for (m = 1; m < PP_NumProcs; m++)
5019 (void) randomunitintervall();
5020 PP_randn += (m - 1);
5021 PP_rand++;
5022 return randomunitintervall();
5023 #endif /* NO_SPRNG */
5024 #endif /* NOT PARALLEL */
5025 #else /* FIXEDINTRAND */
5026 cerr << "!!! fixed \"random\" integers for testing purposes !!!" << endl;
5027 return 0.0;
5028 #endif /* FIXEDINTRAND */
5029
5030 }
5031
random_resampling(int n,IntVector & sample,int * rstream)5032 void random_resampling(int n, IntVector &sample, int *rstream) {
5033 sample.resize(n, 0);
5034 if (Params::getInstance().jackknife_prop == 0.0) {
5035 // boostrap resampling
5036 for (int i = 0; i < n; i++) {
5037 int j = random_int(n, rstream);
5038 sample[j]++;
5039 }
5040 } else {
5041 // jackknife resampling
5042 int total = floor((1.0 - Params::getInstance().jackknife_prop)*n);
5043 if (total <= 0)
5044 outError("Jackknife sample size is zero");
5045 // make sure jackknife samples have exacly the same size
5046 for (int num = 0; num < total; ) {
5047 for (int i = 0; i < n; i++) if (!sample[i]) {
5048 if (random_double(rstream) < Params::getInstance().jackknife_prop)
5049 continue;
5050 sample[i] = 1;
5051 num++;
5052 if (num >= total)
5053 break;
5054 }
5055 }
5056 }
5057 }
5058
5059
5060 /* Following part is taken from ModelTest software */
5061 #define BIGX 20.0 /* max value to represent exp (x) */
5062 #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */
5063 #define I_SQRT_PI 0.5641895835477562869480795 /* 1 / sqrt (pi) */
5064 #define Z_MAX 6.0 /* maximum meaningful z value */
5065 #define ex(x) (((x) < -BIGX) ? 0.0 : exp (x))
5066
5067 /************** Normalz: probability of normal z value *********************/
5068
5069 /*
5070 ALGORITHM: Adapted from a polynomial approximation in:
5071 Ibbetson D, Algorithm 209
5072 Collected Algorithms of the CACM 1963 p. 616
5073 Note:
5074 This routine has six digit accuracy, so it is only useful for absolute
5075 z values < 6. For z values >= to 6.0, Normalz() returns 0.0.
5076 */
5077
Normalz(double z)5078 double Normalz(double z) /*VAR returns cumulative probability from -oo to z VAR normal z value */ {
5079 double y, x, w;
5080
5081 if (z == 0.0)
5082 x = 0.0;
5083 else {
5084 y = 0.5 * fabs(z);
5085 if (y >= (Z_MAX * 0.5))
5086 x = 1.0;
5087 else if (y < 1.0) {
5088 w = y*y;
5089 x = ((((((((0.000124818987 * w
5090 - 0.001075204047) * w + 0.005198775019) * w
5091 - 0.019198292004) * w + 0.059054035642) * w
5092 - 0.151968751364) * w + 0.319152932694) * w
5093 - 0.531923007300) * w + 0.797884560593) * y * 2.0;
5094 } else {
5095 y -= 2.0;
5096 x = (((((((((((((-0.000045255659 * y
5097 + 0.000152529290) * y - 0.000019538132) * y
5098 - 0.000676904986) * y + 0.001390604284) * y
5099 - 0.000794620820) * y - 0.002034254874) * y
5100 + 0.006549791214) * y - 0.010557625006) * y
5101 + 0.011630447319) * y - 0.009279453341) * y
5102 + 0.005353579108) * y - 0.002141268741) * y
5103 + 0.000535310849) * y + 0.999936657524;
5104 }
5105 }
5106 return (z > 0.0 ? ((x + 1.0) * 0.5) : ((1.0 - x) * 0.5));
5107 }
5108
5109
5110 /************** ChiSquare: probability of chi square value *************/
5111
5112 /*ALGORITHM Compute probability of chi square value.
5113 Adapted from: Hill, I. D. and Pike, M. C. Algorithm 299.Collected Algorithms for the CACM 1967 p. 243
5114 Updated for rounding errors based on remark inACM TOMS June 1985, page 185. Found in Perlman.lib*/
5115
computePValueChiSquare(double x,int df)5116 double computePValueChiSquare(double x, int df) /* x: obtained chi-square value, df: degrees of freedom */ {
5117 double a, y, s;
5118 double e, c, z;
5119 int even; /* true if df is an even number */
5120
5121 if (x <= 0.0 || df < 1)
5122 return (1.0);
5123
5124 y = 1;
5125
5126 a = 0.5 * x;
5127 even = (2 * (df / 2)) == df;
5128 if (df > 1)
5129 y = ex(-a);
5130 s = (even ? y : (2.0 * Normalz(-sqrt(x))));
5131 if (df > 2) {
5132 x = 0.5 * (df - 1.0);
5133 z = (even ? 1.0 : 0.5);
5134 if (a > BIGX) {
5135 e = (even ? 0.0 : LOG_SQRT_PI);
5136 c = log(a);
5137 while (z <= x) {
5138 e = log(z) + e;
5139 s += ex(c * z - a - e);
5140 z += 1.0;
5141 }
5142 return (s);
5143 } else {
5144 e = (even ? 1.0 : (I_SQRT_PI / sqrt(a)));
5145 c = 0.0;
5146 while (z <= x) {
5147 e = e * (a / z);
5148 c = c + e;
5149 z += 1.0;
5150 }
5151 return (c * y + s);
5152 }
5153 } else
5154 return (s);
5155 }
5156
trimString(string & str)5157 void trimString(string &str) {
5158 str.erase(0, str.find_first_not_of(" \n\r\t"));
5159 str.erase(str.find_last_not_of(" \n\r\t")+1);
5160 }
5161
5162
5163
getInstance()5164 Params& Params::getInstance() {
5165 static Params instance;
5166 return instance;
5167 }
5168
5169
countPhysicalCPUCores()5170 int countPhysicalCPUCores() {
5171 #ifdef _OPENMP
5172 return omp_get_num_procs();
5173 #else
5174 return std::thread::hardware_concurrency();
5175 #endif
5176 /*
5177 uint32_t registers[4];
5178 unsigned logicalcpucount;
5179 unsigned physicalcpucount;
5180 #if defined(_WIN32) || defined(WIN32)
5181 SYSTEM_INFO systeminfo;
5182 GetSystemInfo( &systeminfo );
5183 logicalcpucount = systeminfo.dwNumberOfProcessors;
5184 #else
5185 logicalcpucount = sysconf( _SC_NPROCESSORS_ONLN );
5186 #endif
5187 if (logicalcpucount < 1) logicalcpucount = 1;
5188 return logicalcpucount;
5189
5190 if (logicalcpucount % 2 != 0)
5191 return logicalcpucount;
5192 __asm__ __volatile__ ("cpuid " :
5193 "=a" (registers[0]),
5194 "=b" (registers[1]),
5195 "=c" (registers[2]),
5196 "=d" (registers[3])
5197 : "a" (1), "c" (0));
5198
5199 unsigned CPUFeatureSet = registers[3];
5200 bool hyperthreading = CPUFeatureSet & (1 << 28);
5201 if (hyperthreading){
5202 physicalcpucount = logicalcpucount / 2;
5203 } else {
5204 physicalcpucount = logicalcpucount;
5205 }
5206 if (physicalcpucount < 1) physicalcpucount = 1;
5207 return physicalcpucount;
5208 */
5209 }
5210
5211 // stacktrace.h (c) 2008, Timo Bingmann from http://idlebox.net/
5212 // published under the WTFPL v2.0
5213
5214 /** Print a demangled stack backtrace of the caller function to FILE* out. */
5215
5216 #if !defined(Backtrace_FOUND)
5217
5218 // donothing for WIN32
print_stacktrace(ostream & out,unsigned int max_frames)5219 void print_stacktrace(ostream &out, unsigned int max_frames) {}
5220
5221 #else
5222
print_stacktrace(ostream & out,unsigned int max_frames)5223 void print_stacktrace(ostream &out, unsigned int max_frames)
5224 {
5225 #ifdef _OPENMP
5226 #pragma omp master
5227 {
5228 #endif
5229 out << "STACK TRACE FOR DEBUGGING:" << endl;
5230
5231 // storage array for stack trace address data
5232 void* addrlist[max_frames+1];
5233
5234 // retrieve current stack addresses
5235 int addrlen = backtrace(addrlist, sizeof(addrlist) / sizeof(void*));
5236
5237 // if (addrlen == 0) {
5238 // out << " <empty, possibly corrupt>" << endl;
5239 // return;
5240 // }
5241
5242 // resolve addresses into strings containing "filename(function+address)",
5243 // this array must be free()-ed
5244 char** symbollist = backtrace_symbols(addrlist, addrlen);
5245
5246 // allocate string which will be filled with the demangled function name
5247 size_t funcnamesize = 256;
5248 char* funcname = (char*)malloc(funcnamesize);
5249
5250 // iterate over the returned symbol lines. skip the first, it is the
5251 // address of this function.
5252 for (int i = 1; i < addrlen; i++)
5253 {
5254 char *begin_name = 0, *begin_offset = 0;
5255
5256 // find parentheses and +address offset surrounding the mangled name:
5257 #ifdef __clang__
5258 // OSX style stack trace
5259 for ( char *p = symbollist[i]; *p; ++p )
5260 {
5261 if (( *p == '_' ) && ( *(p-1) == ' ' ))
5262 begin_name = p-1;
5263 else if ( *p == '+' )
5264 begin_offset = p-1;
5265 }
5266
5267 if ( begin_name && begin_offset && ( begin_name < begin_offset ))
5268 {
5269 *begin_name++ = '\0';
5270 *begin_offset++ = '\0';
5271
5272 // mangled name is now in [begin_name, begin_offset) and caller
5273 // offset in [begin_offset, end_offset). now apply
5274 // __cxa_demangle():
5275 int status;
5276 char* ret = abi::__cxa_demangle( begin_name, &funcname[0],
5277 &funcnamesize, &status );
5278 if ( status == 0 )
5279 {
5280 funcname = ret; // use possibly realloc()-ed string
5281 // out << " " << symbollist[i] << " : " << funcname << "+"<< begin_offset << endl;
5282 out << i << " " << funcname << endl;
5283 } else {
5284 // demangling failed. Output function name as a C function with
5285 // no arguments.
5286 // out << " " << symbollist[i] << " : " << begin_name << "()+"<< begin_offset << endl;
5287 out << i << " " << begin_name << "()" << endl;
5288 }
5289
5290 #else // !DARWIN - but is posix
5291 // ./module(function+0x15c) [0x8048a6d]
5292 char *end_offset = 0;
5293 for (char *p = symbollist[i]; *p; ++p)
5294 {
5295 if (*p == '(')
5296 begin_name = p;
5297 else if (*p == '+')
5298 begin_offset = p;
5299 else if (*p == ')' && begin_offset) {
5300 end_offset = p;
5301 break;
5302 }
5303 }
5304
5305 if (begin_name && begin_offset && end_offset
5306 && begin_name < begin_offset)
5307 {
5308 *begin_name++ = '\0';
5309 *begin_offset++ = '\0';
5310 *end_offset = '\0';
5311
5312 // mangled name is now in [begin_name, begin_offset) and caller
5313 // offset in [begin_offset, end_offset). now apply
5314 // __cxa_demangle():
5315
5316 int status;
5317 char* ret = abi::__cxa_demangle(begin_name,
5318 funcname, &funcnamesize, &status);
5319 if (status == 0) {
5320 funcname = ret; // use possibly realloc()-ed string
5321 // out << " " << symbollist[i] << " : " << funcname << "+"<< begin_offset << endl;
5322 out << i << " " << funcname << endl;
5323 }
5324 else {
5325 // demangling failed. Output function name as a C function with
5326 // no arguments.
5327 // out << " " << symbollist[i] << " : " << begin_name << "()+"<< begin_offset << endl;
5328 out << i << " " << begin_name << "()" << endl;
5329 }
5330 #endif
5331 }
5332 else
5333 {
5334 // couldn't parse the line? print the whole line.
5335 // out << i << ". " << symbollist[i] << endl;
5336 }
5337 }
5338
5339 free(funcname);
5340 free(symbollist);
5341 #ifdef _OPENMP
5342 }
5343 #endif
5344
5345 }
5346
5347 #endif // Backtrace_FOUND
5348
5349 bool memcmpcpy(void * destination, const void * source, size_t num) {
5350 bool diff = (memcmp(destination, source, num) != 0);
5351 memcpy(destination, source, num);
5352 return diff;
5353 }
5354
5355 // Pairing function: see https://en.wikipedia.org/wiki/Pairing_function
5356 int pairInteger(int int1, int int2) {
5357 if (int1 <= int2) {
5358 return ((int1 + int2)*(int1 + int2 + 1)/2 + int2);
5359 } else {
5360 return ((int1 + int2)*(int1 + int2 + 1)/2 + int1);
5361 }
5362 }
5363
5364 /*
5365 * Given a model name, look in it for "+F..." and
5366 * determine the StateFreqType. Returns FREQ_EMPIRICAL if
5367 * unable to find a good +F... specifier
5368 */
5369 StateFreqType parseStateFreqFromPlusF(string model_name) {
5370 // StateFreqType freq_type = FREQ_EMPIRICAL;
5371
5372 // BQM 2017-05-02: change back to FREQ_UNKNOWN to resemble old behavior
5373 StateFreqType freq_type = FREQ_UNKNOWN;
5374 size_t plusFPos;
5375 if (model_name.find("+F1X4") != string::npos)
5376 freq_type = FREQ_CODON_1x4;
5377 else if (model_name.find("+F3X4C") != string::npos)
5378 freq_type = FREQ_CODON_3x4C;
5379 else if (model_name.find("+F3X4") != string::npos)
5380 freq_type = FREQ_CODON_3x4;
5381 else if (model_name.find("+FQ") != string::npos)
5382 freq_type = FREQ_EQUAL;
5383 else if (model_name.find("+FO") != string::npos)
5384 freq_type = FREQ_ESTIMATE;
5385 else if (model_name.find("+FU") != string::npos)
5386 freq_type = FREQ_USER_DEFINED;
5387 else if (model_name.find("+FRY") != string::npos)
5388 freq_type = FREQ_DNA_RY;
5389 else if (model_name.find("+FWS") != string::npos)
5390 freq_type = FREQ_DNA_WS;
5391 else if (model_name.find("+FMK") != string::npos)
5392 freq_type = FREQ_DNA_MK;
5393 else if ((plusFPos = model_name.find("+F")) != string::npos) {
5394 freq_type = FREQ_EMPIRICAL;
5395 // Now look for +F#### where #s are digits
5396 if (model_name.length() > plusFPos+2 && isdigit(model_name[plusFPos+2]))
5397 try {
5398 // throws if string is not 4 digits
5399 freq_type = parseStateFreqDigits(model_name.substr(plusFPos+2,4));
5400 } catch (const char *str) {
5401 // +F exists, but can't parse it as anything else
5402 outError(str);
5403 }
5404 }
5405 return(freq_type);
5406 }
5407
5408 /*
5409 * Given a string of 4 digits, return a StateFreqType according to
5410 * equality constraints expressed by those digits.
5411 * E.g. "1233" constrains pi_G=pi_T (ACGT order, 3rd and 4th equal)
5412 * which results in FREQ_DNA_2311. "5288" would give the same result.
5413 */
5414
5415 StateFreqType parseStateFreqDigits(string digits) {
5416 bool good = true;
5417 if (digits.length()!=4) {
5418 good = false;
5419 } else {
5420 // Convert digits to canonical form, first occuring digit becomes 1 etc.
5421 int digit_order[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
5422 int first_found = 0;
5423 for (int i=0; i<4; i++) {
5424 int digit = digits[i]-'0';
5425 if (digit<0 || digit>9) {
5426 good = false; // found a non-digit
5427 break;
5428 }
5429 if (digit_order[digit]==-1) {
5430 // haven't seen this digit before
5431 digit_order[digit]=++first_found;
5432 }
5433 // rewrite digit in canonical form
5434 digits[i] = '0'+digit_order[digit];
5435 }
5436 // e.g. if digits was "5288", digit_order will end up as {-1,-1,2,-1,-1,1,-1,-1,3,-1}
5437 }
5438 if (!good) throw "Use -f <c | o | u | q | ry | ws | mk | <digit><digit><digit><digit>>";
5439 StateFreqType freq_type = FREQ_UNKNOWN;
5440 // Now just exhaustively list all canonical digit possibilities
5441 if (digits.compare("1111")==0) {
5442 freq_type = FREQ_EQUAL;
5443 } else if (digits.compare("1112")==0) {
5444 freq_type = FREQ_DNA_1112;
5445 } else if (digits.compare("1121")==0) {
5446 freq_type = FREQ_DNA_1121;
5447 } else if (digits.compare("1211")==0) {
5448 freq_type = FREQ_DNA_1211;
5449 } else if (digits.compare("1222")==0) {
5450 freq_type = FREQ_DNA_2111;
5451 } else if (digits.compare("1122")==0) {
5452 freq_type = FREQ_DNA_1122;
5453 } else if (digits.compare("1212")==0) {
5454 freq_type = FREQ_DNA_1212;
5455 } else if (digits.compare("1221")==0) {
5456 freq_type = FREQ_DNA_1221;
5457 } else if (digits.compare("1123")==0) {
5458 freq_type = FREQ_DNA_1123;
5459 } else if (digits.compare("1213")==0) {
5460 freq_type = FREQ_DNA_1213;
5461 } else if (digits.compare("1231")==0) {
5462 freq_type = FREQ_DNA_1231;
5463 } else if (digits.compare("1223")==0) {
5464 freq_type = FREQ_DNA_2113;
5465 } else if (digits.compare("1232")==0) {
5466 freq_type = FREQ_DNA_2131;
5467 } else if (digits.compare("1233")==0) {
5468 freq_type = FREQ_DNA_2311;
5469 } else if (digits.compare("1234")==0) {
5470 freq_type = FREQ_ESTIMATE;
5471 } else
5472 throw ("Unrecognized canonical digits - Can't happen"); // paranoia is good.
5473 return freq_type;
5474 }
5475
5476
5477 /*
5478 * All params in range [0,1]
5479 * returns true if base frequencies have changed as a result of this call
5480 */
5481
5482 bool freqsFromParams(double *freq_vec, double *params, StateFreqType freq_type) {
5483
5484 // BQM 2017-05-02: Note that only freq for A, C, G are free parameters and stored
5485 // in params, whereas freq_T is not free and should be handled properly
5486
5487 double pA, pC, pG, pT; // base freqs
5488 switch (freq_type) {
5489 case FREQ_EQUAL:
5490 case FREQ_USER_DEFINED:
5491 case FREQ_EMPIRICAL:
5492 return false;
5493 case FREQ_ESTIMATE:
5494 // Minh: in code review, please pay extra attention to ensure my treadment of FREQ_ESTIMATE is equivalent to old treatment.
5495 // BQM: DONE!
5496 pA=params[0];
5497 pC=params[1];
5498 pG=params[2];
5499 //pT=1-pA-pC-pG;
5500 pT=freq_vec[3];
5501 break;
5502 case FREQ_DNA_RY:
5503 pA = params[0]/2;
5504 pG = 0.5-pA;
5505 pC = params[1]/2;
5506 pT = 0.5-pC;
5507 break;
5508 case FREQ_DNA_WS:
5509 pA = params[0]/2;
5510 pT = 0.5-pA;
5511 pC = params[1]/2;
5512 pG = 0.5-pC;
5513 break;
5514 case FREQ_DNA_MK:
5515 pA = params[0]/2;
5516 pC = 0.5-pA;
5517 pG = params[1]/2;
5518 pT = 0.5-pG;
5519 break;
5520 case FREQ_DNA_1112:
5521 pA = pC = pG = params[0]/3;
5522 pT = 1-3*pA;
5523 break;
5524 case FREQ_DNA_1121:
5525 pA = pC = pT = params[0]/3;
5526 pG = 1-3*pA;
5527 break;
5528 case FREQ_DNA_1211:
5529 pA = pG = pT = params[0]/3;
5530 pC = 1-3*pA;
5531 break;
5532 case FREQ_DNA_2111:
5533 pC = pG = pT = params[0]/3;
5534 pA = 1-3*pC;
5535 break;
5536 case FREQ_DNA_1122:
5537 pA = params[0]/2;
5538 pC = pA;
5539 pG = 0.5-pA;
5540 pT = pG;
5541 break;
5542 case FREQ_DNA_1212:
5543 pA = params[0]/2;
5544 pG = pA;
5545 pC = 0.5-pA;
5546 pT = pC;
5547 break;
5548 case FREQ_DNA_1221:
5549 pA = params[0]/2;
5550 pT = pA;
5551 pC = 0.5-pA;
5552 pG = pC;
5553 break;
5554 case FREQ_DNA_1123:
5555 pA = params[0]/2;
5556 pC = pA;
5557 pG = params[1]*(1-2*pA);
5558 pT = 1-pG-2*pA;
5559 break;
5560 case FREQ_DNA_1213:
5561 pA = params[0]/2;
5562 pG = pA;
5563 pC = params[1]*(1-2*pA);
5564 pT = 1-pC-2*pA;
5565 break;
5566 case FREQ_DNA_1231:
5567 pA = params[0]/2;
5568 pT = pA;
5569 pC = params[1]*(1-2*pA);
5570 pG = 1-pC-2*pA;
5571 break;
5572 case FREQ_DNA_2113:
5573 pC = params[0]/2;
5574 pG = pC;
5575 pA = params[1]*(1-2*pC);
5576 pT = 1-pA-2*pC;
5577 break;
5578 case FREQ_DNA_2131:
5579 pC = params[0]/2;
5580 pT = pC;
5581 pA = params[1]*(1-2*pC);
5582 pG = 1-pA-2*pC;
5583 break;
5584 case FREQ_DNA_2311:
5585 pG = params[0]/2;
5586 pT = pG;
5587 pA = params[1]*(1-2*pG);
5588 pC = 1-pA-2*pG;
5589 break;
5590 default:
5591 throw("Unrecognized freq_type in freqsFromParams - can't happen");
5592 }
5593
5594 // To MDW, 2017-05-02: please make sure that frequencies are positive!
5595 // Otherwise, numerical issues will occur.
5596
5597 bool changed = freq_vec[0]!=pA || freq_vec[1]!=pC || freq_vec[2]!=pG || freq_vec[3]!=pT;
5598 if (changed) {
5599 freq_vec[0]=pA;
5600 freq_vec[1]=pC;
5601 freq_vec[2]=pG;
5602 freq_vec[3]=pT;
5603 }
5604 return(changed);
5605 }
5606
5607 /*
5608 * For given freq_type, derives frequency parameters from freq_vec
5609 * All parameters are in range [0,1] (assuming freq_vec is valid)
5610 */
5611
5612 void paramsFromFreqs(double *params, double *freq_vec, StateFreqType freq_type) {
5613 double pA = freq_vec[0]; // These just improve code readability
5614 double pC = freq_vec[1];
5615 double pG = freq_vec[2];
5616 // double pT = freq_vec[3]; // pT is not used below
5617 switch (freq_type) {
5618 case FREQ_EQUAL:
5619 case FREQ_USER_DEFINED:
5620 case FREQ_EMPIRICAL:
5621 break; // freq_vec never changes
5622 case FREQ_ESTIMATE:
5623 params[0]=pA;
5624 params[1]=pC;
5625 params[2]=pG;
5626 break;
5627 case FREQ_DNA_RY:
5628 params[0]=2*pA;
5629 params[1]=2*pC;
5630 break;
5631 case FREQ_DNA_WS:
5632 params[0]=2*pA;
5633 params[1]=2*pC;
5634 break;
5635 case FREQ_DNA_MK:
5636 params[0]=2*pA;
5637 params[1]=2*pG;
5638 break;
5639 case FREQ_DNA_1112:
5640 params[0]=3*pA;
5641 break;
5642 case FREQ_DNA_1121:
5643 params[0]=3*pA;
5644 break;
5645 case FREQ_DNA_1211:
5646 params[0]=3*pA;
5647 break;
5648 case FREQ_DNA_2111:
5649 params[0]=3*pC;
5650 break;
5651 case FREQ_DNA_1122:
5652 params[0]=2*pA;
5653 break;
5654 case FREQ_DNA_1212:
5655 params[0]=2*pA;
5656 break;
5657 case FREQ_DNA_1221:
5658 params[0]=2*pA;
5659 break;
5660 case FREQ_DNA_1123:
5661 params[0]=2*pA;
5662 params[1]=pG/(1-params[0]);
5663 break;
5664 case FREQ_DNA_1213:
5665 params[0]=2*pA;
5666 params[1]=pC/(1-params[0]);
5667 break;
5668 case FREQ_DNA_1231:
5669 params[0]=2*pA;
5670 params[1]=pC/(1-params[0]);
5671 break;
5672 case FREQ_DNA_2113:
5673 params[0]=2*pC;
5674 params[1]=pA/(1-params[0]);
5675 break;
5676 case FREQ_DNA_2131:
5677 params[0]=2*pC;
5678 params[1]=pA/(1-params[0]);
5679 break;
5680 case FREQ_DNA_2311:
5681 params[0]=2*pG;
5682 params[1]=pA/(1-params[0]);
5683 break;
5684 default:
5685 throw("Unrecognized freq_type in paramsFromFreqs - can't happen");
5686 }
5687 }
5688
5689 /*
5690 * Given a DNA freq_type and a base frequency vector, alter the
5691 * base freq vector to conform with the constraints of freq_type
5692 */
5693 void forceFreqsConform(double *base_freq, StateFreqType freq_type) {
5694 double pA = base_freq[0]; // These just improve code readability
5695 double pC = base_freq[1];
5696 double pG = base_freq[2];
5697 double pT = base_freq[3];
5698 double scale;
5699 switch (freq_type) {
5700 case FREQ_EQUAL:
5701 // this was already handled, thus not necessary to check here
5702 // base_freq[0] = base_freq[1] = base_freq[2] = base_freq[3] = 0.25;
5703 // break;
5704 case FREQ_USER_DEFINED:
5705 case FREQ_EMPIRICAL:
5706 case FREQ_ESTIMATE:
5707 break; // any base_freq is legal
5708 case FREQ_DNA_RY:
5709 scale = 0.5/(pA+pG);
5710 base_freq[0] = pA*scale;
5711 base_freq[2] = pG*scale;
5712 scale = 0.5/(pC+pT);
5713 base_freq[1] = pC*scale;
5714 base_freq[3] = pT*scale;
5715 break;
5716 case FREQ_DNA_WS:
5717 scale = 0.5/(pA+pT);
5718 base_freq[0] = pA*scale;
5719 base_freq[3] = pT*scale;
5720 scale = 0.5/(pC+pG);
5721 base_freq[1] = pC*scale;
5722 base_freq[2] = pG*scale;
5723 break;
5724 case FREQ_DNA_MK:
5725 scale = 0.5/(pA+pC);
5726 base_freq[0] = pA*scale;
5727 base_freq[1] = pC*scale;
5728 scale = 0.5/(pG+pT);
5729 base_freq[2] = pG*scale;
5730 base_freq[3] = pT*scale;
5731 break;
5732 case FREQ_DNA_1112:
5733 base_freq[0]=base_freq[1]=base_freq[2]=(pA+pC+pG)/3;
5734 break;
5735 case FREQ_DNA_1121:
5736 base_freq[0]=base_freq[1]=base_freq[3]=(pA+pC+pT)/3;
5737 break;
5738 case FREQ_DNA_1211:
5739 base_freq[0]=base_freq[2]=base_freq[3]=(pA+pG+pT)/3;
5740 break;
5741 case FREQ_DNA_2111:
5742 base_freq[1]=base_freq[2]=base_freq[3]=(pC+pG+pT)/3;
5743 break;
5744 case FREQ_DNA_1122:
5745 base_freq[0]=base_freq[1]=(pA+pC)/2;
5746 base_freq[2]=base_freq[3]=(pG+pT)/2;
5747 break;
5748 case FREQ_DNA_1212:
5749 base_freq[0]=base_freq[2]=(pA+pG)/2;
5750 base_freq[1]=base_freq[3]=(pC+pT)/2;
5751 break;
5752 case FREQ_DNA_1221:
5753 base_freq[0]=base_freq[3]=(pA+pT)/2;
5754 base_freq[1]=base_freq[2]=(pC+pG)/2;
5755 break;
5756 case FREQ_DNA_1123:
5757 base_freq[0]=base_freq[1]=(pA+pC)/2;
5758 break;
5759 case FREQ_DNA_1213:
5760 base_freq[0]=base_freq[2]=(pA+pG)/2;
5761 break;
5762 case FREQ_DNA_1231:
5763 base_freq[0]=base_freq[3]=(pA+pT)/2;
5764 break;
5765 case FREQ_DNA_2113:
5766 base_freq[1]=base_freq[2]=(pC+pG)/2;
5767 break;
5768 case FREQ_DNA_2131:
5769 base_freq[1]=base_freq[3]=(pC+pT)/2;
5770 break;
5771 case FREQ_DNA_2311:
5772 base_freq[2]=base_freq[3]=(pG+pT)/2;
5773 break;
5774 default:
5775 throw("Unrecognized freq_type in forceFreqsConform - can't happen");
5776 }
5777 ASSERT(base_freq[0]>=0 && base_freq[1]>=0 && base_freq[2]>=0 && base_freq[3]>=0 && fabs(base_freq[0]+base_freq[1]+base_freq[2]+base_freq[3]-1)<1e-7);
5778 }
5779
5780 /*
5781 * For given freq_type, how many parameters are needed to
5782 * determine frequenc vector?
5783 * Currently, this is for DNA StateFreqTypes only.
5784 */
5785
5786 int nFreqParams(StateFreqType freq_type) {
5787 switch (freq_type) {
5788 case FREQ_DNA_1112:
5789 case FREQ_DNA_1121:
5790 case FREQ_DNA_1211:
5791 case FREQ_DNA_2111:
5792 case FREQ_DNA_1122:
5793 case FREQ_DNA_1212:
5794 case FREQ_DNA_1221:
5795 return(1);
5796 case FREQ_DNA_RY:
5797 case FREQ_DNA_WS:
5798 case FREQ_DNA_MK:
5799 case FREQ_DNA_1123:
5800 case FREQ_DNA_1213:
5801 case FREQ_DNA_1231:
5802 case FREQ_DNA_2113:
5803 case FREQ_DNA_2131:
5804 case FREQ_DNA_2311:
5805 return(2);
5806 default:
5807 return 0; // BQM: don't care about other cases
5808 }
5809 }
5810
5811 /*
5812 * For freq_type, and given every base must have frequency >= min_freq, set upper
5813 * and lower bounds for parameters.
5814 */
5815 void setBoundsForFreqType(double *lower_bound,
5816 double *upper_bound,
5817 bool *bound_check,
5818 double min_freq,
5819 StateFreqType freq_type) {
5820 // Sanity check: if min_freq==0, lower_bound=0 and upper_bound=1
5821 // (except FREQ_ESTIMATE, which follows legacy code way of doing things.)
5822 switch (freq_type) {
5823 case FREQ_EQUAL:
5824 case FREQ_USER_DEFINED:
5825 case FREQ_EMPIRICAL:
5826 break; // There are no frequency determining parameters
5827 case FREQ_DNA_1112:
5828 case FREQ_DNA_1121:
5829 case FREQ_DNA_1211:
5830 case FREQ_DNA_2111:
5831 // one frequency determining parameter
5832 lower_bound[0] = 3*min_freq;
5833 upper_bound[0] = 1-min_freq;
5834 bound_check[0] = true;
5835 break;
5836 case FREQ_DNA_1122:
5837 case FREQ_DNA_1212:
5838 case FREQ_DNA_1221:
5839 // one frequency determining parameter
5840 lower_bound[0] = 2*min_freq;
5841 upper_bound[0] = 1-2*min_freq;
5842 bound_check[0] = true;
5843 break;
5844 case FREQ_DNA_RY:
5845 case FREQ_DNA_WS:
5846 case FREQ_DNA_MK:
5847 // two frequency determining parameters
5848 lower_bound[0] = lower_bound[1] = 2*min_freq;
5849 upper_bound[0] = upper_bound[1] = 1-2*min_freq;
5850 bound_check[0] = bound_check[1] = true;
5851 break;
5852 case FREQ_DNA_1123:
5853 case FREQ_DNA_1213:
5854 case FREQ_DNA_1231:
5855 case FREQ_DNA_2113:
5856 case FREQ_DNA_2131:
5857 case FREQ_DNA_2311:
5858 // two frequency determining parameters
5859 lower_bound[0] = 2*min_freq;
5860 upper_bound[0] = 1-2*min_freq;
5861 lower_bound[1] = min_freq/(1-2*min_freq);
5862 upper_bound[1] = (1-3*min_freq)/(1-2*min_freq);
5863 bound_check[0] = bound_check[1] = true;
5864 break;
5865 /* NOTE:
5866 * upper_bound[1] and lower_bound[1] are not perfect. Some in-bounds parameters
5867 * will give base freqs for '2' or '3' base below minimum. This is
5868 * the best that can be done without passing min_freq to freqsFromParams
5869 */
5870 case FREQ_ESTIMATE:
5871 lower_bound[0] = lower_bound[1] = lower_bound[2] = min_freq;
5872 upper_bound[0] = upper_bound[1] = upper_bound[2] = 1;
5873 bound_check[0] = bound_check[1] = bound_check[2] = false;
5874 break;
5875 default:
5876 throw("Unrecognized freq_type in setBoundsForFreqType - can't happen");
5877 }
5878 }
5879
5880 double binomial_coefficient_log(unsigned int N, unsigned int n) {
5881 static DoubleVector logv;
5882 if (logv.size() <= 0) {
5883 logv.push_back(0.0);
5884 logv.push_back(0.0);
5885 }
5886 if (n < N-n)
5887 n = N-n;
5888 if (n==0)
5889 return 0.0;
5890 if (N >= logv.size()) {
5891 for (unsigned int i = logv.size(); i <= N; i++)
5892 logv.push_back(log((double) i));
5893 }
5894 double binom_log = 0.0;
5895 for (unsigned int i = n+1; i <= N; i++)
5896 binom_log += logv[i] - logv[i-n];
5897 return binom_log;
5898 }
5899
5900 double binomial_dist(unsigned int k, unsigned int N, double p) {
5901 double binom_log = binomial_coefficient_log(N, k);
5902 double res_log = binom_log + log(p)*k + log(1-p)*(N-k);
5903 return exp(res_log);
5904 }
5905
5906 double hypergeometric_dist(unsigned int k, unsigned int n, unsigned int K, unsigned int N) {
5907 if (n > N)
5908 outError("Invalid parameters for hypergeometric distribution.");
5909 if (k > K || (n-k) > (N-K))
5910 return 0.0;
5911 double num_successes_log = binomial_coefficient_log(K, k);
5912 double num_failures_log = binomial_coefficient_log(N-K, n-k);
5913 double num_total_log = binomial_coefficient_log(N,n);
5914 return exp(num_successes_log + num_failures_log - num_total_log);
5915 }
5916
5917 // Calculate the Frobenius norm of an N x N matrix M (flattened, rows
5918 // concatenated) and linearly scaled by SCALE.
5919 double frob_norm(double m[], int n, double scale) {
5920 double sum = 0;
5921 for (int i = 0; i < n; i++) {
5922 for (int j = 0; j < n; j++) {
5923 sum += m[i*n + j] * m[i*n + j] * scale * scale;
5924 }
5925 }
5926 return sqrt(sum);
5927 }
5928